OperatorTest.cpp source code [glow/tests/unittests/OperatorTest.cpp]

1	/**
2	* Copyright (c) Glow Contributors. See CONTRIBUTORS file.
3	*
4	* Licensed under the Apache License, Version 2.0 (the "License");
5	* you may not use this file except in compliance with the License.
6	* You may obtain a copy of the License at
7	*
8	* http://www.apache.org/licenses/LICENSE-2.0
9	*
10	* Unless required by applicable law or agreed to in writing, software
11	* distributed under the License is distributed on an "AS IS" BASIS,
12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13	* See the License for the specific language governing permissions and
14	* limitations under the License.
15	*/
16
17	#if defined(_MSC_VER)
18	// Enable non-standard math constants (e.g. M_2_SQRTPI, M_SQRT1_2)
19	#define _USE_MATH_DEFINES
20	#endif
21
22	#include "BackendTestUtils.h"
23
24	#include "glow/ExecutionEngine/ExecutionEngine.h"
25	#include "glow/Exporter/ONNXModelWriter.h"
26	#include "glow/Flags/Flags.h"
27	#include "glow/Graph/Graph.h"
28	#include "glow/IR/IR.h"
29	#include "glow/IR/IRBuilder.h"
30	#include "glow/IR/Instrs.h"
31	#include "glow/Importer/ONNXModelLoader.h"
32	#include "glow/Optimizer/GraphOptimizer/GraphOptimizer.h"
33	#include "glow/Quantization/Base/Base.h"
34
35	#include "llvm/ADT/SmallVector.h"
36	#include "llvm/Support/FileSystem.h"
37	#include "llvm/Support/raw_ostream.h"
38
39	#include <cmath>
40	#include <functional>
41	#include <numeric>
42
43	using namespace glow;
44
45	class OperatorStatelessTest : public BackendStatelessTest {};
46
47	class OperatorTest : public BackendTest {
48	protected:
49	PlaceholderBindings bindings_;
50	/// Use this for storing tensors that are unowned, i.e. if they would normally
51	/// be stack local and so they cannot be read in TearDown.
52	std::vector<Tensor> unownedTensors_;
53	virtual void SetUp() override {
54	glow::nnpi::flags::EnableCustomIAKernels = true;
55	glow::nnpi::flags::EnableCustomDSPKernels = true;
56
57	// Skip stripping the module so that we can inspect Constants after
58	// compilation.
59	EE_.setSkipModuleStrip(true);
60	}
61
62	virtual void TearDown() override {
63	if (::testing::Test::IsSkipped()) {
64	return;
65	}
66
67	EXPECT_TRUE(F_->getNodes().size() != `0`)
68	<< "Functions should have nodes at the end of the test.";
69
70	ASSERT_TRUE(F_->verify(&EE_.getBackend()))
71	<< "Function must pass verification.";
72
73	// If the function contains custom kernels then skip the serialization
74	#ifdef GLOW_WITH_NNPI
75	bool hasCustomKernels = false;
76	for (auto &node : F_->getNodes()) {
77	if (node.getKind() == Kinded::Kind::NNPICustomDSPNodeKind \|\|
78	node.getKind() == Kinded::Kind::NNPICustomIANodeKind) {
79	hasCustomKernels = true;
80	break;
81	}
82	}
83	if (hasCustomKernels) {
84	return;
85	}
86	#endif
87
88	// Now export the model to later import it back in.
89	llvm::SmallString<`64`> path;
90	auto tempFileRes =
91	llvm::sys::fs::createTemporaryFile("exporter", "output.onnxtxt", path);
92	ASSERT_EQ(tempFileRes.value(), `0`)
93	<< "Failed to create temp file to write into.";
94	std::string pathToModel(path.c_str());
95
96	Error err = Error::empty();
97	ONNXModelWriter onnxWR(pathToModel, *F_, `7`, `9`, &err,
98	/ textMode / true, / zipMode / false,
99	/ useGlowCustomOps / true);
100	ASSERT_FALSE(ERR_TO_BOOL(std::move(err))) << "Error exporting model";
101
102	// Now that we've exported, load it back into a new module/function, run it,
103	// and compare results from the original run.
104	PlaceholderBindings loadedBindings;
105	ExecutionEngine loadedEE{getBackendName()};
106	Module &loadedMod = loadedEE.getModule();
107	Function *loadedF = loadedMod.createFunction(F_->getName());
108	{
109	Error err = Error::empty();
110	// Note: We disable constant folding here because we only need it to
111	// calculate shapes that are the result of constant compute in the proto,
112	// but this won't be the case when using useGlowCustomOps exporting.
113	ONNXModelLoader onnxLD(pathToModel, {}, {}, *loadedF, &err,
114	/ zipMode / false, / perNodeOpts / nullptr,
115	/ disableConstFoldInLoader / true,
116	/ loadIntoExistingModule / false,
117	&loadedEE.getBackend());
118	if (ERR_TO_BOOL(std::move(err))) {
119	llvm::sys::fs::remove(pathToModel);
120	FAIL() << "Error loading exported model";
121	}
122	}
123
124	// Note that we use the backend for verification here, because the function
125	// is post optimization pipeline and so has backend-specific requirements
126	// built in, e.g. for required layout.
127	ASSERT_TRUE(loadedF->verify(&loadedEE.getBackend()))
128	<< "Loaded Function must pass verification";
129
130	// String representations of original and loaded functions must be the same.
131	// Note that we skip printing users for Storage because some tests have
132	// other Functions sharing Storage for testing purposes.
133	EXPECT_EQ(F_->toString(/ skipUsersForStorage / true),
134	loadedF->toString(/ skipUsersForStorage / true));
135
136	// Copy over inputs from previous bindings to newly loaded bindings. We have
137	// new Placeholders so can't reuse the bindings from before.
138	for (const auto &p : bindings_.pairs()) {
139	if (!isInput(p.first, *F_)) {
140	continue;
141	}
142
143	// Look for an input PH by the same name as the original Function.
144	Placeholder *inputPH =
145	loadedMod.getPlaceholderByNameSlow(p.first->getName());
146	ASSERT_TRUE(inputPH);
147	loadedBindings.insert(inputPH, p.second.getUnowned(inputPH->dims()));
148	}
149
150	// Allocate all other PHs/tensors that need it (i.e. result PHs/tensors).
151	loadedBindings.allocate(loadedF->findPlaceholders());
152
153	// Skip the optimization pipeline for loadedF (via onlyLowerFuns), as we
154	// already passed it through the optimization pipeline before exporting it.
155	CompilationContext cctx;
156	cctx.optimizationOpts.onlyLowerFuns.insert(loadedF);
157	loadedEE.compile(cctx);
158	loadedEE.run(loadedBindings);
159
160	// Now bitwise-equal compare result tensors from before and after.
161	for (const auto &p : bindings_.pairs()) {
162	const Placeholder *resultPH = p.first;
163	if (!isOutput(resultPH, *F_)) {
164	continue;
165	}
166	const Tensor &resultT = p.second;
167
168	// Find the result PH by the same name in the loaded Function.
169	Placeholder *loadedResultPH =
170	loadedMod.getPlaceholderByNameSlow(resultPH->getName());
171	ASSERT_TRUE(loadedResultPH);
172	const Tensor *loadedResultT = loadedBindings.get(loadedResultPH);
173
174	EXPECT_TRUE(resultT.isBitwiseEqual(loadedResultT, /* verbose / true));
175	}
176
177	llvm::sys::fs::remove(pathToModel);
178	}
179	};
180
181	/// Helper to create a Placeholder; if \p T is quantized, then it will include a
182	/// dummy scale and offset, otherwise it will not.
183	static Placeholder *createPlaceholderConditionallyQuantized(
184	Module &mod, ElemKind T, llvm::ArrayRef<dim_t> dims, llvm::StringRef name,
185	bool isTrainable, llvm::StringRef layout = ANY_LAYOUT) {
186	return isQuantizedElemKind(T)
187	? mod.createPlaceholder(T, dims, `1.0`, `0`, name.str(), isTrainable,
188	layout.str())
189	: mod.createPlaceholder(T, dims, name.str(), isTrainable,
190	layout.str());
191	}
192
193	/// Helper to get a unique Type; if \p T is quantized, then it will include a
194	/// dummy scale and offset, otherwise it will not.
195	static TypeRef uniqueTypeConditionallyQuantized(Module &mod, ElemKind T,
196	llvm::ArrayRef<dim_t> dims) {
197	return isQuantizedElemKind(T) ? mod.uniqueType(T, dims, `1.0`, `0`)
198	: mod.uniqueType(T, dims);
199	}
200
201	/// Helper to create a Tensor; if \p T is quantized, then it will include a
202	/// dummy scale and offset, otherwise it will not.
203	static Tensor createTensorConditionallyQuantized(ElemKind T,
204	llvm::ArrayRef<dim_t> dims) {
205	return isQuantizedElemKind(T) ? Tensor (T, dims, `1.0`, `0`) : Tensor (T, dims);
206	}
207
208	template <typename DataType>
209	glow::Handle<bool>
210	lessHelper(glow::PlaceholderBindings &bindings, glow::Module &mod,
211	glow::Function *F, glow::ExecutionEngine &EE, ElemKind DTy,
212	llvm::ArrayRef<DataType> xValues, llvm::ArrayRef<DataType> yValues,
213	llvm::ArrayRef<dim_t> xDims, llvm::ArrayRef<dim_t> yDims) {
214	auto *X = createPlaceholderConditionallyQuantized(mod, DTy, xDims, "X",
215	/ isTrainable / false);
216
217	auto *Y = createPlaceholderConditionallyQuantized(mod, DTy, yDims, "Y",
218	/ isTrainable / false);
219
220	bindings.allocate(llvm::dyn_cast<Placeholder>(X))->getHandle<DataType>() =
221	xValues;
222
223	bindings.allocate(llvm::dyn_cast<Placeholder>(Y))->getHandle<DataType>() =
224	yValues;
225
226	auto *cmpr =
227	F->createNodeWithBroadcast<CmpLTNode>("cmpLT", / axis / -`1`, X, Y);
228
229	auto *save = F->createSave("save", cmpr);
230	auto *saveAlloc = bindings.allocate(save->getPlaceholder());
231
232	EE.compile(CompilationMode::Infer);
233	EE.run(bindings);
234
235	return saveAlloc->getHandle<bool>();
236	}
237
238	TEST_P(OperatorTest, less_int8) {
239	CHECK_IF_ENABLED();
240
241	int8_t xValues[] = {`3`, `2`, `3`, `6`, `4`, `5`, `6`, `3`, `7`, `8`, `9`, `2`, `3`, `5`, `7`, `1`,
242
243	`1`, `2`, `3`, `6`, `4`, `5`, `6`, `3`, `7`, `8`, `9`, `2`, `3`, `5`, `7`, `1`,
244
245	`1`, `2`, `3`, `6`, `4`, `5`, `6`, `3`, `7`, `8`, `9`, `2`, `3`, `5`, `7`, `1`,
246
247	`1`, `2`, `3`, `6`, `4`, `5`, `6`, `3`, `7`, `8`, `9`, `2`, `3`, `5`, `7`, `1`};
248
249	int8_t yValues[] = {`3`, `4`, `5`, `7`, `2`, `5`, `0`, `6`, `4`, `2`, `1`, `8`, `5`, `9`, `2`, `6`,
250
251	`3`, `4`, `5`, `7`, `2`, `1`, `0`, `6`, `4`, `2`, `1`, `8`, `5`, `9`, `2`, `6`,
252
253	`3`, `4`, `5`, `7`, `2`, `1`, `0`, `6`, `4`, `2`, `1`, `8`, `5`, `9`, `2`, `6`,
254
255	`3`, `4`, `5`, `7`, `2`, `1`, `0`, `6`, `4`, `2`, `1`, `8`, `5`, `9`, `2`, `6`};
256
257	dim_t xDims[] = {`2`, `2`, `4`, `4`};
258	dim_t yDims[] = {`2`, `2`, `4`, `4`};
259
260	Handle<bool> saveH =
261	lessHelper<int8_t>(bindings_, mod_, F_, EE_, ElemKind::Int8QTy, xValues,
262	yValues, xDims, yDims);
263
264	bool refResults[] = {
265	false, true, true, true, false, false, false, true,
266	false, false, false, true, true, true, false, true,
267
268	true, true, true, true, false, false, false, true,
269	false, false, false, true, true, true, false, true,
270
271	true, true, true, true, false, false, false, true,
272	false, false, false, true, true, true, false, true,
273
274	true, true, true, true, false, false, false, true,
275	false, false, false, true, true, true, false, true,
276	};
277
278	int counter = `0`;
279	for (dim_t i = `0`; i < saveH.dims()[`0`]; ++i) {
280	for (dim_t j = `0`; j < saveH.dims()[`1`]; ++j) {
281	for (dim_t k = `0`; k < saveH.dims()[`2`]; ++k) {
282	for (dim_t f = `0`; f < saveH.dims()[`3`]; ++f) {
283	EXPECT_FLOAT_EQ(refResults[counter++], saveH.at({i, j, k, f}));
284	}
285	}
286	}
287	}
288	}
289
290	TEST_P(OperatorTest, less_floatCases) {
291	CHECK_IF_ENABLED();
292
293	float xValues[] = {`1.0f`, `2.0f`, `3.0f`, `4.0f`, `5.0f`};
294
295	float yValues[] = {`5.0f`, `4.0f`, `3.0f`, `2.0f`, `1.0f`};
296
297	dim_t xDims[] = {`5`};
298	dim_t yDims[] = {`5`};
299
300	Handle<bool> saveH =
301	lessHelper<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy, xValues,
302	yValues, xDims, yDims);
303
304	bool refResults[] = {true, true, false, false, false};
305
306	int counter = `0`;
307	for (dim_t i = `0`; i < saveH.dims()[`0`]; ++i) {
308	EXPECT_FLOAT_EQ(refResults[counter++], saveH.at({i}));
309	}
310	}
311
312	TEST_P(OperatorTest, less_float16Cases) {
313	CHECK_IF_ENABLED();
314
315	float16 xValues[] = {`1.0f`, `2.0f`, `3.0f`, `4.0f`, `5.0f`};
316
317	float16 yValues[] = {`5.0f`, `4.0f`, `3.0f`, `2.0f`, `1.0f`};
318
319	dim_t xDims[] = {`5`};
320	dim_t yDims[] = {`5`};
321
322	Handle<bool> saveH =
323	lessHelper<float16_t>(bindings_, mod_, F_, EE_, ElemKind::Float16Ty,
324	xValues, yValues, xDims, yDims);
325
326	bool refResults[] = {true, true, false, false, false};
327
328	int counter = `0`;
329	for (dim_t i = `0`; i < saveH.dims()[`0`]; ++i) {
330	EXPECT_FLOAT_EQ(refResults[counter++], saveH.at({i}));
331	}
332	}
333
334	TEST_P(OperatorTest, less_bfloat16Cases) {
335	CHECK_IF_ENABLED();
336
337	bfloat16 xValues[] = {`1.0f`, `2.0f`, `3.0f`, `4.0f`, `5.0f`};
338
339	bfloat16 yValues[] = {`5.0f`, `4.0f`, `3.0f`, `2.0f`, `1.0f`};
340
341	dim_t xDims[] = {`5`};
342	dim_t yDims[] = {`5`};
343
344	Handle<bool> saveH =
345	lessHelper<bfloat16>(bindings_, mod_, F_, EE_, ElemKind::BFloat16Ty,
346	xValues, yValues, xDims, yDims);
347
348	bool refResults[] = {true, true, false, false, false};
349
350	int counter = `0`;
351	for (dim_t i = `0`; i < saveH.dims()[`0`]; ++i) {
352	EXPECT_FLOAT_EQ(refResults[counter++], saveH.at({i}));
353	}
354	}
355
356	TEST_P(OperatorTest, less_int64Cases) {
357	CHECK_IF_ENABLED();
358
359	int64_t xValues[] = {`1`, `2`, `3`, `4`, `5`};
360
361	int64_t yValues[] = {`5`, `4`, `3`, `2`, `1`};
362
363	dim_t xDims[] = {`5`};
364	dim_t yDims[] = {`5`};
365
366	Handle<bool> saveH =
367	lessHelper<int64_t>(bindings_, mod_, F_, EE_, ElemKind::Int64ITy, xValues,
368	yValues, xDims, yDims);
369
370	bool refResults[] = {true, true, false, false, false};
371
372	int counter = `0`;
373	for (dim_t i = `0`; i < saveH.dims()[`0`]; ++i) {
374	EXPECT_FLOAT_EQ(refResults[counter++], saveH.at({i}));
375	}
376	}
377
378	TEST_P(OperatorTest, less_int16Cases) {
379	CHECK_IF_ENABLED();
380
381	int16_t xValues[] = {`1`, `2`, `3`, `4`, `5`};
382
383	int16_t yValues[] = {`5`, `4`, `3`, `2`, `1`};
384
385	dim_t xDims[] = {`5`};
386	dim_t yDims[] = {`5`};
387
388	Handle<bool> saveH =
389	lessHelper<int16_t>(bindings_, mod_, F_, EE_, ElemKind::Int16QTy, xValues,
390	yValues, xDims, yDims);
391
392	bool refResults[] = {true, true, false, false, false};
393
394	int counter = `0`;
395	for (dim_t i = `0`; i < saveH.dims()[`0`]; ++i) {
396	EXPECT_TRUE(refResults[counter++] == saveH.at({i}));
397	}
398	}
399
400	TEST_P(OperatorTest, less_float) {
401	CHECK_IF_ENABLED();
402
403	float xValues[] = {`1.0f`, `2.0f`, `3.0f`, `6.0f`, `4.0f`, `5.0f`, `6.0f`, `3.0f`,
404	`7.0f`, `8.0f`, `9.0f`, `2.0f`, `3.0f`, `5.0f`, `7.0f`, `1.0f`,
405
406	`1.0f`, `2.0f`, `3.0f`, `6.0f`, `4.0f`, `5.0f`, `6.0f`, `3.0f`,
407	`7.0f`, `8.0f`, `9.0f`, `2.0f`, `3.0f`, `5.0f`, `7.0f`, `1.0f`,
408
409	`1.0f`, `2.0f`, `3.0f`, `6.0f`, `4.0f`, `5.0f`, `6.0f`, `3.0f`,
410	`7.0f`, `8.0f`, `9.0f`, `2.0f`, `3.0f`, `5.0f`, `7.0f`, `1.0f`,
411
412	`1.0f`, `2.0f`, `3.0f`, `6.0f`, `4.0f`, `5.0f`, `6.0f`, `3.0f`,
413	`7.0f`, `8.0f`, `9.0f`, `2.0f`, `3.0f`, `5.0f`, `7.0f`, `1.0f`};
414
415	float yValues[] = {`3.0f`, `4.0f`, `5.0f`, `7.0f`, `2.0f`, `1.0f`, `0.0f`, `6.0f`,
416	`4.0f`, `2.0f`, `1.0f`, `8.0f`, `5.0f`, `9.0f`, `2.0f`, `6.0f`,
417
418	`3.0f`, `4.0f`, `5.0f`, `7.0f`, `2.0f`, `1.0f`, `0.0f`, `6.0f`,
419	`4.0f`, `2.0f`, `1.0f`, `8.0f`, `5.0f`, `9.0f`, `2.0f`, `6.0f`,
420
421	`3.0f`, `4.0f`, `5.0f`, `7.0f`, `2.0f`, `1.0f`, `0.0f`, `6.0f`,
422	`4.0f`, `2.0f`, `1.0f`, `8.0f`, `5.0f`, `9.0f`, `2.0f`, `6.0f`,
423
424	`3.0f`, `4.0f`, `5.0f`, `7.0f`, `2.0f`, `1.0f`, `0.0f`, `6.0f`,
425	`4.0f`, `2.0f`, `1.0f`, `8.0f`, `5.0f`, `9.0f`, `2.0f`, `6.0f`};
426
427	dim_t xDims[] = {`2`, `2`, `4`, `4`};
428	dim_t yDims[] = {`2`, `2`, `4`, `4`};
429
430	Handle<bool> saveH =
431	lessHelper<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy, xValues,
432	yValues, xDims, yDims);
433
434	bool refResults[] = {
435	true, true, true, true, false, false, false, true,
436	false, false, false, true, true, true, false, true,
437
438	true, true, true, true, false, false, false, true,
439	false, false, false, true, true, true, false, true,
440
441	true, true, true, true, false, false, false, true,
442	false, false, false, true, true, true, false, true,
443
444	true, true, true, true, false, false, false, true,
445	false, false, false, true, true, true, false, true,
446	};
447
448	int counter = `0`;
449	for (dim_t i = `0`; i < saveH.dims()[`0`]; ++i) {
450	for (dim_t j = `0`; j < saveH.dims()[`1`]; ++j) {
451	for (dim_t k = `0`; k < saveH.dims()[`2`]; ++k) {
452	for (dim_t f = `0`; f < saveH.dims()[`3`]; ++f) {
453	EXPECT_FLOAT_EQ(refResults[counter++], saveH.at({i, j, k, f}));
454	}
455	}
456	}
457	}
458	}
459
460	TEST_P(OperatorTest, less_broadcast_float) {
461	CHECK_IF_ENABLED();
462
463	float xValues[] = {`1.0f`, `2.0f`, `3.0f`, `6.0f`, `4.0f`, `5.0f`, `6.0f`, `3.0f`,
464	`7.0f`, `8.0f`, `9.0f`, `2.0f`, `3.0f`, `5.0f`, `7.0f`, `1.0f`,
465
466	`1.0f`, `2.0f`, `3.0f`, `6.0f`, `4.0f`, `5.0f`, `6.0f`, `3.0f`,
467	`7.0f`, `8.0f`, `9.0f`, `2.0f`, `3.0f`, `5.0f`, `7.0f`, `1.0f`,
468
469	`1.0f`, `2.0f`, `3.0f`, `6.0f`, `4.0f`, `5.0f`, `6.0f`, `3.0f`,
470	`7.0f`, `8.0f`, `9.0f`, `2.0f`, `3.0f`, `5.0f`, `7.0f`, `1.0f`,
471
472	`1.0f`, `2.0f`, `3.0f`, `6.0f`, `4.0f`, `5.0f`, `6.0f`, `3.0f`,
473	`7.0f`, `8.0f`, `9.0f`, `2.0f`, `3.0f`, `5.0f`, `7.0f`, `1.0f`};
474
475	float yValues[] = {`3.0f`, `4.0f`, `5.0f`, `7.0f`, `2.0f`, `1.0f`, `0.0f`, `6.0f`,
476	`4.0f`, `2.0f`, `1.0f`, `8.0f`, `5.0f`, `9.0f`, `2.0f`, `6.0f`,
477
478	`3.0f`, `4.0f`, `5.0f`, `7.0f`, `2.0f`, `1.0f`, `0.0f`, `6.0f`,
479	`4.0f`, `2.0f`, `1.0f`, `8.0f`, `5.0f`, `9.0f`, `2.0f`, `6.0f`};
480
481	dim_t xDims[] = {`2`, `2`, `4`, `4`};
482	dim_t yDims[] = {`1`, `2`, `4`, `4`};
483
484	Handle<bool> saveH =
485	lessHelper<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy, xValues,
486	yValues, xDims, yDims);
487
488	bool refResults[] = {true, true, true, true, false, false, false, true,
489	false, false, false, true, true, true, false, true,
490
491	true, true, true, true, false, false, false, true,
492	false, false, false, true, true, true, false, true,
493
494	true, true, true, true, false, false, false, true,
495	false, false, false, true, true, true, false, true,
496
497	true, true, true, true, false, false, false, true,
498	false, false, false, true, true, true, false, true};
499
500	int counter = `0`;
501	for (dim_t i = `0`; i < saveH.dims()[`0`]; ++i) {
502	for (dim_t j = `0`; j < saveH.dims()[`1`]; ++j) {
503	for (dim_t k = `0`; k < saveH.dims()[`2`]; ++k) {
504	for (dim_t f = `0`; f < saveH.dims()[`3`]; ++f) {
505	EXPECT_FLOAT_EQ(refResults[counter++], saveH.at({i, j, k, f}));
506	}
507	}
508	}
509	}
510	}
511
512	TEST_P(OperatorTest, less_int32Cases) {
513	CHECK_IF_ENABLED();
514
515	int32_t xValues[] = {`1`, `2`, `3`, `4`, `5`};
516	int32_t yValues[] = {`5`, `4`, `3`, `2`, `1`};
517
518	dim_t xDims[] = {`1`, `1`, `1`, `5`};
519	dim_t yDims[] = {`1`, `1`, `1`, `5`};
520
521	Handle<bool> saveH =
522	lessHelper<int32_t>(bindings_, mod_, F_, EE_, ElemKind::Int32ITy, xValues,
523	yValues, xDims, yDims);
524
525	bool refResults[] = {true, true, false, false, false};
526
527	int counter = `0`;
528	for (dim_t i = `0`; i < saveH.dims()[`0`]; ++i) {
529	for (dim_t j = `0`; j < saveH.dims()[`1`]; ++j) {
530	for (dim_t k = `0`; k < saveH.dims()[`2`]; ++k) {
531	for (dim_t f = `0`; f < saveH.dims()[`3`]; ++f) {
532	EXPECT_FLOAT_EQ(refResults[counter++], saveH.at({i, j, k, f}));
533	}
534	}
535	}
536	}
537	}
538
539	template <typename DataType>
540	glow::Handle<DataType>
541	whereHelper(glow::PlaceholderBindings &bindings, glow::Module &mod,
542	glow::Function *F, glow::ExecutionEngine &EE, ElemKind DTy,
543	llvm::ArrayRef<DataType> xValues, llvm::ArrayRef<DataType> yValues,
544	llvm::ArrayRef<bool> cValues, llvm::ArrayRef<dim_t> xDims,
545	llvm::ArrayRef<dim_t> yDims, llvm::ArrayRef<dim_t> cDims) {
546	auto *cond = createPlaceholderConditionallyQuantized(mod, ElemKind::BoolTy,
547	cDims, "cond", false);
548	auto *X = createPlaceholderConditionallyQuantized(mod, DTy, xDims, "X",
549	DTy != ElemKind::FloatTy);
550
551	auto *Y = createPlaceholderConditionallyQuantized(mod, DTy, yDims, "Y",
552	DTy != ElemKind::FloatTy);
553
554	bindings.allocate(llvm::dyn_cast<Placeholder>(cond))->getHandle<bool>() =
555	cValues;
556
557	bindings.allocate(llvm::dyn_cast<Placeholder>(X))->getHandle<DataType>() =
558	xValues;
559
560	bindings.allocate(llvm::dyn_cast<Placeholder>(Y))->getHandle<DataType>() =
561	yValues;
562
563	auto whr = F->createNodeWithBroadcast<SelectNode>("Select", /* axis / -`1`,
564	cond, X, Y);
565
566	auto *save = F->createSave("save", whr);
567	auto *saveAlloc = bindings.allocate(save->getPlaceholder());
568
569	EE.compile(CompilationMode::Infer);
570	EE.run(bindings);
571
572	return saveAlloc->getHandle<DataType>();
573	}
574
575	TEST_P(OperatorTest, where_2d_broadcast_x_y_i8) {
576	CHECK_IF_ENABLED();
577	llvm::SmallVector<int8_t, `16`> xValues = {`3`, `5`, `7`};
578
579	llvm::SmallVector<int8_t, `16`> yValues = {`2`, `4`, `6`};
580
581	llvm::SmallVector<bool, `4`> cValues = {`1`, `0`, `1`};
582
583	llvm::SmallVector<dim_t, `4`> condDims = {`3`, `1`, `1`};
584
585	llvm::SmallVector<dim_t, `4`> xDims = {`1`, `3`, `1`};
586	llvm::SmallVector<dim_t, `4`> yDims = {`3`, `1`, `1`};
587
588	Handle<int8_t> saveH =
589	whereHelper<int8_t>(bindings_, mod_, F_, EE_, ElemKind::Int8QTy, xValues,
590	yValues, cValues, xDims, yDims, condDims);
591
592	llvm::SmallVector<int8_t, `16`> refResults = {`3`, `5`, `7`, `4`, `4`, `4`, `3`, `5`, `7`};
593
594	int counter = `0`;
595	for (dim_t i = `0`; i < saveH.dims()[`0`]; ++i) {
596	for (dim_t j = `0`; j < saveH.dims()[`1`]; ++j) {
597	for (dim_t k = `0`; k < saveH.dims()[`2`]; ++k) {
598	EXPECT_EQ(refResults[counter++], saveH.at({i, j, k}));
599	}
600	}
601	}
602	}
603
604	TEST_P(OperatorTest, where_2d_wise_i8) {
605	CHECK_IF_ENABLED();
606	llvm::SmallVector<int8_t, `16`> xValues = {
607	`1`, `2`, `3`, `6`, `4`, `5`, `6`, `3`, `7`, `8`, `9`, `2`, `3`, `5`, `7`, `1`,
608
609	`1`, `2`, `3`, `6`, `4`, `5`, `6`, `3`, `7`, `8`, `9`, `2`, `3`, `5`, `7`, `1`,
610
611	`1`, `2`, `3`, `6`, `4`, `5`, `6`, `3`, `7`, `8`, `9`, `2`, `3`, `5`, `7`, `1`,
612
613	`1`, `2`, `3`, `6`, `4`, `5`, `6`, `3`, `7`, `8`, `9`, `2`, `3`, `5`, `7`, `1`};
614
615	llvm::SmallVector<int8_t, `16`> yValues = {
616	`3`, `4`, `5`, `7`, `2`, `1`, `0`, `6`, `4`, `2`, `1`, `8`, `5`, `9`, `2`, `6`,
617
618	`3`, `4`, `5`, `7`, `2`, `1`, `0`, `6`, `4`, `2`, `1`, `8`, `5`, `9`, `2`, `6`,
619
620	`3`, `4`, `5`, `7`, `2`, `1`, `0`, `6`, `4`, `2`, `1`, `8`, `5`, `9`, `2`, `6`,
621
622	`3`, `4`, `5`, `7`, `2`, `1`, `0`, `6`, `4`, `2`, `1`, `8`, `5`, `9`, `2`, `6`};
623
624	llvm::SmallVector<bool, `4`> cValues = {`1`, `0`, `1`, `0`};
625
626	llvm::SmallVector<dim_t, `4`> condDims = {`2`, `2`, `1`, `1`};
627
628	llvm::SmallVector<dim_t, `4`> xDims = {`2`, `2`, `4`, `4`};
629	llvm::SmallVector<dim_t, `4`> yDims = {`2`, `2`, `4`, `4`};
630
631	Handle<int8_t> saveH =
632	whereHelper<int8_t>(bindings_, mod_, F_, EE_, ElemKind::Int8QTy, xValues,
633	yValues, cValues, xDims, yDims, condDims);
634
635	llvm::SmallVector<int8_t, `16`> refResults = {
636	`1`, `2`, `3`, `6`, `4`, `5`, `6`, `3`, `7`, `8`, `9`, `2`, `3`, `5`, `7`, `1`,
637
638	`3`, `4`, `5`, `7`, `2`, `1`, `0`, `6`, `4`, `2`, `1`, `8`, `5`, `9`, `2`, `6`,
639
640	`1`, `2`, `3`, `6`, `4`, `5`, `6`, `3`, `7`, `8`, `9`, `2`, `3`, `5`, `7`, `1`,
641
642	`3`, `4`, `5`, `7`, `2`, `1`, `0`, `6`, `4`, `2`, `1`, `8`, `5`, `9`, `2`, `6`};
643
644	int counter = `0`;
645	for (dim_t i = `0`; i < saveH.dims()[`0`]; ++i) {
646	for (dim_t j = `0`; j < saveH.dims()[`1`]; ++j) {
647	for (dim_t k = `0`; k < saveH.dims()[`2`]; ++k) {
648	for (dim_t f = `0`; f < saveH.dims()[`3`]; ++f) {
649	EXPECT_EQ(refResults[counter++], saveH.at({i, j, k, f}));
650	}
651	}
652	}
653	}
654	}
655
656	TEST_P(OperatorTest, where_2d_wise_float) {
657	CHECK_IF_ENABLED();
658
659	llvm::SmallVector<float, `16`> xValues = {
660	`1.0f`, `2.0f`, `3.0f`, `6.0f`, `4.0f`, `5.0f`, `6.0f`, `3.0f`,
661	`7.0f`, `8.0f`, `9.0f`, `2.0f`, `3.0f`, `5.0f`, `7.0f`, `1.0f`,
662
663	`1.0f`, `2.0f`, `3.0f`, `6.0f`, `4.0f`, `5.0f`, `6.0f`, `3.0f`,
664	`7.0f`, `8.0f`, `9.0f`, `2.0f`, `3.0f`, `5.0f`, `7.0f`, `1.0f`,
665
666	`1.0f`, `2.0f`, `3.0f`, `6.0f`, `4.0f`, `5.0f`, `6.0f`, `3.0f`,
667	`7.0f`, `8.0f`, `9.0f`, `2.0f`, `3.0f`, `5.0f`, `7.0f`, `1.0f`,
668
669	`1.0f`, `2.0f`, `3.0f`, `6.0f`, `4.0f`, `5.0f`, `6.0f`, `3.0f`,
670	`7.0f`, `8.0f`, `9.0f`, `2.0f`, `3.0f`, `5.0f`, `7.0f`, `1.0f`};
671
672	llvm::SmallVector<float, `16`> yValues = {
673	`3.0f`, `4.0f`, `5.0f`, `7.0f`, `2.0f`, `1.0f`, `0.0f`, `6.0f`,
674	`4.0f`, `2.0f`, `1.0f`, `8.0f`, `5.0f`, `9.0f`, `2.0f`, `6.0f`,
675
676	`3.0f`, `4.0f`, `5.0f`, `7.0f`, `2.0f`, `1.0f`, `0.0f`, `6.0f`,
677	`4.0f`, `2.0f`, `1.0f`, `8.0f`, `5.0f`, `9.0f`, `2.0f`, `6.0f`,
678
679	`3.0f`, `4.0f`, `5.0f`, `7.0f`, `2.0f`, `1.0f`, `0.0f`, `6.0f`,
680	`4.0f`, `2.0f`, `1.0f`, `8.0f`, `5.0f`, `9.0f`, `2.0f`, `6.0f`,
681
682	`3.0f`, `4.0f`, `5.0f`, `7.0f`, `2.0f`, `1.0f`, `0.0f`, `6.0f`,
683	`4.0f`, `2.0f`, `1.0f`, `8.0f`, `5.0f`, `9.0f`, `2.0f`, `6.0f`};
684
685	llvm::SmallVector<bool, `4`> cValues = {`1`, `0`, `1`, `0`};
686
687	llvm::SmallVector<dim_t, `4`> condDims = {`2`, `2`, `1`, `1`};
688
689	llvm::SmallVector<dim_t, `4`> xDims = {`2`, `2`, `4`, `4`};
690	llvm::SmallVector<dim_t, `4`> yDims = {`2`, `2`, `4`, `4`};
691
692	Handle<float> saveH =
693	whereHelper<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy, xValues,
694	yValues, cValues, xDims, yDims, condDims);
695
696	llvm::SmallVector<float, `16`> refResults = {
697	`1.0f`, `2.0f`, `3.0f`, `6.0f`, `4.0f`, `5.0f`, `6.0f`, `3.0f`,
698	`7.0f`, `8.0f`, `9.0f`, `2.0f`, `3.0f`, `5.0f`, `7.0f`, `1.0f`,
699
700	`3.0f`, `4.0f`, `5.0f`, `7.0f`, `2.0f`, `1.0f`, `0.0f`, `6.0f`,
701	`4.0f`, `2.0f`, `1.0f`, `8.0f`, `5.0f`, `9.0f`, `2.0f`, `6.0f`,
702
703	`1.0f`, `2.0f`, `3.0f`, `6.0f`, `4.0f`, `5.0f`, `6.0f`, `3.0f`,
704	`7.0f`, `8.0f`, `9.0f`, `2.0f`, `3.0f`, `5.0f`, `7.0f`, `1.0f`,
705
706	`3.0f`, `4.0f`, `5.0f`, `7.0f`, `2.0f`, `1.0f`, `0.0f`, `6.0f`,
707	`4.0f`, `2.0f`, `1.0f`, `8.0f`, `5.0f`, `9.0f`, `2.0f`, `6.0f`};
708
709	int counter = `0`;
710	for (dim_t i = `0`; i < saveH.dims()[`0`]; ++i) {
711	for (dim_t j = `0`; j < saveH.dims()[`1`]; ++j) {
712	for (dim_t k = `0`; k < saveH.dims()[`2`]; ++k) {
713	for (dim_t f = `0`; f < saveH.dims()[`3`]; ++f) {
714	EXPECT_FLOAT_EQ(refResults[counter++], saveH.at({i, j, k, f}));
715	}
716	}
717	}
718	}
719	}
720
721	TEST_P(OperatorTest, where_row_wise_float) {
722	CHECK_IF_ENABLED();
723
724	llvm::SmallVector<bool, `4`> cValues = {`1`, `1`, `1`, `0`, `0`, `1`, `0`, `0`};
725
726	llvm::SmallVector<dim_t, `4`> condDims = {`2`, `4`, `1`};
727
728	llvm::SmallVector<dim_t, `4`> xDims = {`2`, `4`, `4`};
729	llvm::SmallVector<dim_t, `4`> yDims = {`2`, `4`, `4`};
730
731	llvm::SmallVector<float, `16`> xValues = {
732	`1.0f`, `2.0f`, `3.0f`, `6.0f`, `4.0f`, `5.0f`, `6.0f`, `3.0f`,
733	`7.0f`, `8.0f`, `9.0f`, `2.0f`, `3.0f`, `5.0f`, `7.0f`, `1.0f`,
734
735	`1.0f`, `2.0f`, `3.0f`, `6.0f`, `4.0f`, `5.0f`, `6.0f`, `3.0f`,
736	`7.0f`, `8.0f`, `9.0f`, `2.0f`, `3.0f`, `5.0f`, `7.0f`, `1.0f`};
737
738	llvm::SmallVector<float, `16`> yValues = {
739	`3.0f`, `4.0f`, `5.0f`, `7.0f`, `2.0f`, `1.0f`, `0.0f`, `6.0f`,
740	`4.0f`, `2.0f`, `1.0f`, `8.0f`, `5.0f`, `9.0f`, `2.0f`, `6.0f`,
741
742	`3.0f`, `4.0f`, `5.0f`, `7.0f`, `2.0f`, `1.0f`, `0.0f`, `6.0f`,
743	`4.0f`, `2.0f`, `1.0f`, `8.0f`, `5.0f`, `9.0f`, `2.0f`, `6.0f`};
744
745	Handle<float> saveH =
746	whereHelper<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy, xValues,
747	yValues, cValues, xDims, yDims, condDims);
748
749	llvm::SmallVector<float, `16`> refResults = {
750	`1.0f`, `2.0f`, `3.0f`, `6.0f`, `4.0f`, `5.0f`, `6.0f`, `3.0f`,
751	`7.0f`, `8.0f`, `9.0f`, `2.0f`, `5.0f`, `9.0f`, `2.0f`, `6.0f`,
752
753	`3.0f`, `4.0f`, `5.0f`, `7.0f`, `4.0f`, `5.0f`, `6.0f`, `3.0f`,
754	`4.0f`, `2.0f`, `1.0f`, `8.0f`, `5.0f`, `9.0f`, `2.0f`, `6.0f`,
755	};
756
757	int counter = `0`;
758	for (dim_t i = `0`; i < saveH.dims()[`0`]; ++i) {
759	for (dim_t j = `0`; j < saveH.dims()[`1`]; ++j) {
760	for (dim_t k = `0`; k < saveH.dims()[`2`]; ++k) {
761	EXPECT_FLOAT_EQ(refResults[counter++], saveH.at({i, j, k}));
762	}
763	}
764	}
765	}
766
767	TEST_P(OperatorTest, where_element_wise_float) {
768	CHECK_IF_ENABLED();
769
770	llvm::SmallVector<dim_t, `4`> condDims = {`1`, `4`, `4`};
771
772	llvm::SmallVector<dim_t, `4`> xDims = {`1`, `4`, `4`};
773	llvm::SmallVector<dim_t, `4`> yDims = {`1`, `4`, `4`};
774
775	llvm::SmallVector<bool, `4`> cValues = {`1`, `1`, `1`, `0`, `0`, `1`, `0`, `0`,
776	`0`, `1`, `0`, `1`, `1`, `0`, `1`, `0`};
777
778	llvm::SmallVector<float, `16`> xValues = {`1.0f`, `2.0f`, `3.0f`, `6.0f`, `4.0f`, `5.0f`,
779	`6.0f`, `3.0f`, `7.0f`, `8.0f`, `9.0f`, `2.0f`,
780	`3.0f`, `5.0f`, `7.0f`, `1.0f`};
781
782	llvm::SmallVector<float, `16`> yValues = {`3.0f`, `4.0f`, `5.0f`, `7.0f`, `2.0f`, `1.0f`,
783	`0.0f`, `6.0f`, `4.0f`, `2.0f`, `1.0f`, `8.0f`,
784	`5.0f`, `9.0f`, `2.0f`, `6.0f`};
785
786	Handle<float> saveH =
787	whereHelper<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy, xValues,
788	yValues, cValues, xDims, yDims, condDims);
789
790	llvm::SmallVector<float, `16`> refResults = {`1.0f`, `2.0f`, `3.0f`, `7.0f`, `2.0f`, `5.0f`,
791	`0.0f`, `6.0f`, `4.0f`, `8.0f`, `1.0f`, `2.0f`,
792	`3.0f`, `9.0f`, `7.0f`, `6.0f`};
793
794	int counter = `0`;
795	for (dim_t i = `0`; i < saveH.dims()[`0`]; ++i) {
796	for (dim_t j = `0`; j < saveH.dims()[`1`]; ++j) {
797	for (dim_t k = `0`; k < saveH.dims()[`2`]; ++k) {
798	EXPECT_FLOAT_EQ(refResults[counter++], saveH.at({i, j, k}));
799	}
800	}
801	}
802	}
803
804	struct NMSMetaData {
805	int centerPoint{`0`};
806	size_t maxOutputPerClass{`0`};
807	float iouThreshold{`0.0`};
808	float scoreThreshold{`0.0`};
809	};
810
811	struct SelectedBox {
812	int batchIndex{`0`};
813	int classIndex{`0`};
814	int boxIndex{`0`};
815	};
816
817	struct Box {
818	float x;
819	float y;
820	float h;
821	float w;
822	};
823
824	template <typename DataType, typename outType = int64_t>
825	static Handle<outType> testNonMaxSuppression(
826	glow::PlaceholderBindings &bindings, glow::Module &mod, glow::Function *F,
827	glow::ExecutionEngine &EE, ElemKind DTy, llvm::ArrayRef<dim_t> boxesDims,
828	llvm::ArrayRef<dim_t> scoresDims, llvm::ArrayRef<DataType> boxesData,
829	llvm::ArrayRef<DataType> classes, llvm::ArrayRef<SelectedBox> refResults,
830	llvm::ArrayRef<int32_t> refNumSelected, const NMSMetaData &metaData,
831	bool isV4) {
832
833	// NHW
834	auto *boxes = createPlaceholderConditionallyQuantized(mod, DTy, boxesDims,
835	"boxes", false);
836
837	auto *scores = createPlaceholderConditionallyQuantized(mod, DTy, scoresDims,
838	"scores", false);
839
840	NonMaxSuppressionNode nms = nullptr*;
841
842	if (isV4) {
843	nms = F->createNonMaxSuppressionV4(
844	"NMS", boxes, scores, metaData.centerPoint, metaData.maxOutputPerClass,
845	metaData.iouThreshold, metaData.scoreThreshold);
846	} else {
847	nms = F->createNonMaxSuppressionONNX(
848	"NMS", boxes, scores, metaData.centerPoint, metaData.maxOutputPerClass,
849	metaData.iouThreshold, metaData.scoreThreshold);
850	}
851
852	auto *saveIndices = F->createSave("save", nms->getIndices());
853	auto *saveNumSelected =
854	F->createSave("numSelected", nms->getNumberOfSelectedIndices());
855	auto *result = bindings.allocate(saveIndices->getPlaceholder());
856	auto *result2 = bindings.allocate(saveNumSelected->getPlaceholder());
857
858	bindings.allocate(boxes)->getHandle<DataType>() = boxesData;
859	bindings.allocate(scores)->getHandle<DataType>() = classes;
860
861	CompilationContext cctx;
862	cctx.compMode = CompilationMode::Infer;
863	EE.compile(cctx);
864	EE.run(bindings);
865
866	Handle<outType> result2H = result2->getHandle<outType>();
867	for (dim_t i = `0`; i < (dim_t)refNumSelected.size(); ++i) {
868	EXPECT_EQ(result2H.at({i}), refNumSelected[i]);
869	}
870
871	Handle<outType> resultH = result->getHandle<outType>();
872
873	if (isV4) {
874	for (dim_t i = `0`; i < (dim_t)metaData.maxOutputPerClass; ++i) {
875	EXPECT_EQ(refResults[i].boxIndex, resultH.at({i}));
876	}
877	} else {
878	for (dim_t i = `0`; i < (dim_t)metaData.maxOutputPerClass; ++i) {
879	EXPECT_EQ(refResults[i].batchIndex, resultH.at({i, (dim_t)`0`}));
880	EXPECT_EQ(refResults[i].classIndex, resultH.at({i, (dim_t)`1`}));
881	EXPECT_EQ(refResults[i].boxIndex, resultH.at({i, (dim_t)`2`}));
882	}
883	}
884
885	return resultH;
886	}
887
888	template <typename DataType, typename outType = int64_t>
889	static Handle<float> testNonMaxSuppressionWithGather(
890	glow::PlaceholderBindings &bindings, glow::Module &mod, glow::Function *F,
891	glow::ExecutionEngine &EE, ElemKind DTy, llvm::ArrayRef<dim_t> boxesDims,
892	llvm::ArrayRef<dim_t> scoresDims, llvm::ArrayRef<dim_t> boxIndicesDim,
893	llvm::ArrayRef<DataType> boxesData, llvm::ArrayRef<DataType> classes,
894	llvm::ArrayRef<int32_t> boxIndicesData, llvm::ArrayRef<Box> refBoxResults,
895	llvm::ArrayRef<int32_t> refNumSelected, const NMSMetaData &metaData,
896	bool isV4) {
897	// NHW
898	auto *boxes = createPlaceholderConditionallyQuantized(mod, DTy, boxesDims,
899	"boxes", false);
900
901	auto *scores = createPlaceholderConditionallyQuantized(mod, DTy, scoresDims,
902	"scores", false);
903
904	auto *boxIndices = createPlaceholderConditionallyQuantized(
905	mod, ElemKind::Int32ITy, boxIndicesDim, "boxIndices", false);
906
907	NonMaxSuppressionNode nms = nullptr*;
908
909	unsigned axis = `1`;
910	if (isV4) {
911	nms = F->createNonMaxSuppressionV4(
912	"NMS", boxes, scores, metaData.centerPoint, metaData.maxOutputPerClass,
913	metaData.iouThreshold, metaData.scoreThreshold);
914	axis = `0`;
915	} else {
916
917	nms = F->createNonMaxSuppressionONNX(
918	"NMS", boxes, scores, metaData.centerPoint, metaData.maxOutputPerClass,
919	metaData.iouThreshold, metaData.scoreThreshold);
920	}
921
922	// extract all the box indices
923	auto *gthI =
924	F->createGather("gatherBoxIndices", nms->getIndices(), boxIndices, axis);
925	auto *gthB = F->createGather("gatherClassIndices", boxes, gthI, axis);
926	Node fltn2 = nullptr*;
927
928	if (isV4) {
929	fltn2 = gthB;
930	} else {
931	fltn2 = F->createFlatten("flatten", gthB, `2`);
932	}
933
934	auto *saveBoxes = F->createSave("saveBoxes", fltn2);
935	auto saveNumSelected =
936	F->createSave("numSelected", nms->getNumberOfSelectedIndices());
937
938	auto *result = bindings.allocate(saveBoxes->getPlaceholder());
939	auto *result2 = bindings.allocate(saveNumSelected->getPlaceholder());
940
941	bindings.allocate(boxes)->getHandle<DataType>() = boxesData;
942	bindings.allocate(scores)->getHandle<DataType>() = classes;
943	bindings.allocate(boxIndices)->getHandle<int32_t>() = boxIndicesData;
944
945	CompilationContext cctx;
946	cctx.compMode = CompilationMode::Infer;
947	EE.compile(cctx);
948	EE.run(bindings);
949
950	Handle<outType> result2H = result2->getHandle<outType>();
951	for (dim_t i = `0`; i < (dim_t)refNumSelected.size(); ++i) {
952	EXPECT_EQ(result2H.at({i}), refNumSelected[i]);
953	}
954
955	Handle<float> resultH = result->getHandle<float>();
956
957	for (dim_t i = `0`; i < (dim_t)refBoxResults.size(); ++i) {
958	EXPECT_EQ(refBoxResults[i].x, resultH.at({i, (dim_t)`0`}));
959	EXPECT_EQ(refBoxResults[i].y, resultH.at({i, (dim_t)`1`}));
960	EXPECT_EQ(refBoxResults[i].h, resultH.at({i, (dim_t)`2`}));
961	EXPECT_EQ(refBoxResults[i].w, resultH.at({i, (dim_t)`3`}));
962	}
963
964	return resultH;
965	}
966
967	TEST_P(OperatorTest, nms_center_point_box_with_gather_float) {
968	CHECK_IF_ENABLED();
969	llvm::SmallVector<dim_t, `3`> boxesDims = {`1`, `6`, `4`};
970	llvm::SmallVector<dim_t, `3`> scoresDims = {`1`, `1`, `6`};
971	llvm::SmallVector<dim_t, `1`> boxIndexesDms = {`1`};
972
973	llvm::SmallVector<float, `24`> boxes = {
974	`0.5`, `0.5`, `1.0`, `1.0`, `0.5`, `0.6`, `1.0`, `1.0`, `0.5`, `0.4`, `1.0`, `1.0`,
975	`0.5`, `10.5`, `1.0`, `1.0`, `0.5`, `10.6`, `1.0`, `1.0`, `0.5`, `100.5`, `1.0`, `1.0`};
976
977	llvm::SmallVector<float, `6`> classes = {`0.9`, `0.75`, `0.6`, `0.95`, `0.5`, `0.3`};
978	llvm::SmallVector<int32_t, `1`> boxIndices = {`2`};
979	llvm::SmallVector<Box, `3`> refResults = {
980	{`0.5`, `10.5`, `1.0`, `1.0`}, {`0.5`, `0.5`, `1.0`, `1.0`}, {`0.5`, `0.5`, `1.0`, `1.0`}};
981	NMSMetaData metaData = {`1`, `3`, `0.5`, `0.4`};
982	llvm::SmallVector<int32_t, `1`> refNumSelected = {`2`};
983
984	testNonMaxSuppressionWithGather<float>(
985	bindings_, mod_, F_, EE_, ElemKind::FloatTy, boxesDims, scoresDims,
986	boxIndexesDms, boxes, classes, boxIndices, refResults, refNumSelected,
987	metaData, false);
988	}
989
990	TEST_P(OperatorTest, nms_v4_center_point_box_with_gather_float) {
991	CHECK_IF_ENABLED();
992	llvm::SmallVector<dim_t, `3`> boxesDims = {`6`, `4`};
993	llvm::SmallVector<dim_t, `1`> scoresDims = {`6`};
994	llvm::SmallVector<dim_t, `1`> boxIndexesDims = {`3`};
995
996	llvm::SmallVector<float, `24`> boxes = {
997	`0.5`, `0.5`, `1.0`, `1.0`, `0.5`, `0.6`, `1.0`, `1.0`, `0.5`, `0.4`, `1.0`, `1.0`,
998	`0.5`, `10.5`, `1.0`, `1.0`, `0.5`, `10.6`, `1.0`, `1.0`, `0.5`, `100.5`, `1.0`, `1.0`};
999
1000	llvm::SmallVector<float, `6`> classes = {`0.9`, `0.75`, `0.6`, `0.95`, `0.5`, `0.3`};
1001	llvm::SmallVector<int32_t, `3`> boxIndices = {`0`, `1`, `2`};
1002	llvm::SmallVector<Box, `3`> refResults = {
1003	{`0.5`, `10.5`, `1.0`, `1.0`}, {`0.5`, `0.5`, `1.0`, `1.0`}, {`0.5`, `0.5`, `1.0`, `1.0`}};
1004	NMSMetaData metaData = {`1`, `3`, `0.5`, `0.4`};
1005	llvm::SmallVector<int32_t, `1`> refNumSelected{`2`};
1006
1007	testNonMaxSuppressionWithGather<float>(
1008	bindings_, mod_, F_, EE_, ElemKind::FloatTy, boxesDims, scoresDims,
1009	boxIndexesDims, boxes, classes, boxIndices, refResults, refNumSelected,
1010	metaData, true);
1011	}
1012
1013	TEST_P(OperatorTest, nms_center_point_box_float) {
1014	CHECK_IF_ENABLED();
1015	llvm::SmallVector<dim_t, `3`> boxesDims = {`1`, `6`, `4`};
1016	llvm::SmallVector<dim_t, `3`> scoresDims = {`1`, `1`, `6`};
1017	llvm::SmallVector<float, `24`> boxes = {
1018	`0.5`, `0.5`, `1.0`, `1.0`, `0.5`, `0.6`, `1.0`, `1.0`, `0.5`, `0.4`, `1.0`, `1.0`,
1019	`0.5`, `10.5`, `1.0`, `1.0`, `0.5`, `10.6`, `1.0`, `1.0`, `0.5`, `100.5`, `1.0`, `1.0`};
1020	llvm::SmallVector<float, `6`> classes = {`0.9`, `0.75`, `0.6`, `0.95`, `0.5`, `0.3`};
1021	llvm::SmallVector<SelectedBox, `3`> refResults = {
1022	{`0`, `0`, `3`}, {`0`, `0`, `0`}, {`0`, `0`, `5`}};
1023	NMSMetaData metaData = {`1`, `3`, `0.5`, `0.0`};
1024	llvm::SmallVector<int32_t, `1`> refNumSelected{`3`};
1025
1026	testNonMaxSuppression<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy,
1027	boxesDims, scoresDims, boxes, classes,
1028	refResults, refNumSelected, metaData, false);
1029	}
1030
1031	TEST_P(OperatorTest, nms_v4_center_point_box_float) {
1032	CHECK_IF_ENABLED();
1033	llvm::SmallVector<dim_t, `3`> boxesDims = {`6`, `4`};
1034	llvm::SmallVector<dim_t, `1`> scoresDims = {`6`};
1035	llvm::SmallVector<float, `24`> boxes = {
1036	`0.5`, `0.5`, `1.0`, `1.0`, `0.5`, `0.6`, `1.0`, `1.0`, `0.5`, `0.4`, `1.0`, `1.0`,
1037	`0.5`, `10.5`, `1.0`, `1.0`, `0.5`, `10.6`, `1.0`, `1.0`, `0.5`, `100.5`, `1.0`, `1.0`};
1038	llvm::SmallVector<float, `6`> classes = {`0.9`, `0.75`, `0.6`, `0.95`, `0.5`, `0.3`};
1039	llvm::SmallVector<SelectedBox, `3`> refResults = {
1040	{`0`, `0`, `3`}, {`0`, `0`, `0`}, {`0`, `0`, `5`}};
1041	NMSMetaData metaData = {`1`, `3`, `0.5`, `0.0`};
1042	llvm::SmallVector<int32_t, `1`> refNumSelected{`3`};
1043
1044	testNonMaxSuppression<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy,
1045	boxesDims, scoresDims, boxes, classes,
1046	refResults, refNumSelected, metaData, true);
1047	}
1048
1049	TEST_P(OperatorTest, nms_flipped_coordinates_float) {
1050	CHECK_IF_ENABLED();
1051	llvm::SmallVector<dim_t, `3`> boxesDims = {`1`, `6`, `4`};
1052	llvm::SmallVector<dim_t, `3`> scoresDims = {`1`, `1`, `6`};
1053	llvm::SmallVector<float, `24`> boxes = {
1054	`1.0`, `1.0`, `0.0`, `0.0`, `0.0`, `0.1`, `1.0`, `1.1`, `0.0`, `0.9`, `1.0`, -`0.1`,
1055	`0.0`, `10.0`, `1.0`, `11.0`, `1.0`, `10.1`, `0.0`, `11.1`, `1.0`, `101.0`, `0.0`, `100.0`};
1056	llvm::SmallVector<float, `6`> classes = {`0.9`, `0.75`, `0.6`, `0.95`, `0.5`, `0.3`};
1057	llvm::SmallVector<SelectedBox, `3`> refResults = {
1058	{`0`, `0`, `3`}, {`0`, `0`, `0`}, {`0`, `0`, `5`}};
1059	NMSMetaData metaData = {`0`, `3`, `0.5`, `0.0`};
1060	llvm::SmallVector<int32_t, `1`> refNumSelected{`3`};
1061
1062	testNonMaxSuppression<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy,
1063	boxesDims, scoresDims, boxes, classes,
1064	refResults, refNumSelected, metaData, false);
1065	}
1066
1067	TEST_P(OperatorTest, nms_identical_boxes_float) {
1068	CHECK_IF_ENABLED();
1069	llvm::SmallVector<dim_t, `3`> boxesDims = {`1`, `10`, `4`};
1070	llvm::SmallVector<dim_t, `3`> scoresDims = {`1`, `1`, `10`};
1071	llvm::SmallVector<float, `40`> boxes = {
1072	`0.0`, `0.0`, `1.0`, `1.0`, `0.0`, `0.0`, `1.0`, `1.0`, `0.0`, `0.0`, `1.0`, `1.0`, `0.0`, `0.0`,
1073	`1.0`, `1.0`, `0.0`, `0.0`, `1.0`, `1.0`, `0.0`, `0.0`, `1.0`, `1.0`, `0.0`, `0.0`, `1.0`, `1.0`,
1074	`0.0`, `0.0`, `1.0`, `1.0`, `0.0`, `0.0`, `1.0`, `1.0`, `0.0`, `0.0`, `1.0`, `1.0`};
1075	llvm::SmallVector<float, `10`> classes = {`0.9`, `0.9`, `0.9`, `0.9`, `0.9`,
1076	`0.9`, `0.9`, `0.9`, `0.9`, `0.9`};
1077	llvm::SmallVector<SelectedBox, `3`> refResults = {{`0`, `0`, `0`}};
1078	NMSMetaData metaData = {`0`, `1`, `0.5`, `0.0`};
1079	llvm::SmallVector<int32_t, `1`> refNumSelected{`1`};
1080
1081	testNonMaxSuppression<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy,
1082	boxesDims, scoresDims, boxes, classes,
1083	refResults, refNumSelected, metaData, false);
1084	}
1085
1086	TEST_P(OperatorTest, nms_limit_output_size_float) {
1087	CHECK_IF_ENABLED();
1088	llvm::SmallVector<dim_t, `3`> boxesDims = {`1`, `6`, `4`};
1089	llvm::SmallVector<dim_t, `3`> scoresDims = {`1`, `1`, `6`};
1090	llvm::SmallVector<float, `24`> boxes = {
1091	`0.0`, `0.0`, `1.0`, `1.0`, `0.0`, `0.1`, `1.0`, `1.1`, `0.0`, -`0.1`, `1.0`, `0.9`,
1092	`0.0`, `10.0`, `1.0`, `11.0`, `0.0`, `10.1`, `1.0`, `11.1`, `0.0`, `100.0`, `1.0`, `101.0`};
1093	llvm::SmallVector<float, `6`> classes = {`0.9`, `0.75`, `0.6`, `0.95`, `0.5`, `0.3`};
1094	llvm::SmallVector<SelectedBox, `2`> refResults = {{`0`, `0`, `3`}, {`0`, `0`, `0`}};
1095	NMSMetaData metaData = {`0`, `2`, `0.5`, `0.0`};
1096	llvm::SmallVector<int32_t, `1`> refNumSelected{`2`};
1097
1098	testNonMaxSuppression<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy,
1099	boxesDims, scoresDims, boxes, classes,
1100	refResults, refNumSelected, metaData, false);
1101	}
1102
1103	TEST_P(OperatorTest, nms_single_box_float) {
1104	CHECK_IF_ENABLED();
1105	llvm::SmallVector<dim_t, `3`> boxesDims = {`1`, `1`, `4`};
1106	llvm::SmallVector<dim_t, `3`> scoresDims = {`1`, `1`, `1`};
1107	llvm::SmallVector<float, `4`> boxes = {`0.0`, `0.0`, `1.0`, `1.0`};
1108	llvm::SmallVector<float, `1`> classes = {`0.9`};
1109	llvm::SmallVector<SelectedBox, `1`> refResults = {
1110	{`0`, `0`, `0`}, {`0`, `0`, `0`}, {`0`, `0`, `0`}};
1111	NMSMetaData metaData = {`0`, `3`, `0.5`, `0.0`};
1112	llvm::SmallVector<int32_t, `1`> refNumSelected{`1`};
1113
1114	testNonMaxSuppression<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy,
1115	boxesDims, scoresDims, boxes, classes,
1116	refResults, refNumSelected, metaData, false);
1117	}
1118
1119	TEST_P(OperatorTest, nms_by_iou_float) {
1120	CHECK_IF_ENABLED();
1121	llvm::SmallVector<dim_t, `3`> boxesDims = {`1`, `6`, `4`};
1122	llvm::SmallVector<dim_t, `3`> scoresDims = {`1`, `1`, `6`};
1123	llvm::SmallVector<float, `24`> boxes = {
1124	`0.0`, `0.0`, `1.0`, `1.0`, `0.0`, `0.1`, `1.0`, `1.1`, `0.0`, -`0.1`, `1.0`, `0.9`,
1125	`0.0`, `10.0`, `1.0`, `11.0`, `0.0`, `10.1`, `1.0`, `11.1`, `0.0`, `100.0`, `1.0`, `101.0`};
1126	llvm::SmallVector<float, `6`> classes = {`0.9`, `0.75`, `0.6`, `0.95`, `0.5`, `0.3`};
1127	llvm::SmallVector<SelectedBox, `2`> refResults = {
1128	{`0`, `0`, `3`}, {`0`, `0`, `0`}, {`0`, `0`, `5`}};
1129	NMSMetaData metaData = {`0`, `3`, `0.5`, `0.0`};
1130	llvm::SmallVector<int32_t, `1`> refNumSelected{`3`};
1131
1132	testNonMaxSuppression<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy,
1133	boxesDims, scoresDims, boxes, classes,
1134	refResults, refNumSelected, metaData, false);
1135	}
1136
1137	TEST_P(OperatorTest, nms_by_iou_and_scores_float) {
1138	CHECK_IF_ENABLED();
1139	llvm::SmallVector<dim_t, `3`> boxesDims = {`1`, `6`, `4`};
1140	llvm::SmallVector<dim_t, `3`> scoresDims = {`1`, `1`, `6`};
1141	llvm::SmallVector<float, `24`> boxes = {
1142	`0.0`, `0.0`, `1.0`, `1.0`, `0.0`, `0.1`, `1.0`, `1.1`, `0.0`, -`0.1`, `1.0`, `0.9`,
1143	`0.0`, `10.0`, `1.0`, `11.0`, `0.0`, `10.1`, `1.0`, `11.1`, `0.0`, `100.0`, `1.0`, `101.0`};
1144	llvm::SmallVector<float, `6`> classes = {`0.9`, `0.75`, `0.6`, `0.95`, `0.5`, `0.3`};
1145	llvm::SmallVector<SelectedBox, `2`> refResults = {{`0`, `0`, `3`}, {`0`, `0`, `0`}};
1146	NMSMetaData metaData = {`0`, `2`, `0.5`, `0.4`};
1147	llvm::SmallVector<int32_t, `1`> refNumSelected{`2`};
1148
1149	testNonMaxSuppression<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy,
1150	boxesDims, scoresDims, boxes, classes,
1151	refResults, refNumSelected, metaData, false);
1152	}
1153
1154	TEST_P(OperatorTest, nms_two_batches_float) {
1155	CHECK_IF_ENABLED();
1156	llvm::SmallVector<dim_t, `3`> boxesDims = {`2`, `6`, `4`};
1157	llvm::SmallVector<dim_t, `3`> scoresDims = {`2`, `1`, `6`};
1158	llvm::SmallVector<float, `48`> boxes = {
1159	`0.0`, `0.0`, `1.0`, `1.0`, `0.0`, `0.1`, `1.0`, `1.1`, `0.0`, -`0.1`, `1.0`, `0.9`,
1160	`0.0`, `10.0`, `1.0`, `11.0`, `0.0`, `10.1`, `1.0`, `11.1`, `0.0`, `100.0`, `1.0`, `101.0`,
1161	`0.0`, `0.0`, `1.0`, `1.0`, `0.0`, `0.1`, `1.0`, `1.1`, `0.0`, -`0.1`, `1.0`, `0.9`,
1162	`0.0`, `10.0`, `1.0`, `11.0`, `0.0`, `10.1`, `1.0`, `11.1`, `0.0`, `100.0`, `1.0`, `101.0`};
1163	llvm::SmallVector<float, `12`> classes = {`0.9`, `0.75`, `0.6`, `0.95`, `0.5`, `0.3`,
1164	`0.9`, `0.75`, `0.6`, `0.95`, `0.5`, `0.3`};
1165	llvm::SmallVector<SelectedBox, `4`> refResults = {
1166	{`0`, `0`, `3`}, {`0`, `0`, `0`}, {`1`, `0`, `3`}, {`1`, `0`, `0`}};
1167	NMSMetaData metaData = {`0`, `2`, `0.5`, `0.0`};
1168	llvm::SmallVector<int32_t, `2`> refNumSelected{`2`, `2`};
1169
1170	testNonMaxSuppression<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy,
1171	boxesDims, scoresDims, boxes, classes,
1172	refResults, refNumSelected, metaData, false);
1173	}
1174
1175	TEST_P(OperatorTest, nms_two_classes_float) {
1176	CHECK_IF_ENABLED();
1177	llvm::SmallVector<dim_t, `3`> boxesDims = {`1`, `6`, `4`};
1178	llvm::SmallVector<dim_t, `3`> scoresDims = {`1`, `2`, `6`};
1179	llvm::SmallVector<float, `24`> boxes = {
1180	`0.0`, `0.0`, `1.0`, `1.0`, `0.0`, `0.1`, `1.0`, `1.1`, `0.0`, -`0.1`, `1.0`, `0.9`,
1181	`0.0`, `10.0`, `1.0`, `11.0`, `0.0`, `10.1`, `1.0`, `11.1`, `0.0`, `100.0`, `1.0`, `101.0`};
1182	llvm::SmallVector<float, `12`> classes = {`0.9`, `0.75`, `0.6`, `0.95`, `0.5`, `0.3`,
1183	`0.9`, `0.75`, `0.6`, `0.95`, `0.5`, `0.3`};
1184	llvm::SmallVector<SelectedBox, `4`> refResults = {
1185	{`0`, `0`, `3`}, {`0`, `0`, `0`}, {`0`, `1`, `3`}, {`0`, `1`, `0`}};
1186	NMSMetaData metaData = {`0`, `2`, `0.5`, `0.4`};
1187	llvm::SmallVector<int32_t, `1`> refNumSelected{`4`};
1188
1189	testNonMaxSuppression<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy,
1190	boxesDims, scoresDims, boxes, classes,
1191	refResults, refNumSelected, metaData, false);
1192	}
1193
1194	TEST_P(OperatorTest, nms_two_boxes_float) {
1195	CHECK_IF_ENABLED();
1196	llvm::SmallVector<dim_t, `3`> boxesDims = {`1`, `2`, `4`};
1197	llvm::SmallVector<dim_t, `3`> scoresDims = {`1`, `1`, `2`};
1198	llvm::SmallVector<float, `4`> boxes = {`0.0`, `0.0`, `1.0`, `1.0`, `0.1`, `0.1`, `0.9`, `0.9`};
1199	llvm::SmallVector<float, `2`> classes = {`0.8`, `0.9`};
1200	llvm::SmallVector<SelectedBox, `1`> refResults = {{`0`, `0`, `1`}};
1201	NMSMetaData metaData = {`0`, `1`, `0.5`, `0.0`};
1202	llvm::SmallVector<int32_t, `1`> refNumSelected{`1`};
1203
1204	testNonMaxSuppression<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy,
1205	boxesDims, scoresDims, boxes, classes,
1206	refResults, refNumSelected, metaData, false);
1207	}
1208
1209	/// Helper function to test AudioSpectrogram node.
1210	template <size_t windowCount, size_t windowSize, bool magnitudeSquared>
1211	static FunctionTensorPair
1212	createAndInitBasicAudioSpectrogramTest(glow::PlaceholderBindings &bindings,
1213	glow::ExecutionEngine &EE) {
1214	auto &mod = EE.getModule();
1215	Function *F = mod.createFunction("main");
1216
1217	// Create random input audio signal.
1218	dim_t windowStride = `320`;
1219	dim_t inputLength = windowSize + (windowCount - `1`) * windowStride;
1220	auto *input = mod.createPlaceholder(ElemKind::FloatTy, {inputLength}, "input",
1221	false / isTrainable /);
1222	bindings.allocate(input)->getHandle().randomize(-`1.0`, `1.0`, mod.getPRNG());
1223
1224	// Create AudioSpectrogram node.
1225	auto *audioSpec = F->createAudioSpectrogram(
1226	"audio_spectrogram", input, windowSize, windowStride, magnitudeSquared);
1227	auto *res = F->createSave("save", audioSpec);
1228	auto *resultTensor = bindings.allocate(res->getPlaceholder());
1229	return std::make_pair(F, resultTensor);
1230	}
1231
1232	#define TEST_AUDIO_SPECTROGRAM(WCOUNT, WSIZE, MSQUARED, TOL) \
1233	TEST_P(OperatorStatelessTest, \
1234	AudioSpectrogram_##WCOUNT##x##WSIZE##_##MSQUARED##_Float) { \
1235	ENABLED_BACKENDS("Interpreter", "CPU"); \
1236	compareAgainstInterpreter( \
1237	getBackendName(), \
1238	createAndInitBasicAudioSpectrogramTest<WCOUNT, WSIZE, MSQUARED>, \
1239	ElemKind::FloatTy, ElemKind::FloatTy, TOL); \
1240	}
1241
1242	/// Test one window magnitude spectrograms.
1243	TEST_AUDIO_SPECTROGRAM(`1`, `2`, false, `1e-6`)
1244	TEST_AUDIO_SPECTROGRAM(`1`, `4`, false, `1e-6`)
1245	TEST_AUDIO_SPECTROGRAM(`1`, `8`, false, `1e-6`)
1246	TEST_AUDIO_SPECTROGRAM(`1`, `16`, false, `1e-6`)
1247	TEST_AUDIO_SPECTROGRAM(`1`, `32`, false, `1e-6`)
1248	TEST_AUDIO_SPECTROGRAM(`1`, `64`, false, `5e-6`)
1249	TEST_AUDIO_SPECTROGRAM(`1`, `128`, false, `5e-6`)
1250	TEST_AUDIO_SPECTROGRAM(`1`, `256`, false, `1e-5`)
1251	TEST_AUDIO_SPECTROGRAM(`1`, `512`, false, `5e-5`)
1252	TEST_AUDIO_SPECTROGRAM(`1`, `1024`, false, `5e-5`)
1253
1254	/// Test multiple window magnitude spectrograms.
1255	TEST_AUDIO_SPECTROGRAM(`2`, `256`, false, `1e-5`)
1256	TEST_AUDIO_SPECTROGRAM(`3`, `320`, false, `1e-5`)
1257	TEST_AUDIO_SPECTROGRAM(`4`, `640`, false, `5e-5`)
1258
1259	/// Test multiple window power spectrograms.
1260	TEST_AUDIO_SPECTROGRAM(`2`, `256`, true, `5e-4`)
1261	TEST_AUDIO_SPECTROGRAM(`3`, `320`, true, `5e-4`)
1262	TEST_AUDIO_SPECTROGRAM(`4`, `640`, true, `1e-3`)
1263
1264	/// Helper function to test MFCC node.
1265	template <size_t winNum, size_t specLen>
1266	static FunctionTensorPair
1267	createAndInitBasicMFCCTest(glow::PlaceholderBindings &bindings,
1268	glow::ExecutionEngine &EE) {
1269	auto &mod = EE.getModule();
1270	Function *F = mod.createFunction("main");
1271
1272	// Create random input spectrogram.
1273	auto *spectrogram =
1274	mod.createPlaceholder(ElemKind::FloatTy, {winNum, specLen}, "spectrogram",
1275	false / isTrainable /);
1276	bindings.allocate(spectrogram)
1277	->getHandle()
1278	.randomize(`10.0`, `100.0`, mod.getPRNG());
1279
1280	// Create MFCC node.
1281	float sampleRate = `16000.0`;
1282	float lowerFrequency = `20.0`;
1283	float upperFrequency = `4000.0`;
1284	size_t filterBankCount = `40`;
1285	size_t numCoefficients = `13`;
1286	auto *mfcc = F->createMFCC("mfcc", spectrogram, sampleRate, lowerFrequency,
1287	upperFrequency, filterBankCount, numCoefficients);
1288	auto *res = F->createSave("save", mfcc);
1289	auto *resultTensor = bindings.allocate(res->getPlaceholder());
1290	return std::make_pair(F, resultTensor);
1291	}
1292
1293	#define TEST_MFCC(WNUM, SLEN, TOL) \
1294	TEST_P(OperatorStatelessTest, MFCC_##WNUM##x##SLEN##_Float) { \
1295	ENABLED_BACKENDS("Interpreter", "CPU"); \
1296	compareAgainstInterpreter(getBackendName(), \
1297	createAndInitBasicMFCCTest<WNUM, SLEN>, \
1298	ElemKind::FloatTy, ElemKind::FloatTy, TOL); \
1299	}
1300
1301	TEST_MFCC(`1`, `17`, `2e-4`)
1302	TEST_MFCC(`1`, `33`, `5e-5`)
1303	TEST_MFCC(`1`, `65`, `2e-5`)
1304	TEST_MFCC(`1`, `129`, `1e-5`)
1305	TEST_MFCC(`2`, `257`, `1e-5`)
1306	TEST_MFCC(`3`, `513`, `1e-5`)
1307	TEST_MFCC(`3`, `1025`, `1e-5`)
1308
1309	template <typename DataType>
1310	static void testRoiAlign(
1311	PlaceholderBindings &bindings, Module &mod, Function &F,
1312	ExecutionEngine &EE, ElemKind ElemTy, llvm::ArrayRef<dim_t> featureMapDims,
1313	llvm::ArrayRef<DataType> featureMap, llvm::ArrayRef<dim_t> boxesDims,
1314	llvm::ArrayRef<DataType> boxes, llvm::ArrayRef<dim_t> batchIndicesDims,
1315	llvm::ArrayRef<int32_t> batchIndices, PoolingMode mode, dim_t outputHeight,
1316	dim_t outputWidth, uint32_t samplingRatio, float spatialScale, bool aligned,
1317	llvm::ArrayRef<DataType> expectedValues, float comparisonThreshold,
1318	bool rotated) {
1319	auto *featureMapT =
1320	mod.createPlaceholder(ElemTy, featureMapDims, "featureMap", false);
1321	bindings.allocate(featureMapT)->getHandle<DataType>() = featureMap;
1322
1323	auto boxesT = mod.createPlaceholder(ElemTy, boxesDims, "boxes", false*);
1324	bindings.allocate(boxesT)->getHandle<DataType>() = boxes;
1325
1326	auto *batchIndicesT = mod.createPlaceholder(
1327	ElemKind::Int32ITy, batchIndicesDims, "batchIndices", false);
1328	bindings.allocate(batchIndicesT)->getHandle<int32_t>() = batchIndices;
1329
1330	auto *LN = F.createROIAlign("ROIAlign", featureMapT, boxesT, batchIndicesT,
1331	outputHeight, outputWidth, samplingRatio,
1332	spatialScale, aligned, rotated, mode);
1333	auto *save = F.createSave("save", LN);
1334	auto *savePlaceholder = save->getPlaceholder();
1335	bindings.allocate(savePlaceholder);
1336
1337	EE.compile(CompilationMode::Infer);
1338
1339	EE.run(bindings);
1340
1341	auto saveH = bindings.get(savePlaceholder)->getHandle<DataType>();
1342
1343	for (dim_t i = `0`; i < expectedValues.size(); i++) {
1344	EXPECT_NEAR(saveH.raw(i), expectedValues[i], comparisonThreshold);
1345	}
1346	}
1347
1348	template <typename DataType>
1349	static void roiAlignBasicTest(PlaceholderBindings &bindings, Module &mod,
1350	Function &F, ExecutionEngine &EE, ElemKind ElemTy,
1351	float comparisonThreshold) {
1352	llvm::SmallVector<dim_t, `4`> featureMapDims = {`2`, `5`, `5`, `2`};
1353	llvm::SmallVector<DataType, `100`> featureMap = {
1354	`1.`, `0.`, `1.`, `1.`, `1.`, `2.`, `1.`, `3.`, `1.`, `4.`, `1.`, `5.`, `1.`, `6.`, `1.`,
1355	`7.`, `1.`, `8.`, `1.`, `9.`, `1.`, `10.`, `1.`, `11.`, `1.`, `12.`, `1.`, `13.`, `1.`, `14.`,
1356	`1.`, `15.`, `1.`, `16.`, `1.`, `17.`, `1.`, `18.`, `1.`, `19.`, `1.`, `20.`, `1.`, `21.`, `1.`,
1357	`22.`, `1.`, `23.`, `1.`, `24.`, `0.`, `1.`, `1.`, `1.`, `2.`, `1.`, `3.`, `1.`, `4.`, `1.`,
1358	`5.`, `1.`, `6.`, `1.`, `7.`, `1.`, `8.`, `1.`, `9.`, `1.`, `10.`, `1.`, `11.`, `1.`, `12.`,
1359	`1.`, `13.`, `1.`, `14.`, `1.`, `15.`, `1.`, `16.`, `1.`, `17.`, `1.`, `18.`, `1.`, `19.`, `1.`,
1360	`20.`, `1.`, `21.`, `1.`, `22.`, `1.`, `23.`, `1.`, `24.`, `1.`};
1361
1362	llvm::SmallVector<dim_t, `2`> boxesDims = {`2`, `4`};
1363	llvm::SmallVector<DataType, `8`> boxes = {`1.`, `1.`, `3.`, `3.`, `1.`, `1.`, `3.`, `3.`};
1364
1365	llvm::SmallVector<dim_t, `1`> batchIndicesDims = {`2`};
1366	llvm::SmallVector<int32_t, `2`> batchIndices = {`1`, `0`};
1367
1368	llvm::SmallVector<DataType, `12`> expectedValues = {
1369	`9`, `1`, `10`, `1`, `14`, `1`, `15`, `1`, `1`, `9`, `1`, `10`, `1`, `14`, `1`, `15.`};
1370
1371	testRoiAlign<DataType>(
1372	bindings, mod, F, EE, ElemTy, featureMapDims, featureMap, boxesDims,
1373	boxes, batchIndicesDims, batchIndices, PoolingMode::AVG, `2`, `2`, `2`, `1`,
1374	false, expectedValues, comparisonThreshold, /rotated/ false);
1375	}
1376
1377	TEST_P(OperatorTest, RoiAlign) {
1378	CHECK_IF_ENABLED();
1379	roiAlignBasicTest<float>(bindings_, mod_, *F_, EE_, ElemKind::FloatTy, `1E-4`);
1380	}
1381
1382	TEST_P(OperatorTest, FP16RoiAlign) {
1383	CHECK_IF_ENABLED();
1384	roiAlignBasicTest<float16_t>(bindings_, mod_, *F_, EE_, ElemKind::Float16Ty,
1385	`1E-3`);
1386	}
1387
1388	template <typename DataType>
1389	static void
1390	roiAlignWithAlignedCoordinatesTest(PlaceholderBindings &bindings, Module &mod,
1391	Function &F, ExecutionEngine &EE,
1392	ElemKind ElemTy, float comparisonThreshold) {
1393	llvm::SmallVector<dim_t, `4`> featureMapDims = {`1`, `5`, `5`, `1`};
1394	llvm::SmallVector<DataType, `25`> featureMap = {
1395	`0.1`, `0.2`, `0.3`, `0.4`, `0.5`, `0.1`, `0.2`, `0.3`, `0.4`, `0.5`, `0.1`, `0.2`, `0.3`,
1396	`0.4`, `0.5`, `0.1`, `0.2`, `0.3`, `0.4`, `0.5`, `0.1`, `0.2`, `0.3`, `0.4`, `0.5`};
1397
1398	llvm::SmallVector<dim_t, `2`> boxesDims = {`1`, `4`};
1399	llvm::SmallVector<DataType, `5`> boxes = {`0.0`, `0.4`, `4.3`, `2.9`};
1400
1401	llvm::SmallVector<dim_t, `1`> batchIndicesDims = {`1`};
1402	llvm::SmallVector<int32_t, `1`> batchIndices = {`0`};
1403
1404	llvm::SmallVector<DataType, `9`> expectedValues = {
1405	`0.1287`, `0.2650`, `0.4083`, `0.1288`, `0.2650`, `0.4083`, `0.1287`, `0.2650`, `0.4083`};
1406
1407	testRoiAlign<DataType>(
1408	bindings, mod, F, EE, ElemTy, featureMapDims, featureMap, boxesDims,
1409	boxes, batchIndicesDims, batchIndices, PoolingMode::AVG, `3`, `3`, `2`, `1`, true,
1410	expectedValues, comparisonThreshold, /rotated/ false);
1411	}
1412
1413	TEST_P(OperatorTest, RoiAlignWithAlignedCoordinates) {
1414	CHECK_IF_ENABLED();
1415	roiAlignWithAlignedCoordinatesTest<float>(bindings_, mod_, *F_, EE_,
1416	ElemKind::FloatTy, `1E-4`);
1417	}
1418
1419	TEST_P(OperatorTest, FP16RoiAlignWithAlignedCoordinates) {
1420	CHECK_IF_ENABLED();
1421	roiAlignWithAlignedCoordinatesTest<float16_t>(bindings_, mod_, *F_, EE_,
1422	ElemKind::Float16Ty, `1E-3`);
1423	}
1424
1425	/// RoiAlign test, for batch_index given in caffe2 format, with batch_size==1
1426	template <typename DataType>
1427	static void roiAlignBatchIndexInBoxesTensorTest(PlaceholderBindings &bindings,
1428	Module &mod, Function &F,
1429	ExecutionEngine &EE,
1430	ElemKind ElemTy,
1431	float comparisonThreshold) {
1432	llvm::SmallVector<dim_t, `4`> featureMapDims = {`1`, `5`, `5`, `1`};
1433	llvm::SmallVector<DataType, `25`> featureMap = {
1434	-`1.2428743`, -`0.9784467`, `0.33036363`, `0.47368783`, -`0.81611377`,
1435	-`1.1874917`, -`1.6208626`, -`0.04190686`, -`0.5767553`, `1.1949452`,
1436	-`2.1838918`, `1.0099407`, `0.6925469`, `0.37020323`, -`0.3799704`,
1437	-`0.10355259`, -`0.64257944`, -`1.3108171`, -`1.5346326`, -`1.4158413`,
1438	`0.65036285`, -`0.59222955`, -`1.560379`, -`0.33371264`, `0.37395215`,
1439	};
1440
1441	llvm::SmallVector<dim_t, `2`> boxesDims = {`2`, `5`};
1442	llvm::SmallVector<DataType, `5`> boxes = {
1443	`0.`, `1.1889961`, `0.53260314`, `3.1794803`, `3.5056353`,
1444	`0.`, `1.4748696`, `2.4069107`, `4.1870456`, `4.6166725`};
1445
1446	llvm::SmallVector<dim_t, `1`> batchIndicesDims = {`1`};
1447	llvm::SmallVector<int32_t, `1`> batchIndices = {`1`};
1448
1449	llvm::SmallVector<DataType, `18`> expectedValues = {
1450	-`1.1747`, -`0.3246`, `0.0591`, -`0.3049`, `0.1516`, `0.1917`,
1451	`0.0270`, -`0.1727`, -`0.4240`, `0.3784`, `0.0435`, -`0.2741`,
1452	-`0.7801`, -`1.1925`, -`1.2289`, -`0.9860`, -`1.2124`, -`0.5044`};
1453
1454	testRoiAlign<DataType>(
1455	bindings, mod, F, EE, ElemTy, featureMapDims, featureMap, boxesDims,
1456	boxes, batchIndicesDims, batchIndices, PoolingMode::AVG, `3`, `3`, `2`, `1`, true,
1457	expectedValues, comparisonThreshold, /rotated/ false);
1458	}
1459
1460	TEST_P(OperatorTest, RoiAlignBatchIndexInBoxesTensor) {
1461	CHECK_IF_ENABLED();
1462	roiAlignBatchIndexInBoxesTensorTest<float>(bindings_, mod_, *F_, EE_,
1463	ElemKind::FloatTy, `1E-4`);
1464	}
1465
1466	TEST_P(OperatorTest, FP16RoiAlignBatchIndexInBoxesTensor) {
1467	CHECK_IF_ENABLED();
1468
1469	// 1E-2 threshold is required because fp16 occasionally causes sampling
1470	// points to be shifted due to rounding which results in large maximum
1471	// difference from reference.
1472	roiAlignBatchIndexInBoxesTensorTest<float16_t>(bindings_, mod_, *F_, EE_,
1473	ElemKind::Float16Ty, `1E-2`);
1474	}
1475
1476	template <typename DataType>
1477	static void randRois(dim_t N, dim_t H, dim_t W, dim_t count,
1478	Handle<DataType> &boxes, Module &mod) {
1479	boxes.randomize(static_cast<DataType>(`0`),
1480	static_cast<DataType>(std::min(H, W)), mod.getPRNG());
1481
1482	// enforce format [batch_idx, x1, y1, x2, y2] where x2 >= x1 and y2 >= y1
1483	for (dim_t n = `0`; n < count; ++n) {
1484	boxes.at({n, `0`}) = `0`;
1485	if (boxes.at({n, `1`}) > boxes.at({n, `3`})) {
1486	std::swap(boxes.at({n, `1`}), boxes.at({n, `3`}));
1487	}
1488	if (boxes.at({n, `2`}) > boxes.at({n, `4`})) {
1489	std::swap(boxes.at({n, `2`}), boxes.at({n, `4`}));
1490	}
1491	}
1492	}
1493
1494	TEST_P(OperatorStatelessTest,
1495	FP16RoiAlignBatchIndexInBoxesTensorCompareToInterpreter) {
1496	CHECK_IF_ENABLED();
1497
1498	compareAgainstInterpreter(
1499	getBackendName(),
1500	[](PlaceholderBindings &bindings, ExecutionEngine &EE) {
1501	Module &mod = EE.getModule();
1502	Function *F = mod.createFunction("main");
1503	dim_t H = `50`;
1504	dim_t W = `50`;
1505	dim_t N = `1`;
1506	dim_t C = `2`;
1507	dim_t pooled_H = `6`;
1508	dim_t pooled_W = `6`;
1509	float samplingRatio = `2`;
1510	float spatialScale = `0.0625`;
1511
1512	llvm::SmallVector<dim_t, `4`> featureMapDims = {N, H, W, C};
1513	auto *featureMapT = mod.createPlaceholder(
1514	ElemKind::FloatTy, featureMapDims, "featureMap", false);
1515	bindings.allocate(featureMapT)
1516	->getHandle()
1517	.randomize(`0.0f`, `1.0f`, mod.getPRNG());
1518
1519	dim_t count = `4`;
1520	llvm::SmallVector<dim_t, `2`> boxesDims = {count, `5`};
1521	auto *boxesT =
1522	mod.createPlaceholder(ElemKind::FloatTy, boxesDims, "boxes",
1523	/trainable/ false);
1524	Handle<float> boxesH = bindings.allocate(boxesT)->getHandle<float>();
1525	randRois<float>(N, H / spatialScale, W / spatialScale, count, boxesH,
1526	mod);
1527
1528	llvm::SmallVector<dim_t, `1`> batchIndicesDims = {`1`};
1529	llvm::SmallVector<int32_t, `1`> batchIndices = {`1`};
1530	auto *batchIndicesT = mod.createPlaceholder(
1531	ElemKind::Int32ITy, batchIndicesDims, "batch_indices", false);
1532	bindings.allocate(batchIndicesT)->getHandle<int32_t>() = batchIndices;
1533
1534	auto *R = F->createROIAlign(
1535	"roi_align", featureMapT, boxesT, batchIndicesT, pooled_H, pooled_W,
1536	samplingRatio, spatialScale, /aligned/ true, /rotated/ false,
1537	PoolingMode::AVG);
1538
1539	SaveNode *save = F->createSave("save", R);
1540	Tensor *saveTensor = bindings.allocate(save->getPlaceholder());
1541	return std::make_pair(F, saveTensor);
1542	},
1543	ElemKind::FloatTy, ElemKind::Float16Ty, `5E-2`);
1544	}
1545
1546	/// RoiAlign test, for batch_index given in caffe2 format, with batch_size==4
1547	template <typename DataType>
1548	static void roiAlignC2BatchedTest(PlaceholderBindings &bindings, Module &mod,
1549	Function &F, ExecutionEngine &EE,
1550	ElemKind ElemTy, float comparisonThreshold) {
1551	llvm::SmallVector<dim_t, `4`> featureMapDims = {`4`, `5`, `5`, `3`};
1552	llvm::SmallVector<DataType, `300`> featureMap = {
1553	-`1.4997481e-01`, -`9.8885156e-02`, `1.2952483e+00`, -`4.4686830e-01`,
1554	-`1.9194591e+00`, -`1.0772421e+00`, -`1.1467551e-01`, `8.9944112e-01`,
1555	`6.4507586e-01`, -`9.8680484e-01`, -`2.4539863e-01`, -`1.3373662e+00`,
1556	`6.3659292e-01`, -`3.1682998e-01`, -`8.7653893e-01`,
1557
1558	`4.5280015e-01`, `2.7663174e-01`, -`1.0524951e+00`, `1.1813318e+00`,
1559	-`1.2291962e+00`, `1.2122868e+00`, -`7.5726169e-01`, `1.7416600e+00`,
1560	-`1.4438627e+00`, `2.2553526e-01`, `1.4496186e+00`, -`9.8364061e-01`,
1561	-`1.7099962e+00`, `1.7165806e+00`, -`4.2644852e-01`,
1562
1563	-`2.2035122e+00`, `1.2187438e+00`, `4.5501122e-01`, `1.1717483e+00`,
1564	`9.8809980e-02`, -`6.9401674e-02`, -`4.0079719e-01`, -`5.2090770e-01`,
1565	`9.7559446e-01`, -`1.5667720e+00`, `5.5907667e-01`, -`4.5638707e-01`,
1566	-`2.3643453e-01`, -`2.2533321e+00`, -`5.2161014e-01`,
1567
1568	-`1.9849734e-01`, -`1.5915425e+00`, -`1.2717092e-01`, -`1.1243403e+00`,
1569	-`2.0563929e+00`, -`1.5039265e-01`, -`4.4963720e-01`, `4.2345795e-01`,
1570	-`1.8417383e-02`, `1.3405696e+00`, `1.9051230e-01`, `1.0407910e+00`,
1571	-`9.9479568e-01`, `6.3413751e-01`, -`1.4580569e+00`,
1572
1573	`7.1679175e-01`, `1.4471674e-01`, -`1.3997192e+00`, `7.0409644e-01`,
1574	-`1.6881183e+00`, -`6.0072118e-01`, -`7.1876746e-01`, `4.7649837e-01`,
1575	-`1.1106577e+00`, `1.3523364e+00`, -`6.4029312e-01`, `1.4514278e+00`,
1576	-`1.0234021e+00`, -`1.7788823e+00`, `7.7104000e-03`,
1577
1578	`4.2131311e-01`, -`1.1457406e+00`, -`5.8293420e-01`, -`3.2084238e-02`,
1579	`4.8537293e-01`, `3.2275200e-01`, `1.2700356e+00`, `1.2349664e+00`,
1580	`5.8654165e-01`, -`1.2600404e+00`, -`1.3615701e+00`, `2.0268664e-01`,
1581	`4.8697135e-01`, -`9.3002540e-01`, `1.3607346e+00`,
1582
1583	-`1.8294290e-01`, -`1.5636250e-01`, `2.7806088e-01`, -`5.8244568e-01`,
1584	-`5.2727741e-01`, -`7.8948897e-01`, `1.4770951e+00`, -`5.6237417e-01`,
1585	`9.7146934e-01`, -`8.4972686e-01`, -`3.5488096e-01`, -`7.3511235e-02`,
1586	`1.6265751e+00`, `4.1761816e-01`, -`8.4130716e-01`,
1587
1588	`2.1895346e-01`, `3.3017102e-01`, `1.0423416e-01`, `2.3304439e-01`,
1589	-`5.4485726e-01`, `4.6967003e-01`, `2.2024193e+00`, -`1.0180294e-02`,
1590	`5.8995700e-01`, `3.0450410e-01`, -`1.3114309e+00`, -`8.7699980e-01`,
1591	`1.5916479e-01`, -`6.3107949e-01`, `3.6086974e-01`,
1592
1593	`5.7962316e-01`, -`2.0860515e+00`, -`1.7852426e+00`, -`9.4240969e-01`,
1594	-`2.5013718e-01`, -`9.6015137e-01`, `1.5564002e-01`, `8.7524027e-01`,
1595	-`1.7288256e+00`, `8.9928240e-01`, -`5.8292085e-01`, -`2.0578516e+00`,
1596	`9.3291610e-01`, -`3.1894284e-01`, `1.4940295e-01`,
1597
1598	`4.7993332e-01`, `8.8685113e-01`, `1.5998088e-02`, -`3.0376071e-03`,
1599	-`9.1030812e-01`, `2.5395685e-01`, -`7.3639840e-02`, `1.5035777e+00`,
1600	-`1.3367783e+00`, `4.4903034e-01`, -`1.9161012e-02`, `4.5010322e-01`,
1601	`6.9552845e-01`, -`2.0336145e-01`, -`1.4398783e-02`,
1602
1603	-`1.1160702e+00`, `1.0709391e+00`, `8.5241461e-01`, -`1.6760592e+00`,
1604	`1.8895254e-01`, `7.5980502e-01`, -`2.2822763e-01`, `2.5674531e-01`,
1605	`8.5795867e-01`, -`4.2376343e-02`, `3.5849747e-01`, -`7.0041668e-01`,
1606	-`1.1749506e+00`, -`7.6209731e-02`, `9.3490142e-01`,
1607
1608	`8.4322268e-01`, `6.0089475e-01`, `1.2778026e+00`, -`5.2632529e-01`,
1609	-`7.7977139e-01`, `1.3875870e+00`, `7.0041299e-01`, `1.3700093e+00`,
1610	-`1.3874733e+00`, -`5.7349408e-01`, `6.6391379e-01`, -`1.5689260e+00`,
1611	-`1.6703378e-01`, `1.0597401e-01`, `5.8617592e-01`,
1612
1613	-`2.6551807e-01`, -`1.6452628e+00`, `3.4110144e-01`, `3.6732164e-01`,
1614	-`7.0698965e-01`, `4.8472685e-01`, `5.7356831e-02`, -`1.3607574e+00`,
1615	-`1.5073760e-01`, -`7.4872303e-01`, -`9.2906094e-01`, `9.0447372e-01`,
1616	-`4.5557413e-01`, `2.2286782e-01`, `1.0092977e+00`,
1617
1618	`2.8225061e-01`, -`1.3488407e+00`, `1.5358961e+00`, -`9.0286934e-01`,
1619	`8.1959856e-01`, -`5.3633952e-01`, `8.8325459e-01`, `4.3913189e-01`,
1620	`1.8962466e+00`, `1.0499959e-01`, -`1.7051783e+00`, `1.1462390e+00`,
1621	-`1.9076254e+00`, `7.9921043e-01`, `1.8769097e-01`,
1622
1623	`8.6285615e-01`, -`7.5376606e-01`, -`2.7797452e-01`, `8.2129729e-01`,
1624	-`1.1357613e+00`, -`1.0534587e+00`, -`1.6342834e+00`, `1.5571175e+00`,
1625	-`2.9357672e-02`, `5.0357723e-01`, `1.7594602e+00`, -`4.1023266e-01`,
1626	-`3.8507235e-01`, -`1.4152279e+00`, `1.3019496e+00`,
1627
1628	`5.5732393e-01`, `1.6657623e+00`, -`6.0697760e-02`, `1.1874427e+00`,
1629	`1.5112163e+00`, `4.2789158e-01`, -`4.8342901e-01`, `1.0879853e+00`,
1630	`2.5128168e-01`, -`7.4815375e-01`, -`7.0994526e-01`, -`8.1975794e-01`,
1631	`2.4763657e-01`, `5.3745079e-01`, -`7.0532227e-01`,
1632
1633	`1.9053514e-01`, -`3.1138790e-01`, -`1.8849430e+00`, -`7.2135782e-01`,
1634	-`2.2610760e-01`, `1.1200874e+00`, `5.8765519e-01`, `1.7486675e-02`,
1635	-`1.8689735e+00`, `1.0521593e+00`, `1.0392075e+00`, `2.2325387e+00`,
1636	`7.4370694e-01`, -`4.3933296e-01`, -`1.8680326e+00`,
1637
1638	`7.8669429e-01`, -`1.7130607e+00`, -`1.8260387e+00`, -`1.6219904e+00`,
1639	`2.6793033e-01`, `5.6496286e-01`, `5.2848613e-01`, `1.0625128e-01`,
1640	`3.5053259e-01`, `1.9303731e+00`, -`1.1183808e+00`, -`1.9174458e+00`,
1641	`2.2270663e-01`, -`1.0492816e+00`, -`2.3991664e-01`,
1642
1643	`5.4555202e-01`, -`1.1328123e+00`, -`4.7008261e-01`, `8.3088994e-02`,
1644	`8.6603612e-01`, `5.3655165e-01`, `5.4011714e-01`, `2.0690429e+00`,
1645	-`1.6191018e-01`, `9.0212280e-01`, -`9.0078294e-01`, -`5.3107500e-01`,
1646	-`5.6809604e-02`, `1.3337183e+00`, `6.3540235e-02`,
1647
1648	`5.9740990e-01`, `3.1837901e-01`, -`8.6937255e-01`, -`1.4723153e-01`,
1649	`8.5274154e-01`, `4.3450969e-01`, -`6.7253810e-01`, `3.8070625e-01`,
1650	-`1.4946671e+00`, -`4.9999154e-01`, `2.2797520e+00`, `3.7723225e-01`,
1651	`5.4892421e-01`, `5.7596415e-01`, `1.2112036e+00`};
1652
1653	llvm::SmallVector<dim_t, `2`> boxesDims = {`4`, `5`};
1654	llvm::SmallVector<DataType, `20`> boxes = {
1655	`2.0000000e+00`, `2.3108411e+00`, `3.2493637e+00`, `3.3715181e+00`,
1656	`4.5002527e+00`, `1.0000000e+00`, `3.2116971e+00`, `9.6868110e-01`,
1657	`4.9558969e+00`, `3.4516301e+00`, `0.0000000e+00`, `2.7448869e-01`,
1658	`3.3287115e+00`, `3.6297052e+00`, `4.4592605e+00`, `1.0000000e+00`,
1659	`1.2294500e+00`, `1.8630254e+00`, `2.9256778e+00`, `3.1924551e+00`};
1660
1661	llvm::SmallVector<dim_t, `1`> batchIndicesDims = {`4`};
1662	llvm::SmallVector<int32_t, `4`> batchIndices = {`2`, `1`, `0`, `1`};
1663
1664	llvm::SmallVector<DataType, `12`> expectedValues = {
1665	-`6.5894896e-01`, `5.6539643e-01`, `1.0041733e+00`,
1666	-`9.4539058e-01`, `2.0993830e-01`, `9.9824858e-01`,
1667	-`1.1638527e+00`, -`8.7358490e-02`, `9.6341258e-01`,
1668
1669	-`8.9801103e-02`, `3.5700285e-01`, `1.1669571e+00`,
1670	-`4.6619377e-01`, -`5.3864054e-02`, `1.1835206e+00`,
1671	-`7.6861465e-01`, -`3.8029239e-01`, `1.1398559e+00`,
1672
1673	`3.4802374e-01`, `9.4746768e-02`, `1.2450449e+00`,
1674	-`6.2197246e-02`, -`3.1529313e-01`, `1.2807325e+00`,
1675	-`4.0000397e-01`, -`6.2870646e-01`, `1.2343520e+00`,
1676
1677	`1.2194232e-01`, -`4.8879901e-01`, -`2.1927929e-01`,
1678	-`2.5108352e-02`, -`9.6720949e-02`, -`6.6829696e-02`,
1679	-`5.9729241e-02`, `2.5984848e-01`, `9.4225824e-02`,
1680
1681	-`7.2876096e-02`, -`4.0418655e-01`, -`1.7393507e-01`,
1682	-`2.1393849e-01`, -`2.3455608e-01`, -`2.4073394e-01`,
1683	-`2.2880568e-01`, -`7.6615483e-02`, -`2.3102391e-01`,
1684
1685	-`2.6769453e-01`, -`3.1957406e-01`, -`1.2859085e-01`,
1686	-`4.0276864e-01`, -`3.7239122e-01`, -`4.1463819e-01`,
1687	-`3.9788216e-01`, -`4.1307950e-01`, -`5.5627370e-01`,
1688
1689	`9.1947585e-02`, -`2.7115697e-01`, `2.9882264e-01`,
1690	`1.2106247e-01`, -`8.3870202e-01`, `8.7205000e-02`,
1691	`1.5017739e-01`, -`1.4062470e+00`, -`1.2441259e-01`,
1692
1693	`3.5570449e-01`, -`1.2669888e-01`, -`1.9961390e-01`,
1694	`4.9875557e-01`, -`6.5927219e-01`, `1.0402098e-01`,
1695	`6.4180654e-01`, -`1.1918454e+00`, `4.0765578e-01`,
1696
1697	`4.3482316e-01`, `5.3905103e-02`, -`5.4277897e-01`,
1698	`7.2550941e-01`, -`4.4221759e-01`, `1.2174799e-01`,
1699	`1.0161958e+00`, -`9.3834019e-01`, `7.8627473e-01`,
1700
1701	`1.4355460e-01`, -`6.0647041e-01`, -`2.5467190e-01`,
1702	-`2.4918951e-03`, -`2.5169450e-01`, -`1.3898802e-01`,
1703	-`1.4853841e-01`, `1.0308146e-01`, -`2.3304094e-02`,
1704
1705	-`5.3489491e-02`, -`4.3918055e-01`, -`1.2783936e-01`,
1706	-`1.9354768e-01`, -`3.0685222e-01`, -`2.3140387e-01`,
1707	-`3.3360592e-01`, -`1.7452389e-01`, -`3.3496842e-01`,
1708
1709	-`2.3242901e-01`, -`2.7417648e-01`, -`4.4064280e-03`,
1710	-`3.6451423e-01`, -`3.5603085e-01`, -`3.0842513e-01`,
1711	-`4.9659950e-01`, -`4.3788522e-01`, -`6.1244386e-01`};
1712
1713	testRoiAlign<DataType>(
1714	bindings, mod, F, EE, ElemTy, featureMapDims, featureMap, boxesDims,
1715	boxes, batchIndicesDims, batchIndices, PoolingMode::AVG, `3`, `3`, `2`, `0.0625`,
1716	false, expectedValues, comparisonThreshold, /rotated/ false);
1717	}
1718
1719	TEST_P(OperatorTest, RoiAlignC2Batched) {
1720	CHECK_IF_ENABLED();
1721	roiAlignC2BatchedTest<float>(bindings_, mod_, *F_, EE_, ElemKind::FloatTy,
1722	`1E-4`);
1723	}
1724
1725	TEST_P(OperatorTest, FP16RoiAlignC2Batched) {
1726	CHECK_IF_ENABLED();
1727	// 1E-2 threshold is required because fp16 occasionally causes sampling
1728	// points to be shifted due to rounding which results in large maximum
1729	// difference from reference.
1730	roiAlignC2BatchedTest<float16_t>(bindings_, mod_, *F_, EE_,
1731	ElemKind::Float16Ty, `1E-2`);
1732	}
1733
1734	template <typename DataType>
1735	static void roiAlignRotatedBatchIndexInBoxesTensorTest(
1736	PlaceholderBindings &bindings, Module &mod, Function &F,
1737	ExecutionEngine &EE, ElemKind ElemTy, float comparisonThreshold) {
1738	llvm::SmallVector<dim_t, `4`> featureMapDims = {`1`, `5`, `5`, `3`};
1739	llvm::SmallVector<DataType, `25`> featureMap = {
1740	-`8.6497840881`, -`5.0528664589`, -`5.1990814209`, -`10.8463373184`,
1741	-`14.9225864410`, `4.0806860924`, `14.7214040756`, -`11.9505138397`,
1742	`16.7156505585`, -`9.7665214539`, -`13.4883165359`, `1.3252578974`,
1743	-`1.6687428951`, `10.5697870255`, -`4.4617910385`, `16.9429378510`,
1744	`9.5267467499`, `5.9925584793`, `5.6118640900`, `1.5372716188`,
1745	`2.4355530739`, -`3.0808238983`, `2.6959202290`, -`9.9537315369`,
1746	-`1.1652010679`, `15.3153333664`, `11.4361877441`, `8.7219638824`,
1747	`6.0323386192`, -`3.3185434341`, -`5.8790159225`, -`7.0839004517`,
1748	`11.3739776611`, -`7.1884007454`, `10.0514144897`, -`7.9980802536`,
1749	`15.8880462646`, -`2.3542327881`, -`9.3197269440`, -`4.7869114876`,
1750	`15.6589784622`, -`1.5917046070`, -`1.2245910168`, `0.0595506988`,
1751	`3.6575553417`, `14.7897586823`, `11.4384317398`, -`5.1155147552`,
1752	`0.7425209880`, `1.1070071459`, `4.2300715446`, -`17.3323173523`,
1753	-`2.9571244717`, -`3.6389255524`, -`8.8692741394`, `19.7417812347`,
1754	`7.1416730881`, `25.0613708496`, `3.8868305683`, -`1.4834585190`,
1755	`0.3542223871`, `14.2146720886`, -`7.8964066505`, `7.7495927811`,
1756	`3.6963310242`, `9.0857019424`, -`3.4129979610`, -`3.1457190514`,
1757	-`15.2861795425`, `10.1850719452`, -`0.2935675085`, `9.8417263031`,
1758	`1.1156638861`, -`8.5692892075`, -`1.8766889572`};
1759
1760	llvm::SmallVector<dim_t, `2`> boxesDims = {`4`, `6`};
1761	llvm::SmallVector<DataType, `5`> boxes = {
1762	`0.0000000000e+00`, `3.7350432873e+00`, `1.8349769115e+00`,
1763	`2.2127370536e-01`, `1.7214350700e+00`, `6.7396400452e+01`,
1764
1765	`0.0000000000e+00`, `2.5810198784e+00`, `2.7632935047e+00`,
1766	`4.5813250542e-01`, `1.0615788698e+00`, `5.9284824371e+01`,
1767
1768	`0.0000000000e+00`, `1.4992059469e+00`, `3.3264288902e+00`,
1769	`5.8828938752e-02`, `1.2860099971e-01`, `1.7042655945e+02`,
1770
1771	`0.0000000000e+00`, `1.6475434303e+00`, `1.1158514023e+00`,
1772	`6.0969877243e-01`, `1.6949450970e+00`, `5.7489040375e+01`};
1773
1774	// Unused
1775	llvm::SmallVector<dim_t, `1`> batchIndicesDims = {`4`};
1776	llvm::SmallVector<int32_t, `1`> batchIndices = {`42`, `42`, `42`, `42`};
1777
1778	llvm::SmallVector<DataType, `18`> expectedValues = {
1779	-`1.2753072977e+00`, `1.1022174835e+01`, `2.8559112549e+00`,
1780	-`1.5445901155e+00`, `1.1492666245e+01`, `4.0045604706e+00`,
1781	-`1.6816796064e+00`, `1.1780773163e+01`, `5.1841292381e+00`,
1782	-`1.1537375450e+00`, `1.2963508606e+01`, `4.9455566406e+00`,
1783	-`5.9787964821e-01`, `1.2705860138e+01`, `5.3227939606e+00`,
1784	-`1.7603963614e-02`, `1.2472600937e+01`, `5.6228017807e+00`,
1785	`9.0734308958e-01`, `5.7471928596e+00`, `1.7764383554e+00`,
1786	`1.6517986059e+00`, `5.6778922081e+00`, `1.5571854115e+00`,
1787	`2.4206719398e+00`, `5.6329779625e+00`, `1.2607033253e+00`,
1788	`4.3689918518e+00`, `7.3948031664e-01`, -`7.3034667969e+00`,
1789	`8.6381378174e+00`, `2.3455758393e-01`, -`8.4534435272e+00`,
1790	`1.1591947556e+01`, -`4.2240649462e-01`, -`9.0010957718e+00`,
1791	`2.1553003788e+00`, -`1.4560343027e+00`, -`6.5866470337e+00`,
1792	`6.0744242668e+00`, -`8.3328241110e-01`, -`7.0825934410e+00`,
1793	`8.9081802368e+00`, `5.4210889339e-01`, -`7.2683048248e+00`,
1794	-`4.3445730209e+00`, `3.6746215820e+00`, -`3.1289699078e+00`,
1795	-`1.7619293928e+00`, `5.1320915222e+00`, -`3.2894101143e+00`,
1796	`2.2393733263e-01`, `6.4935913086e+00`, -`3.5123698711e+00`,
1797	-`4.1849538684e-01`, `2.1935482025e+00`, `2.5842363834e+00`,
1798	-`2.0057903230e-01`, `2.3351111412e+00`, `2.5799021721e+00`,
1799	`1.1708274484e-02`, `2.4918785095e+00`, `2.4347753525e+00`,
1800	-`8.1277823448e-01`, `2.5285022259e+00`, `2.0223598480e+00`,
1801	-`6.2242215872e-01`, `2.6641519070e+00`, `2.0815916061e+00`,
1802	-`4.2229780555e-01`, `2.7992806435e+00`, `1.9814052582e+00`,
1803	-`1.1822580099e+00`, `2.8584275246e+00`, `1.4644309282e+00`,
1804	-`1.0267944336e+00`, `3.0002222061e+00`, `1.5492329597e+00`,
1805	-`8.4862631559e-01`, `3.1232376099e+00`, `1.5107266903e+00`,
1806	-`6.1683624983e-02`, -`3.0222876072e+00`, `1.8380764723e+00`,
1807	-`4.1196775436e+00`, -`6.7160081863e+00`, `1.7320134640e+00`,
1808	-`7.8356714249e+00`, -`1.0480127335e+01`, `2.1065652370e+00`,
1809	`2.1715850830e+00`, -`1.5094176531e+00`, `2.0960900784e+00`,
1810	-`3.2094952464e-01`, -`4.5263018608e+00`, `2.4609162807e+00`,
1811	-`1.1458464861e+00`, -`7.0648045540e+00`, `3.5408535004e+00`,
1812	`1.7010095119e+00`, `2.0761563778e+00`, -`4.2401647568e+00`,
1813	`4.8630356789e-01`, `8.5567343235e-01`, -`3.9398088455e+00`,
1814	`1.1503255367e+00`, -`1.4384213686e+00`, -`1.8096057177e+00`};
1815
1816	testRoiAlign<DataType>(bindings, mod, F, EE, ElemTy, featureMapDims,
1817	featureMap, boxesDims, boxes, batchIndicesDims,
1818	batchIndices, PoolingMode::AVG, `3`, `3`, `2`, `1`, true,
1819	expectedValues, comparisonThreshold, /rotated/ true);
1820	}
1821
1822	TEST_P(OperatorTest, RoiAlignRotatedBatchIndexInBoxesTensor) {
1823	CHECK_IF_ENABLED();
1824	roiAlignRotatedBatchIndexInBoxesTensorTest<float>(bindings_, mod_, *F_, EE_,
1825	ElemKind::FloatTy, `1E-4`);
1826	}
1827
1828	TEST_P(OperatorTest, FP16RoiAlignRotatedBatchIndexInBoxesTensor) {
1829	CHECK_IF_ENABLED();
1830
1831	// 1E-1 threshold is required because fp16 occasionally causes sampling
1832	// points to be shifted due to rounding which results in large maximum
1833	// difference from reference.
1834	roiAlignRotatedBatchIndexInBoxesTensorTest<float16_t>(
1835	bindings_, mod_, *F_, EE_, ElemKind::Float16Ty, `1E-1`);
1836	}
1837
1838	template <typename DataType>
1839	static void testBBoxTransform(PlaceholderBindings &bindings, Module &mod,
1840	Function &F, ExecutionEngine &EE, ElemKind ElemTy,
1841	bool applyScale, bool legacyPlusOne,
1842	float absError) {
1843	llvm::SmallVector<dim_t, `2`> roisDims = {`5`, `5`};
1844	llvm::SmallVector<DataType, `25`> rois = {
1845	`0.`, `22.113754`, `10.269318`, `77.57481`, `117.23254`,
1846	`0.`, `89.73806`, `46.060974`, `125.824005`, `96.2649`,
1847	`1.`, `11.121593`, `78.21209`, `75.711426`, `254.73167`,
1848	`3.`, `0.9983631`, `352.86606`, `248.86679`, `367.66916`,
1849	`3.`, `221.1072`, `136.93027`, `413.82764`, `211.13977`};
1850
1851	llvm::SmallVector<dim_t, `2`> deltasDims = {`5`, `8`};
1852	llvm::SmallVector<DataType, `40`> deltas = {
1853	-`0.30892685`, -`0.44120562`, `1.7046866`, -`0.62745374`, `1.1726723`,
1854	-`0.52569604`, -`0.14308402`, `0.48242334`, -`1.3132329`, -`1.5958056`,
1855	-`0.81750935`, `2.2151427`, -`0.73521894`, -`0.00737088`, `2.3750482`,
1856	-`1.5794574`, -`0.48789233`, `1.7873235`, `0.6119284`, -`0.7701755`,
1857	-`0.41762614`, -`0.9074146`, -`0.7296619`, -`0.30050594`, `0.58725464`,
1858	`0.71989095`, -`0.8755994`, -`1.2122285`, -`0.5378105`, -`0.90247065`,
1859	`1.3996177`, -`1.3575566`, `0.6860114`, -`0.4028068`, `0.15296046`,
1860	-`0.22815527`, -`2.4161322`, -`1.8008438`, -`0.92949533`, `0.19269551`};
1861
1862	llvm::SmallVector<dim_t, `2`> imInfoDims = {`4`, `3`};
1863	llvm::SmallVector<DataType, `12`> imInfo = {`159.`, `159.`, `1.`, `328.`, `328.`, `1.`,
1864	`466.`, `466.`, `0.8`, `414.`, `414.`, `0.625`};
1865
1866	std::vector<float> weights = {`10.0`, `10.0`, `5.0`, `5.0`};
1867
1868	std::vector<DataType> expectedValues = {
1869	`9.1345`, `11.8575`, `87.1274`, `106.2058`, `29.3998`, `0.0000`, `83.2963`,
1870	`117.0268`, `87.7207`, `24.0571`, `118.3636`, `102.2456`, `76.1143`, `52.8231`,
1871	`134.1416`, `89.4287`, `3.7658`, `122.3617`, `76.7646`, `273.6816`, `12.8093`,
1872	`67.3427`, `68.6289`, `233.5658`, `35.4638`, `355.5252`, `243.5137`, `367.1413`,
1873	`0.0000`, `353.2900`, `275.5705`, `364.5733`, `231.3346`, `135.5961`, `413.7500`,
1874	`206.4955`, `190.8902`, `122.1084`, `350.9171`, `199.2337`};
1875
1876	auto ROIS = mod.createPlaceholder(ElemTy, roisDims, "rois", false*);
1877	bindings.allocate(ROIS)->getHandle<DataType>() = rois;
1878
1879	auto DELTAS = mod.createPlaceholder(ElemTy, deltasDims, "deltas", false*);
1880	bindings.allocate(DELTAS)->getHandle<DataType>() = deltas;
1881
1882	auto IMINFO = mod.createPlaceholder(ElemTy, imInfoDims, "imInfo", false*);
1883	bindings.allocate(IMINFO)->getHandle<DataType>() = imInfo;
1884
1885	auto *BBTN = F.createBBoxTransform(
1886	"bboxTransform", ROIS, DELTAS, IMINFO, weights, applyScale,
1887	/ rotated / false, / angleBoundOn / false, / angleBoundLo / `0`,
1888	/ angleBoundHi / `0`, / clipAngleThresh / `0`, legacyPlusOne);
1889
1890	auto *save = F.createSave("save", BBTN->getBoxOut());
1891	auto *savePlaceholder = save->getPlaceholder();
1892	bindings.allocate(savePlaceholder);
1893
1894	auto *saveSplits = F.createSave("save_splits", BBTN->getRoiBatchSplits());
1895	auto *saveSplitsPlaceholder = saveSplits->getPlaceholder();
1896	bindings.allocate(saveSplitsPlaceholder);
1897
1898	EE.compile(CompilationMode::Infer);
1899
1900	EE.run(bindings);
1901
1902	auto saveH = bindings.get(savePlaceholder)->getHandle<DataType>();
1903	float maxDiff = `0.0f`;
1904	for (dim_t i = `0`; i < expectedValues.size(); i++) {
1905	EXPECT_NEAR(saveH.raw(i), expectedValues[i], absError);
1906	maxDiff =
1907	std::max(maxDiff, std::abs((float)(saveH.raw(i) - expectedValues[i])));
1908	}
1909	VLOG(`2`) << "Max diff: " << maxDiff;
1910
1911	std::vector<DataType> expectedSplitsValues = {`2`, `1`, `0`, `2`};
1912	auto saveSplitsH = bindings.get(saveSplitsPlaceholder)->getHandle<DataType>();
1913	EXPECT_EQ(saveSplitsH.size(), expectedSplitsValues.size());
1914	for (dim_t i = `0`; i < expectedSplitsValues.size(); i++) {
1915	EXPECT_EQ(saveSplitsH.raw(i), expectedSplitsValues[i]);
1916	}
1917	}
1918
1919	TEST_P(OperatorTest, BBoxTransform_Float) {
1920	CHECK_IF_ENABLED();
1921	testBBoxTransform<float>(bindings_, mod_, *F_, EE_, ElemKind::FloatTy,
1922	/ applyScale / true,
1923	/ legacyPlusOne / false, / absError / `0.1`);
1924	}
1925
1926	TEST_P(OperatorTest, BBoxTransform_Float16) {
1927	CHECK_IF_ENABLED();
1928	testBBoxTransform<float16_t>(bindings_, mod_, *F_, EE_, ElemKind::Float16Ty,
1929	/ applyScale / true,
1930	/ legacyPlusOne / false, / absError / `1.0`);
1931	}
1932
1933	template <typename DataType>
1934	static void testBBoxTransformRotated(PlaceholderBindings &bindings, Module &mod,
1935	Function &F, ExecutionEngine &EE,
1936	ElemKind ElemTy, bool applyScale,
1937	bool angleBoundOn, int64_t angleBoundLo,
1938	int64_t angleBoundHi,
1939	float clipAngleThresh, bool legacyPlusOne,
1940	float absError) {
1941	llvm::SmallVector<dim_t, `2`> roisDims = {`2`, `6`};
1942	llvm::SmallVector<DataType, `12`> rois = {
1943	`0.`, `63.52861`, `78.48322`, `107.24573`, `1.7388153`, `72.550606`,
1944	`1.`, `142.78809`, `53.0654`, `9.154373`, `58.370438`, `72.550606`};
1945
1946	llvm::SmallVector<dim_t, `2`> deltasDims = {`2`, `5`};
1947	llvm::SmallVector<DataType, `10`> deltas = {
1948	-`0.31072143`, `1.9020474`, `0.20086022`, `0.49893576`, -`0.06181559`,
1949	-`0.6979074`, -`2.205989`, -`0.573434`, -`0.62059146`, -`0.50649583`};
1950
1951	llvm::SmallVector<dim_t, `2`> imInfoDims = {`2`, `3`};
1952	llvm::SmallVector<DataType, `6`> imInfo = {`263.`, `263.`, `0.7027847`,
1953	`217.`, `217.`, `0.7027847`};
1954
1955	std::vector<float> weights = {`1.0`, `1.0`, `1.0`, `1.0`};
1956
1957	std::vector<DataType> expectedValues = {`42.9791`, `116.3806`, `186.5478`, `4.0749`,
1958	`69.0088`, `194.0839`, -`107.7131`, `7.3412`,
1959	`44.6531`, `43.5305`};
1960
1961	auto ROIS = mod.createPlaceholder(ElemTy, roisDims, "rois", false*);
1962	bindings.allocate(ROIS)->getHandle<DataType>() = rois;
1963
1964	auto DELTAS = mod.createPlaceholder(ElemTy, deltasDims, "deltas", false*);
1965	bindings.allocate(DELTAS)->getHandle<DataType>() = deltas;
1966
1967	auto IMINFO = mod.createPlaceholder(ElemTy, imInfoDims, "imInfo", false*);
1968	bindings.allocate(IMINFO)->getHandle<DataType>() = imInfo;
1969
1970	auto *BBTN = F.createBBoxTransform("bboxTransform", ROIS, DELTAS, IMINFO,
1971	weights, applyScale, / rotated / true,
1972	angleBoundOn, angleBoundLo, angleBoundHi,
1973	clipAngleThresh, legacyPlusOne);
1974
1975	auto *save = F.createSave("save", BBTN->getBoxOut());
1976	auto *savePlaceholder = save->getPlaceholder();
1977	bindings.allocate(savePlaceholder);
1978
1979	EE.compile(CompilationMode::Infer);
1980
1981	EE.run(bindings);
1982
1983	auto saveH = bindings.get(savePlaceholder)->getHandle<DataType>();
1984	float maxDiff = `0.0f`;
1985	for (dim_t i = `0`; i < expectedValues.size(); i++) {
1986	EXPECT_NEAR(saveH.raw(i), expectedValues[i], absError);
1987	maxDiff =
1988	std::max(maxDiff, std::abs((float)(saveH.raw(i) - expectedValues[i])));
1989	}
1990	VLOG(`2`) << "Max diff: " << maxDiff;
1991	}
1992
1993	TEST_P(OperatorTest, BBoxTransform_Rotated_Float) {
1994	CHECK_IF_ENABLED();
1995	testBBoxTransformRotated<float>(
1996	bindings_, mod_, *F_, EE_, ElemKind::FloatTy,
1997	/ applyScale / false, / angleBoundOn / false, / angleBoundLo / -`90`,
1998	/ angleBoundHi / `90`, / clipAngleThresh / `1.0`,
1999	/ legacyPlusOne / true, / absError / `0.1`);
2000	}
2001
2002	TEST_P(OperatorTest, BBoxTransform_Rotated_Float16) {
2003	CHECK_IF_ENABLED();
2004	testBBoxTransformRotated<float16_t>(
2005	bindings_, mod_, F_, EE_, ElemKind::Float16Ty, /* applyScale / false,
2006	/ angleBoundOn / false, / angleBoundLo / -`90`,
2007	/ angleBoundHi / `90`, / clipAngleThresh / `1.0`,
2008	/ legacyPlusOne / true, / absError / `1.0`);
2009	}
2010
2011	// Helper to test SpaceToDepth using \p DTy.
2012	template <typename DataType>
2013	static void testSpaceToDepthBlock3(glow::PlaceholderBindings &bindings,
2014	glow::Module &mod, glow::Function *F,
2015	glow::ExecutionEngine &EE, ElemKind DTy) {
2016	unsigned blockSize = `3`;
2017	auto *in = createPlaceholderConditionallyQuantized(mod, DTy, {`1`, `2`, `6`, `6`},
2018	"in", false, "NHWC");
2019	auto *tri = F->createTranspose("sptdTransposeIn", in, {`0`, `2`, `3`, `1`}, "NHWC");
2020	auto *stdn = F->createSpaceToDepth("spacetodepth", tri, blockSize);
2021	auto *tro =
2022	F->createTranspose("sptdTransposeOut", stdn, {`0`, `3`, `1`, `2`}, "NCHW");
2023	auto *save = F->createSave("save", tro);
2024	auto *result = bindings.allocate(save->getPlaceholder());
2025
2026	/*
2027	Example for first batch.
2028	FROM:
2029	C0: C1:
2030	[0 1 2 3 16 17] [ 0 -1 -2 -3 -16 -17]
2031	[4 5 6 7 18 19] [-4 -5 -6 -7 -18 -19]
2032	[8 9 10 11 20 21] [-8 -9 -10 -11 -20 -21]
2033	[12 13 14 15 22 23] [-12 -13 -14 -15 -22 -23]
2034	[24 25 26 27 28 29] [-24 -25 -26 -27 -28 -29]
2035	[30 31 32 33 34 35] [-30 -31 -32 -33 -34 -35]
2036
2037	TO:
2038	C = 0
2039	[0,3]
2040	[12,15]
2041
2042	C = 1
2043	[0,-3]
2044	[-12,-15]
2045
2046	C = 2
2047	[1,16]
2048	[13,22]
2049
2050	C = 3
2051	[-1,-16]
2052	[-13,-22]
2053
2054	C = 4
2055	[2,17]
2056	[14,23]
2057
2058	C = 5
2059	[-2,-17]
2060	[-14,-23]
2061
2062	C = 6
2063	[4,7]
2064	[24,27]
2065
2066	C = 7
2067	[-4,-7]
2068	[-24,-27]
2069
2070	C = 8
2071	[5,18]
2072	[25,28]
2073
2074	C = 9
2075	[-5,-18]
2076	[-25,-28]
2077
2078	C = 10
2079	[6,19]
2080	[26,29]
2081
2082	C = 11
2083	[-6,-19]
2084	[-26,-29]
2085
2086	C = 12
2087	[8,11]
2088	[30,33]
2089
2090	C = 13
2091	[-8,-11]
2092	[-30,-33]
2093
2094	C = 14
2095	[9,20]
2096	[31,34]
2097
2098	C = 15
2099	[-9,-20]
2100	[-31,-34]
2101
2102	C = 16
2103	[10,21]
2104	[32,35]
2105
2106	C = 17
2107	[-10,-21]
2108	[-32,-35]
2109	*/
2110
2111	bindings.allocate(in)->getHandle<DataType>() = {
2112	`0`, `1`, `2`, `3`, `16`, `17`, `4`, `5`, `6`, `7`, `18`, `19`, `8`, `9`, `10`,
2113	`11`, `20`, `21`, `12`, `13`, `14`, `15`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`,
2114	`30`, `31`, `32`, `33`, `34`, `35`, `0`, -`1`, -`2`, -`3`, -`16`, -`17`, -`4`, -`5`, -`6`,
2115	-`7`, -`18`, -`19`, -`8`, -`9`, -`10`, -`11`, -`20`, -`21`, -`12`, -`13`, -`14`, -`15`, -`22`, -`23`,
2116	-`24`, -`25`, -`26`, -`27`, -`28`, -`29`, -`30`, -`31`, -`32`, -`33`, -`34`, -`35`};
2117
2118	std::vector<DataType> refResult = {
2119	`0`, `3`, `12`, `15`, `0`, -`3`, -`12`, -`15`, `1`, `16`, `13`, `22`, -`1`, -`16`, -`13`,
2120	-`22`, `2`, `17`, `14`, `23`, -`2`, -`17`, -`14`, -`23`, `4`, `7`, `24`, `27`, -`4`, -`7`,
2121	-`24`, -`27`, `5`, `18`, `25`, `28`, -`5`, -`18`, -`25`, -`28`, `6`, `19`, `26`, `29`, -`6`,
2122	-`19`, -`26`, -`29`, `8`, `11`, `30`, `33`, -`8`, -`11`, -`30`, -`33`, `9`, `20`, `31`, `34`,
2123	-`9`, -`20`, -`31`, -`34`, `10`, `21`, `32`, `35`, -`10`, -`21`, -`32`, -`35`};
2124
2125	EE.compile(CompilationMode::Infer);
2126	EE.run(bindings);
2127
2128	Handle<DataType> resultH = result->getHandle<DataType>();
2129
2130	auto iDims = in->dims();
2131	auto oDims = resultH.dims();
2132	EXPECT_EQ(iDims[`0`], oDims[`0`]);
2133	EXPECT_EQ(iDims[`1`] * blockSize * blockSize, oDims[`1`]);
2134	EXPECT_EQ(iDims[`2`], oDims[`2`] * blockSize);
2135	EXPECT_EQ(iDims[`3`], oDims[`3`] * blockSize);
2136
2137	// NCHW format
2138	dim_t resIndex = `0`;
2139	for (dim_t on = `0`; on < oDims[`0`]; ++on) {
2140	for (dim_t oc = `0`; oc < oDims[`1`]; ++oc) {
2141	for (dim_t oh = `0`; oh < oDims[`2`]; ++oh) {
2142	for (dim_t ow = `0`; ow < oDims[`3`]; ++ow) {
2143	DataType resultVal = resultH.at({on, oc, oh, ow});
2144	DataType refVal = refResult[resIndex++];
2145	EXPECT_EQ(resultVal, refVal);
2146	}
2147	}
2148	}
2149	}
2150	}
2151
2152	/// Verify that the SpaceToDepth operator works correctly for int8. Block
2153	/// Size 3.
2154	TEST_P(OperatorTest, spaceToDepth_block3_int8) {
2155	CHECK_IF_ENABLED();
2156	testSpaceToDepthBlock3<int8_t>(bindings_, mod_, F_, EE_, ElemKind::Int8QTy);
2157	}
2158
2159	/// Verify that the SpaceToDepth operator works correctly for Float. Block
2160	/// Size 3.
2161	TEST_P(OperatorTest, spaceToDepth_block3_Float) {
2162	CHECK_IF_ENABLED();
2163	testSpaceToDepthBlock3<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy);
2164	}
2165
2166	// Helper to test SpaceToDepth using \p DTy.
2167	template <typename DataType>
2168	static void testSpaceToDepth(glow::PlaceholderBindings &bindings,
2169	glow::Module &mod, glow::Function *F,
2170	glow::ExecutionEngine &EE, ElemKind DTy) {
2171	unsigned blockSize = `2`;
2172	auto *in = createPlaceholderConditionallyQuantized(mod, DTy, {`2`, `2`, `4`, `4`},
2173	"in", false, "NHWC");
2174	auto *tri = F->createTranspose("sptdTransposeIn", in, {`0`, `2`, `3`, `1`}, "NHWC");
2175	auto *stdn = F->createSpaceToDepth("spacetodepth", tri, blockSize);
2176	auto *tro =
2177	F->createTranspose("sptdTransposeOut", stdn, {`0`, `3`, `1`, `2`}, "NCHW");
2178	auto *save = F->createSave("save", tro);
2179	auto *result = bindings.allocate(save->getPlaceholder());
2180
2181	/*
2182	Example for first batch.
2183	FROM:
2184	C0: C1:
2185	[0 1 2 3] [ 0 -1 -2 -3]
2186	[4 5 6 7] [-4 -5 -6 -7]
2187	[8 9 10 11] [-8 -9 -10 -11]
2188	[12 13 14 15] [-12 -13 -14 -15]
2189
2190	TO:
2191	C0:
2192	[0, 2]
2193	[8, 10]
2194
2195	C1:
2196	[ 0, -2]
2197	[-8, -10]
2198
2199	C2:
2200	[1, 3]
2201	[9, 11]
2202
2203	C3:
2204	[-1, -3]
2205	[-9, -11]
2206
2207	C4:
2208	[4, 6]
2209	[12, 14]
2210
2211	C5:
2212	[-4, -6]
2213	[-12, -14]
2214
2215	C6:
2216	[5, 7]
2217	[13, 15]
2218
2219	C7:
2220	[-5, -7]
2221	[-13, -15]
2222	*/
2223
2224	bindings.allocate(in)->getHandle<DataType>() = {
2225	`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
2226	`0`, -`1`, -`2`, -`3`, -`4`, -`5`, -`6`, -`7`, -`8`, -`9`, -`10`, -`11`, -`12`, -`13`, -`14`, -`15`,
2227	`0`, `7`, `9`, `23`, `24`, `25`, `26`, `27`, `8`, `9`, `10`, `33`, `12`, `13`, `14`, `15`,
2228	`0`, -`21`, -`22`, -`23`, -`4`, -`5`, -`26`, -`27`, -`8`, -`9`, -`10`, -`11`, -`12`, -`13`, -`14`, -`15`};
2229
2230	std::vector<DataType> refResult = {
2231	`0`, `2`, `8`, `10`, `0`, -`2`, -`8`, -`10`, `1`, `3`, `9`, `11`, -`1`, -`3`, -`9`, -`11`,
2232	`4`, `6`, `12`, `14`, -`4`, -`6`, -`12`, -`14`, `5`, `7`, `13`, `15`, -`5`, -`7`, -`13`, -`15`,
2233	`0`, `9`, `8`, `10`, `0`, -`22`, -`8`, -`10`, `7`, `23`, `9`, `33`, -`21`, -`23`, -`9`, -`11`,
2234	`24`, `26`, `12`, `14`, -`4`, -`26`, -`12`, -`14`, `25`, `27`, `13`, `15`, -`5`, -`27`, -`13`, -`15`};
2235
2236	EE.compile(CompilationMode::Infer);
2237	EE.run(bindings);
2238
2239	Handle<DataType> resultH = result->getHandle<DataType>();
2240
2241	auto iDims = in->dims();
2242	auto oDims = resultH.dims();
2243	EXPECT_EQ(iDims[`0`], oDims[`0`]);
2244	EXPECT_EQ(iDims[`1`] * blockSize * blockSize, oDims[`1`]);
2245	EXPECT_EQ(iDims[`2`], oDims[`2`] * blockSize);
2246	EXPECT_EQ(iDims[`3`], oDims[`3`] * blockSize);
2247
2248	// NCHW format
2249	dim_t resIndex = `0`;
2250	for (dim_t on = `0`; on < oDims[`0`]; ++on) {
2251	for (dim_t oc = `0`; oc < oDims[`1`]; ++oc) {
2252	for (dim_t oh = `0`; oh < oDims[`2`]; ++oh) {
2253	for (dim_t ow = `0`; ow < oDims[`3`]; ++ow) {
2254	DataType resultVal = resultH.at({on, oc, oh, ow});
2255	DataType refVal = refResult[resIndex++];
2256	EXPECT_EQ(resultVal, refVal);
2257	}
2258	}
2259	}
2260	}
2261	}
2262
2263	/// Verify that the SpaceToDepth operator works correctly for int8. Block
2264	/// Size 2.
2265	TEST_P(OperatorTest, spaceToDepth_block2_int8) {
2266	CHECK_IF_ENABLED();
2267	testSpaceToDepth<int8_t>(bindings_, mod_, F_, EE_, ElemKind::Int8QTy);
2268	}
2269
2270	/// Verify that the SpaceToDepth operator works correctly for Float. Block
2271	/// Size 2.
2272	TEST_P(OperatorTest, spaceToDepth_block2_Float) {
2273	CHECK_IF_ENABLED();
2274	testSpaceToDepth<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy);
2275	}
2276
2277	/// Helper to test ResizeNearest using \p DTy.
2278	template <typename DataType>
2279	static void testResizeNearest(glow::PlaceholderBindings &bindings,
2280	glow::Module &mod, glow::Function *F,
2281	glow::ExecutionEngine &EE, ElemKind DTy,
2282	bool v11 = false) {
2283	auto *input = createPlaceholderConditionallyQuantized(mod, DTy, {`1`, `2`, `2`, `1`},
2284	"input", false, "NHWC");
2285	bindings.allocate(input)->getHandle<DataType>() = {`2`, `4`, `8`, `16`};
2286
2287	ResizeNearestNode resizeUp = nullptr*;
2288	ResizeNearestNode resizeDown = nullptr*;
2289
2290	std::vector<float> scaleUp = {`1`, `2.0f`, `1.5f`, `1`};
2291
2292	if (v11) {
2293	dim_t newH = std::floor(`2` * `2.0f`);
2294	dim_t newW = std::floor(`2` * `1.5f`);
2295	auto outTy =
2296	mod.uniqueTypeWithNewShape(input->getType(), {`1`, newH, newW, `1`});
2297	resizeUp = F->createResizeNearest("resizeUp", input, outTy);
2298	} else {
2299	resizeUp = F->createResizeNearest("resizeUp", input, scaleUp);
2300	}
2301	auto *saveUp = F->createSave("saveUp", resizeUp);
2302	auto *resultUp = bindings.allocate(saveUp->getPlaceholder());
2303
2304	std::vector<float> scaleDown = {`1`, `0.9f`, `0.6f`, `1`};
2305
2306	if (v11) {
2307	dim_t newH = std::floor(`2` * `0.9f`);
2308	dim_t newW = std::floor(`2` * `0.6f`);
2309	auto outTy =
2310	mod.uniqueTypeWithNewShape(input->getType(), {`1`, newH, newW, `1`});
2311	resizeDown = F->createResizeNearest("resizeDown", input, outTy);
2312	} else {
2313	resizeDown = F->createResizeNearest("resizeDown", input, scaleDown);
2314	}
2315
2316	auto *saveDown = F->createSave("saveDown", resizeDown);
2317	auto *resultDown = bindings.allocate(saveDown->getPlaceholder());
2318
2319	::glow::convertPlaceholdersToConstants(
2320	F, bindings,
2321	{input, saveUp->getPlaceholder(), saveDown->getPlaceholder()});
2322
2323	EE.compile(CompilationMode::Infer);
2324	EE.run(bindings);
2325
2326	auto resultUpH = resultUp->getHandle<DataType>();
2327	std::vector<dim_t> expectedDimsUp = {`1`, `4`, `3`, `1`};
2328	ASSERT_TRUE(resultUpH.dims().vec() == expectedDimsUp);
2329
2330	EXPECT_EQ(resultUpH.at({`0`, `0`, `0`, `0`}), static_cast<DataType>(`2`));
2331	EXPECT_EQ(resultUpH.at({`0`, `0`, `1`, `0`}), static_cast<DataType>(`2`));
2332	EXPECT_EQ(resultUpH.at({`0`, `0`, `2`, `0`}), static_cast<DataType>(`4`));
2333
2334	EXPECT_EQ(resultUpH.at({`0`, `1`, `0`, `0`}), static_cast<DataType>(`2`));
2335	EXPECT_EQ(resultUpH.at({`0`, `1`, `1`, `0`}), static_cast<DataType>(`2`));
2336	EXPECT_EQ(resultUpH.at({`0`, `1`, `2`, `0`}), static_cast<DataType>(`4`));
2337
2338	EXPECT_EQ(resultUpH.at({`0`, `2`, `0`, `0`}), static_cast<DataType>(`8`));
2339	EXPECT_EQ(resultUpH.at({`0`, `2`, `1`, `0`}), static_cast<DataType>(`8`));
2340	EXPECT_EQ(resultUpH.at({`0`, `2`, `2`, `0`}), static_cast<DataType>(`16`));
2341
2342	EXPECT_EQ(resultUpH.at({`0`, `3`, `0`, `0`}), static_cast<DataType>(`8`));
2343	EXPECT_EQ(resultUpH.at({`0`, `3`, `1`, `0`}), static_cast<DataType>(`8`));
2344	EXPECT_EQ(resultUpH.at({`0`, `3`, `2`, `0`}), static_cast<DataType>(`16`));
2345
2346	auto resultDownH = resultDown->getHandle<DataType>();
2347	std::vector<dim_t> expectedDimsDown = {`1`, `1`, `1`, `1`};
2348	ASSERT_TRUE(resultDownH.dims().vec() == expectedDimsDown);
2349	EXPECT_EQ(resultDownH.at({`0`, `0`, `0`, `0`}), static_cast<DataType>(`2`));
2350	}
2351
2352	/// Verify that the ResizeNearest operator works correctly for Float.
2353	TEST_P(OperatorTest, ResizeNearest_Float) {
2354	CHECK_IF_ENABLED();
2355	testResizeNearest<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy);
2356	}
2357
2358	/// Verify that the ResizeNearest operator works correctly for Float16.
2359	TEST_P(OperatorTest, ResizeNearest_Float16) {
2360	CHECK_IF_ENABLED();
2361	testResizeNearest<float16_t>(bindings_, mod_, F_, EE_, ElemKind::Float16Ty);
2362	}
2363
2364	/// Verify that the ResizeNearest operator works correctly for BFloat16.
2365	TEST_P(OperatorTest, ResizeNearest_BFloat16) {
2366	CHECK_IF_ENABLED();
2367	testResizeNearest<bfloat16_t>(bindings_, mod_, F_, EE_, ElemKind::BFloat16Ty);
2368	}
2369
2370	/// Verify that the ResizeNearest operator works correctly for Int8Q.
2371	TEST_P(OperatorTest, ResizeNearest_Int8) {
2372	CHECK_IF_ENABLED();
2373	testResizeNearest<int8_t>(bindings_, mod_, F_, EE_, ElemKind::Int8QTy);
2374	}
2375
2376	/// Verify that the ResizeNearest operator works correctly for Int16Q.
2377	TEST_P(OperatorTest, ResizeNearest_Int16) {
2378	CHECK_IF_ENABLED();
2379	testResizeNearest<int16_t>(bindings_, mod_, F_, EE_, ElemKind::Int16QTy);
2380	}
2381
2382	/// Verify that the ResizeNearest operator works correctly for Int32Q.
2383	TEST_P(OperatorTest, ResizeNearest_Int32) {
2384	CHECK_IF_ENABLED();
2385	testResizeNearest<int32_t>(bindings_, mod_, F_, EE_, ElemKind::Int32QTy);
2386	}
2387
2388	TEST_P(OperatorTest, ResizeNearest_Float_outTy) {
2389	CHECK_IF_ENABLED();
2390	testResizeNearest<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy, true);
2391	}
2392
2393	TEST_P(OperatorTest, ResizeNearest_Float16_outTy) {
2394	CHECK_IF_ENABLED();
2395	testResizeNearest<float16_t>(bindings_, mod_, F_, EE_, ElemKind::Float16Ty,
2396	true);
2397	}
2398
2399	TEST_P(OperatorTest, ResizeNearest_BFloat16_outTy) {
2400	CHECK_IF_ENABLED();
2401	testResizeNearest<bfloat16_t>(bindings_, mod_, F_, EE_, ElemKind::BFloat16Ty,
2402	true);
2403	}
2404
2405	TEST_P(OperatorTest, ResizeNearest_Int8_outTy) {
2406	CHECK_IF_ENABLED();
2407	testResizeNearest<int8_t>(bindings_, mod_, F_, EE_, ElemKind::Int8QTy, true);
2408	}
2409	TEST_P(OperatorTest, ResizeNearest_Int16_outTy) {
2410	CHECK_IF_ENABLED();
2411	testResizeNearest<int16_t>(bindings_, mod_, F_, EE_, ElemKind::Int16QTy,
2412	true);
2413	}
2414	TEST_P(OperatorTest, ResizeNearest_Int32_outTy) {
2415	CHECK_IF_ENABLED();
2416	testResizeNearest<int32_t>(bindings_, mod_, F_, EE_, ElemKind::Int32QTy,
2417	true);
2418	}
2419
2420	/// Helper to test ResizeNearest using \p DTy.
2421	template <typename DataType>
2422	static void testResizeBilinear(glow::PlaceholderBindings &bindings,
2423	glow::Module &mod, glow::Function *F,
2424	glow::ExecutionEngine &EE, ElemKind DTy,
2425	bool v11 = false) {
2426	auto *input = createPlaceholderConditionallyQuantized(mod, DTy, {`1`, `2`, `2`, `1`},
2427	"input", false, "NHWC");
2428	bindings.allocate(input)->getHandle<DataType>() = {`2`, `4`, `8`, `16`};
2429
2430	std::vector<float> scaleUp = {`1`, `2.0f`, `1.5f`, `1`};
2431
2432	ResizeBilinearNode resizeUp = nullptr*;
2433	ResizeBilinearNode resizeDown = nullptr*;
2434
2435	if (v11) {
2436	dim_t newH = std::floor(`2` * `2.0f`);
2437	dim_t newW = std::floor(`2` * `1.5f`);
2438	auto outTy =
2439	mod.uniqueTypeWithNewShape(input->getType(), {`1`, newH, newW, `1`});
2440	resizeUp = F->createResizeBilinear("resizeUp", input, outTy);
2441	} else {
2442	resizeUp = F->createResizeBilinear("resizeUp", input, scaleUp);
2443	}
2444
2445	auto *saveUp = F->createSave("saveUp", resizeUp);
2446	auto *resultUp = bindings.allocate(saveUp->getPlaceholder());
2447
2448	std::vector<float> scaleDown = {`1`, `0.9f`, `0.6f`, `1`};
2449
2450	if (v11) {
2451	dim_t newH = std::floor(`2` * `0.9f`);
2452	dim_t newW = std::floor(`2` * `0.6f`);
2453	auto outTy =
2454	mod.uniqueTypeWithNewShape(input->getType(), {`1`, newH, newW, `1`});
2455	resizeDown = F->createResizeBilinear("resizeDown", input, outTy);
2456	} else {
2457	resizeDown = F->createResizeBilinear("resizeDown", input, scaleDown);
2458	}
2459
2460	auto *saveDown = F->createSave("saveDown", resizeDown);
2461	auto *resultDown = bindings.allocate(saveDown->getPlaceholder());
2462
2463	::glow::convertPlaceholdersToConstants(
2464	F, bindings,
2465	{input, saveUp->getPlaceholder(), saveDown->getPlaceholder()});
2466
2467	EE.compile(CompilationMode::Infer);
2468	EE.run(bindings);
2469
2470	auto resultUpH = resultUp->getHandle<DataType>();
2471	std::vector<dim_t> expectedDimsUp = {`1`, `4`, `3`, `1`};
2472	ASSERT_TRUE(resultUpH.dims().vec() == expectedDimsUp);
2473
2474	// use EXPECT_NEAR for float otherwise EXPECT_EQ. Optional third arg is
2475	// allowed error for EXPECT_NEAR. If not specified uses default.
2476	#define EXPECT_EQF(a, b, ...) \
2477	if ((std::is_same<DataType, float>::value) \|\| \
2478	(std::is_same<DataType, float16_t>::value) \|\| \
2479	(std::is_same<DataType, bfloat16_t>::value)) { \
2480	EXPECT_FLOAT_EQ(a, b); \
2481	} else { \
2482	EXPECT_EQ(a, b); \
2483	}
2484
2485	EXPECT_EQF(resultUpH.at({`0`, `0`, `0`, `0`}), static_cast<DataType>(`2`));
2486	EXPECT_EQF(resultUpH.at({`0`, `0`, `1`, `0`}), static_cast<DataType>(`3.333333`));
2487	EXPECT_EQF(resultUpH.at({`0`, `0`, `2`, `0`}), static_cast<DataType>(`4`));
2488
2489	EXPECT_EQF(resultUpH.at({`0`, `1`, `0`, `0`}), static_cast<DataType>(`5`));
2490	EXPECT_EQF(resultUpH.at({`0`, `1`, `1`, `0`}), static_cast<DataType>(`8.333333`));
2491	EXPECT_EQF(resultUpH.at({`0`, `1`, `2`, `0`}), static_cast<DataType>(`10`));
2492
2493	EXPECT_EQF(resultUpH.at({`0`, `2`, `0`, `0`}), static_cast<DataType>(`8`));
2494	EXPECT_EQF(resultUpH.at({`0`, `2`, `1`, `0`}), static_cast<DataType>(`13.33333`));
2495	EXPECT_EQF(resultUpH.at({`0`, `2`, `2`, `0`}), static_cast<DataType>(`16`));
2496
2497	EXPECT_EQF(resultUpH.at({`0`, `3`, `0`, `0`}), static_cast<DataType>(`8`));
2498	EXPECT_EQF(resultUpH.at({`0`, `3`, `1`, `0`}), static_cast<DataType>(`13.33333`));
2499	EXPECT_EQF(resultUpH.at({`0`, `3`, `2`, `0`}), static_cast<DataType>(`16`));
2500
2501	auto resultDownH = resultDown->getHandle<DataType>();
2502	std::vector<dim_t> expectedDimsDown = {`1`, `1`, `1`, `1`};
2503	ASSERT_TRUE(resultDownH.dims().vec() == expectedDimsDown);
2504	EXPECT_EQF(resultDownH.at({`0`, `0`, `0`, `0`}), static_cast<DataType>(`2`));
2505	}
2506
2507	/// Verify that the ResizeNearest operator works correctly for Float.
2508	TEST_P(OperatorTest, ResizeBilinear_Float) {
2509	CHECK_IF_ENABLED();
2510	testResizeBilinear<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy);
2511	}
2512
2513	/// Verify that the ResizeNearest operator works correctly for Float16.
2514	TEST_P(OperatorTest, ResizeBilinear_Float16) {
2515	CHECK_IF_ENABLED();
2516	testResizeBilinear<float16_t>(bindings_, mod_, F_, EE_, ElemKind::Float16Ty);
2517	}
2518
2519	/// Verify that the ResizeNearest operator works correctly for BFloat16.
2520	TEST_P(OperatorTest, ResizeBilinear_BFloat16) {
2521	CHECK_IF_ENABLED();
2522	testResizeBilinear<bfloat16_t>(bindings_, mod_, F_, EE_,
2523	ElemKind::BFloat16Ty);
2524	}
2525
2526	/// Verify that the ResizeNearest operator works correctly for Int8Q.
2527	TEST_P(OperatorTest, ResizeBilinear_Int8) {
2528	CHECK_IF_ENABLED();
2529	testResizeBilinear<int8_t>(bindings_, mod_, F_, EE_, ElemKind::Int8QTy);
2530	}
2531
2532	/// Verify that the ResizeNearest operator works correctly for Int16Q.
2533	TEST_P(OperatorTest, ResizeBilinear_Int16) {
2534	CHECK_IF_ENABLED();
2535	testResizeBilinear<int16_t>(bindings_, mod_, F_, EE_, ElemKind::Int16QTy);
2536	}
2537
2538	/// Verify that the ResizeNearest operator works correctly for Int32Q.
2539	TEST_P(OperatorTest, ResizeBilinear_Int32) {
2540	CHECK_IF_ENABLED();
2541	testResizeBilinear<int32_t>(bindings_, mod_, F_, EE_, ElemKind::Int32QTy);
2542	}
2543
2544	TEST_P(OperatorTest, ResizeBilinear_Float_outTy) {
2545	CHECK_IF_ENABLED();
2546	testResizeBilinear<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy, true);
2547	}
2548	TEST_P(OperatorTest, ResizeBilinear_Float16_outTy) {
2549	CHECK_IF_ENABLED();
2550	testResizeBilinear<float16_t>(bindings_, mod_, F_, EE_, ElemKind::Float16Ty,
2551	true);
2552	}
2553	TEST_P(OperatorTest, ResizeBilinear_BFloat16_outTy) {
2554	CHECK_IF_ENABLED();
2555	testResizeBilinear<bfloat16_t>(bindings_, mod_, F_, EE_, ElemKind::BFloat16Ty,
2556	true);
2557	}
2558	TEST_P(OperatorTest, ResizeBilinear_Int8_outTy) {
2559	CHECK_IF_ENABLED();
2560	testResizeBilinear<int8_t>(bindings_, mod_, F_, EE_, ElemKind::Int8QTy, true);
2561	}
2562	TEST_P(OperatorTest, ResizeBilinear_Int16_outTy) {
2563	CHECK_IF_ENABLED();
2564	testResizeBilinear<int16_t>(bindings_, mod_, F_, EE_, ElemKind::Int16QTy,
2565	true);
2566	}
2567	TEST_P(OperatorTest, ResizeBilinear_Int32_outTy) {
2568	CHECK_IF_ENABLED();
2569	testResizeBilinear<int32_t>(bindings_, mod_, F_, EE_, ElemKind::Int32QTy,
2570	true);
2571	}
2572
2573	TEST_P(OperatorTest, pow) {
2574	CHECK_IF_ENABLED();
2575
2576	auto X = mod_.createPlaceholder(ElemKind::FloatTy, {`1`, `1`, `3`}, "X", false*);
2577	auto Y = mod_.createPlaceholder(ElemKind::FloatTy, {`2`}, "Y", false*);
2578	auto Exp = mod_.createPlaceholder(ElemKind::FloatTy, {`2`}, "Exp", false*);
2579
2580	bindings_.allocate(X)->getHandle() = {`5`, `0.1f`, -`3`};
2581	bindings_.allocate(Y)->getHandle() = {`2`, `0.25`};
2582	bindings_.allocate(Exp)->getHandle() = {`2`, -`0.5`};
2583
2584	auto *Pow1 = F_->createPow("Pow1", X, `2.0`);
2585	auto *Pow2 = F_->createPow("Pow2", Y, `0.5`);
2586	auto *Pow3 = F_->createPow("Pow3", Y, Exp);
2587
2588	// Create quantized Pow
2589	auto *quantY = F_->createQuantize(
2590	"Y_quant", Y, mod_.uniqueType(ElemKind::Int8QTy, {`2`}, `0.05`, `0`));
2591	auto *quantExp = F_->createQuantize(
2592	"exp_quant", Exp, mod_.uniqueType(ElemKind::Int8QTy, {`2`}, `0.1`, `0`));
2593	auto *Pow4 = F_->createPow("Pow4", quantY, quantExp);
2594
2595	auto *save1 = F_->createSave("save", Pow1);
2596	auto *savePlaceholder1 = save1->getPlaceholder();
2597
2598	auto *save2 = F_->createSave("save", Pow2);
2599	auto *savePlaceholder2 = save2->getPlaceholder();
2600
2601	auto *save3 = F_->createSave("save", Pow3);
2602	auto *savePlaceholder3 = save3->getPlaceholder();
2603
2604	auto *save4 = F_->createSave("save", Pow4);
2605	auto *savePlaceholder4 = save4->getPlaceholder();
2606
2607	bindings_.allocate(savePlaceholder1);
2608	bindings_.allocate(savePlaceholder2);
2609	bindings_.allocate(savePlaceholder3);
2610	bindings_.allocate(savePlaceholder4);
2611
2612	EE_.compile(CompilationMode::Infer);
2613
2614	EE_.run(bindings_);
2615
2616	auto H_X = bindings_.get(savePlaceholder1)->getHandle();
2617	EXPECT_NEAR(H_X.at({`0`, `0`, `0`}), `25`, `1E-5`);
2618	EXPECT_NEAR(H_X.at({`0`, `0`, `1`}), `0.01`, `1E-5`);
2619	EXPECT_NEAR(H_X.at({`0`, `0`, `2`}), `9`, `1E-5`);
2620
2621	auto H_Y = bindings_.get(savePlaceholder2)->getHandle();
2622	EXPECT_NEAR(H_Y.at({`0`}), sqrt(`2.0`), `1E-5`);
2623	EXPECT_NEAR(H_Y.at({`1`}), `0.5`, `1E-5`);
2624
2625	auto H_Z = bindings_.get(savePlaceholder3)->getHandle();
2626	EXPECT_NEAR(H_Z.at({`0`}), `4`, `1E-5`);
2627	EXPECT_NEAR(H_Z.at({`1`}), `2`, `1E-5`);
2628
2629	auto H_A = bindings_.get(savePlaceholder4)->getHandle<int8_t>();
2630	EXPECT_NEAR(H_A.at({`0`}) * `0.05`, `4`, `1E-5`);
2631	EXPECT_NEAR(H_A.at({`1`}) * `0.05`, `2`, `1E-5`);
2632	}
2633
2634	/// Helper to test ReplaceNaN using \p DTy.
2635	template <typename DataType>
2636	static void testReplaceNaN(glow::PlaceholderBindings &bindings,
2637	glow::Module &mod, glow::Function *F,
2638	glow::ExecutionEngine &EE, ElemKind DTy) {
2639	auto value = `1.0f`;
2640	auto X = mod.createPlaceholder(DTy, {`6`}, "X", false*);
2641	auto XH = bindings.allocate(X)->getHandle<DataType>();
2642	XH = {`1`, NAN, `2`, NAN, `3`, NAN};
2643
2644	auto *RNN = F->createReplaceNaN("replaceNaN", X, value);
2645
2646	auto *save = F->createSave("save", RNN);
2647	auto *saveTensor = bindings.allocate(save->getPlaceholder());
2648
2649	EE.compile(CompilationMode::Infer);
2650
2651	EE.run(bindings);
2652
2653	auto saveH = saveTensor->getHandle<DataType>();
2654
2655	for (size_t i = `0`; i < `6`; i++) {
2656	if (std::isnan((float)XH.raw(i))) {
2657	EXPECT_EQ(saveH.raw(i), (DataType)value);
2658	} else {
2659	EXPECT_EQ(XH.raw(i), saveH.raw(i));
2660	}
2661	}
2662	}
2663
2664	/// Test that ReplaceNaN is correctly supported in FloatTy.
2665	TEST_P(OperatorTest, replaceNaN_Float) {
2666	CHECK_IF_ENABLED();
2667	testReplaceNaN<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy);
2668	}
2669
2670	/// Test that ReplaceNaN is correctly supported in Float16Ty.
2671	TEST_P(OperatorTest, replaceNaN_Float16) {
2672	CHECK_IF_ENABLED();
2673	testReplaceNaN<float16_t>(bindings_, mod_, F_, EE_, ElemKind::Float16Ty);
2674	}
2675
2676	/// Test that ReplaceNaN is correctly supported in BFloat16Ty.
2677	TEST_P(OperatorTest, replaceNaN_BFloat16) {
2678	CHECK_IF_ENABLED();
2679	testReplaceNaN<bfloat16_t>(bindings_, mod_, F_, EE_, ElemKind::BFloat16Ty);
2680	}
2681
2682	/// Reference ideal sigmoid implementation. Computes an fp32 sigmoid
2683	/// and casts the result to FP16.
2684	static float16_t refSigmoidFp16(float x) {
2685	float res = `1` / (`1` + exp(-x));
2686
2687	return (float16_t)res;
2688	}
2689
2690	/// Reference ideal sigmoid implementation. Computes an fp32 sigmoid
2691	/// and casts the result to BFloat16.
2692	static bfloat16_t refSigmoidBFloat16(float x) {
2693	float res = `1` / (`1` + exp(-x));
2694
2695	return (bfloat16_t)res;
2696	}
2697
2698	TEST_P(OperatorTest, LSTMUnitFP16) {
2699	CHECK_IF_ENABLED();
2700
2701	unsigned minibatchSize = `2`;
2702	unsigned hiddenSize = `4`;
2703
2704	// Input
2705	auto *Input = mod_.createPlaceholder(
2706	ElemKind::Float16Ty, {minibatchSize, `4` * hiddenSize}, "Input", false);
2707	auto InputH = bindings_.allocate(Input)->getHandle<float16_t>();
2708	for (unsigned i = `0`; i < minibatchSize; i++) {
2709	for (unsigned j = `0`; j < hiddenSize * `4`; j++) {
2710	InputH.at({i, j}) = i * hiddenSize + (j % hiddenSize) + j / hiddenSize;
2711	}
2712	}
2713
2714	// Cell State
2715	auto *C = mod_.createPlaceholder(ElemKind::Float16Ty,
2716	{minibatchSize, hiddenSize}, "C", false);
2717	auto CH = bindings_.allocate(C)->getHandle<float16_t>();
2718	for (unsigned i = `0`; i < minibatchSize * hiddenSize; i++) {
2719	CH.raw(i) = i;
2720	}
2721
2722	auto lstmUnitNode = F_->createLSTMUnit("lstm_unit", Input, C);
2723
2724	auto hRes = lstmUnitNode->getNthResult(`0`);
2725	auto cRes = lstmUnitNode->getNthResult(`1`);
2726
2727	auto *hSave = F_->createSave("saveH", hRes);
2728	auto *hTensor = bindings_.allocate(hSave->getPlaceholder());
2729	auto *cSave = F_->createSave("saveC", cRes);
2730	auto *cTensor = bindings_.allocate(cSave->getPlaceholder());
2731
2732	EE_.compile(CompilationMode::Infer);
2733	EE_.run(bindings_);
2734
2735	auto hHandle = hTensor->getHandle<float16_t>();
2736	auto cHandle = cTensor->getHandle<float16_t>();
2737
2738	for (dim_t i = `0`; i < `8`; i++) {
2739	float cExpect = (float16_t)i * refSigmoidFp16(i + `1`) +
2740	refSigmoidFp16(i) * (float16_t)std::tanh(i + `2`);
2741	float hExpect = (float16_t)std::tanh(cExpect) * refSigmoidFp16(i + `3`);
2742	EXPECT_NEAR(hHandle.raw(i), hExpect, `1E-3`);
2743	EXPECT_NEAR(cHandle.raw(i), cExpect, `1E-2`);
2744	}
2745	}
2746
2747	TEST_P(OperatorTest, PyTorchLSTMFP16) {
2748	CHECK_IF_ENABLED();
2749
2750	unsigned minibatchSize = `2`;
2751	unsigned inputSize = `3`;
2752	unsigned hiddenSize = `4`;
2753	unsigned numSteps = `3`;
2754
2755	// Input
2756	auto *X = mod_.createPlaceholder(ElemKind::Float16Ty,
2757	{numSteps, minibatchSize, inputSize},
2758	"Input", false);
2759	auto IH = bindings_.allocate(X)->getHandle<float16_t>();
2760	for (unsigned i = `0`; i < numSteps * minibatchSize * inputSize; i++) {
2761	IH.raw(i) = `0.1` * i;
2762	}
2763
2764	// Weights & Bias
2765	Tensor tWx(ElemKind::Float16Ty, {inputSize, `4` * hiddenSize});
2766	for (unsigned i = `0`; i < inputSize * `4` * hiddenSize; i++) {
2767	tWx.getHandle<float16_t>().raw(i) = `0.1` * i;
2768	}
2769	auto Wx = (mod_.createConstant("Wx", std::move(tWx)))->getOutput();
2770
2771	Tensor tWh(ElemKind::Float16Ty, {hiddenSize, `4` * hiddenSize});
2772	for (unsigned i = `0`; i < hiddenSize * `4` * hiddenSize; i++) {
2773	tWh.getHandle<float16_t>().raw(i) = `0.1` * (i + `1`);
2774	}
2775	auto Wh = (mod_.createConstant("Wh", std::move(tWh)))->getOutput();
2776
2777	Tensor tBx(ElemKind::Float16Ty, {`4` * hiddenSize});
2778	for (unsigned i = `0`; i < `4` * hiddenSize; i++) {
2779	tBx.getHandle<float16_t>().raw(i) = `0.1` * (i + `2`);
2780	}
2781	auto Bx = (mod_.createConstant("Bx", std::move(tBx)))->getOutput();
2782
2783	Tensor tBh(ElemKind::Float16Ty, {`4` * hiddenSize});
2784	for (unsigned i = `0`; i < `4` * hiddenSize; i++) {
2785	tBh.getHandle<float16_t>().raw(i) = `0.1` * (i + `3`);
2786	}
2787	auto Bh = (mod_.createConstant("Bh", std::move(tBh)))->getOutput();
2788
2789	// H & C
2790	auto *H = mod_.createPlaceholder(ElemKind::Float16Ty,
2791	{minibatchSize, hiddenSize}, "H", false);
2792	auto *C = mod_.createPlaceholder(ElemKind::Float16Ty,
2793	{minibatchSize, hiddenSize}, "C", false);
2794
2795	auto hH = bindings_.allocate(H)->getHandle<float16_t>();
2796	auto hC = bindings_.allocate(C)->getHandle<float16_t>();
2797	for (unsigned i = `0`; i < minibatchSize * hiddenSize; i++) {
2798	hH.raw(i) = `0.1` * (i + `4`);
2799	hC.raw(i) = `0.1` * (i + `5`);
2800	}
2801
2802	NodeValue nH = H, nC = C;
2803	NodeValue output;
2804	std::vector<NodeValue> WxVector = {Wx};
2805	std::vector<NodeValue> WhVector = {Wh};
2806	std::vector<NodeValue> BxVector = {Bx};
2807	std::vector<NodeValue> BhVector = {Bh};
2808	F_->createPyTorchLSTM("lstm", X, WxVector, WhVector, BxVector, BhVector, nH,
2809	nC, output, false);
2810
2811	auto *save = F_->createSave("save_output", output);
2812	auto *saveTensor = bindings_.allocate(save->getPlaceholder());
2813
2814	EE_.compile(CompilationMode::Infer);
2815	EE_.run(bindings_);
2816	auto saveH = saveTensor->getHandle<float16_t>();
2817
2818	// expectOutput calculated by PyTorch Float32 using torch.nn.LSTM() with same
2819	// input, weights, biases, h and c. Set eps to 1E-3 since OperatorTest could
2820	// be Float16
2821	float expectOutput[] = {`0.9050`, `0.9216`, `0.9354`, `0.9468`, `0.9562`, `0.9640`,
2822	`0.9704`, `0.9758`, `0.9866`, `0.9890`, `0.9910`, `0.9926`,
2823	`0.9940`, `0.9951`, `0.9959`, `0.9967`, `0.9982`, `0.9985`,
2824	`0.9988`, `0.9990`, `0.9992`, `0.9993`, `0.9995`, `0.9996`};
2825	for (unsigned_t i = `0`; i < numSteps * minibatchSize * hiddenSize; i++) {
2826	EXPECT_NEAR(saveH.raw(i), expectOutput[i], `2E-3`);
2827	}
2828	}
2829
2830	TEST_P(OperatorTest, PyTorchMultipleLayerLSTMFP16) {
2831	CHECK_IF_ENABLED();
2832
2833	unsigned minibatchSize = `2`;
2834	unsigned inputSize = `3`;
2835	unsigned hiddenSize = `4`;
2836	unsigned numSteps = `3`;
2837	unsigned numLayers = `2`;
2838
2839	// Input
2840	auto *X = mod_.createPlaceholder(ElemKind::Float16Ty,
2841	{numSteps, minibatchSize, inputSize},
2842	"Input", false);
2843	auto IH = bindings_.allocate(X)->getHandle<float16_t>();
2844	for (unsigned i = `0`; i < numSteps * minibatchSize * inputSize; i++) {
2845	IH.raw(i) = `0.1` * i;
2846	}
2847
2848	// Weights & Bias
2849	Tensor tWx0(ElemKind::Float16Ty, {inputSize, `4` * hiddenSize});
2850	for (unsigned i = `0`; i < inputSize * `4` * hiddenSize; i++) {
2851	tWx0.getHandle<float16_t>().raw(i) = `0.1` * i;
2852	}
2853	auto Wx0 = (mod_.createConstant("Wx_0", std::move(tWx0)))->getOutput();
2854
2855	Tensor tWx1(ElemKind::Float16Ty, {hiddenSize, `4` * hiddenSize});
2856	for (unsigned i = `0`; i < hiddenSize * `4` * hiddenSize; i++) {
2857	tWx1.getHandle<float16_t>().raw(i) = `0.1` * (i + `1`);
2858	}
2859	auto Wx1 = (mod_.createConstant("Wx_1", std::move(tWx1)))->getOutput();
2860	std::vector<NodeValue> WxVector = {Wx0, Wx1};
2861
2862	std::vector<NodeValue> WhVector;
2863	for (unsigned j = `0`; j < numLayers; j++) {
2864	Tensor tWh(ElemKind::Float16Ty, {hiddenSize, `4` * hiddenSize});
2865	for (unsigned i = `0`; i < hiddenSize * `4` * hiddenSize; i++) {
2866	tWh.getHandle<float16_t>().raw(i) = `0.1` * (i + `2` + j);
2867	}
2868	auto Wh = (mod_.createConstant("Wh_" + std::to_string(j), std::move(tWh)))
2869	->getOutput();
2870	WhVector.push_back(Wh);
2871	}
2872
2873	std::vector<NodeValue> BxVector;
2874	for (unsigned j = `0`; j < numLayers; j++) {
2875	Tensor tBx(ElemKind::Float16Ty, {`4` * hiddenSize});
2876	for (unsigned i = `0`; i < `4` * hiddenSize; i++) {
2877	tBx.getHandle<float16_t>().raw(i) = `0.1` * (i + `4` + j);
2878	}
2879	auto Bx = (mod_.createConstant("Bx_" + std::to_string(j), std::move(tBx)))
2880	->getOutput();
2881	BxVector.push_back(Bx);
2882	}
2883	std::vector<NodeValue> BhVector;
2884	for (unsigned j = `0`; j < numLayers; j++) {
2885	Tensor tBh(ElemKind::Float16Ty, {`4` * hiddenSize});
2886	for (unsigned i = `0`; i < `4` * hiddenSize; i++) {
2887	tBh.getHandle<float16_t>().raw(i) = `0.1` * (i + `7` + j);
2888	}
2889	auto Bh = (mod_.createConstant("Bh_" + std::to_string(j), std::move(tBh)))
2890	->getOutput();
2891	BhVector.push_back(Bh);
2892	}
2893
2894	// H & C
2895	auto *H = mod_.createPlaceholder(
2896	ElemKind::Float16Ty, {numLayers, minibatchSize, hiddenSize}, "H", false);
2897	auto *C = mod_.createPlaceholder(
2898	ElemKind::Float16Ty, {numLayers, minibatchSize, hiddenSize}, "C", false);
2899
2900	auto hH = bindings_.allocate(H)->getHandle<float16_t>();
2901	auto hC = bindings_.allocate(C)->getHandle<float16_t>();
2902	for (unsigned i = `0`; i < numLayers * minibatchSize * hiddenSize; i++) {
2903	hH.raw(i) = `0.1` * (i + `1`);
2904	hC.raw(i) = `0.1` * (i + `2`);
2905	}
2906
2907	NodeValue nH = H, nC = C;
2908	NodeValue output;
2909
2910	F_->createPyTorchLSTM("lstm", X, WxVector, WhVector, BxVector, BhVector, nH,
2911	nC, output, false);
2912
2913	auto *save = F_->createSave("save_output", output);
2914	auto *saveTensor = bindings_.allocate(save->getPlaceholder());
2915
2916	EE_.compile(CompilationMode::Infer);
2917	EE_.run(bindings_);
2918	auto saveH = saveTensor->getHandle<float16_t>();
2919
2920	// expectOutput calculated by PyTorch Float32 using torch.nn.LSTM() with same
2921	// input, weights, biases, h and c. Set eps to 1E-3 since OperatorTest could
2922	// be Float16
2923	float expectOutput[] = {`0.9640`, `0.9705`, `0.9757`, `0.9801`, `0.9837`, `0.9866`,
2924	`0.9890`, `0.9910`, `0.9951`, `0.9959`, `0.9967`, `0.9973`,
2925	`0.9978`, `0.9982`, `0.9985`, `0.9988`, `0.9993`, `0.9995`,
2926	`0.9996`, `0.9996`, `0.9997`, `0.9998`, `0.9998`, `0.9998`};
2927	for (unsigned_t i = `0`; i < numSteps * minibatchSize * hiddenSize; i++) {
2928	EXPECT_NEAR(saveH.raw(i), expectOutput[i], `2E-3`);
2929	}
2930	}
2931
2932	TEST_P(OperatorTest, log) {
2933	CHECK_IF_ENABLED();
2934
2935	auto X = mod_.createPlaceholder(ElemKind::FloatTy, {`6`}, "X", false*);
2936	auto XH = bindings_.allocate(X)->getHandle();
2937	XH = {`210030`, `600`, `4`, `0.7f`, `.005f`, `0.000829f`};
2938
2939	auto *LN = F_->createLog("log", X);
2940
2941	auto *save = F_->createSave("save", LN);
2942	auto *saveTensor = bindings_.allocate(save->getPlaceholder());
2943
2944	EE_.compile(CompilationMode::Infer);
2945
2946	EE_.run(bindings_);
2947
2948	auto saveH = saveTensor->getHandle();
2949
2950	for (dim_t i = `0`; i < `6`; i++) {
2951	EXPECT_NEAR(saveH.at({i}), log(XH.at({i})), `1E-5`);
2952	}
2953	}
2954
2955	/// Range of asin domain is [-1,1] and the range of output
2956	/// is [-pi/2, pi/2]
2957	TEST_P(OperatorTest, Asin_FloatTy) {
2958	CHECK_IF_ENABLED();
2959
2960	auto X = mod_.createPlaceholder(ElemKind::FloatTy, {`6`}, "X", false*);
2961	auto XH = bindings_.allocate(X)->getHandle();
2962	XH = {-`0.34`, `0.32`, `0.0001`, `1.0`, -`0.4`, `0.78`};
2963
2964	auto *AS = F_->createAsin("Asin", X);
2965
2966	auto *save = F_->createSave("save", AS);
2967	auto *saveTensor = bindings_.allocate(save->getPlaceholder());
2968
2969	EE_.compile(CompilationMode::Infer);
2970
2971	EE_.run(bindings_);
2972
2973	auto saveH = saveTensor->getHandle();
2974
2975	for (dim_t i = `0`; i < `6`; i++) {
2976	EXPECT_NEAR(saveH.at({i}), asin(XH.at({i})), `1E-5`);
2977	}
2978	}
2979
2980	/// Range of acos domain is [-1,1] and the range of output
2981	/// is [0, pi]
2982	TEST_P(OperatorTest, Acos_FloatTy) {
2983	CHECK_IF_ENABLED();
2984
2985	auto X = mod_.createPlaceholder(ElemKind::FloatTy, {`6`}, "X", false*);
2986	auto XH = bindings_.allocate(X)->getHandle();
2987	XH = {-`0.34`, `0.32`, `0.0001`, `1.0`, -`0.4`, `0.78`};
2988
2989	auto *AC = F_->createAcos("Acos", X);
2990
2991	auto *save = F_->createSave("save", AC);
2992	auto *saveTensor = bindings_.allocate(save->getPlaceholder());
2993
2994	EE_.compile(CompilationMode::Infer);
2995
2996	EE_.run(bindings_);
2997
2998	auto saveH = saveTensor->getHandle();
2999
3000	for (dim_t i = `0`; i < `6`; i++) {
3001	EXPECT_NEAR(saveH.at({i}), acos(XH.at({i})), `1E-5`);
3002	}
3003	}
3004
3005	/// Range of atan domain is [-1,1] and the range of output
3006	/// is [-pi/2, pi/2]
3007	TEST_P(OperatorTest, Atan_FloatTy) {
3008	CHECK_IF_ENABLED();
3009
3010	auto X = mod_.createPlaceholder(ElemKind::FloatTy, {`6`}, "X", false*);
3011	auto XH = bindings_.allocate(X)->getHandle();
3012	XH = {-`0.34`, `0.32`, `0.0001`, `1.0`, -`0.4`, `0.78`};
3013
3014	auto *AT = F_->createAtan("Atan", X);
3015
3016	auto *save = F_->createSave("save", AT);
3017	auto *saveTensor = bindings_.allocate(save->getPlaceholder());
3018
3019	EE_.compile(CompilationMode::Infer);
3020
3021	EE_.run(bindings_);
3022
3023	auto saveH = saveTensor->getHandle();
3024
3025	for (dim_t i = `0`; i < `6`; i++) {
3026	EXPECT_NEAR(saveH.at({i}), atan(XH.at({i})), `1E-5`);
3027	}
3028	}
3029
3030	/// Range of asin domain is [-1,1] and the range of output
3031	/// is [-pi/2, pi/2]
3032	TEST_P(OperatorTest, Asin_Int8QTy) {
3033	CHECK_IF_ENABLED();
3034	auto X = mod_.createPlaceholder(ElemKind::FloatTy, {`6`}, "X", false*);
3035	auto qParams = glow::quantization::chooseQuantizationParams({-`1`, `1`});
3036	auto oParams = glow::quantization::chooseQuantizationParams({-`1.57`, `1.57`});
3037	auto *data =
3038	mod_.uniqueType(ElemKind::Int8QTy, {`6`}, qParams.scale, qParams.offset);
3039
3040	auto OT =
3041	mod_.uniqueType(ElemKind::Int8QTy, {`6`}, oParams.scale, oParams.offset);
3042	auto XH = bindings_.allocate(X)->getHandle();
3043	XH = {-`0.34`, `0.32`, `0.0001`, `1.0`, -`0.4`, `0.78`};
3044	auto *XQ = F_->createQuantize("quantizeQ", X, data);
3045	auto *ASQ = F_->createAsin("Asin", OT, XQ);
3046
3047	auto *AS = F_->createDequantize("dequantize", ASQ, ElemKind::FloatTy);
3048
3049	auto *save = F_->createSave("save", AS);
3050	auto *saveTensor = bindings_.allocate(save->getPlaceholder());
3051
3052	EE_.compile(CompilationMode::Infer);
3053
3054	EE_.run(bindings_);
3055
3056	auto saveH = saveTensor->getHandle();
3057
3058	for (dim_t i = `0`; i < `6`; i++) {
3059	EXPECT_NEAR(saveH.at({i}), asin(XH.at({i})), `0.25`);
3060	}
3061	}
3062
3063	/// Range of acos domain is [-1,1] and the range of output
3064	/// is [0, pi]
3065	TEST_P(OperatorTest, Acos_Int8QTy) {
3066	CHECK_IF_ENABLED();
3067	auto X = mod_.createPlaceholder(ElemKind::FloatTy, {`6`}, "X", false*);
3068	auto qParams = glow::quantization::chooseQuantizationParams({-`1`, `1`});
3069	auto oParams = glow::quantization::chooseQuantizationParams({`0`, `3.14`});
3070	auto *data =
3071	mod_.uniqueType(ElemKind::Int8QTy, {`6`}, qParams.scale, qParams.offset);
3072
3073	auto OT =
3074	mod_.uniqueType(ElemKind::Int8QTy, {`6`}, oParams.scale, oParams.offset);
3075	auto XH = bindings_.allocate(X)->getHandle();
3076	XH = {-`0.34`, `0.32`, `0.0001`, `1.0`, -`0.4`, `0.78`};
3077	auto *XQ = F_->createQuantize("quantizeQ", X, data);
3078	auto *ACQ = F_->createAcos("Acos", OT, XQ);
3079
3080	auto *AC = F_->createDequantize("dequantize", ACQ, ElemKind::FloatTy);
3081
3082	auto *save = F_->createSave("save", AC);
3083	auto *saveTensor = bindings_.allocate(save->getPlaceholder());
3084
3085	EE_.compile(CompilationMode::Infer);
3086
3087	EE_.run(bindings_);
3088
3089	auto saveH = saveTensor->getHandle();
3090
3091	for (dim_t i = `0`; i < `6`; i++) {
3092	EXPECT_NEAR(saveH.at({i}), acos(XH.at({i})), `0.25`);
3093	}
3094	}
3095
3096	/// Range of atan domain is [-1,1] and the range of output
3097	/// is [-pi/2, pi/2]
3098	TEST_P(OperatorTest, Atan_Int8QTy) {
3099	CHECK_IF_ENABLED();
3100	auto X = mod_.createPlaceholder(ElemKind::FloatTy, {`6`}, "X", false*);
3101	auto qParams = glow::quantization::chooseQuantizationParams({-`1`, `1`});
3102	auto oParams = glow::quantization::chooseQuantizationParams({-`1.57`, `1.57`});
3103	auto *data =
3104	mod_.uniqueType(ElemKind::Int8QTy, {`6`}, qParams.scale, qParams.offset);
3105
3106	auto OT =
3107	mod_.uniqueType(ElemKind::Int8QTy, {`6`}, oParams.scale, oParams.offset);
3108	auto XH = bindings_.allocate(X)->getHandle();
3109	XH = {-`0.34`, `0.32`, `0.0001`, `1.0`, -`0.4`, `0.78`};
3110	auto *XQ = F_->createQuantize("quantizeQ", X, data);
3111	auto *ATQ = F_->createAtan("Atan", OT, XQ);
3112
3113	auto *AT = F_->createDequantize("dequantize", ATQ, ElemKind::FloatTy);
3114
3115	auto *save = F_->createSave("save", AT);
3116	auto *saveTensor = bindings_.allocate(save->getPlaceholder());
3117
3118	EE_.compile(CompilationMode::Infer);
3119
3120	EE_.run(bindings_);
3121
3122	auto saveH = saveTensor->getHandle();
3123
3124	for (dim_t i = `0`; i < `6`; i++) {
3125	EXPECT_NEAR(saveH.at({i}), atan(XH.at({i})), `0.25`);
3126	}
3127	}
3128
3129	/// Helper to test Logit using \p DTy.
3130	template <typename DataType>
3131	static void testLogit(glow::PlaceholderBindings &bindings, glow::Module &mod,
3132	glow::Function *F, glow::ExecutionEngine &EE,
3133	ElemKind DTy, float allowedError) {
3134	constexpr auto eps = `1E-6f`; // the default in Caffe2
3135	constexpr dim_t size = `10`; // sample size for randomized tests
3136
3137	auto input = mod.createPlaceholder(DTy, {size}, "input", false*);
3138	// generate the input data in (0.0f, 1.0f) (probabilites including degenerate
3139	// cases) and test that afterward the input data is clamped in
3140	// (eps, 1 - eps) as in Caffe2.
3141	bindings.allocate(input)->getHandle<DataType>().randomize(`0.0f`, `1.0f`,
3142	mod.getPRNG());
3143
3144	auto *logitDiff = F->createLogit("logitDiff", input, eps);
3145	auto *saveDiff = F->createSave("saveDiff", logitDiff);
3146	bindings.allocate(saveDiff->getPlaceholder());
3147
3148	// property: zero-sum for the log-odds for complementary events probabilities
3149	// i.e., logit(p) + logit(1 - p) == 0
3150	Node *const1 = F->createSplat("const1", input->getType(), `1.0`);
3151	Node *complInput = F->createSub("sub", const1, input);
3152	Node *logitCompl = F->createLogit("logitCompl", complInput, eps);
3153	auto *saveCompl = F->createSave("saveCompl", logitCompl);
3154	bindings.allocate(saveCompl->getPlaceholder());
3155
3156	EE.compile(CompilationMode::Infer);
3157	EE.run(bindings);
3158
3159	// results: differential test against the oracle
3160	auto resultDiffH =
3161	bindings.get(saveDiff->getPlaceholder())->getHandle<DataType>();
3162	auto inputH = bindings.get(input)->getHandle<DataType>();
3163
3164	// results: zero-sum property
3165	auto resultComplH =
3166	bindings.get(saveCompl->getPlaceholder())->getHandle<DataType>();
3167
3168	// differential test:
3169	// ensure we match an oracle `logit_test` (a C++ reimplementation test)
3170	auto clamp_test = [](float v, float lo, float hi) {
3171	return std::max(std::min(v, hi), lo);
3172	};
3173	auto logit_test = [clamp_test](float x, float eps = `1E-6f`) {
3174	float p = clamp_test(x, eps, `1.0f` - eps);
3175	return std::log(p / (`1.0f` - p));
3176	};
3177
3178	// property: the logit function is the right-inverse of the logistic function
3179	// i.e., logistic(logit(p)) == p
3180	auto logistic_test = [](float x) { return `1.0f` / (`1.0f` + std::exp(-x)); };
3181
3182	for (dim_t i = `0`; i != size; ++i) {
3183	// differential test against the oracle
3184	EXPECT_NEAR(resultDiffH.at({i}), logit_test(inputH.at({i})), allowedError);
3185	// zero-sum property
3186	EXPECT_NEAR(resultComplH.at({i}) + resultDiffH.at({i}), `0.0f`, allowedError);
3187	// right-inverse property
3188	EXPECT_NEAR(logistic_test(resultDiffH.at({i})),
3189	clamp_test(inputH.at({i}), eps, `1.0f` - eps), allowedError);
3190	}
3191	}
3192
3193	/// Test the Logit operator using FloatTy.
3194	TEST_P(OperatorTest, Logit_Float) {
3195	CHECK_IF_ENABLED();
3196	testLogit<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy, `1E-5`);
3197	}
3198
3199	/// Test the Logit operator using Float16Ty.
3200	TEST_P(OperatorTest, Logit_Float16) {
3201	CHECK_IF_ENABLED();
3202	testLogit<float16_t>(bindings_, mod_, F_, EE_, ElemKind::Float16Ty, `0.002`);
3203	}
3204
3205	/// Test the Logit operator using Float16Ty.
3206	TEST_P(OperatorTest, Logit_BFloat16) {
3207	CHECK_IF_ENABLED();
3208	testLogit<bfloat16_t>(bindings_, mod_, F_, EE_, ElemKind::BFloat16Ty, `0.05`);
3209	}
3210
3211	/// Helper to test CmpEQ using \p DTy.
3212	template <typename DataType>
3213	static void testCmpEQ(glow::PlaceholderBindings &bindings, glow::Module &mod,
3214	glow::Function *F, glow::ExecutionEngine &EE,
3215	ElemKind DTy) {
3216	auto X = mod.createPlaceholder(DTy, {`2`, `7`}, "X", false*);
3217	// Values listed here in the dynamic range of both int32_t and int64_t
3218	bindings.allocate(X)->getHandle<DataType>() = {
3219	`0`, `1`, `17`, `876`, `1000`, `44444`, `65535`, `0`, `1`, `17`, `876`, `1000`, `44444`, `65535`};
3220	auto Y = mod.createPlaceholder(DTy, {`2`, `7`}, "Y", false*);
3221	bindings.allocate(Y)->getHandle<DataType>() = {
3222	`1`, `2`, `16`, `900`, `1111`, `44544`, `65534`, `0`, `1`, `17`, `876`, `1000`, `44444`, `65535`};
3223
3224	auto *cmpEQ = F->createCmpEQ("cmpEQ", X, Y);
3225	auto *save = F->createSave("save", cmpEQ);
3226	auto *saveTensor = bindings.allocate(save->getPlaceholder());
3227
3228	EE.compile(CompilationMode::Infer);
3229
3230	EE.run(bindings);
3231
3232	auto saveH = saveTensor->getHandle<bool>();
3233	for (dim_t i = `0`; i < `7`; ++i) {
3234	EXPECT_FALSE(saveH.at({`0`, i}));
3235	}
3236	for (dim_t i = `0`; i < `7`; ++i) {
3237	EXPECT_TRUE(saveH.at({`1`, i}));
3238	}
3239	}
3240
3241	/// Test the CmpEQ operator using Int64ITy.
3242	TEST_P(OperatorTest, CmpEQ_Int64) {
3243	CHECK_IF_ENABLED();
3244	testCmpEQ<int64_t>(bindings_, mod_, F_, EE_, ElemKind::Int64ITy);
3245	}
3246
3247	/// Test the CmpEQ operator using Int32ITy.
3248	TEST_P(OperatorTest, CmpEQ_Int32) {
3249	CHECK_IF_ENABLED();
3250	testCmpEQ<int32_t>(bindings_, mod_, F_, EE_, ElemKind::Int32ITy);
3251	}
3252
3253	/// Check that the add operator works properly with FP16.
3254	TEST_P(OperatorTest, FP16Add) {
3255	CHECK_IF_ENABLED();
3256
3257	PseudoRNG PRNG;
3258
3259	auto *inputA =
3260	mod_.createPlaceholder(ElemKind::Float16Ty, {`1`, `3`, `3`, `1`}, "A", false);
3261	bindings_.allocate(inputA)->getHandle<float16_t>().randomize(-`3.0`, `3.0`, PRNG);
3262	auto *inputB =
3263	mod_.createPlaceholder(ElemKind::Float16Ty, {`1`, `3`, `3`, `1`}, "B", false);
3264	bindings_.allocate(inputB)->getHandle<float16_t>().randomize(-`3.0`, `3.0`, PRNG);
3265	auto *Pool = F_->createAdd("pool", inputA, inputB);
3266	auto *S = F_->createSave("save", Pool);
3267	bindings_.allocate(S->getPlaceholder());
3268
3269	EE_.compile(CompilationMode::Infer);
3270	EE_.run(bindings_);
3271
3272	auto result = bindings_.get(S->getPlaceholder())->getHandle<float16_t>();
3273	auto handleA = bindings_.get(inputA)->getHandle<float16_t>();
3274	auto handleB = bindings_.get(inputB)->getHandle<float16_t>();
3275	ASSERT_EQ(result.size(), handleA.size());
3276	for (size_t idx = `0`, end = result.size(); idx != end; ++idx) {
3277	EXPECT_EQ(result.raw(idx), handleA.raw(idx) + handleB.raw(idx));
3278	}
3279	}
3280
3281	/// Check that the add operator works properly with FP16.
3282	TEST_P(OperatorTest, BFloat16Add) {
3283	CHECK_IF_ENABLED();
3284
3285	PseudoRNG PRNG;
3286
3287	auto *inputA =
3288	mod_.createPlaceholder(ElemKind::BFloat16Ty, {`1`, `3`, `3`, `1`}, "A", false);
3289	bindings_.allocate(inputA)->getHandle<bfloat16_t>().randomize(-`3.0`, `3.0`,
3290	PRNG);
3291	auto *inputB =
3292	mod_.createPlaceholder(ElemKind::BFloat16Ty, {`1`, `3`, `3`, `1`}, "B", false);
3293	bindings_.allocate(inputB)->getHandle<bfloat16_t>().randomize(-`3.0`, `3.0`,
3294	PRNG);
3295	auto *Pool = F_->createAdd("pool", inputA, inputB);
3296	auto *S = F_->createSave("save", Pool);
3297	bindings_.allocate(S->getPlaceholder());
3298
3299	EE_.compile(CompilationMode::Infer);
3300	EE_.run(bindings_);
3301
3302	auto result = bindings_.get(S->getPlaceholder())->getHandle<bfloat16_t>();
3303	auto handleA = bindings_.get(inputA)->getHandle<bfloat16_t>();
3304	auto handleB = bindings_.get(inputB)->getHandle<bfloat16_t>();
3305	ASSERT_EQ(result.size(), handleA.size());
3306	for (size_t idx = `0`, end = result.size(); idx != end; ++idx) {
3307	EXPECT_EQ(result.raw(idx), handleA.raw(idx) + handleB.raw(idx));
3308	}
3309	}
3310
3311	TEST_P(OperatorTest, matmul) {
3312	CHECK_IF_ENABLED();
3313
3314	auto lhs = mod_.createPlaceholder(ElemKind::FloatTy, {`3`, `2`}, "lhs", false*);
3315	auto rhs = mod_.createPlaceholder(ElemKind::FloatTy, {`2`, `1`}, "rhs", false*);
3316	bindings_.allocate(lhs)->getHandle() = {`1`, `2`, `3`, `4`, `5`, `6`};
3317	bindings_.allocate(rhs)->getHandle() = {`7`, `10`};
3318
3319	auto *R = F_->createMatMul("MM", lhs, rhs);
3320
3321	auto *save = F_->createSave("save", R);
3322	auto *saveTensor = bindings_.allocate(save->getPlaceholder());
3323
3324	EE_.compile(CompilationMode::Infer);
3325	EE_.run(bindings_);
3326
3327	auto H = saveTensor->getHandle();
3328	EXPECT_NEAR(H.at({`0`, `0`}), `27`, `0.001`);
3329	EXPECT_NEAR(H.at({`1`, `0`}), `61`, `0.001`);
3330	EXPECT_NEAR(H.at({`2`, `0`}), `95`, `0.001`);
3331	}
3332
3333	/// Test that cloneFunInsideFun works correctly with matmuls.
3334	TEST_P(OperatorTest, matmul_ParCloneTest10) {
3335	CHECK_IF_ENABLED();
3336
3337	auto lhs = mod_.createPlaceholder(ElemKind::FloatTy, {`3`, `2`}, "lhs", false*);
3338	auto rhs = mod_.createPlaceholder(ElemKind::FloatTy, {`2`, `1`}, "rhs", false*);
3339	bindings_.allocate(lhs)->getHandle() = {`1`, `2`, `3`, `4`, `5`, `6`};
3340	bindings_.allocate(rhs)->getHandle() = {`7`, `10`};
3341
3342	auto *R = F_->createMatMul("MM", lhs, rhs);
3343
3344	auto *save = F_->createSave("save", R);
3345	auto *saveTensor = bindings_.allocate(save->getPlaceholder());
3346
3347	CompilationContext cctx;
3348	const unsigned parallelCount = `10`;
3349	auto resultTensors = cloneFunInsideFun(std::make_pair(F_, saveTensor),
3350	&bindings_, cctx, parallelCount);
3351
3352	EXPECT_EQ(resultTensors.size(), parallelCount);
3353
3354	EE_.compile(cctx);
3355	EE_.run(bindings_);
3356
3357	for (Tensor *T : resultTensors) {
3358	auto H = T->getHandle();
3359	EXPECT_NEAR(H.at({`0`, `0`}), `27`, `0.001`);
3360	EXPECT_NEAR(H.at({`1`, `0`}), `61`, `0.001`);
3361	EXPECT_NEAR(H.at({`2`, `0`}), `95`, `0.001`);
3362	}
3363	}
3364
3365	/// Test that compareAgainstInterpreter works correctly along with quantization
3366	/// and parallel cloning.
3367	TEST_P(OperatorStatelessTest, matmulQuantized_InterpCompareParClone) {
3368	CHECK_IF_ENABLED();
3369
3370	constexpr unsigned parallelCount = `10`;
3371	compareAgainstInterpreter(
3372	getBackendName(),
3373	[](PlaceholderBindings &bindings, ExecutionEngine &EE) {
3374	Module &mod = EE.getModule();
3375	Function *F = mod.createFunction("main");
3376	Placeholder *lhs =
3377	mod.createPlaceholder(ElemKind::FloatTy, {`3`, `2`}, "lhs", false);
3378	Placeholder *rhs =
3379	mod.createPlaceholder(ElemKind::FloatTy, {`2`, `1`}, "rhs", false);
3380	bindings.allocate(lhs)->getHandle().randomize(-`1.0`, `1.0`, mod.getPRNG());
3381	bindings.allocate(rhs)->getHandle().randomize(-`1.0`, `1.0`, mod.getPRNG());
3382
3383	MatMulNode *R = F->createMatMul("MM", lhs, rhs);
3384
3385	SaveNode *save = F->createSave("save", R);
3386	Tensor *saveTensor = bindings.allocate(save->getPlaceholder());
3387	return std::make_pair(F, saveTensor);
3388	},
3389	ElemKind::FloatTy, ElemKind::Int8QTy, `0.006`, parallelCount);
3390	}
3391
3392	/// Check that the matmul operator behaves correctly with FP16.
3393	TEST_P(OperatorTest, FP16Matmul) {
3394	CHECK_IF_ENABLED();
3395
3396	auto lhs = mod_.createPlaceholder(ElemKind::Float16Ty, {`3`, `2`}, "lhs", false*);
3397	auto rhs = mod_.createPlaceholder(ElemKind::Float16Ty, {`2`, `1`}, "rhs", false*);
3398	bindings_.allocate(lhs)->getHandle<float16_t>() = {`1`, `2`, `3`, `4`, `5`, `6`};
3399	bindings_.allocate(rhs)->getHandle<float16_t>() = {`7`, `10`};
3400
3401	auto *R = F_->createMatMul("MM", lhs, rhs);
3402
3403	auto *save = F_->createSave("save", R);
3404	auto *saveTensor = bindings_.allocate(save->getPlaceholder());
3405
3406	EE_.compile(CompilationMode::Infer);
3407	EE_.run(bindings_);
3408
3409	auto H = saveTensor->getHandle<float16_t>();
3410	EXPECT_NEAR(H.at({`0`, `0`}), `27`, `0.001`);
3411	EXPECT_NEAR(H.at({`1`, `0`}), `61`, `0.001`);
3412	EXPECT_NEAR(H.at({`2`, `0`}), `95`, `0.001`);
3413	}
3414
3415	/// Check that the matmul operator behaves correctly with FP16.
3416	TEST_P(OperatorTest, BFloat16Matmul) {
3417	CHECK_IF_ENABLED();
3418
3419	auto *lhs =
3420	mod_.createPlaceholder(ElemKind::BFloat16Ty, {`3`, `2`}, "lhs", false);
3421	auto *rhs =
3422	mod_.createPlaceholder(ElemKind::BFloat16Ty, {`2`, `1`}, "rhs", false);
3423	bindings_.allocate(lhs)->getHandle<bfloat16_t>() = {`1`, `2`, `3`, `4`, `5`, `6`};
3424	bindings_.allocate(rhs)->getHandle<bfloat16_t>() = {`7`, `10`};
3425
3426	auto *R = F_->createMatMul("MM", lhs, rhs);
3427
3428	auto *save = F_->createSave("save", R);
3429	auto *saveTensor = bindings_.allocate(save->getPlaceholder());
3430
3431	EE_.compile(CompilationMode::Infer);
3432	EE_.run(bindings_);
3433
3434	auto H = saveTensor->getHandle<bfloat16_t>();
3435	EXPECT_NEAR(H.at({`0`, `0`}), `27`, `0.001`);
3436	EXPECT_NEAR(H.at({`1`, `0`}), `61`, `0.001`);
3437	EXPECT_NEAR(H.at({`2`, `0`}), `95`, `0.001`);
3438	}
3439
3440	/// Test that the broadcasted batch mat mul operator works as expected.
3441	TEST_P(OperatorTest, BroadcastedBatchMatMul) {
3442	CHECK_IF_ENABLED();
3443
3444	auto *lhs =
3445	mod_.createPlaceholder(ElemKind::FloatTy, {`2`, `3`, `2`}, "lhs", false);
3446	auto rhs = mod_.createPlaceholder(ElemKind::FloatTy, {`2`, `1`}, "rhs", false*);
3447	bindings_.allocate(lhs)->getHandle() = {`1`, `2`, `3`, `4`, `5`, `6`,
3448	-`1`, -`2`, -`3`, -`4`, -`5`, -`6`};
3449	bindings_.allocate(rhs)->getHandle() = {`7`, `10`};
3450
3451	auto *R = F_->createBatchMatMul("BMM", lhs, rhs);
3452
3453	auto *save = F_->createSave("save", R);
3454	auto *result = bindings_.allocate(save->getPlaceholder());
3455
3456	EE_.compile(CompilationMode::Infer);
3457	EE_.run(bindings_);
3458
3459	auto H = result->getHandle();
3460	EXPECT_NEAR(H.at({`0`, `0`, `0`}), `27`, `0.001`);
3461	EXPECT_NEAR(H.at({`0`, `1`, `0`}), `61`, `0.001`);
3462	EXPECT_NEAR(H.at({`0`, `2`, `0`}), `95`, `0.001`);
3463	EXPECT_NEAR(H.at({`1`, `0`, `0`}), -`27`, `0.001`);
3464	EXPECT_NEAR(H.at({`1`, `1`, `0`}), -`61`, `0.001`);
3465	EXPECT_NEAR(H.at({`1`, `2`, `0`}), -`95`, `0.001`);
3466	}
3467
3468	/// Test that the broadcasted batch mat mul operator works as expected when the
3469	/// RHS does not have to be tiled.
3470	TEST_P(OperatorTest, NonBroadcastedBatchMatMul) {
3471	CHECK_IF_ENABLED();
3472	auto *lhs =
3473	mod_.createPlaceholder(ElemKind::FloatTy, {`1`, `3`, `2`}, "lhs", false);
3474	auto rhs = mod_.createPlaceholder(ElemKind::FloatTy, {`2`, `1`}, "rhs", false*);
3475	bindings_.allocate(lhs)->getHandle() = {`1`, `2`, `3`, `4`, `5`, `6`};
3476	bindings_.allocate(rhs)->getHandle() = {`7`, `10`};
3477
3478	auto *R = F_->createBatchMatMul("BMM", lhs, rhs);
3479
3480	auto *save = F_->createSave("save", R);
3481	auto *result = bindings_.allocate(save->getPlaceholder());
3482
3483	EE_.compile(CompilationMode::Infer);
3484	EE_.run(bindings_);
3485
3486	auto H = result->getHandle();
3487	EXPECT_NEAR(H.at({`0`, `0`, `0`}), `27`, `0.001`);
3488	EXPECT_NEAR(H.at({`0`, `1`, `0`}), `61`, `0.001`);
3489	EXPECT_NEAR(H.at({`0`, `2`, `0`}), `95`, `0.001`);
3490	}
3491
3492	TEST_P(OperatorTest, ParallelBatchMatMul) {
3493	CHECK_IF_ENABLED();
3494
3495	auto *lhs =
3496	mod_.createPlaceholder(ElemKind::FloatTy, {`2`, `3`, `2`}, "lhs", false);
3497	auto *rhs =
3498	mod_.createPlaceholder(ElemKind::FloatTy, {`2`, `2`, `1`}, "rhs", false);
3499	bindings_.allocate(lhs)->getHandle() = {`1`, `2`, `3`, `4`, `5`, `6`,
3500	-`1`, -`2`, -`3`, -`4`, -`5`, -`6`};
3501	bindings_.allocate(rhs)->getHandle() = {`7`, `10`, `12`, -`1`};
3502
3503	auto *R = F_->createBatchMatMul("BMM", lhs, rhs);
3504
3505	auto *save = F_->createSave("save", R);
3506	auto *result = bindings_.allocate(save->getPlaceholder());
3507
3508	EE_.compile(CompilationMode::Infer);
3509	EE_.run(bindings_);
3510
3511	auto H = result->getHandle();
3512	EXPECT_NEAR(H.at({`0`, `0`, `0`}), `27`, `0.001`);
3513	EXPECT_NEAR(H.at({`0`, `1`, `0`}), `61`, `0.001`);
3514	EXPECT_NEAR(H.at({`0`, `2`, `0`}), `95`, `0.001`);
3515	EXPECT_NEAR(H.at({`1`, `0`, `0`}), -`10`, `0.001`);
3516	EXPECT_NEAR(H.at({`1`, `1`, `0`}), -`32`, `0.001`);
3517	EXPECT_NEAR(H.at({`1`, `2`, `0`}), -`54`, `0.001`);
3518	}
3519
3520	static FunctionTensorPair
3521	createAndInitParallelBatchMatMulTest(glow::PlaceholderBindings &bindings,
3522	glow::ExecutionEngine &EE) {
3523	auto &mod = EE.getModule();
3524	Function *F = mod.createFunction("main");
3525
3526	auto *lhs =
3527	mod.createPlaceholder(ElemKind::FloatTy, {`10`, `50`, `100`}, "lhs", false);
3528	auto *rhs =
3529	mod.createPlaceholder(ElemKind::FloatTy, {`10`, `100`, `80`}, "rhs", false);
3530	bindings.allocate(lhs)->getHandle().randomize(-`0.1`, `0.1`, mod.getPRNG());
3531	bindings.allocate(rhs)->getHandle().randomize(-`0.1`, `0.1`, mod.getPRNG());
3532
3533	auto *R = F->createBatchMatMul("BMM", lhs, rhs);
3534
3535	auto *save = F->createSave("save", R);
3536	auto *resultTensor = bindings.allocate(save->getPlaceholder());
3537
3538	return std::make_pair(F, resultTensor);
3539	}
3540
3541	TEST_P(OperatorStatelessTest, ParallelBatchMatMul_Float16) {
3542	CHECK_IF_ENABLED();
3543	compareAgainstInterpreter(
3544	getBackendName(), createAndInitParallelBatchMatMulTest, ElemKind::FloatTy,
3545	ElemKind::Float16Ty, `0.0005f`, parCloneCountOpt);
3546	}
3547
3548	TEST_P(OperatorStatelessTest, ParallelBatchMatMul_BFloat16) {
3549	CHECK_IF_ENABLED();
3550	compareAgainstInterpreter(
3551	getBackendName(), createAndInitParallelBatchMatMulTest, ElemKind::FloatTy,
3552	ElemKind::BFloat16Ty, `0.0005f`, parCloneCountOpt);
3553	}
3554
3555	TEST_P(OperatorStatelessTest, ParallelBatchMatMul_Int8) {
3556	CHECK_IF_ENABLED();
3557	compareAgainstInterpreter(
3558	getBackendName(), createAndInitParallelBatchMatMulTest, ElemKind::FloatTy,
3559	ElemKind::Int8QTy, `0.002f`, parCloneCountOpt);
3560	}
3561
3562	/// Helper to test BatchedReduceSumSquare using \p DTy.
3563	template <typename DataType>
3564	static void testBatchedReduceSumSquare(glow::PlaceholderBindings &bindings,
3565	glow::Module &mod, glow::Function *F,
3566	glow::ExecutionEngine &EE,
3567	ElemKind DTy) {
3568	auto batch = mod.createPlaceholder(DTy, {`2`, `4`}, "batch", false*);
3569	bindings.allocate(batch)->getHandle<DataType>() = {`10`, `20`, `30`, `40`,
3570	`1`, `2`, `3`, `4`};
3571
3572	auto *R =
3573	F->createBatchedReduceSumSquare("reduce.sumsquare", batch, / axis / `0`);
3574
3575	auto *save = F->createSave("save", R);
3576	auto *result = bindings.allocate(save->getPlaceholder());
3577
3578	EE.compile(CompilationMode::Infer);
3579	EE.run(bindings);
3580
3581	Tensor expected(DTy, {`4`});
3582	expected.getHandle<DataType>() = {`101`, `404`, `909`, `1616`};
3583	EXPECT_TRUE(result->isEqual(expected));
3584	}
3585
3586	/// Test that BatchedReduceSumSquare is correctly supported in FloatTy.
3587	TEST_P(OperatorTest, batchedReduceSumSquare_Float) {
3588	CHECK_IF_ENABLED();
3589
3590	testBatchedReduceSumSquare<float>(bindings_, mod_, F_, EE_,
3591	ElemKind::FloatTy);
3592	}
3593
3594	/// Helper to test BatchedReduceAdd using \p DTy.
3595	template <typename DataType>
3596	static void testBatchedReduceAdd(glow::PlaceholderBindings &bindings,
3597	glow::Module &mod, glow::Function *F,
3598	glow::ExecutionEngine &EE, ElemKind DTy) {
3599	auto batch = mod.createPlaceholder(DTy, {`2`, `4`}, "batch", false*);
3600	bindings.allocate(batch)->getHandle<DataType>() = {`10`, `20`, `30`, `40`,
3601	`1`, `2`, `3`, `4`};
3602
3603	auto R = F->createBatchedReduceAdd("reduce.add", batch, /* axis / `0`);
3604
3605	auto *save = F->createSave("save", R);
3606	auto *result = bindings.allocate(save->getPlaceholder());
3607
3608	EE.compile(CompilationMode::Infer);
3609	EE.run(bindings);
3610
3611	Tensor expected(DTy, {`4`});
3612	expected.getHandle<DataType>() = {`11`, `22`, `33`, `44`};
3613	EXPECT_TRUE(result->isEqual(expected));
3614	}
3615
3616	/// Test that BatchedReduceAdd is correctly supported in FloatTy.
3617	TEST_P(OperatorTest, batchedReduceAdd_Float) {
3618	CHECK_IF_ENABLED();
3619
3620	testBatchedReduceAdd<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy);
3621	}
3622
3623	/// Test that BatchedReduceAdd is correctly supported in Float16Ty.
3624	TEST_P(OperatorTest, batchedReduceAdd_Float16) {
3625	CHECK_IF_ENABLED();
3626	testBatchedReduceAdd<float16_t>(bindings_, mod_, F_, EE_,
3627	ElemKind::Float16Ty);
3628	}
3629
3630	/// Test that BatchedReduceAdd is correctly supported in Float16Ty.
3631	TEST_P(OperatorTest, batchedReduceAdd_BFloat16) {
3632	CHECK_IF_ENABLED();
3633	testBatchedReduceAdd<bfloat16_t>(bindings_, mod_, F_, EE_,
3634	ElemKind::BFloat16Ty);
3635	}
3636
3637	/// Test that BatchedReduceAdd is correctly supported in Int32ITy.
3638	TEST_P(OperatorTest, batchedReduceAdd_Int32ITy) {
3639	CHECK_IF_ENABLED();
3640	testBatchedReduceAdd<int>(bindings_, mod_, F_, EE_, ElemKind::Int32ITy);
3641	}
3642
3643	/// Test that BatchedReduceAdd works correctly reducing the outermost axis.
3644	TEST_P(OperatorTest, batchedReduceAdd_outerAxis) {
3645	CHECK_IF_ENABLED();
3646
3647	auto *batch =
3648	mod_.createPlaceholder(ElemKind::FloatTy, {`2`, `2`, `4`}, "batch", false);
3649	bindings_.allocate(batch)->getHandle<float>() = {`10`, `20`, `30`, `40`, `1`, `2`, `3`, `4`,
3650	`10`, `20`, `30`, `40`, `1`, `2`, `3`, `4`};
3651
3652	auto R = F_->createBatchedReduceAdd("reduce.add", batch, /* axis / `0`);
3653
3654	auto *save = F_->createSave("save", R);
3655	auto *result = bindings_.allocate(save->getPlaceholder());
3656
3657	EE_.compile(CompilationMode::Infer);
3658	EE_.run(bindings_);
3659
3660	Tensor expected(ElemKind::FloatTy, {`2`, `4`});
3661	expected.getHandle<float>() = {`20`, `40`, `60`, `80`, `2`, `4`, `6`, `8`};
3662
3663	EXPECT_TRUE(result->isEqual(expected));
3664	}
3665
3666	/// Test that BatchedReduceAdd works correctly reducing an internal axis.
3667	TEST_P(OperatorTest, batchedReduceAdd_innerAxis) {
3668	CHECK_IF_ENABLED();
3669
3670	auto *batch =
3671	mod_.createPlaceholder(ElemKind::FloatTy, {`2`, `2`, `4`}, "batch", false);
3672	bindings_.allocate(batch)->getHandle<float>() = {`10`, `20`, `30`, `40`, `1`, `2`, `3`, `4`,
3673	`10`, `20`, `30`, `40`, `1`, `2`, `3`, `4`};
3674
3675	auto R = F_->createBatchedReduceAdd("reduce.add", batch, /* axis / `1`);
3676
3677	auto *save = F_->createSave("save", R);
3678	auto *result = bindings_.allocate(save->getPlaceholder());
3679
3680	EE_.compile(CompilationMode::Infer);
3681	EE_.run(bindings_);
3682
3683	Tensor expected(ElemKind::FloatTy, {`2`, `4`});
3684	expected.getHandle<float>() = {`11`, `22`, `33`, `44`, `11`, `22`, `33`, `44`};
3685
3686	EXPECT_TRUE(result->isEqual(expected));
3687	}
3688
3689	/// Test that BatchedReduceAdd works correctly reducing the innermost axis.
3690	TEST_P(OperatorTest, batchedReduceAdd_lastAxis) {
3691	CHECK_IF_ENABLED();
3692
3693	auto *batch =
3694	mod_.createPlaceholder(ElemKind::FloatTy, {`2`, `2`, `4`}, "batch", false);
3695	bindings_.allocate(batch)->getHandle<float>() = {`10`, `20`, `30`, `40`, `1`, `2`, `3`, `4`,
3696	`10`, `20`, `30`, `40`, `1`, `2`, `3`, `4`};
3697	auto R = F_->createBatchedReduceAdd("reduce.add", batch, /* axis / `2`);
3698
3699	auto *save = F_->createSave("save", R);
3700	auto *result = bindings_.allocate(save->getPlaceholder());
3701
3702	EE_.compile(CompilationMode::Infer);
3703	EE_.run(bindings_);
3704
3705	Tensor expected(ElemKind::FloatTy, {`2`, `2`});
3706	expected.getHandle<float>() = {`100`, `10`, `100`, `10`};
3707
3708	EXPECT_TRUE(result->isEqual(expected));
3709	}
3710
3711	/// Test that BatchReducedAdd works on a 4D input.
3712	TEST_P(OperatorTest, batchedReduceAdd_4Dinput) {
3713	CHECK_IF_ENABLED();
3714
3715	auto *batch =
3716	mod_.createPlaceholder(ElemKind::FloatTy, {`2`, `2`, `2`, `4`}, "batch", false);
3717	bindings_.allocate(batch)->getHandle<float>() = {
3718	`10`, `20`, `30`, `40`, `1`, `2`, `3`, `4`, `10`, `20`, `30`, `40`, `1`, `2`, `3`, `4`,
3719	`10`, `20`, `30`, `40`, `1`, `2`, `3`, `4`, `10`, `20`, `30`, `40`, `1`, `2`, `3`, `4`};
3720
3721	auto R = F_->createBatchedReduceAdd("reduce.add", batch, /* axis / `0`);
3722
3723	auto *save = F_->createSave("save", R);
3724	auto *result = bindings_.allocate(save->getPlaceholder());
3725
3726	EE_.compile(CompilationMode::Infer);
3727	EE_.run(bindings_);
3728
3729	Tensor expected(ElemKind::FloatTy, {`2`, `2`, `4`});
3730	expected.getHandle<float>() = {`20`, `40`, `60`, `80`, `2`, `4`, `6`, `8`,
3731	`20`, `40`, `60`, `80`, `2`, `4`, `6`, `8`};
3732
3733	EXPECT_TRUE(result->isEqual(expected));
3734	}
3735
3736	/// Test that BatchReducedAdd works on a 5D input.
3737	TEST_P(OperatorTest, batchedReduceAdd_5Dinput) {
3738	CHECK_IF_ENABLED();
3739	auto *batch = mod_.createPlaceholder(ElemKind::FloatTy, {`2`, `2`, `2`, `2`, `4`},
3740	"batch", false);
3741	bindings_.allocate(batch)->getHandle<float>() = {
3742	`10`, `20`, `30`, `40`, `1`, `2`, `3`, `4`, `10`, `20`, `30`, `40`, `1`, `2`, `3`, `4`,
3743	`10`, `20`, `30`, `40`, `1`, `2`, `3`, `4`, `10`, `20`, `30`, `40`, `1`, `2`, `3`, `4`,
3744	`10`, `20`, `30`, `40`, `1`, `2`, `3`, `4`, `10`, `20`, `30`, `40`, `1`, `2`, `3`, `4`,
3745	`10`, `20`, `30`, `40`, `1`, `2`, `3`, `4`, `10`, `20`, `30`, `40`, `1`, `2`, `3`, `4`};
3746
3747	auto R = F_->createBatchedReduceAdd("reduce.add", batch, /* axis / `2`);
3748
3749	auto *save = F_->createSave("save", R);
3750	auto *result = bindings_.allocate(save->getPlaceholder());
3751
3752	EE_.compile(CompilationMode::Infer);
3753	EE_.run(bindings_);
3754
3755	Tensor expected(ElemKind::FloatTy, {`2`, `2`, `2`, `4`});
3756	expected.getHandle<float>() = {`20`, `40`, `60`, `80`, `2`, `4`, `6`, `8`, `20`, `40`, `60`,
3757	`80`, `2`, `4`, `6`, `8`, `20`, `40`, `60`, `80`, `2`, `4`,
3758	`6`, `8`, `20`, `40`, `60`, `80`, `2`, `4`, `6`, `8`};
3759
3760	EXPECT_TRUE(result->isEqual(expected));
3761	}
3762
3763	/// Helper to test VectorNorm using \p DTy.
3764	template <typename DataType>
3765	static void testVectorNorm(glow::PlaceholderBindings &bindings,
3766	glow::Module &mod, glow::Function *F,
3767	glow::ExecutionEngine &EE, ElemKind elemKind,
3768	float maxRefDiff = `0.0000f`) {
3769	auto input = mod.createPlaceholder(elemKind, {`2`, `3`}, "norm", false*);
3770	bindings.allocate(input)->getHandle<DataType>() = {`1`, `2`, `3`, -`1`, `1`, `4`};
3771
3772	auto R = F->createVectorNorm("vector.norm", input, /* axis / `0`, / p / `2`);
3773
3774	auto *save = F->createSave("save", R);
3775	auto *result = bindings.allocate(save->getPlaceholder());
3776
3777	EE.compile(CompilationMode::Infer);
3778	EE.run(bindings);
3779
3780	auto resData = result->getHandle<DataType>();
3781
3782	EXPECT_NEAR(resData.at({`0`}), `1.4142`, maxRefDiff);
3783	EXPECT_NEAR(resData.at({`1`}), `2.2361`, maxRefDiff);
3784	EXPECT_NEAR(resData.at({`2`}), `5.0000`, maxRefDiff);
3785	}
3786
3787	/// Test that VectorNorm is correctly supported in FloatTy.
3788	TEST_P(OperatorTest, VectorNorm_Float) {
3789	CHECK_IF_ENABLED();
3790
3791	testVectorNorm<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy, `4E-5`);
3792	}
3793
3794	/// Test that VectorNorm is correctly supported in Float16Ty.
3795	TEST_P(OperatorTest, VectorNorm_Float16Ty) {
3796	CHECK_IF_ENABLED();
3797
3798	testVectorNorm<float16_t>(bindings_, mod_, F_, EE_, ElemKind::Float16Ty,
3799	`5E-3`);
3800	}
3801
3802	/// Test that VectorNorm is correctly supported in BFloat16Ty.
3803	TEST_P(OperatorTest, VectorNorm_BFloat16) {
3804	CHECK_IF_ENABLED();
3805
3806	testVectorNorm<bfloat16_t>(bindings_, mod_, F_, EE_, ElemKind::BFloat16Ty,
3807	`2E-3`);
3808	}
3809
3810	/// Test that BatchedReduceAdd works correctly reducing an internal axis.
3811	TEST_P(OperatorTest, VectorNorm_3D_innerAxis) {
3812	CHECK_IF_ENABLED();
3813	auto *input =
3814	mod_.createPlaceholder(ElemKind::FloatTy, {`2`, `2`, `2`}, "norm", false);
3815	bindings_.allocate(input)->getHandle<float>() = {`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`};
3816
3817	auto R = F_->createVectorNorm("vector.norm", input, /* axis / `1`, / p / `2`);
3818
3819	auto *save = F_->createSave("save", R);
3820	auto *result = bindings_.allocate(save->getPlaceholder());
3821
3822	EE_.compile(CompilationMode::Infer);
3823	EE_.run(bindings_);
3824
3825	Tensor expected(ElemKind::FloatTy, {`2`, `2`});
3826	expected.getHandle<float>() = {`2.0000`, `3.1623`, `7.2111`, `8.6023`};
3827	EXPECT_TRUE(result->isEqual(expected));
3828	}
3829
3830	/// Helper to test BatchedReduceProd using \p DTy.
3831	template <typename DataType>
3832	static void testBatchedReduceProd(glow::PlaceholderBindings &bindings,
3833	glow::Module &mod, glow::Function *F,
3834	glow::ExecutionEngine &EE, ElemKind DTy) {
3835	auto batch = mod.createPlaceholder(DTy, {`2`, `4`}, "batch", false*);
3836	bindings.allocate(batch)->getHandle<DataType>() = {`10`, `20`, `30`, `40`,
3837	`1`, `2`, `3`, `4`};
3838
3839	auto R = F->createBatchedReduceProd("reduce.prod", batch, /* axis / `0`);
3840
3841	auto *save = F->createSave("save", R);
3842	auto *result = bindings.allocate(save->getPlaceholder());
3843
3844	EE.compile(CompilationMode::Infer);
3845	EE.run(bindings);
3846
3847	Tensor expected(DTy, {`4`});
3848	expected.getHandle<DataType>() = {`10`, `40`, `90`, `160`};
3849
3850	EXPECT_TRUE(result->isEqual(expected));
3851	}
3852
3853	/// Test that BatchedReduceProd is correctly supported in FloatTy.
3854	TEST_P(OperatorTest, batchedReduceProd_Float) {
3855	CHECK_IF_ENABLED();
3856
3857	testBatchedReduceProd<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy);
3858	}
3859
3860	/// Test that BatchedReduceProd is correctly supported in Float16Ty.
3861	TEST_P(OperatorTest, batchedReduceProd_Float16) {
3862	CHECK_IF_ENABLED();
3863	testBatchedReduceProd<float16_t>(bindings_, mod_, F_, EE_,
3864	ElemKind::Float16Ty);
3865	}
3866
3867	/// Test that BatchedReduceProd is correctly supported in Float16Ty.
3868	TEST_P(OperatorTest, batchedReduceProd_BFloat16) {
3869	CHECK_IF_ENABLED();
3870	testBatchedReduceProd<bfloat16_t>(bindings_, mod_, F_, EE_,
3871	ElemKind::BFloat16Ty);
3872	}
3873
3874	/// Test that BatchedReduceProd is correctly supported in Int32Ty.
3875	TEST_P(OperatorTest, batchedReduceProd_Int32) {
3876	CHECK_IF_ENABLED();
3877	testBatchedReduceProd<int32_t>(bindings_, mod_, F_, EE_, ElemKind::Int32ITy);
3878	}
3879
3880	/// Test that BatchedReduceProd is correctly supported in Int64Ty.
3881	TEST_P(OperatorTest, batchedReduceProd_Int64) {
3882	CHECK_IF_ENABLED();
3883	testBatchedReduceProd<int64_t>(bindings_, mod_, F_, EE_, ElemKind::Int64ITy);
3884	}
3885
3886	/// Helper to test BatchedReduceMax using \p DTy.
3887	template <typename DataType>
3888	static void testBatchedReduceMax(glow::PlaceholderBindings &bindings,
3889	glow::Module &mod, glow::Function *F,
3890	glow::ExecutionEngine &EE, ElemKind DTy) {
3891	const bool isQuantized = isQuantizedElemKind(DTy);
3892	const std::vector<dim_t> dims = {`2`, `4`};
3893	const std::vector<dim_t> expectedDims = {`4`};
3894	auto BT = isQuantized ? mod.uniqueType(DTy, dims, `0.5`, `3`)
3895	: mod.uniqueType(DTy, dims);
3896	auto OT = isQuantized ? mod.uniqueType(DTy, expectedDims, `2.0`, -`1`)
3897	: mod.uniqueType(DTy, expectedDims);
3898
3899	auto batch = mod.createPlaceholder(BT, "batch", false*);
3900	bindings.allocate(batch)->getHandle<DataType>() = {-`10`, `20`, `30`, `40`,
3901	-`1`, `2`, `3`, `4`};
3902	auto R = F->createBatchedReduceMax("reduce.Max", OT, batch, /* axis / `0`);
3903
3904	auto *save = F->createSave("save", R);
3905	auto *result = bindings.allocate(save->getPlaceholder());
3906
3907	EE.compile(CompilationMode::Infer);
3908	EE.run(bindings);
3909
3910	Tensor expected(OT);
3911	expected.getHandle<DataType>() = {-`1`, `20`, `30`, `40`};
3912
3913	EXPECT_TRUE(result->isEqual(expected));
3914	}
3915
3916	/// Helper to test BatchedReduceMax using \p DTy.
3917	template <typename DataType>
3918	static void testBatchedReduceMaxMultiAxis(glow::PlaceholderBindings &bindings,
3919	glow::Module &mod, glow::Function *F,
3920	glow::ExecutionEngine &EE,
3921	ElemKind DTy) {
3922	const bool isQuantized = isQuantizedElemKind(DTy);
3923	const std::vector<dim_t> dims = {`2`, `2`, `2`, `2`};
3924	const std::vector<dim_t> expectedDims = {`2`, `2`};
3925	auto BT = isQuantized ? mod.uniqueType(DTy, dims, `0.5`, `3`)
3926	: mod.uniqueType(DTy, dims);
3927	auto OT = isQuantized ? mod.uniqueType(DTy, expectedDims, `2.0`, -`1`)
3928	: mod.uniqueType(DTy, expectedDims);
3929
3930	auto batch = mod.createPlaceholder(BT, "batch", false*);
3931	bindings.allocate(batch)->getHandle<DataType>() = {
3932	`1`, -`2`, `3`, -`4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`};
3933	auto *R =
3934	F->createBatchedReduceMax("reduce.Max", OT, batch, / axis / {`1`, `3`});
3935	auto *save = F->createSave("save", R);
3936	auto *result = bindings.allocate(save->getPlaceholder());
3937
3938	EE.compile(CompilationMode::Infer);
3939	EE.run(bindings);
3940
3941	Tensor expected(OT);
3942	expected.getHandle<DataType>() = {`6`, `8`, `14`, `16`};
3943	EXPECT_TRUE(result->isEqual(expected));
3944	}
3945
3946	/// Test that BatchedReduceMax is correctly supported in FloatTy.
3947	TEST_P(OperatorTest, batchedReduceMax_Float) {
3948	CHECK_IF_ENABLED();
3949	testBatchedReduceMax<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy);
3950	}
3951
3952	/// Test that BatchedReduceMax is correctly supported in Int32Ty.
3953	TEST_P(OperatorTest, batchedReduceMax_Int32) {
3954	CHECK_IF_ENABLED();
3955	testBatchedReduceMax<int32_t>(bindings_, mod_, F_, EE_, ElemKind::Int32ITy);
3956	}
3957
3958	/// Test that BatchedReduceMax is correctly supported in Int64Ty.
3959	TEST_P(OperatorTest, batchedReduceMax_Int64) {
3960	CHECK_IF_ENABLED();
3961	testBatchedReduceMax<int64_t>(bindings_, mod_, F_, EE_, ElemKind::Int64ITy);
3962	}
3963
3964	/// Test that BatchedReduceMax is correctly supported in Int8QTy.
3965	TEST_P(OperatorTest, batchedReduceMax_Int8) {
3966	CHECK_IF_ENABLED();
3967	testBatchedReduceMax<int8_t>(bindings_, mod_, F_, EE_, ElemKind::Int8QTy);
3968	}
3969
3970	/// Test that BatchedReduceMax is correctly supported in FloatTy.
3971	TEST_P(OperatorTest, batchedReduceMaxMultiAxis_Float) {
3972	CHECK_IF_ENABLED();
3973	testBatchedReduceMaxMultiAxis<float>(bindings_, mod_, F_, EE_,
3974	ElemKind::FloatTy);
3975	}
3976
3977	/// Test that BatchedReduceMax is correctly supported in Int32Ty.
3978	TEST_P(OperatorTest, batchedReduceMaxMultiAxis_Int32) {
3979	CHECK_IF_ENABLED();
3980	testBatchedReduceMaxMultiAxis<int32_t>(bindings_, mod_, F_, EE_,
3981	ElemKind::Int32ITy);
3982	}
3983
3984	/// Test that BatchedReduceMax is correctly supported in Int64Ty.
3985	TEST_P(OperatorTest, batchedReduceMaxMultiAxis_Int64) {
3986	CHECK_IF_ENABLED();
3987	testBatchedReduceMaxMultiAxis<int64_t>(bindings_, mod_, F_, EE_,
3988	ElemKind::Int64ITy);
3989	}
3990
3991	/// Test that BatchedReduceMax is correctly supported in Int64Ty.
3992	TEST_P(OperatorTest, batchedReduceMaxMultiAxis_Int8) {
3993	CHECK_IF_ENABLED();
3994	testBatchedReduceMaxMultiAxis<int8_t>(bindings_, mod_, F_, EE_,
3995	ElemKind::Int8QTy);
3996	}
3997
3998	/// Helper to test BatchedReduceMin using \p DTy.
3999	template <typename DataType>
4000	static void testBatchedReduceMin(glow::PlaceholderBindings &bindings,
4001	glow::Module &mod, glow::Function *F,
4002	glow::ExecutionEngine &EE, ElemKind DTy) {
4003	const bool isQuantized = isQuantizedElemKind(DTy);
4004	const std::vector<dim_t> dims = {`2`, `4`};
4005	const std::vector<dim_t> expectedDims = {`4`};
4006	auto BT = isQuantized ? mod.uniqueType(DTy, dims, `0.5`, `3`)
4007	: mod.uniqueType(DTy, dims);
4008	auto OT = isQuantized ? mod.uniqueType(DTy, expectedDims, `2.0`, -`1`)
4009	: mod.uniqueType(DTy, expectedDims);
4010
4011	auto batch = mod.createPlaceholder(BT, "batch", false*);
4012
4013	bindings.allocate(batch)->getHandle<DataType>() = {`10`, `20`, `30`, `40`,
4014	`1`, `2`, `3`, `4`};
4015	auto R = F->createBatchedReduceMin("reduce.min", OT, batch, /* axis / `0`);
4016
4017	auto *save = F->createSave("save", R);
4018	auto *result = bindings.allocate(save->getPlaceholder());
4019
4020	EE.compile(CompilationMode::Infer);
4021	EE.run(bindings);
4022
4023	auto expected = Tensor (OT);
4024	expected.getHandle<DataType>() = {`1`, `2`, `3`, `4`};
4025
4026	EXPECT_TRUE(result->isEqual(expected));
4027	}
4028
4029	/// Helper to test BatchedReduceMin using \p DTy.
4030	template <typename DataType>
4031	static void testBatchedReduceMinMultiAxis(glow::PlaceholderBindings &bindings,
4032	glow::Module &mod, glow::Function *F,
4033	glow::ExecutionEngine &EE,
4034	ElemKind DTy) {
4035	const bool isQuantized = isQuantizedElemKind(DTy);
4036	std::vector<dim_t> dims = {`2`, `2`, `2`, `2`};
4037	std::vector<dim_t> expectedDims = {`4`};
4038
4039	auto BT = isQuantized ? mod.uniqueType(DTy, dims, `0.5`, `3`)
4040	: mod.uniqueType(DTy, dims);
4041	auto OT = isQuantized ? mod.uniqueType(DTy, expectedDims, `2.0`, -`1`)
4042	: mod.uniqueType(DTy, expectedDims);
4043
4044	auto batch = mod.createPlaceholder(BT, "batch", false*);
4045
4046	bindings.allocate(batch)->getHandle<DataType>() = {
4047	`1`, -`2`, `3`, -`4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`, `16`};
4048	auto *R =
4049	F->createBatchedReduceMin("reduce.min", OT, batch, / axis / {`1`, `3`});
4050	auto *save = F->createSave("save", R);
4051	auto *result = bindings.allocate(save->getPlaceholder());
4052
4053	EE.compile(CompilationMode::Infer);
4054	EE.run(bindings);
4055
4056	Tensor expected(OT);
4057	expected.getHandle<DataType>() = {-`2`, -`4`, `9`, `11`};
4058	EXPECT_TRUE(result->isEqual(expected));
4059	}
4060
4061	/// Test that BatchedReduceMin is correctly supported in FloatTy.
4062	TEST_P(OperatorTest, batchedReduceMin_Float) {
4063	CHECK_IF_ENABLED();
4064	testBatchedReduceMin<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy);
4065	}
4066
4067	/// Test that BatchedReduceMin is correctly supported in Int32Ty.
4068	TEST_P(OperatorTest, batchedReduceMin_Int32) {
4069	CHECK_IF_ENABLED();
4070	testBatchedReduceMin<int32_t>(bindings_, mod_, F_, EE_, ElemKind::Int32ITy);
4071	}
4072
4073	/// Test that BatchedReduceMin is correctly supported in Int64Ty.
4074	TEST_P(OperatorTest, batchedReduceMin_Int64) {
4075	CHECK_IF_ENABLED();
4076	testBatchedReduceMin<int64_t>(bindings_, mod_, F_, EE_, ElemKind::Int64ITy);
4077	}
4078
4079	/// Test that BatchedReduceMin is correctly supported in Int8QTy.
4080	TEST_P(OperatorTest, batchedReduceMin_Int8) {
4081	CHECK_IF_ENABLED();
4082	testBatchedReduceMin<int8_t>(bindings_, mod_, F_, EE_, ElemKind::Int8QTy);
4083	}
4084
4085	/// Test that BatchedReduceMin is correctly supported in FloatTy.
4086	TEST_P(OperatorTest, batchedReduceMinMultiAxis_Float) {
4087	CHECK_IF_ENABLED();
4088	testBatchedReduceMinMultiAxis<float>(bindings_, mod_, F_, EE_,
4089	ElemKind::FloatTy);
4090	}
4091
4092	/// Test that BatchedReduceMin is correctly supported in Int32Ty.
4093	TEST_P(OperatorTest, batchedReduceMinMultiAxis_Int32) {
4094	CHECK_IF_ENABLED();
4095	testBatchedReduceMinMultiAxis<int32_t>(bindings_, mod_, F_, EE_,
4096	ElemKind::Int32ITy);
4097	}
4098
4099	/// Test that BatchedReduceMin is correctly supported in Int64Ty.
4100	TEST_P(OperatorTest, batchedReduceMinMultiAxis_Int64) {
4101	CHECK_IF_ENABLED();
4102	testBatchedReduceMinMultiAxis<int64_t>(bindings_, mod_, F_, EE_,
4103	ElemKind::Int64ITy);
4104	}
4105
4106	/// Test that BatchedReduceMin is correctly supported in Int64Ty.
4107	TEST_P(OperatorTest, batchedReduceMinMultiAxis_Int8) {
4108	CHECK_IF_ENABLED();
4109	testBatchedReduceMinMultiAxis<int8_t>(bindings_, mod_, F_, EE_,
4110	ElemKind::Int8QTy);
4111	}
4112
4113	/// Helper to test BatchedReduceZeroDimResult using \p DTy.
4114	template <typename DataType>
4115	static void testBatchedReduceZeroDimResult(glow::PlaceholderBindings &bindings,
4116	glow::Module &mod, glow::Function *F,
4117	glow::ExecutionEngine &EE,
4118	ElemKind DTy) {
4119	auto *batch = createPlaceholderConditionallyQuantized(
4120	mod, DTy, {`4`}, "batch", / isTrainable / false, "N");
4121	bindings.allocate(batch)->getHandle<DataType>() = {`2`, `4`, `6`, `8`};
4122
4123	auto OT = uniqueTypeConditionallyQuantized(mod, DTy, {});
4124	auto RA = F->createBatchedReduceAdd("reduce.add", OT, batch, /* axis / `0`);
4125	auto RM = F->createBatchedReduceMean("reduce.mean", OT, batch, /* axis / `0`);
4126	auto *saveRA = F->createSave("saveRA", RA);
4127	auto *saveRM = F->createSave("saveRM", RM);
4128	auto *resultRA = bindings.allocate(saveRA->getPlaceholder());
4129	auto *resultRM = bindings.allocate(saveRM->getPlaceholder());
4130
4131	EE.compile(CompilationMode::Infer);
4132	EE.run(bindings);
4133
4134	auto RAH = resultRA->getHandle<DataType>();
4135	auto RMH = resultRM->getHandle<DataType>();
4136	if (isQuantizedElemKind(DTy)) {
4137	EXPECT_EQ(RAH.at({}), static_cast<DataType>(`20`));
4138	EXPECT_EQ(RMH.at({}), static_cast<DataType>(`5`));
4139	} else {
4140	EXPECT_NEAR(RAH.at({}), `20`, `0.001`);
4141	EXPECT_NEAR(RMH.at({}), `5`, `0.001`);
4142	}
4143	}
4144
4145	/// Test reduction down to a zero-dim tensor on FloatTy.
4146	TEST_P(OperatorTest, batchedReduceZeroDimResult_Float) {
4147	CHECK_IF_ENABLED();
4148	testBatchedReduceZeroDimResult<float>(bindings_, mod_, F_, EE_,
4149	ElemKind::FloatTy);
4150	}
4151
4152	/// Test reduction down to a zero-dim tensor on Float16Ty.
4153	TEST_P(OperatorTest, batchedReduceZeroDimResult_Float16) {
4154	CHECK_IF_ENABLED();
4155	testBatchedReduceZeroDimResult<float16_t>(bindings_, mod_, F_, EE_,
4156	ElemKind::Float16Ty);
4157	}
4158
4159	/// Test reduction down to a zero-dim tensor on BFloat16Ty.
4160	TEST_P(OperatorTest, batchedReduceZeroDimResult_BFloat16) {
4161	CHECK_IF_ENABLED();
4162	testBatchedReduceZeroDimResult<bfloat16_t>(bindings_, mod_, F_, EE_,
4163	ElemKind::BFloat16Ty);
4164	}
4165
4166	/// Test reduction down to a zero-dim tensor on Int8QTy.
4167	TEST_P(OperatorTest, batchedReduceZeroDimResult_Int8) {
4168	CHECK_IF_ENABLED();
4169	testBatchedReduceZeroDimResult<int8_t>(bindings_, mod_, F_, EE_,
4170	ElemKind::Int8QTy);
4171	}
4172
4173	/// Helper to test BatchedReduceAddWithAxis using \p DTy.
4174	template <typename DataType>
4175	static void testBatchedReduceAddWithAxis(glow::PlaceholderBindings &bindings,
4176	glow::Module &mod, glow::Function *F,
4177	glow::ExecutionEngine &EE,
4178	ElemKind DTy) {
4179	auto *batch = createPlaceholderConditionallyQuantized(mod, DTy, {`2`, `3`, `2`},
4180	"batch", false);
4181	bindings.allocate(batch)->getHandle<DataType>() = {`0`, `1`, `2`, `3`, `4`, `5`,
4182	`6`, `7`, `8`, `9`, `10`, `11`};
4183
4184	auto OT1 = uniqueTypeConditionallyQuantized(mod, DTy, {`2`, `2`});
4185	auto *R1 =
4186	F->createBatchedReduceAdd("reduce.add.axis.1", OT1, batch, / axis / `1`);
4187	auto OT2 = uniqueTypeConditionallyQuantized(mod, DTy, {`2`, `3`});
4188	auto *R2 =
4189	F->createBatchedReduceAdd("reduce.add.axis.2", OT2, batch, / axis / `2`);
4190	auto *save1 = F->createSave("save1", R1);
4191	auto *save2 = F->createSave("save2", R2);
4192
4193	auto *result1 = bindings.allocate(save1->getPlaceholder());
4194	auto *result2 = bindings.allocate(save2->getPlaceholder());
4195
4196	EE.compile(CompilationMode::Infer);
4197	EE.run(bindings);
4198
4199	auto expected1 = createTensorConditionallyQuantized(DTy, {`2`, `2`});
4200	expected1.getHandle<DataType>() = {`6`, `9`, `24`, `27`};
4201	EXPECT_TRUE(result1->isEqual(expected1));
4202
4203	auto expected2 = createTensorConditionallyQuantized(DTy, {`2`, `3`});
4204	expected2.getHandle<DataType>() = {`1`, `5`, `9`, `13`, `17`, `21`};
4205	EXPECT_TRUE(result2->isEqual(expected2));
4206	}
4207
4208	/// Test that batchedReduceAddWithAxis is correctly supported in FloatTy.
4209	TEST_P(OperatorTest, batchedReduceAddWithAxis_Float) {
4210	CHECK_IF_ENABLED();
4211	testBatchedReduceAddWithAxis<float>(bindings_, mod_, F_, EE_,
4212	ElemKind::FloatTy);
4213	}
4214
4215	/// Test that batchedReduceAddWithAxis is correctly supported in Float16Ty.
4216	TEST_P(OperatorTest, batchedReduceAddWithAxis_Float16) {
4217	CHECK_IF_ENABLED();
4218	testBatchedReduceAddWithAxis<float16_t>(bindings_, mod_, F_, EE_,
4219	ElemKind::Float16Ty);
4220	}
4221
4222	/// Test that batchedReduceAddWithAxis is correctly supported in BFloat16Ty.
4223	TEST_P(OperatorTest, batchedReduceAddWithAxis_BFloat16) {
4224	CHECK_IF_ENABLED();
4225	testBatchedReduceAddWithAxis<bfloat16_t>(bindings_, mod_, F_, EE_,
4226	ElemKind::BFloat16Ty);
4227	}
4228
4229	/// Test that batchedReduceAddWithAxis is correctly supported in Int8QTy.
4230	TEST_P(OperatorTest, batchedReduceAddWithAxis_Int8Q) {
4231	CHECK_IF_ENABLED();
4232	testBatchedReduceAddWithAxis<int8_t>(bindings_, mod_, F_, EE_,
4233	ElemKind::Int8QTy);
4234	}
4235
4236	TEST_P(OperatorTest, batchedReduceAddQuantized) {
4237	CHECK_IF_ENABLED();
4238
4239	auto BT = mod_.uniqueType(ElemKind::Int8QTy, {`3`, `8`}, `0.5`, `3`);
4240	auto OT = mod_.uniqueType(ElemKind::Int8QTy, {`8`}, `2.0`, -`1`);
4241
4242	auto *batch =
4243	mod_.createPlaceholder(ElemKind::Int8QTy, {`3`, `8`}, BT->getScale(),
4244	BT->getOffset(), "batch", false);
4245
4246	bindings_.allocate(batch)->getHandle<int8_t>() = {
4247	`27`, -`31`, `16`, `7`, `20`, `34`, -`2`, `8`, -`10`, `83`, `29`, -`17`,
4248	`19`, `13`, -`11`, -`9`, `50`, `58`, `0`, -`20`, -`72`, `43`, -`25`, -`1`};
4249
4250	auto BH = bindings_.get(batch)->getHandle<int8_t>();
4251
4252	auto *R =
4253	F_->createBatchedReduceAdd("batched.reduce.add", OT, batch, / axis / `0`);
4254
4255	auto *save = F_->createSave("save", R);
4256	auto OH = bindings_.allocate(save->getPlaceholder())->getHandle<int8_t>();
4257
4258	EE_.compile(CompilationMode::Infer);
4259	EE_.run(bindings_);
4260
4261	for (dim_t i = `0`; i < `8`; i++) {
4262	std::array<int32_t, `3`> b{{BH.at({`0`, i}), BH.at({`1`, i}), BH.at({`2`, i})}};
4263	float s = BT->getScale() / OT->getScale();
4264	int32_t o = BT->getOffset();
4265	float result = (b [`0`] - o) + (b [`1`] - o) + (b [`2`] - o);
4266	result = s * result + OT->getOffset();
4267
4268	EXPECT_NEAR(std::round(result), OH.at({i}), `1.0`);
4269	}
4270	}
4271
4272	TEST_P(OperatorTest, batchedReduceAddQuantizedWithAxis) {
4273	CHECK_IF_ENABLED();
4274
4275	auto BT = mod_.uniqueType(ElemKind::Int8QTy, {`2`, `3`, `4`}, `0.5`, `3`);
4276	auto OT = mod_.uniqueType(ElemKind::Int8QTy, {`2`, `4`}, `2.0`, -`1`);
4277
4278	auto *batch =
4279	mod_.createPlaceholder(ElemKind::Int8QTy, {`2`, `3`, `4`}, BT->getScale(),
4280	BT->getOffset(), "batch", false);
4281
4282	bindings_.allocate(batch)->getHandle<int8_t>() = {
4283	`27`, -`31`, `16`, `7`, `20`, `34`, -`2`, `8`, -`10`, `83`, `29`, -`17`,
4284	`19`, `13`, -`11`, -`9`, `50`, `58`, `0`, -`20`, -`72`, `43`, -`25`, -`1`};
4285
4286	auto BH = bindings_.get(batch)->getHandle<int8_t>();
4287
4288	auto *R =
4289	F_->createBatchedReduceAdd("batched.reduce.add", OT, batch, / axis / `1`);
4290	auto *save = F_->createSave("save", R);
4291	auto OH = bindings_.allocate(save->getPlaceholder())->getHandle<int8_t>();
4292
4293	EE_.compile(CompilationMode::Infer);
4294	EE_.run(bindings_);
4295
4296	for (dim_t i = `0`; i < `2`; i++) {
4297	for (dim_t j = `0`; j < `4`; j++) {
4298	std::array<int32_t, `3`> b{
4299	{BH.at({i, `0`, j}), BH.at({i, `1`, j}), BH.at({i, `2`, j})}};
4300	float s = BT->getScale() / OT->getScale();
4301	int32_t o = BT->getOffset();
4302	float result = (b [`0`] - o) + (b [`1`] - o) + (b [`2`] - o);
4303	result = s * result + OT->getOffset();
4304
4305	EXPECT_NEAR(std::round(result), OH.at({i, j}), `1.0`);
4306	}
4307	}
4308	}
4309
4310	TEST_P(OperatorTest, batchedReduceMean) {
4311	CHECK_IF_ENABLED();
4312
4313	auto *batch =
4314	mod_.createPlaceholder(ElemKind::FloatTy, {`2`, `4`}, "batch", false);
4315	bindings_.allocate(batch)->getHandle() = {`10`, `20`, `30`, `40`, `1`, `2`, `3`, `4`};
4316
4317	auto R = F_->createBatchedReduceMean("reduce.add", batch, /* axis / `0`);
4318
4319	auto *save = F_->createSave("save", R);
4320	auto *result = bindings_.allocate(save->getPlaceholder());
4321
4322	EE_.compile(CompilationMode::Infer);
4323	EE_.run(bindings_);
4324
4325	auto H = result->getHandle();
4326	EXPECT_NEAR(H.at({`0`}), `5.5`, `0.001`);
4327	EXPECT_NEAR(H.at({`1`}), `11.0`, `0.001`);
4328	EXPECT_NEAR(H.at({`2`}), `16.5`, `0.001`);
4329	EXPECT_NEAR(H.at({`3`}), `22.0`, `0.001`);
4330	}
4331
4332	TEST_P(OperatorTest, batchedReduceMeanWithAxis) {
4333	CHECK_IF_ENABLED();
4334
4335	auto *batch =
4336	mod_.createPlaceholder(ElemKind::FloatTy, {`2`, `3`, `2`}, "batch", false);
4337	bindings_.allocate(batch)->getHandle() = {`0`, `1`, `2`, `3`, `4`, `5`,
4338	`6`, `7`, `8`, `9`, `10`, `11`};
4339
4340	auto R = F_->createBatchedReduceMean("reduce.add", batch, /* axis / `1`);
4341
4342	auto *save = F_->createSave("save", R);
4343	auto *result = bindings_.allocate(save->getPlaceholder());
4344
4345	EE_.compile(CompilationMode::Infer);
4346	EE_.run(bindings_);
4347
4348	auto H = result->getHandle();
4349	EXPECT_NEAR(H.at({`0`, `0`}), `2.0`, `0.001`);
4350	EXPECT_NEAR(H.at({`0`, `1`}), `3.0`, `0.001`);
4351	EXPECT_NEAR(H.at({`1`, `0`}), `8.0`, `0.001`);
4352	EXPECT_NEAR(H.at({`1`, `1`}), `9.0`, `0.001`);
4353	}
4354
4355	TEST_P(OperatorTest, batchedReduceMeanQuantized) {
4356	CHECK_IF_ENABLED();
4357
4358	auto BT = mod_.uniqueType(ElemKind::Int8QTy, {`3`, `8`}, `0.5`, `3`);
4359	auto OT = mod_.uniqueType(ElemKind::Int8QTy, {`8`}, `2.0`, -`1`);
4360
4361	auto *batch =
4362	mod_.createPlaceholder(ElemKind::Int8QTy, {`3`, `8`}, BT->getScale(),
4363	BT->getOffset(), "batch", false);
4364
4365	bindings_.allocate(batch)->getHandle<int8_t>() = {
4366	`27`, -`31`, `16`, `7`, `20`, `34`, -`2`, `8`, -`10`, `83`, `29`, -`17`,
4367	`19`, `13`, -`11`, -`9`, `50`, `58`, `0`, -`20`, -`72`, `43`, -`25`, -`1`};
4368
4369	auto BH = bindings_.get(batch)->getHandle<int8_t>();
4370
4371	auto *R = F_->createBatchedReduceMean("batched.reduce.add", OT, batch,
4372	/ axis / `0`);
4373
4374	auto *save = F_->createSave("save", R);
4375	auto OH = bindings_.allocate(save->getPlaceholder())->getHandle<int8_t>();
4376
4377	EE_.compile(CompilationMode::Infer);
4378	EE_.run(bindings_);
4379
4380	for (dim_t i = `0`; i < `8`; i++) {
4381	std::array<int32_t, `3`> b{{BH.at({`0`, i}), BH.at({`1`, i}), BH.at({`2`, i})}};
4382	float s = BT->getScale() / OT->getScale();
4383	int32_t o = BT->getOffset();
4384	float result = ((b [`0`] - o) + (b [`1`] - o) + (b [`2`] - o)) / `3`;
4385	result = s * result + OT->getOffset();
4386
4387	EXPECT_NEAR(std::round(result), OH.at({i}), `1.0`);
4388	}
4389	}
4390
4391	TEST_P(OperatorTest, batchedReduceMeanQuantizedWithAxis) {
4392	CHECK_IF_ENABLED();
4393
4394	auto BT = mod_.uniqueType(ElemKind::Int8QTy, {`2`, `3`, `4`}, `0.5`, `3`);
4395	auto OT = mod_.uniqueType(ElemKind::Int8QTy, {`2`, `4`}, `2.0`, -`1`);
4396
4397	auto *batch =
4398	mod_.createPlaceholder(ElemKind::Int8QTy, {`2`, `3`, `4`}, BT->getScale(),
4399	BT->getOffset(), "batch", false);
4400
4401	bindings_.allocate(batch)->getHandle<int8_t>() = {
4402	`27`, -`31`, `16`, `7`, `20`, `34`, -`2`, `8`, -`10`, `83`, `29`, -`17`,
4403	`19`, `13`, -`11`, -`9`, `50`, `58`, `0`, -`20`, -`72`, `43`, -`25`, -`1`};
4404
4405	auto BH = bindings_.get(batch)->getHandle<int8_t>();
4406
4407	auto *R = F_->createBatchedReduceMean("batched.reduce.add", OT, batch,
4408	/ axis / `1`);
4409	auto *save = F_->createSave("save", R);
4410	auto OH = bindings_.allocate(save->getPlaceholder())->getHandle<int8_t>();
4411
4412	EE_.compile(CompilationMode::Infer);
4413	EE_.run(bindings_);
4414
4415	for (dim_t i = `0`; i < `2`; i++) {
4416	for (dim_t j = `0`; j < `4`; j++) {
4417	std::array<int32_t, `3`> b{
4418	{BH.at({i, `0`, j}), BH.at({i, `1`, j}), BH.at({i, `2`, j})}};
4419	float s = BT->getScale() / OT->getScale();
4420	int32_t o = BT->getOffset();
4421	float result = ((b [`0`] - o) + (b [`1`] - o) + (b [`2`] - o)) / `3`;
4422	result = s * result + OT->getOffset();
4423
4424	EXPECT_NEAR(std::round(result), OH.at({i, j}), `1.0`);
4425	}
4426	}
4427	}
4428
4429	/// Verify that batchedReduceMean optimization using AvgPool works correctly.
4430	TEST_P(OperatorTest, batchedReduceMeanUsingAvgPool) {
4431	CHECK_IF_ENABLED();
4432
4433	std::vector<dim_t> dims = {`3`, `20`, `4`, `8`};
4434
4435	auto *batch =
4436	mod_.createPlaceholder(ElemKind::FloatTy, dims, "batch", false, "NHWC");
4437
4438	auto IH = bindings_.allocate(batch)->getHandle();
4439	IH.randomize(`1.0`, `100.0`, mod_.getPRNG());
4440
4441	auto *R = F_->createBatchedReduceMean("reduce.mean", batch, {`2`, `3`});
4442
4443	auto *save = F_->createSave("save", R);
4444	auto *result = bindings_.allocate(save->getPlaceholder());
4445	EE_.compile(CompilationMode::Infer);
4446
4447	EE_.run(bindings_);
4448	auto H = result->getHandle();
4449
4450	std::array<std::array<float, `20`>, `3`> results{};
4451	for (dim_t i = `0`; i < dims [`0`]; i++) {
4452	for (dim_t j = `0`; j < dims [`1`]; j++) {
4453	for (dim_t k = `0`; k < dims [`2`]; k++) {
4454	for (dim_t l = `0`; l < dims [`3`]; l++) {
4455	results [i][j] += IH.at({i, j, k, l});
4456	}
4457	}
4458	results [i][j] /= (dims [`2`] * dims [`3`]);
4459	EXPECT_NEAR(H.at({i, j}), results[i][j], `0.001`);
4460	}
4461	}
4462	}
4463
4464	/// Verify that quantized batchedReduceMean optimization using AvgPool works
4465	/// correctly.
4466	TEST_P(OperatorTest, batchedReduceMeanUsingAvgPoolQuantized) {
4467	CHECK_IF_ENABLED();
4468
4469	std::vector<dim_t> dims = {`2`, `3`, `3`, `4`};
4470
4471	auto BT = mod_.uniqueType(ElemKind::Int8QTy, dims, `1`, `0`);
4472	auto OT = mod_.uniqueType(ElemKind::Int8QTy, {dims [`0`], dims [`1`]}, `3`, `0`);
4473	auto *batch = mod_.createPlaceholder(ElemKind::Int8QTy, dims, BT->getScale(),
4474	BT->getOffset(), "batch", false);
4475
4476	auto IH = bindings_.allocate(batch)->getHandle<int8_t>();
4477	IH.randomize(`1`, `100`, mod_.getPRNG());
4478
4479	auto *R = F_->createBatchedReduceMean("reduce.mean", OT, batch, {`2`, `3`});
4480
4481	auto *save = F_->createSave("save", R);
4482	auto OH = bindings_.allocate(save->getPlaceholder())->getHandle<int8_t>();
4483
4484	EE_.compile(CompilationMode::Infer);
4485	EE_.run(bindings_);
4486
4487	std::array<std::array<float, `3`>, `2`> results{};
4488	float s = BT->getScale() / OT->getScale();
4489	for (dim_t i = `0`; i < dims [`0`]; i++) {
4490	for (dim_t j = `0`; j < dims [`1`]; j++) {
4491	for (dim_t k = `0`; k < dims [`2`]; k++) {
4492	int32_t o = BT->getOffset();
4493	for (dim_t l = `0`; l < dims [`3`]; l++) {
4494	results [i][j] += IH.at({i, j, k, l}) - o;
4495	}
4496	}
4497	results [i][j] = s * results [i][j] + OT->getOffset();
4498	results [i][j] /= (dims [`2`] * dims [`3`]);
4499	EXPECT_NEAR(std::round(results[i][j]), OH.at({i, j}), `1.0`);
4500	}
4501	}
4502	}
4503
4504	/// Test that the BatchedAdd operator works.
4505	TEST_P(OperatorTest, BatchedAdd) {
4506	CHECK_IF_ENABLED();
4507
4508	auto *batch =
4509	mod_.createPlaceholder(ElemKind::FloatTy, {`2`, `3`, `3`}, "batch", false);
4510	auto *added =
4511	mod_.createPlaceholder(ElemKind::FloatTy, {`3`, `3`}, "added", false);
4512
4513	bindings_.allocate(batch)->getHandle() = {`9`, `8`, `7`, `6`, `5`, `4`, `3`, `4`, `5`,
4514	`6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`};
4515	bindings_.allocate(added)->getHandle().clear(`1.0`);
4516
4517	auto *R = F_->createBatchedAdd("batch.add", batch, added);
4518	auto *save = F_->createSave("save", R);
4519	auto *result = bindings_.allocate(save->getPlaceholder());
4520
4521	EE_.compile(CompilationMode::Infer);
4522	EE_.run(bindings_);
4523
4524	auto BH = bindings_.get(batch)->getHandle();
4525	auto RH = result->getHandle();
4526	for (dim_t i = `0`; i < `2`; i++) {
4527	for (dim_t j = `0`; j < `3`; j++) {
4528	for (dim_t k = `0`; k < `3`; k++) {
4529	EXPECT_NEAR(RH.at({i, j, k}), BH.at({i, j, k}) + `1.0`, `0.001`);
4530	}
4531	}
4532	}
4533	}
4534
4535	/// Broadcast Tensor of shape (2,1,1) to (2,4,2) with axis 0.
4536	TEST_P(OperatorTest, broadcastSimple) {
4537	CHECK_IF_ENABLED();
4538
4539	const dim_t numDims_A = `3`;
4540	const dim_t dimY_A = `2`;
4541	const dim_t dimZ_A = `4`;
4542	const dim_t dimW_A = `2`;
4543	const dim_t dims_A[numDims_A] = {dimY_A, dimZ_A, dimW_A};
4544
4545	const dim_t numDims_B = `3`;
4546	const dim_t dimY_B = `2`;
4547	const dim_t dimZ_B = `1`;
4548	const dim_t dimW_B = `1`;
4549	const dim_t dims_B[numDims_B] = {dimY_B, dimZ_B, dimW_B};
4550
4551	auto B = mod_.createPlaceholder(ElemKind::FloatTy, dims_B, "B", false*);
4552	auto *QB =
4553	mod_.createPlaceholder(ElemKind::Int8QTy, dims_B, `1.1`, -`2`, "QB", false);
4554	auto H_B = bindings_.allocate(B)->getHandle();
4555	auto H_QB = bindings_.allocate(QB)->getHandle<int8_t>();
4556	H_B = {`20`, `10`};
4557	H_QB = {`35`, -`18`};
4558
4559	const unsigned axis = `0`;
4560
4561	auto *R = F_->createBroadcast("broadcasted", B, dims_A, axis);
4562	auto *QR = F_->createBroadcast("broadcastedQ", QB, dims_A, axis);
4563
4564	auto *save = F_->createSave("save", R);
4565	auto *broadcasted = bindings_.allocate(save->getPlaceholder());
4566
4567	auto *saveQ = F_->createSave("saveQ", QR);
4568	auto *broadcastedQ = bindings_.allocate(saveQ->getPlaceholder());
4569
4570	EE_.compile(CompilationMode::Infer);
4571	EE_.run(bindings_);
4572
4573	auto broadcastedBHandle = broadcasted->getHandle();
4574	auto broadcastedQBHandle = broadcastedQ->getHandle<int8_t>();
4575	// Verify broadcasted B has same shape.
4576	EXPECT_EQ(broadcastedBHandle.dims().size(), numDims_A);
4577	EXPECT_EQ(broadcastedQBHandle.dims().size(), numDims_A);
4578	for (size_t i = `0`; i < broadcastedBHandle.dims().size(); i++) {
4579	EXPECT_EQ(broadcastedBHandle.dims()[i], dims_A[i]);
4580	EXPECT_EQ(broadcastedQBHandle.dims()[i], dims_A[i]);
4581	}
4582
4583	// Look at the two values in X_B and verify in the three dimensions it was
4584	// broadcasted that the values were correctly broadcasted.
4585	const dim_t k_B = `0`;
4586	const dim_t l_B = `0`;
4587	for (dim_t j_B = `0`; j_B < dimY_B; ++j_B) {
4588	const float origVal = H_B.at({j_B, k_B, l_B});
4589	const int8_t origValQ = H_QB.at({j_B, k_B, l_B});
4590	const dim_t j_A = j_B; // This dim was not broadcasted (dims were equal).
4591	for (dim_t k_A = `0`; k_A < dimZ_A; k_A++) {
4592	for (dim_t l_A = `0`; l_A < dimW_A; l_A++) {
4593	EXPECT_EQ(broadcastedBHandle.at({j_A, k_A, l_A}), origVal);
4594	EXPECT_EQ(broadcastedQBHandle.at({j_A, k_A, l_A}), origValQ);
4595	}
4596	}
4597	}
4598	}
4599
4600	/// Broadcast a Tensor of shape (2,1) to (3,2,4,2) with axis 1.
4601	TEST_P(OperatorTest, broadcast) {
4602	CHECK_IF_ENABLED();
4603
4604	const dim_t numDims_A = `4`;
4605	const dim_t dimX_A = `3`;
4606	const dim_t dimY_A = `2`;
4607	const dim_t dimZ_A = `4`;
4608	const dim_t dimW_A = `2`;
4609	const dim_t dims_A[numDims_A] = {dimX_A, dimY_A, dimZ_A, dimW_A};
4610
4611	const dim_t numDims_B = `2`;
4612	const dim_t dimY_B = `2`;
4613	const dim_t dimZ_B = `1`;
4614	const dim_t dims_B[numDims_B] = {dimY_B, dimZ_B};
4615
4616	auto B = mod_.createPlaceholder(ElemKind::FloatTy, dims_B, "B", false*);
4617	auto *QB =
4618	mod_.createPlaceholder(ElemKind::Int8QTy, dims_B, `0.8`, `3`, "QB", false);
4619
4620	auto H_B = bindings_.allocate(B)->getHandle();
4621	auto H_QB = bindings_.allocate(QB)->getHandle<int8_t>();
4622	H_B = {`20`, `10`};
4623	H_QB = {-`8`, `41`};
4624
4625	const unsigned axis = `1`;
4626
4627	auto *R = F_->createBroadcast("broadcasted", B, dims_A, axis);
4628	auto *QR = F_->createBroadcast("broadcastedQ", QB, dims_A, axis);
4629
4630	auto *save = F_->createSave("save", R);
4631	auto *broadcasted = bindings_.allocate(save->getPlaceholder());
4632
4633	auto *saveQ = F_->createSave("saveQ", QR);
4634	auto *broadcastedQ = bindings_.allocate(saveQ->getPlaceholder());
4635
4636	EE_.compile(CompilationMode::Infer);
4637	EE_.run(bindings_);
4638
4639	auto broadcastedBHandle = broadcasted->getHandle();
4640	auto broadcastedQBHandle = broadcastedQ->getHandle<int8_t>();
4641	// Verify broadcasted B has same shape.
4642	EXPECT_EQ(broadcastedBHandle.dims().size(), numDims_A);
4643	EXPECT_EQ(broadcastedQBHandle.dims().size(), numDims_A);
4644	for (size_t i = `0`; i < broadcastedBHandle.dims().size(); i++) {
4645	EXPECT_EQ(broadcastedBHandle.dims()[i], dims_A[i]);
4646	EXPECT_EQ(broadcastedQBHandle.dims()[i], dims_A[i]);
4647	}
4648	// Look at the two values in X_B and verify in the three dimensions it was
4649	// broadcasted that the values were correctly broadcasted.
4650	const dim_t k_B = `0`;
4651	for (dim_t j_B = `0`; j_B < dimY_B; ++j_B) {
4652	const float origVal = H_B.at({j_B, k_B});
4653	const int8_t origValQ = H_QB.at({j_B, k_B});
4654	const dim_t j_A = j_B; // This dim was not broadcasted (dims were equal).
4655	for (dim_t i_A = `0`; i_A < dimX_A; i_A++) {
4656	for (dim_t k_A = `0`; k_A < dimZ_A; k_A++) {
4657	for (dim_t l_A = `0`; l_A < dimW_A; l_A++) {
4658	EXPECT_EQ(broadcastedBHandle.at({i_A, j_A, k_A, l_A}), origVal);
4659	EXPECT_EQ(broadcastedQBHandle.at({i_A, j_A, k_A, l_A}), origValQ);
4660	}
4661	}
4662	}
4663	}
4664	}
4665
4666	/// Perform a simple weighted sum.
4667	TEST_P(OperatorTest, weightedSum) {
4668	CHECK_IF_ENABLED();
4669
4670	// Create the data.
4671	auto A = mod_.createPlaceholder(ElemKind::FloatTy, {`2`, `2`}, "A", false*);
4672	bindings_.allocate(A)->getHandle() = {`1.0`, `2.0`, `3.0`, `4.0`};
4673
4674	auto B = mod_.createPlaceholder(ElemKind::FloatTy, {`2`, `2`}, "B", false*);
4675	bindings_.allocate(B)->getHandle() = {`5.0`, `6.0`, `7.0`, `8.0`};
4676
4677	// Create the weights.
4678	auto AW = mod_.createPlaceholder(ElemKind::FloatTy, {`1`}, "AW", false*);
4679	bindings_.allocate(AW)->getHandle() = {`0.1f`};
4680
4681	auto BW = mod_.createPlaceholder(ElemKind::FloatTy, {`1`}, "BW", false*);
4682	bindings_.allocate(BW)->getHandle() = {`10.0f`};
4683
4684	// Create the weighted sum with the data and weights, and save it.
4685	auto *WS = F_->createWeightedSum("ws", {A, B}, {AW, BW});
4686	auto *save = F_->createSave("save", WS);
4687	auto *saveTensor = bindings_.allocate(save->getPlaceholder());
4688
4689	EE_.compile(CompilationMode::Infer);
4690	EE_.run(bindings_);
4691
4692	// Verify the weighted sum was correctly calculated.
4693	auto resultH = saveTensor->getHandle();
4694	EXPECT_NEAR(resultH.at({`0`, `0`}), `50.1`, `1E-5`);
4695	EXPECT_NEAR(resultH.at({`0`, `1`}), `60.2`, `1E-5`);
4696	EXPECT_NEAR(resultH.at({`1`, `0`}), `70.3`, `1E-5`);
4697	EXPECT_NEAR(resultH.at({`1`, `1`}), `80.4`, `1E-5`);
4698	}
4699
4700	/// Helper to test ReluSimple using \p DTy.
4701	template <typename DataType>
4702	static void testReluSimple(glow::PlaceholderBindings &bindings,
4703	glow::Module &mod, glow::Function *F,
4704	glow::ExecutionEngine &EE, ElemKind DTy) {
4705	auto in = mod.createPlaceholder(DTy, {`7`}, "in", false*);
4706	auto *relu = F->createRELU("relu", in);
4707	auto *save = F->createSave("relu", relu);
4708	auto *result = bindings.allocate(save->getPlaceholder());
4709
4710	bindings.allocate(in)->getHandle<DataType>() = {`0`, -`1`, -`2`, -`3`, `4`, `5`, `6`};
4711
4712	EE.compile(CompilationMode::Infer);
4713	EE.run(bindings);
4714
4715	auto resultH = result->getHandle<DataType>();
4716
4717	for (size_t i = `0`; i < `7`; i++) {
4718	if (i < `4`) {
4719	EXPECT_EQ(resultH.raw(i), static_cast<DataType>(`0`));
4720	} else {
4721	EXPECT_EQ(resultH.raw(i), static_cast<DataType>(i));
4722	}
4723	}
4724	}
4725
4726	/// Verify that the RELU operator works correctly for Float.
4727	TEST_P(OperatorTest, ReluSimple_Float) {
4728	CHECK_IF_ENABLED();
4729
4730	testReluSimple<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy);
4731	}
4732
4733	/// Verify that the RELU operator works correctly for Float16.
4734	TEST_P(OperatorTest, ReluSimple_Float16) {
4735	CHECK_IF_ENABLED();
4736	testReluSimple<float16_t>(bindings_, mod_, F_, EE_, ElemKind::Float16Ty);
4737	}
4738
4739	/// Verify that the RELU operator works correctly for Float16.
4740	TEST_P(OperatorTest, ReluSimple_BFloat16) {
4741	CHECK_IF_ENABLED();
4742	testReluSimple<bfloat16_t>(bindings_, mod_, F_, EE_, ElemKind::BFloat16Ty);
4743	}
4744
4745	/// Helper to test PReluSimple using \p DTy.
4746	template <typename DataType>
4747	static void testPReluSimple(glow::PlaceholderBindings &bindings,
4748	glow::Module &mod, glow::Function *F,
4749	glow::ExecutionEngine &EE, ElemKind DTy,
4750	double allowedError) {
4751	auto in = mod.createPlaceholder(DTy, {`7`}, "in", false*);
4752	auto slope = mod.createPlaceholder(DTy, {`7`}, "slope", false*);
4753	auto *prelu = F->createPRELU("prelu", in, slope);
4754	auto *save = F->createSave("prelu", prelu);
4755	auto *result = bindings.allocate(save->getPlaceholder());
4756
4757	bindings.allocate(in)->getHandle<DataType>() = {`0`, -`1`, -`2`, -`3`, `4`, `5`, `6`};
4758	bindings.allocate(slope)->getHandle<DataType>().randomize(`0.1`, `3.0`,
4759	mod.getPRNG());
4760
4761	EE.compile(CompilationMode::Infer);
4762	EE.run(bindings);
4763
4764	auto resultH = result->getHandle<DataType>();
4765	auto inH = bindings.get(in)->getHandle<DataType>();
4766	auto slopeH = bindings.get(slope)->getHandle<DataType>();
4767
4768	for (size_t i = `0`; i < `7`; i++) {
4769	DataType expectedResult =
4770	slopeH.raw(i) * std::min<DataType>(`0`, inH.raw(i)) +
4771	std::max<DataType>(`0`, inH.raw(i));
4772	EXPECT_NEAR(resultH.at(i), expectedResult, allowedError);
4773	}
4774	}
4775
4776	/// Verify that the PRELU operator works correctly for Float.
4777	TEST_P(OperatorTest, PReluSimple_Float) {
4778	CHECK_IF_ENABLED();
4779	testPReluSimple<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy, `1E-32`);
4780	}
4781
4782	/// Verify that the PRELU operator works correctly for Float16.
4783	TEST_P(OperatorTest, PReluSimple_Float16) {
4784	CHECK_IF_ENABLED();
4785	testPReluSimple<float16_t>(bindings_, mod_, F_, EE_, ElemKind::Float16Ty,
4786	`1E-16`);
4787	}
4788
4789	/// Verify that the PRELU operator works correctly for BFloat16.
4790	TEST_P(OperatorTest, PReluSimple_BFloat16) {
4791	CHECK_IF_ENABLED();
4792	testPReluSimple<bfloat16_t>(bindings_, mod_, F_, EE_, ElemKind::BFloat16Ty,
4793	`1E-16`);
4794	}
4795
4796	/// Verify that the PRELU operator works correctly for int8 with different
4797	/// input and output quantization parameters.
4798	TEST_P(OperatorTest, PRelu_Int8) {
4799	CHECK_IF_ENABLED();
4800	auto in = mod_.createPlaceholder(ElemKind::Int8QTy, {`7`}, `1`, `0`, "in", false*);
4801	auto *slope =
4802	mod_.createPlaceholder(ElemKind::Int8QTy, {`7`}, `1`, `0`, "slope", false);
4803
4804	auto outTy = mod_.uniqueType(ElemKind::Int8QTy, {`7`}, `2`, `0`);
4805	auto *prelu = F_->createPRELU("prelu", in, slope, outTy);
4806	auto *save = F_->createSave("prelu", prelu);
4807	bindings_.allocate(save->getPlaceholder());
4808
4809	bindings_.allocate(in)->getHandle<int8_t>() = {`0`, -`1`, -`2`, -`3`, `4`, `5`, `6`};
4810	bindings_.allocate(slope)->getHandle<int8_t>().randomize(`1`, `10`,
4811	mod_.getPRNG());
4812
4813	EE_.compile(CompilationMode::Infer);
4814	EE_.run(bindings_);
4815
4816	auto resultH = bindings_.get(save->getPlaceholder())->getHandle<int8_t>();
4817	auto inH = bindings_.get(in)->getHandle<int8_t>();
4818	auto slopeH = bindings_.get(slope)->getHandle<int8_t>();
4819
4820	for (size_t i = `0`; i < `7`; i++) {
4821	int8_t expectedResult = slopeH.raw(i) * std::min<int8_t>(`0`, inH.raw(i)) +
4822	std::max<int8_t>(`0`, inH.raw(i));
4823	EXPECT_NEAR(resultH.at(i), expectedResult / `2`, `1`);
4824	}
4825	}
4826
4827	/// Helper to test Gelu using \p DTy.
4828	template <typename DataType>
4829	static void testGelu(glow::PlaceholderBindings &bindings, glow::Module &mod,
4830	glow::Function *F, glow::ExecutionEngine &EE, ElemKind DTy,
4831	double allowedError) {
4832	auto in = mod.createPlaceholder(DTy, {`7`}, "in", false*);
4833	auto *gelu = F->createGELU("gelu", in);
4834	auto *save = F->createSave("gelu", gelu);
4835	auto *result = bindings.allocate(save->getPlaceholder());
4836
4837	bindings.allocate(in)->getHandle<DataType>().randomize(`0.1`, `3.0`,
4838	mod.getPRNG());
4839
4840	EE.compile(CompilationMode::Infer);
4841	EE.run(bindings);
4842
4843	auto resultH = result->getHandle<DataType>();
4844	auto inH = bindings.get(in)->getHandle<DataType>();
4845	// see https://arxiv.org/pdf/1606.08415.pdf
4846	float geluConst = `0.044715f`;
4847
4848	for (size_t i = `0`; i < `7`; i++) {
4849	float inHf = static_cast<float>(inH.raw(i));
4850	float expectedResult =
4851	`0.5f` * inHf *
4852	(`1.0f` + std::tanh(M_2_SQRTPI * M_SQRT1_2 *
4853	(inHf + geluConst * std::pow(inHf, `3`))));
4854	EXPECT_NEAR(resultH.at(i), expectedResult, allowedError);
4855	}
4856	}
4857
4858	/// Verify that the GELU operator works correctly for Float.
4859	TEST_P(OperatorTest, Gelu_Float) {
4860	CHECK_IF_ENABLED();
4861	testGelu<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy, `1E-6`);
4862	}
4863
4864	/// Verify that the GELU operator works correctly for Float16.
4865	TEST_P(OperatorTest, Gelu_Float16) {
4866	CHECK_IF_ENABLED();
4867	testGelu<float16_t>(bindings_, mod_, F_, EE_, ElemKind::Float16Ty, `1.5E-2`);
4868	}
4869
4870	TEST_P(OperatorTest, CollectRpnProposals) {
4871	CHECK_IF_ENABLED();
4872
4873	int64_t rpnMaxLevels = `6`;
4874	int64_t rpnMinLevels = `2`;
4875	uint32_t rpnPostNmsTopN = `14`;
4876
4877	auto *inp0 =
4878	mod_.createPlaceholder(ElemKind::FloatTy, {`9`, `5`}, "roisIn0", false);
4879	auto *inp1 =
4880	mod_.createPlaceholder(ElemKind::FloatTy, {`9`, `5`}, "roisIn1", false);
4881	auto *inp2 =
4882	mod_.createPlaceholder(ElemKind::FloatTy, {`9`, `5`}, "roisIn2", false);
4883	auto *inp3 =
4884	mod_.createPlaceholder(ElemKind::FloatTy, {`9`, `5`}, "roisIn3", false);
4885	auto *inp4 =
4886	mod_.createPlaceholder(ElemKind::FloatTy, {`9`, `5`}, "roisIn4", false);
4887	auto *inp5 =
4888	mod_.createPlaceholder(ElemKind::FloatTy, {`9`}, "scoresIn0", false);
4889	auto *inp6 =
4890	mod_.createPlaceholder(ElemKind::FloatTy, {`9`}, "scoresIn1", false);
4891	auto *inp7 =
4892	mod_.createPlaceholder(ElemKind::FloatTy, {`9`}, "scoresIn2", false);
4893	auto *inp8 =
4894	mod_.createPlaceholder(ElemKind::FloatTy, {`9`}, "scoresIn3", false);
4895	auto *inp9 =
4896	mod_.createPlaceholder(ElemKind::FloatTy, {`9`}, "scoresIn4", false);
4897
4898	bindings_.allocate(inp0)->getHandle() = {
4899	`0.0000`, `115.2341`, `148.2082`, `163.9516`, `175.6832`, `0.0000`, `104.2961`,
4900	`215.3492`, `242.5623`, `262.1163`, `0.0000`, `19.3218`, `23.6992`, `154.5081`,
4901	`251.7623`, `0.0000`, `226.4726`, `211.6586`, `236.6433`, `266.1842`, `1.0000`,
4902	`17.3863`, `94.0684`, `67.8375`, `114.7378`, `1.0000`, `60.9553`, `38.8897`,
4903	`138.8663`, `76.7117`, `1.0000`, `2.7621`, `83.5598`, `67.0541`, `90.7924`,
4904	`1.0000`, `90.6893`, `52.8477`, `100.2276`, `138.7310`, `1.0000`, `98.0147`,
4905	`8.8531`, `102.5518`, `98.5838`,
4906	};
4907	bindings_.allocate(inp1)->getHandle() = {
4908	`0.0000`, `35.0680`, `85.7965`, `98.9293`, `155.0935`, `0.0000`, `56.8145`,
4909	`27.7562`, `268.8377`, `43.8762`, `0.0000`, `66.4838`, `43.2397`, `68.8953`,
4910	`126.8365`, `0.0000`, `53.4704`, `37.5858`, `178.5217`, `100.2932`, `1.0000`,
4911	`14.1265`, `123.1779`, `14.2739`, `143.5395`, `1.0000`, `108.6717`, `5.7606`,
4912	`143.5839`, `88.9123`, `1.0000`, `17.4530`, `43.5326`, `17.6689`, `46.7435`,
4913	`1.0000`, `9.4297`, `39.0123`, `83.2904`, `101.7934`, `1.0000`, `13.8093`,
4914	`46.8296`, `136.6065`, `84.6641`,
4915	};
4916	bindings_.allocate(inp2)->getHandle() = {
4917	`0.0000`, `35.8490`, `49.8280`, `78.7185`, `194.8410`, `0.0000`, `1.2772`,
4918	`184.3661`, `5.4693`, `225.4717`, `0.0000`, `67.6609`, `156.7148`, `199.9728`,
4919	`261.7153`, `0.0000`, `155.6525`, `60.6782`, `259.1477`, `121.6181`, `1.0000`,
4920	`102.8235`, `43.7232`, `119.6283`, `58.2863`, `1.0000`, `85.4471`, `106.6124`,
4921	`101.8021`, `129.6151`, `1.0000`, `62.3067`, `89.1398`, `140.5443`, `94.6666`,
4922	`1.0000`, `42.6414`, `90.8483`, `44.3315`, `97.0455`, `1.0000`, `19.9147`,
4923	`43.8475`, `83.7848`, `86.8583`};
4924	bindings_.allocate(inp3)->getHandle() = {
4925	`0.0000`, `117.3458`, `177.3721`, `177.6706`, `243.8607`, `0.0000`, `118.5553`,
4926	`191.4577`, `219.2848`, `242.6031`, `0.0000`, `250.0993`, `40.6499`, `271.6864`,
4927	`194.2736`, `0.0000`, `33.6790`, `167.4322`, `44.1981`, `230.6582`, `1.0000`,
4928	`10.1675`, `59.8559`, `83.6578`, `102.5220`, `1.0000`, `106.1422`, `125.8031`,
4929	`143.4017`, `127.3582`, `1.0000`, `25.2296`, `76.5924`, `52.9168`, `107.3086`,
4930	`1.0000`, `29.3995`, `2.7227`, `116.6736`, `32.9169`, `1.0000`, `4.6803`,
4931	`24.2100`, `136.4279`, `103.5489`,
4932	};
4933	bindings_.allocate(inp4)->getHandle() = {
4934	`0.0000`, `157.0062`, `64.7068`, `254.1062`, `166.9987`, `0.0000`, `84.8490`,
4935	`108.3161`, `160.4555`, `198.5932`, `0.0000`, `50.6445`, `133.4048`, `201.1578`,
4936	`256.8693`, `0.0000`, `69.1850`, `15.7839`, `118.1613`, `84.8085`, `1.0000`,
4937	`3.6278`, `9.8857`, `55.5295`, `26.4017`, `1.0000`, `66.6934`, `78.8771`,
4938	`131.8107`, `145.5798`, `1.0000`, `3.0357`, `38.7084`, `97.4725`, `111.4817`,
4939	`1.0000`, `56.3692`, `86.4826`, `122.1641`, `92.4603`, `1.0000`, `27.2885`,
4940	`117.3129`, `40.2107`, `140.0604`,
4941	};
4942	bindings_.allocate(inp5)->getHandle() = {
4943	`0.6030`, `0.0229`, `0.6746`, `0.7330`, `0.3460`, `0.2078`, `0.1711`, `0.4475`, `0.0838`,
4944	};
4945	bindings_.allocate(inp6)->getHandle() = {
4946	`0.0042`, `0.9355`, `0.3390`, `0.5551`, `0.7210`, `0.5162`, `0.5879`, `0.9776`, `0.0361`,
4947	};
4948	bindings_.allocate(inp7)->getHandle() = {
4949	`0.7225`, `0.6125`, `0.3317`, `0.2408`, `0.1214`, `0.2090`, `0.4792`, `0.7840`, `0.6932`,
4950	};
4951	bindings_.allocate(inp8)->getHandle() = {
4952	`0.1799`, `0.8250`, `0.3095`, `0.0022`, `0.5814`, `0.2361`, `0.8224`, `0.0236`, `0.6101`,
4953	};
4954	bindings_.allocate(inp9)->getHandle() = {
4955	`0.8860`, `0.2196`, `0.4328`, `0.2911`, `0.4263`, `0.6079`, `0.2881`, `0.7497`, `0.4761`,
4956	};
4957
4958	auto *roisOut = mod_.createPlaceholder(ElemKind::FloatTy, {rpnPostNmsTopN, `5`},
4959	"roisOut", false);
4960
4961	bindings_.allocate(roisOut);
4962
4963	std::vector<NodeValue> rois = {
4964	inp0, inp1, inp2, inp3, inp4,
4965	};
4966
4967	std::vector<NodeValue> roisProbs = {
4968	inp5, inp6, inp7, inp8, inp9,
4969	};
4970
4971	auto *CRPN =
4972	F_->createCollectRpnProposals("CollectRpnProposal", rois, roisProbs,
4973	rpnMaxLevels, rpnMinLevels, rpnPostNmsTopN);
4974
4975	F_->createSave("save.rois", {CRPN, `0`}, roisOut);
4976
4977	EE_.compile(CompilationMode::Infer);
4978
4979	EE_.run(bindings_);
4980
4981	auto V = bindings_.get(roisOut)->getHandle<float>();
4982
4983	std::vector<std::vector<float>> refRois = {
4984	{`1.0000`, `9.4297`, `39.0123`, `83.2904`, `101.7934`},
4985	{`0.0000`, `56.8145`, `27.7562`, `268.8377`, `43.8762`},
4986	{`0.0000`, `157.0062`, `64.7068`, `254.1062`, `166.9987`},
4987	{`0.0000`, `118.5553`, `191.4577`, `219.2848`, `242.6031`},
4988	{`1.0000`, `25.2296`, `76.5924`, `52.9168`, `107.3086`},
4989	{`1.0000`, `42.6414`, `90.8483`, `44.3315`, `97.0455`},
4990	{`1.0000`, `56.3692`, `86.4826`, `122.1641`, `92.4603`},
4991	{`0.0000`, `226.4726`, `211.6586`, `236.6433`, `266.1842`},
4992	{`0.0000`, `35.8490`, `49.8280`, `78.7185`, `194.8410`},
4993	{`1.0000`, `14.1265`, `123.1779`, `14.2739`, `143.5395`},
4994	{`1.0000`, `19.9147`, `43.8475`, `83.7848`, `86.8583`},
4995	{`0.0000`, `19.3218`, `23.6992`, `154.5081`, `251.7623`},
4996	{`0.0000`, `1.2772`, `184.3661`, `5.4693`, `225.4717`},
4997	{`1.0000`, `4.6803`, `24.2100`, `136.4279`, `103.5489`},
4998	};
4999
5000	for (uint32_t i = `0`; i < rpnPostNmsTopN; i++) {
5001	for (uint32_t j = `0`; j < `5`; j++) {
5002	EXPECT_NEAR(V.at({i, j}), refRois[i][j], `1E-4`);
5003	}
5004	}
5005	}
5006
5007	TEST_P(OperatorTest, TopK) {
5008	CHECK_IF_ENABLED();
5009
5010	auto *inp =
5011	mod_.createPlaceholder(ElemKind::FloatTy, {`3`, `1`, `5`}, "input", false);
5012	auto *values =
5013	mod_.createPlaceholder(ElemKind::FloatTy, {`3`, `1`, `3`}, "values", false);
5014	auto *indices =
5015	mod_.createPlaceholder(ElemKind::Int64ITy, {`3`, `1`, `3`}, "indices", false);
5016
5017	bindings_.allocate(inp)->getHandle() = {
5018	`28`, `4`, `411`, `19`, `42`, `0.4f`, `0.4f`, `0.4f`, -`0.4f`, `0.45f`, `7`, `5`, `9`, `8`, `100`,
5019	};
5020	bindings_.allocate(values);
5021	bindings_.allocate(indices);
5022
5023	auto *R = F_->createTopK("TopK", inp, `3`);
5024
5025	F_->createSave("save.values", {R, `0`}, values);
5026	F_->createSave("save.indices", {R, `1`}, indices);
5027
5028	EE_.compile(CompilationMode::Infer);
5029
5030	EE_.run(bindings_);
5031
5032	auto V = bindings_.get(values)->getHandle();
5033	auto I = bindings_.get(indices)->getHandle<int64_t>();
5034
5035	EXPECT_FLOAT_EQ(V.at({`0`, `0`, `0`}), `411`);
5036	EXPECT_EQ(I.at({`0`, `0`, `0`}), `2`);
5037	EXPECT_FLOAT_EQ(V.at({`0`, `0`, `1`}), `42`);
5038	EXPECT_EQ(I.at({`0`, `0`, `1`}), `4`);
5039	EXPECT_FLOAT_EQ(V.at({`0`, `0`, `2`}), `28`);
5040	EXPECT_EQ(I.at({`0`, `0`, `2`}), `0`);
5041
5042	EXPECT_FLOAT_EQ(V.at({`1`, `0`, `0`}), `0.45`);
5043	EXPECT_EQ(I.at({`1`, `0`, `0`}), `4`);
5044	EXPECT_FLOAT_EQ(V.at({`1`, `0`, `1`}), `0.4`);
5045	EXPECT_EQ(I.at({`1`, `0`, `1`}), `0`);
5046	EXPECT_FLOAT_EQ(V.at({`1`, `0`, `2`}), `0.4`);
5047	EXPECT_EQ(I.at({`1`, `0`, `2`}), `1`);
5048
5049	EXPECT_FLOAT_EQ(V.at({`2`, `0`, `0`}), `100`);
5050	EXPECT_EQ(I.at({`2`, `0`, `0`}), `4`);
5051	EXPECT_FLOAT_EQ(V.at({`2`, `0`, `1`}), `9`);
5052	EXPECT_EQ(I.at({`2`, `0`, `1`}), `2`);
5053	EXPECT_FLOAT_EQ(V.at({`2`, `0`, `2`}), `8`);
5054	EXPECT_EQ(I.at({`2`, `0`, `2`}), `3`);
5055	}
5056
5057	template <typename DataType>
5058	static void testArgMaxKeepDim(glow::PlaceholderBindings &bindings,
5059	glow::Module &mod, glow::Function *F,
5060	glow::ExecutionEngine &EE, ElemKind DTy) {
5061	auto *input = createPlaceholderConditionallyQuantized(mod, DTy, {`2`, `3`, `2`, `2`},
5062	"input", false, "NHWC");
5063	auto *argmax = mod.createPlaceholder(ElemKind::Int64ITy, {`1`, `3`, `2`, `2`},
5064	"argmax", false, "NHWC");
5065
5066	bindings.allocate(input)->getHandle<DataType>() = {
5067	`11`, `24`, `33`, `41`, `15`, `26`, `37`, `48`, `12`, `28`, `31`, `42`,
5068	`13`, `24`, `35`, `46`, `12`, `28`, `39`, `40`, `11`, `22`, `33`, `47`};
5069	bindings.allocate(argmax);
5070
5071	auto AM = F->createArgMax("argmax", input, `0`, true*);
5072	F->createSave("save.argmax", AM, argmax);
5073
5074	EE.compile(CompilationMode::Infer);
5075	EE.run(bindings);
5076
5077	auto I = bindings.get(argmax)->getHandle<int64_t>();
5078	EXPECT_EQ(I.raw(`0`), `1`);
5079	EXPECT_EQ(I.raw(`1`), `0`);
5080	EXPECT_EQ(I.raw(`2`), `1`);
5081	EXPECT_EQ(I.raw(`3`), `1`);
5082	EXPECT_EQ(I.raw(`4`), `0`);
5083	EXPECT_EQ(I.raw(`5`), `1`);
5084	EXPECT_EQ(I.raw(`6`), `1`);
5085	EXPECT_EQ(I.raw(`7`), `0`);
5086	EXPECT_EQ(I.raw(`8`), `0`);
5087	EXPECT_EQ(I.raw(`9`), `0`);
5088	EXPECT_EQ(I.raw(`10`), `1`);
5089	EXPECT_EQ(I.raw(`11`), `1`);
5090	}
5091
5092	TEST_P(OperatorTest, FloatArgMaxKeepDim) {
5093	CHECK_IF_ENABLED();
5094	testArgMaxKeepDim<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy);
5095	}
5096
5097	TEST_P(OperatorTest, Float16ArgMaxKeepDim) {
5098	CHECK_IF_ENABLED();
5099	testArgMaxKeepDim<float16_t>(bindings_, mod_, F_, EE_, ElemKind::Float16Ty);
5100	}
5101
5102	TEST_P(OperatorTest, QuantizedArgMaxKeepDim) {
5103	CHECK_IF_ENABLED();
5104	testArgMaxKeepDim<int8_t>(bindings_, mod_, F_, EE_, ElemKind::Int8QTy);
5105	}
5106
5107	template <typename DataType>
5108	static void testArgMaxNoKeepDim(glow::PlaceholderBindings &bindings,
5109	glow::Module &mod, glow::Function *F,
5110	glow::ExecutionEngine &EE, ElemKind DTy) {
5111	auto *input = createPlaceholderConditionallyQuantized(mod, DTy, {`2`, `3`, `2`, `2`},
5112	"input", false, "NHWC");
5113	auto *argmax =
5114	mod.createPlaceholder(ElemKind::Int64ITy, {`2`, `2`, `2`}, "argmax", false);
5115
5116	bindings.allocate(input)->getHandle<DataType>() = {
5117	`11`, `24`, `33`, `41`, `15`, `26`, `37`, `48`, `12`, `28`, `31`, `42`,
5118	`13`, `24`, `35`, `46`, `12`, `28`, `39`, `40`, `11`, `22`, `33`, `47`};
5119	bindings.allocate(argmax);
5120
5121	auto AM = F->createArgMax("argmax", input, `1`, false*);
5122	F->createSave("save.argmax", AM, argmax);
5123
5124	EE.compile(CompilationMode::Infer);
5125	EE.run(bindings);
5126
5127	auto I = bindings.get(argmax)->getHandle<int64_t>();
5128	EXPECT_EQ(I.raw(`0`), `1`);
5129	EXPECT_EQ(I.raw(`1`), `2`);
5130	EXPECT_EQ(I.raw(`2`), `1`);
5131	EXPECT_EQ(I.raw(`3`), `1`);
5132	EXPECT_EQ(I.raw(`4`), `0`);
5133	EXPECT_EQ(I.raw(`5`), `1`);
5134	EXPECT_EQ(I.raw(`6`), `1`);
5135	EXPECT_EQ(I.raw(`7`), `2`);
5136	}
5137
5138	TEST_P(OperatorTest, FloatArgMaxNoKeepDim) {
5139	CHECK_IF_ENABLED();
5140	testArgMaxNoKeepDim<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy);
5141	}
5142
5143	TEST_P(OperatorTest, Float16ArgMaxNoKeepDim) {
5144	CHECK_IF_ENABLED();
5145	testArgMaxNoKeepDim<float16_t>(bindings_, mod_, F_, EE_, ElemKind::Float16Ty);
5146	}
5147
5148	TEST_P(OperatorTest, QuantizedArgMaxNoKeepDim) {
5149	CHECK_IF_ENABLED();
5150	testArgMaxNoKeepDim<int8_t>(bindings_, mod_, F_, EE_, ElemKind::Int8QTy);
5151	}
5152
5153	TEST_P(OperatorTest, FloatArgMaxNoKeepDimWithAxis1) {
5154	CHECK_IF_ENABLED();
5155
5156	auto *input = mod_.createPlaceholder(ElemKind::FloatTy, {`1`, `2`, `3`, `4`}, "input",
5157	false, "NHWC");
5158	auto *argmax =
5159	mod_.createPlaceholder(ElemKind::Int64ITy, {`1`, `3`, `4`}, "argmax", false);
5160
5161	bindings_.allocate(input)->getHandle<float>() = {
5162	-`2.0031254`, `1.6150867`, -`0.7161922`, -`0.25389647`, -`2.3863597`,
5163	`1.3052065`, -`1.2064048`, -`0.12670185`, `1.4289513`, `0.38050872`,
5164	-`0.15112245`, `1.360533`, -`1.9638863`, -`0.7602536`, `0.68145376`,
5165	`1.1685915`, `0.35476854`, `1.0272173`, -`1.554366`, -`1.6835353`,
5166	-`1.4499142`, `0.9042695`, `1.0751117`, -`1.0798755`};
5167
5168	bindings_.allocate(argmax);
5169
5170	auto *AM =
5171	F_->createArgMax("argmax", input, / axis / `1`, / keepDims / false);
5172	F_->createSave("save.argmax", AM, argmax);
5173
5174	EE_.compile(CompilationMode::Infer);
5175	EE_.run(bindings_);
5176
5177	auto I = bindings_.get(argmax)->getHandle<int64_t>();
5178	EXPECT_EQ(I.raw(`0`), `1`);
5179	EXPECT_EQ(I.raw(`1`), `0`);
5180	EXPECT_EQ(I.raw(`2`), `1`);
5181	EXPECT_EQ(I.raw(`3`), `1`);
5182	EXPECT_EQ(I.raw(`4`), `1`);
5183	EXPECT_EQ(I.raw(`5`), `0`);
5184	EXPECT_EQ(I.raw(`6`), `0`);
5185	EXPECT_EQ(I.raw(`7`), `0`);
5186	EXPECT_EQ(I.raw(`8`), `0`);
5187	EXPECT_EQ(I.raw(`9`), `1`);
5188	EXPECT_EQ(I.raw(`10`), `1`);
5189	EXPECT_EQ(I.raw(`11`), `0`);
5190	}
5191
5192	TEST_P(OperatorTest, FloatArgMaxNoKeepDimWithAxis2) {
5193	CHECK_IF_ENABLED();
5194
5195	auto *input = mod_.createPlaceholder(ElemKind::FloatTy, {`1`, `2`, `3`, `4`}, "input",
5196	false, "NHWC");
5197	auto *argmax =
5198	mod_.createPlaceholder(ElemKind::Int64ITy, {`1`, `2`, `4`}, "argmax", false);
5199
5200	bindings_.allocate(input)->getHandle<float>() = {
5201	-`0.11289205`, -`0.13215652`, -`1.184799`, `0.2295995`, `0.03064479`,
5202	-`0.28138036`, -`0.51807016`, `0.89983666`, -`0.46122625`, -`0.70558083`,
5203	`0.43882176`, -`0.6988644`, `2.0838234`, -`0.22806482`, -`0.6829437`,
5204	`0.70269305`, -`0.8199907`, `0.25597557`, `0.3598691`, -`0.9919779`,
5205	`2.069314`, -`1.8825238`, `1.2604765`, -`0.78306365`};
5206
5207	bindings_.allocate(argmax);
5208
5209	auto *AM =
5210	F_->createArgMax("argmax", input, / axis / `2`, / keepDims / false);
5211	F_->createSave("save.argmax", AM, argmax);
5212
5213	EE_.compile(CompilationMode::Infer);
5214	EE_.run(bindings_);
5215
5216	auto I = bindings_.get(argmax)->getHandle<int64_t>();
5217	EXPECT_EQ(I.raw(`0`), `1`);
5218	EXPECT_EQ(I.raw(`1`), `0`);
5219	EXPECT_EQ(I.raw(`2`), `2`);
5220	EXPECT_EQ(I.raw(`3`), `1`);
5221	EXPECT_EQ(I.raw(`4`), `0`);
5222	EXPECT_EQ(I.raw(`5`), `1`);
5223	EXPECT_EQ(I.raw(`6`), `2`);
5224	EXPECT_EQ(I.raw(`7`), `0`);
5225	}
5226
5227	template <typename DataType>
5228	static void testArgMinKeepDim(glow::PlaceholderBindings &bindings,
5229	glow::Module &mod, glow::Function *F,
5230	glow::ExecutionEngine &EE, ElemKind DTy) {
5231	auto *input = createPlaceholderConditionallyQuantized(mod, DTy, {`2`, `3`, `2`, `2`},
5232	"input", false, "NHWC");
5233	auto *argmin = mod.createPlaceholder(ElemKind::Int64ITy, {`1`, `3`, `2`, `2`},
5234	"argmin", false, "NHWC");
5235
5236	bindings.allocate(input)->getHandle<DataType>() = {
5237	`11`, `24`, `33`, `41`, `15`, `26`, `37`, `48`, `12`, `28`, `31`, `42`,
5238	`13`, `24`, `35`, `46`, `12`, `28`, `39`, `40`, `11`, `22`, `33`, `47`};
5239	bindings.allocate(argmin);
5240
5241	auto AM = F->createArgMin("argmin", input, `0`, true*);
5242	F->createSave("save.argmin", AM, argmin);
5243
5244	EE.compile(CompilationMode::Infer);
5245	EE.run(bindings);
5246
5247	auto I = bindings.get(argmin)->getHandle<int64_t>();
5248	EXPECT_EQ(I.raw(`0`), `0`);
5249	EXPECT_EQ(I.raw(`1`), `0`);
5250	EXPECT_EQ(I.raw(`2`), `0`);
5251	EXPECT_EQ(I.raw(`3`), `0`);
5252	EXPECT_EQ(I.raw(`4`), `1`);
5253	EXPECT_EQ(I.raw(`5`), `0`);
5254	EXPECT_EQ(I.raw(`6`), `0`);
5255	EXPECT_EQ(I.raw(`7`), `1`);
5256	EXPECT_EQ(I.raw(`8`), `1`);
5257	EXPECT_EQ(I.raw(`9`), `1`);
5258	EXPECT_EQ(I.raw(`10`), `0`);
5259	EXPECT_EQ(I.raw(`11`), `0`);
5260	}
5261
5262	TEST_P(OperatorTest, FloatArgMinKeepDim) {
5263	CHECK_IF_ENABLED();
5264	testArgMinKeepDim<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy);
5265	}
5266
5267	TEST_P(OperatorTest, QuantizedArgMinKeepDim) {
5268	CHECK_IF_ENABLED();
5269	testArgMinKeepDim<int8_t>(bindings_, mod_, F_, EE_, ElemKind::Int8QTy);
5270	}
5271
5272	template <typename DataType>
5273	static void testArgMinNoKeepDim(glow::PlaceholderBindings &bindings,
5274	glow::Module &mod, glow::Function *F,
5275	glow::ExecutionEngine &EE, ElemKind DTy) {
5276	auto *input = createPlaceholderConditionallyQuantized(mod, DTy, {`2`, `3`, `2`, `2`},
5277	"input", false, "NHWC");
5278	auto *argmin =
5279	mod.createPlaceholder(ElemKind::Int64ITy, {`2`, `2`, `2`}, "argmin", false);
5280
5281	bindings.allocate(input)->getHandle<DataType>() = {
5282	`11`, `24`, `33`, `41`, `15`, `26`, `37`, `48`, `12`, `28`, `31`, `42`,
5283	`13`, `24`, `35`, `46`, `12`, `28`, `39`, `40`, `11`, `22`, `33`, `47`};
5284	bindings.allocate(argmin);
5285
5286	auto AM = F->createArgMin("argmin", input, `1`, false*);
5287	F->createSave("save.argmin", AM, argmin);
5288
5289	EE.compile(CompilationMode::Infer);
5290	EE.run(bindings);
5291
5292	auto I = bindings.get(argmin)->getHandle<int64_t>();
5293	EXPECT_EQ(I.raw(`0`), `0`);
5294	EXPECT_EQ(I.raw(`1`), `0`);
5295	EXPECT_EQ(I.raw(`2`), `2`);
5296	EXPECT_EQ(I.raw(`3`), `0`);
5297	EXPECT_EQ(I.raw(`4`), `2`);
5298	EXPECT_EQ(I.raw(`5`), `2`);
5299	EXPECT_EQ(I.raw(`6`), `2`);
5300	EXPECT_EQ(I.raw(`7`), `1`);
5301	}
5302
5303	TEST_P(OperatorTest, FloatArgMinNoKeepDim) {
5304	CHECK_IF_ENABLED();
5305	testArgMinNoKeepDim<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy);
5306	}
5307
5308	TEST_P(OperatorTest, QuantizedArgMinNoKeepDim) {
5309	CHECK_IF_ENABLED();
5310	testArgMinNoKeepDim<int8_t>(bindings_, mod_, F_, EE_, ElemKind::Int8QTy);
5311	}
5312
5313	TEST_P(OperatorTest, FloatArgMinNoKeepDimWithAxis1) {
5314	CHECK_IF_ENABLED();
5315
5316	auto *input = mod_.createPlaceholder(ElemKind::FloatTy, {`1`, `2`, `3`, `4`}, "input",
5317	false, "NHWC");
5318	auto *argmin =
5319	mod_.createPlaceholder(ElemKind::Int64ITy, {`1`, `3`, `4`}, "argmin", false);
5320
5321	bindings_.allocate(input)->getHandle<float>() = {
5322	-`2.0031254`, `1.6150867`, -`0.7161922`, -`0.25389647`, -`2.3863597`,
5323	`1.3052065`, -`1.2064048`, -`0.12670185`, `1.4289513`, `0.38050872`,
5324	-`0.15112245`, `1.360533`, -`1.9638863`, -`0.7602536`, `0.68145376`,
5325	`1.1685915`, `0.35476854`, `1.0272173`, -`1.554366`, -`1.6835353`,
5326	-`1.4499142`, `0.9042695`, `1.0751117`, -`1.0798755`};
5327
5328	bindings_.allocate(argmin);
5329
5330	auto *AM =
5331	F_->createArgMin("argmin", input, / axis / `1`, / keepDims / false);
5332	F_->createSave("save.argmin", AM, argmin);
5333
5334	EE_.compile(CompilationMode::Infer);
5335	EE_.run(bindings_);
5336
5337	auto I = bindings_.get(argmin)->getHandle<int64_t>();
5338	EXPECT_EQ(I.raw(`0`), `0`);
5339	EXPECT_EQ(I.raw(`1`), `1`);
5340	EXPECT_EQ(I.raw(`2`), `0`);
5341	EXPECT_EQ(I.raw(`3`), `0`);
5342	EXPECT_EQ(I.raw(`4`), `0`);
5343	EXPECT_EQ(I.raw(`5`), `1`);
5344	EXPECT_EQ(I.raw(`6`), `1`);
5345	EXPECT_EQ(I.raw(`7`), `1`);
5346	EXPECT_EQ(I.raw(`8`), `1`);
5347	EXPECT_EQ(I.raw(`9`), `0`);
5348	EXPECT_EQ(I.raw(`10`), `0`);
5349	EXPECT_EQ(I.raw(`11`), `1`);
5350	}
5351
5352	TEST_P(OperatorTest, FloatArgMinNoKeepDimWithAxis2) {
5353	CHECK_IF_ENABLED();
5354
5355	auto *input = mod_.createPlaceholder(ElemKind::FloatTy, {`1`, `2`, `3`, `4`}, "input",
5356	false, "NHWC");
5357	auto *argmin =
5358	mod_.createPlaceholder(ElemKind::Int64ITy, {`1`, `2`, `4`}, "argmin", false);
5359
5360	bindings_.allocate(input)->getHandle<float>() = {
5361	-`0.11289205`, -`0.13215652`, -`1.184799`, `0.2295995`, `0.03064479`,
5362	-`0.28138036`, -`0.51807016`, `0.89983666`, -`0.46122625`, -`0.70558083`,
5363	`0.43882176`, -`0.6988644`, `2.0838234`, -`0.22806482`, -`0.6829437`,
5364	`0.70269305`, -`0.8199907`, `0.25597557`, `0.3598691`, -`0.9919779`,
5365	`2.069314`, -`1.8825238`, `1.2604765`, -`0.78306365`};
5366
5367	bindings_.allocate(argmin);
5368
5369	auto *AM =
5370	F_->createArgMin("argmin", input, / axis / `2`, / keepDims / false);
5371	F_->createSave("save.argmin", AM, argmin);
5372
5373	EE_.compile(CompilationMode::Infer);
5374	EE_.run(bindings_);
5375
5376	auto I = bindings_.get(argmin)->getHandle<int64_t>();
5377	EXPECT_EQ(I.raw(`0`), `2`);
5378	EXPECT_EQ(I.raw(`1`), `2`);
5379	EXPECT_EQ(I.raw(`2`), `0`);
5380	EXPECT_EQ(I.raw(`3`), `2`);
5381	EXPECT_EQ(I.raw(`4`), `1`);
5382	EXPECT_EQ(I.raw(`5`), `2`);
5383	EXPECT_EQ(I.raw(`6`), `0`);
5384	EXPECT_EQ(I.raw(`7`), `1`);
5385	}
5386
5387	// Check that concatenating Nodes with multiple outputs works correctly.
5388	TEST_P(OperatorTest, ConcatTopK) {
5389	CHECK_IF_ENABLED();
5390
5391	auto *inp1 =
5392	mod_.createPlaceholder(ElemKind::FloatTy, {`2`, `1`, `3`}, "input", false);
5393	auto *inp2 =
5394	mod_.createPlaceholder(ElemKind::FloatTy, {`2`, `1`, `3`}, "input", false);
5395	auto *indices =
5396	mod_.createPlaceholder(ElemKind::Int64ITy, {`4`, `1`, `2`}, "indices", false);
5397
5398	bindings_.allocate(inp1)->getHandle() = {`1`, `2`, `3`, `17.4f`, -`0.1f`, -`10.1f`};
5399	bindings_.allocate(inp2)->getHandle() = {`1`, `2`, -`3`, -`17.4f`, -`0.1f`, -`10.1f`};
5400
5401	auto *R1 = F_->createTopK("TopK1", inp1, `2`);
5402	auto *R2 = F_->createTopK("TopK2", inp2, `2`);
5403
5404	// Concat the values and indices separately, both on the 0th dimension,
5405	// matching the shapes of the values and indices variables above.
5406	auto *CV =
5407	F_->createConcat("Concat.Values", {R1->getValues(), R2->getValues()}, `0`);
5408	auto *CI = F_->createConcat("Concat.Indices",
5409	{R1->getIndices(), R2->getIndices()}, `0`);
5410
5411	auto *saveValues = F_->createSave("Save.Values", CV);
5412	auto *saveValuesTensor = bindings_.allocate(saveValues->getPlaceholder());
5413
5414	auto *saveIndices = F_->createSave("Save.Indices", CI, indices);
5415	auto *saveIndicesTensor = bindings_.allocate(saveIndices->getPlaceholder());
5416
5417	EE_.compile(CompilationMode::Infer);
5418
5419	EE_.run(bindings_);
5420
5421	auto V = saveValuesTensor->getHandle();
5422	auto I = saveIndicesTensor->getHandle<int64_t>();
5423
5424	EXPECT_FLOAT_EQ(V.at({`0`, `0`, `0`}), `3`);
5425	EXPECT_FLOAT_EQ(I.at({`0`, `0`, `0`}), `2`);
5426	EXPECT_FLOAT_EQ(V.at({`0`, `0`, `1`}), `2`);
5427	EXPECT_FLOAT_EQ(I.at({`0`, `0`, `1`}), `1`);
5428
5429	EXPECT_FLOAT_EQ(V.at({`1`, `0`, `0`}), `17.4f`);
5430	EXPECT_FLOAT_EQ(I.at({`1`, `0`, `0`}), `0`);
5431	EXPECT_FLOAT_EQ(V.at({`1`, `0`, `1`}), -`0.1f`);
5432	EXPECT_FLOAT_EQ(I.at({`1`, `0`, `1`}), `1`);
5433
5434	EXPECT_FLOAT_EQ(V.at({`2`, `0`, `0`}), `2`);
5435	EXPECT_FLOAT_EQ(I.at({`2`, `0`, `0`}), `1`);
5436	EXPECT_FLOAT_EQ(V.at({`2`, `0`, `1`}), `1`);
5437	EXPECT_FLOAT_EQ(I.at({`2`, `0`, `1`}), `0`);
5438
5439	EXPECT_FLOAT_EQ(V.at({`3`, `0`, `0`}), -`0.1f`);
5440	EXPECT_FLOAT_EQ(I.at({`3`, `0`, `0`}), `1`);
5441	EXPECT_FLOAT_EQ(V.at({`3`, `0`, `1`}), -`10.1f`);
5442	EXPECT_FLOAT_EQ(I.at({`3`, `0`, `1`}), `2`);
5443	}
5444
5445	// Check that matrix multiplication works well on some predefined values.
5446	TEST_P(OperatorTest, matmul2) {
5447	CHECK_IF_ENABLED();
5448
5449	auto *inp0 =
5450	mod_.createPlaceholder(ElemKind::FloatTy, {`1`, `2`}, "input0", false);
5451	auto *inp1 =
5452	mod_.createPlaceholder(ElemKind::FloatTy, {`1`, `2`}, "input1", false);
5453	auto *inp2 =
5454	mod_.createPlaceholder(ElemKind::FloatTy, {`1`, `2`}, "input2", false);
5455	auto rot = mod_.createPlaceholder(ElemKind::FloatTy, {`2`, `2`}, "rot", false*);
5456
5457	float deg = `45.0` / `180.0` * `3.1415926`;
5458	// Use the rotation matrix to manipulate some values.
5459	// https://en.wikipedia.org/wiki/Rotation_matrix
5460	bindings_.allocate(rot)->getHandle() = {
5461	cosf(deg),
5462	-sinf(deg),
5463	sinf(deg),
5464	cosf(deg),
5465	};
5466
5467	// Some test vectors.
5468	bindings_.allocate(inp0)->getHandle() = {`1`, `4`};
5469	bindings_.allocate(inp1)->getHandle() = {`14`, `2`};
5470	bindings_.allocate(inp2)->getHandle() = {`5`, `2`};
5471
5472	auto *A0 = F_->createMatMul("m0", inp0, rot);
5473	auto *A1 = F_->createMatMul("m1", inp1, rot);
5474	auto *A2 = F_->createMatMul("m2", inp2, rot);
5475
5476	auto *res0 = F_->createSave("save.values", A0);
5477	bindings_.allocate(res0->getPlaceholder());
5478	auto *res1 = F_->createSave("save.values", A1);
5479	bindings_.allocate(res1->getPlaceholder());
5480	auto *res2 = F_->createSave("save.values", A2);
5481	bindings_.allocate(res2->getPlaceholder());
5482
5483	EE_.compile(CompilationMode::Infer);
5484
5485	EE_.run(bindings_);
5486
5487	auto R0 = bindings_.get(res0->getPlaceholder())->getHandle();
5488	auto R1 = bindings_.get(res1->getPlaceholder())->getHandle();
5489	auto R2 = bindings_.get(res2->getPlaceholder())->getHandle();
5490
5491	EXPECT_FLOAT_EQ(R0.at({`0`, `0`}), `3.5355339`);
5492	EXPECT_FLOAT_EQ(R0.at({`0`, `1`}), `2.1213205`);
5493	EXPECT_FLOAT_EQ(R1.at({`0`, `0`}), `11.313709`);
5494	EXPECT_FLOAT_EQ(R1.at({`0`, `1`}), -`8.485281`);
5495	EXPECT_FLOAT_EQ(R2.at({`0`, `0`}), `4.9497476`);
5496	EXPECT_FLOAT_EQ(R2.at({`0`, `1`}), -`2.1213202`);
5497	}
5498
5499	template <typename HandleTy>
5500	static void topK1Template(Module &mod_, Function *F_, ExecutionEngine &EE_,
5501	PlaceholderBindings &bindings_,
5502	ElemKind topKElemKind) {
5503	auto *inp =
5504	mod_.createPlaceholder(ElemKind::FloatTy, {`3`, `1`, `5`}, "input", false);
5505
5506	bindings_.allocate(inp)->getHandle() = {
5507	`0`, `18`, `7`, `16`, `5`, `14`, `33`, `2`, `41`, `0`, `1`, -`23`, `34`, `4`, -`5`,
5508	};
5509
5510	auto *R = F_->createTopK("TopK", inp, `1`, topKElemKind);
5511
5512	auto *values = F_->createSave("save.values", {R, `0`});
5513	bindings_.allocate(values->getPlaceholder());
5514
5515	auto *indices = F_->createSave("save.indices", {R, `1`});
5516	bindings_.allocate(indices->getPlaceholder());
5517
5518	EE_.compile(CompilationMode::Infer);
5519	EE_.run(bindings_);
5520
5521	auto V = bindings_.get(values->getPlaceholder())->getHandle();
5522	auto I = bindings_.get(indices->getPlaceholder())->getHandle<HandleTy>();
5523
5524	EXPECT_FLOAT_EQ(V.at({`0`, `0`, `0`}), `18`);
5525	EXPECT_EQ(I.at({`0`, `0`, `0`}), `1`);
5526	EXPECT_FLOAT_EQ(V.at({`1`, `0`, `0`}), `41`);
5527	EXPECT_EQ(I.at({`1`, `0`, `0`}), `3`);
5528	EXPECT_FLOAT_EQ(V.at({`2`, `0`, `0`}), `34`);
5529	EXPECT_EQ(I.at({`2`, `0`, `0`}), `2`);
5530	}
5531	// Check the TopK operator for the special case of K=1.
5532	TEST_P(OperatorTest, TopK1) {
5533	CHECK_IF_ENABLED();
5534
5535	topK1Template<int64_t>(mod_, F_, EE_, bindings_, ElemKind::Int64ITy);
5536	}
5537
5538	// Check the TopK operator for the special case of K=1.
5539	TEST_P(OperatorTest, TopK1int32) {
5540	CHECK_IF_ENABLED();
5541
5542	topK1Template<int32_t>(mod_, F_, EE_, bindings_, ElemKind::Int32ITy);
5543	}
5544
5545	TEST_P(OperatorTest, QuantizedTopK) {
5546	CHECK_IF_ENABLED();
5547
5548	auto *INV = mod_.createPlaceholder(ElemKind::Int8QTy, {`3`, `1`, `5`}, `1.2`, `5`,
5549	"input", false);
5550	bindings_.allocate(INV)->getHandle<int8_t>() = {
5551	-`12`, -`28`, -`7`, `8`, -`93`, `0`, `10`, `3`, -`1`, `10`, -`2`, `3`, -`2`, `3`, `3`,
5552	};
5553
5554	auto *TK = F_->createTopK("TopK", INV, `3`);
5555
5556	auto *values = F_->createSave("save.values", TK->getValues());
5557	bindings_.allocate(values->getPlaceholder());
5558	auto *indices = F_->createSave("save.indices", TK->getIndices());
5559	bindings_.allocate(indices->getPlaceholder());
5560
5561	EE_.compile(CompilationMode::Infer);
5562	EE_.run(bindings_);
5563
5564	auto VH = bindings_.get(values->getPlaceholder())->getHandle<int8_t>();
5565	auto IH = bindings_.get(indices->getPlaceholder())->getHandle<int64_t>();
5566
5567	EXPECT_EQ(VH.at({`0`, `0`, `0`}), `8`);
5568	EXPECT_EQ(IH.at({`0`, `0`, `0`}), `3`);
5569	EXPECT_EQ(VH.at({`0`, `0`, `1`}), -`7`);
5570	EXPECT_EQ(IH.at({`0`, `0`, `1`}), `2`);
5571	EXPECT_EQ(VH.at({`0`, `0`, `2`}), -`12`);
5572	EXPECT_EQ(IH.at({`0`, `0`, `2`}), `0`);
5573
5574	EXPECT_EQ(VH.at({`1`, `0`, `0`}), `10`);
5575	EXPECT_EQ(IH.at({`1`, `0`, `0`}), `1`);
5576	EXPECT_EQ(VH.at({`1`, `0`, `1`}), `10`);
5577	EXPECT_EQ(IH.at({`1`, `0`, `1`}), `4`);
5578	EXPECT_EQ(VH.at({`1`, `0`, `2`}), `3`);
5579	EXPECT_EQ(IH.at({`1`, `0`, `2`}), `2`);
5580
5581	EXPECT_EQ(VH.at({`2`, `0`, `0`}), `3`);
5582	EXPECT_EQ(IH.at({`2`, `0`, `0`}), `1`);
5583	EXPECT_EQ(VH.at({`2`, `0`, `1`}), `3`);
5584	EXPECT_EQ(IH.at({`2`, `0`, `1`}), `3`);
5585	EXPECT_EQ(VH.at({`2`, `0`, `2`}), `3`);
5586	EXPECT_EQ(IH.at({`2`, `0`, `2`}), `4`);
5587	}
5588
5589	/// Helper for testing Gather with different \p ITy / \p IndexType.
5590	template <typename DataType, typename IndexType>
5591	static void gatherFloatInputTest(glow::PlaceholderBindings &bindings,
5592	glow::Module &mod, glow::Function *F,
5593	glow::ExecutionEngine &EE, ElemKind DTy,
5594	ElemKind ITy) {
5595	/*
5596	DATA = [
5597	[1.0, 1.2],
5598	[2.3, 3.4],
5599	[4.5, 5.7],
5600	]
5601	INDICES = [
5602	[0, 1, 0, 1],
5603	[1, 2, 2, 0],
5604	]
5605	OUTPUT = [
5606	[
5607	[1.0, 1.2],
5608	[2.3, 3.4],
5609	[1.0, 1.2],
5610	[2.3, 3.4],
5611	],
5612	[
5613	[2.3, 3.4],
5614	[4.5, 5.7],
5615	[4.5, 5.7],
5616	[1.0, 1.2],
5617	],
5618	]
5619	*/
5620	auto data = mod.createPlaceholder(DTy, {`3`, `2`}, "data", false*);
5621	auto indices = mod.createPlaceholder(ITy, {`2`, `4`}, "indices", false*);
5622
5623	bindings.allocate(data)->getHandle<DataType>() = {
5624	`1.0f`, `1.2f`, `2.3f`, `3.4f`, `4.5f`, `5.7f`,
5625	};
5626	bindings.allocate(indices)->getHandle<IndexType>() = {
5627	`0`, `1`, `0`, `1`, `1`, `2`, `2`, `0`,
5628	};
5629
5630	auto *R = F->createGather("gather", data, indices);
5631
5632	auto *result = F->createSave("save", R);
5633	bindings.allocate(result->getPlaceholder());
5634
5635	EE.compile(CompilationMode::Infer);
5636	EE.run(bindings);
5637
5638	Tensor *resultT = bindings.get(result->getPlaceholder());
5639	Tensor expectedT(DTy, {`2`, `4`, `2`});
5640	expectedT.getHandle<DataType>() = {`1.0`, `1.2`, `2.3`, `3.4`, `1.0`, `1.2`, `2.3`, `3.4`,
5641	`2.3`, `3.4`, `4.5`, `5.7`, `4.5`, `5.7`, `1.0`, `1.2`};
5642
5643	EXPECT_TRUE(resultT->isEqual(expectedT));
5644	}
5645
5646	TEST_P(OperatorTest, GatherDataNonZeroDim) {
5647	auto data = mod_.createPlaceholder(ElemKind::FloatTy, {`3`, `3`}, "data", false*);
5648	auto dimension = `1`;
5649	auto *indices =
5650	mod_.createPlaceholder(ElemKind::Int64ITy, {`2`}, "indices", false);
5651
5652	bindings_.allocate(data)->getHandle<float>() = {
5653	`1.0f`, `2.0f`, `3.0f`, `4.0f`, `5.0f`, `6.0f`, `7.0f`, `8.0f`, `9.0f`,
5654	};
5655
5656	bindings_.allocate(indices)->getHandle<int64_t>() = {`0l`, `2l`};
5657
5658	auto *R = F_->createGather("gather", data, indices, dimension);
5659
5660	auto *result = F_->createSave("save", R);
5661
5662	bindings_.allocate(result->getPlaceholder());
5663
5664	EE_.compile(CompilationMode::Infer);
5665	EE_.run(bindings_);
5666
5667	Tensor *resultT = bindings_.get(result->getPlaceholder());
5668	Tensor expectedT(ElemKind::FloatTy, {`3`, `2`});
5669	expectedT.getHandle<float>() = {`1.0`, `3.0`, `4.0`, `6.0`, `7.0`, `9.0`};
5670
5671	EXPECT_TRUE(resultT->isEqual(expectedT));
5672	}
5673
5674	/// Test that Gather works with Float data and Int32 indices.
5675	TEST_P(OperatorTest, GatherDataFloatIdxInt32) {
5676	CHECK_IF_ENABLED();
5677	gatherFloatInputTest<float, int32_t>(bindings_, mod_, F_, EE_,
5678	ElemKind::FloatTy, ElemKind::Int32ITy);
5679	}
5680
5681	#if DIM_T_BITWIDTH >= 64
5682	/// Test that Gather works with Float data and Int64 indices.
5683	TEST_P(OperatorTest, GatherDataFloatIdxInt64) {
5684	CHECK_IF_ENABLED();
5685	gatherFloatInputTest<float, int64_t>(bindings_, mod_, F_, EE_,
5686	ElemKind::FloatTy, ElemKind::Int64ITy);
5687	}
5688	#endif
5689
5690	/// Test that Gather works with Float16 data and Int32 indices.
5691	TEST_P(OperatorTest, GatherDataFloat16IdxInt32) {
5692	CHECK_IF_ENABLED();
5693	gatherFloatInputTest<float16_t, int32_t>(
5694	bindings_, mod_, F_, EE_, ElemKind::Float16Ty, ElemKind::Int32ITy);
5695	}
5696
5697	/// Test that Gather works with BFloat16 data and Int32 indices.
5698	TEST_P(OperatorTest, GatherDataBFloat16IdxInt32) {
5699	CHECK_IF_ENABLED();
5700	gatherFloatInputTest<bfloat16_t, int32_t>(
5701	bindings_, mod_, F_, EE_, ElemKind::BFloat16Ty, ElemKind::Int32ITy);
5702	}
5703
5704	/// Test that Gather works with Float16 data and Int64 indices.
5705	TEST_P(OperatorTest, GatherDataFloat16IdxInt64) {
5706	CHECK_IF_ENABLED();
5707	gatherFloatInputTest<float16_t, int64_t>(
5708	bindings_, mod_, F_, EE_, ElemKind::Float16Ty, ElemKind::Int64ITy);
5709	}
5710
5711	/// Test that Gather works with BFloat16 data and Int64 indices.
5712	TEST_P(OperatorTest, GatherDataBFloat16IdxInt64) {
5713	CHECK_IF_ENABLED();
5714	gatherFloatInputTest<bfloat16_t, int64_t>(
5715	bindings_, mod_, F_, EE_, ElemKind::BFloat16Ty, ElemKind::Int64ITy);
5716	}
5717
5718	/// Helper for testing Gather with different \p ITy / \p IndexType.
5719	template <typename IndexType>
5720	static void gatherInt8InputTest(glow::PlaceholderBindings &bindings,
5721	glow::Module &mod, glow::Function *F,
5722	glow::ExecutionEngine &EE, ElemKind ITy) {
5723	/*
5724	DATA = [
5725	[1, 2],
5726	[3, 4],
5727	[5, 6],
5728	]
5729	INDICES = [
5730	[0, 1, 0, 1],
5731	[1, 2, 2, 0],
5732	]
5733	OUTPUT = [
5734	[
5735	[1, 2],
5736	[3, 4],
5737	[1, 2],
5738	[3, 4],
5739	],
5740	[
5741	[3, 4],
5742	[5, 6],
5743	[5, 6],
5744	[1, 2],
5745	],
5746	]
5747	*/
5748	auto *data =
5749	mod.createPlaceholder(ElemKind::Int8QTy, {`3`, `2`}, `1.0`, `0`, "data", false);
5750	auto indices = mod.createPlaceholder(ITy, {`2`, `4`}, "indices", false*);
5751
5752	bindings.allocate(data)->getHandle<int8_t>() = {
5753	`1`, `2`, `3`, `4`, `5`, `6`,
5754	};
5755	bindings.allocate(indices)->getHandle<IndexType>() = {
5756	`0`, `1`, `0`, `1`, `1`, `2`, `2`, `0`,
5757	};
5758
5759	auto *R = F->createGather("gather", data, indices);
5760
5761	auto *result = F->createSave("save", R);
5762	bindings.allocate(result->getPlaceholder());
5763
5764	EE.compile(CompilationMode::Infer);
5765	EE.run(bindings);
5766
5767	Tensor *resultT = bindings.get(result->getPlaceholder());
5768	Tensor expectedT(ElemKind::Int8QTy, {`2`, `4`, `2`}, `1.0`, `0`);
5769	expectedT.getHandle<int8_t>() = {`1`, `2`, `3`, `4`, `1`, `2`, `3`, `4`,
5770	`3`, `4`, `5`, `6`, `5`, `6`, `1`, `2`};
5771
5772	EXPECT_TRUE(resultT->isEqual(expectedT));
5773	}
5774
5775	/// Test that Gather works with Int8 data and Int32 indices.
5776	TEST_P(OperatorTest, GatherDataInt8IdxInt32) {
5777	CHECK_IF_ENABLED();
5778	gatherInt8InputTest<int32_t>(bindings_, mod_, F_, EE_, ElemKind::Int32ITy);
5779	}
5780
5781	#if DIM_T_BITWIDTH >= 64
5782	/// Test that Gather works with Int8 data and Int64 indices.
5783	TEST_P(OperatorTest, GatherDataInt8IdxInt64) {
5784	CHECK_IF_ENABLED();
5785	gatherInt8InputTest<int64_t>(bindings_, mod_, F_, EE_, ElemKind::Int64ITy);
5786	}
5787	#endif
5788
5789	/// Helper for testing GatherND with different \p ITy / \p IndexType.
5790	template <typename DataType, typename IndexType>
5791	static void gatherNDFloatTest(
5792	glow::PlaceholderBindings &bindings, glow::Module &mod, glow::Function *F,
5793	glow::ExecutionEngine &EE, ElemKind DTy, ElemKind ITy,
5794	std::vector<dim_t> dataDims, std::vector<DataType> dataVals,
5795	std::vector<dim_t> indicesDims, std::vector<IndexType> indicesVals,
5796	std::vector<dim_t> outputDims, std::vector<DataType> outputVals,
5797	unsigned_t batchDims) {
5798
5799	auto data = mod.createPlaceholder(DTy, dataDims, "data", false*);
5800	auto indices = mod.createPlaceholder(ITy, indicesDims, "indices", false*);
5801	bindings.allocate(data)->getHandle<DataType>() = dataVals;
5802	bindings.allocate(indices)->getHandle<IndexType>() = indicesVals;
5803	auto *R = F->createGatherND("gatherND", data, indices, batchDims);
5804	auto *result = F->createSave("save", R);
5805	bindings.allocate(result->getPlaceholder());
5806
5807	EE.compile(CompilationMode::Infer);
5808	EE.run(bindings);
5809
5810	Tensor *resultT = bindings.get(result->getPlaceholder());
5811	Tensor expectedT(DTy, outputDims);
5812	expectedT.getHandle<DataType>() = outputVals;
5813	EXPECT_TRUE(resultT->isEqual(expectedT));
5814	}
5815
5816	template <typename DataType, typename IndexType>
5817	static void gatherNDFloatTest1(glow::PlaceholderBindings &bindings,
5818	glow::Module &mod, glow::Function *F,
5819	glow::ExecutionEngine &EE, ElemKind DTy,
5820	ElemKind ITy) {
5821	// Example 1
5822	// batch_dims = 0
5823	// data = [[0,1],[2,3]] # data_shape = [2, 2]
5824	// indices = [[0,0],[1,1]] # indices_shape = [2, 2]
5825	// output = [0,3] # output_shape = [2]
5826	gatherNDFloatTest<DataType, IndexType>(bindings, mod, F, EE, DTy, ITy, {`2`, `2`},
5827	{`0.0`, `1.0`, `2.0`, `3.0`}, {`2`, `2`},
5828	{`0`, `0`, `1`, `1`}, {`2`}, {`0.0`, `3.0`}, `0`);
5829	}
5830
5831	template <typename DataType, typename IndexType>
5832	static void gatherNDFloatTest2(glow::PlaceholderBindings &bindings,
5833	glow::Module &mod, glow::Function *F,
5834	glow::ExecutionEngine &EE, ElemKind DTy,
5835	ElemKind ITy) {
5836	// Example 2
5837	// batch_dims = 0
5838	// data = [[0,1],[2,3]] # data_shape = [2, 2]
5839	// indices = [[1],[0]] # indices_shape = [2, 1]
5840	// output = [[2,3],[0,1]] # output_shape = [2, 2]
5841	gatherNDFloatTest<DataType, IndexType>(bindings, mod, F, EE, DTy, ITy, {`2`, `2`},
5842	{`0.0`, `1.0`, `2.0`, `3.0`}, {`2`, `1`}, {`1`, `0`},
5843	{`2`, `2`}, {`2.0`, `3.0`, `0.0`, `1.0`}, `0`);
5844	}
5845
5846	template <typename DataType, typename IndexType>
5847	static void gatherNDFloatTest3(glow::PlaceholderBindings &bindings,
5848	glow::Module &mod, glow::Function *F,
5849	glow::ExecutionEngine &EE, ElemKind DTy,
5850	ElemKind ITy) {
5851	// Example 3
5852	// batch_dims = 0
5853	// data = [[[0,1],[2,3]],[[4,5],[6,7]]] # data_shape = [2, 2, 2]
5854	// indices = [[0,1],[1,0]] # indices_shape = [2, 2]
5855	// output = [[2,3],[4,5]] # output_shape = [2, 2]
5856	gatherNDFloatTest<DataType, IndexType>(
5857	bindings, mod, F, EE, DTy, ITy, {`2`, `2`, `2`},
5858	{`0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`}, {`2`, `2`}, {`0`, `1`, `1`, `0`}, {`2`, `2`},
5859	{`2.0`, `3.0`, `4.0`, `5.0`}, `0`);
5860	}
5861
5862	template <typename DataType, typename IndexType>
5863	static void gatherNDFloatTest4(glow::PlaceholderBindings &bindings,
5864	glow::Module &mod, glow::Function *F,
5865	glow::ExecutionEngine &EE, ElemKind DTy,
5866	ElemKind ITy) {
5867	// Example 4
5868	// batch_dims = 0
5869	// data = [[[0,1],[2,3]],[[4,5],[6,7]]] # data_shape = [2, 2, 2]
5870	// indices = [[[0,1]],[[1,0]]] # indices_shape = [2, 1, 2]
5871	// output = [[[2,3]],[[4,5]]] # output_shape = [2, 1, 2]
5872	gatherNDFloatTest<DataType, IndexType>(
5873	bindings, mod, F, EE, DTy, ITy, {`2`, `2`, `2`},
5874	{`0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`}, {`2`, `1`, `2`}, {`0`, `1`, `1`, `0`},
5875	{`2`, `1`, `2`}, {`2.0`, `3.0`, `4.0`, `5.0`}, `0`);
5876	}
5877
5878	template <typename DataType, typename IndexType>
5879	static void gatherNDFloatTest5(glow::PlaceholderBindings &bindings,
5880	glow::Module &mod, glow::Function *F,
5881	glow::ExecutionEngine &EE, ElemKind DTy,
5882	ElemKind ITy) {
5883	// Example 5
5884	// batch_dims = 1
5885	// data = [[[0,1],[2,3]],[[4,5],[6,7]]] # data_shape = [2, 2, 2]
5886	// indices = [[1],[0]] # indices_shape = [2, 1]
5887	// output = [[2,3],[4,5]] # output_shape = [2, 2]
5888	gatherNDFloatTest<DataType, IndexType>(
5889	bindings, mod, F, EE, DTy, ITy, {`2`, `2`, `2`},
5890	{`0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`}, {`2`, `1`}, {`1`, `0`}, {`2`, `2`},
5891	{`2.0`, `3.0`, `4.0`, `5.0`}, `1`);
5892	}
5893
5894	#define TEST_GATHER_ND(N, DATA_KIND, INDEX_KIND, DATA_TYPE, INDEX_TYPE) \
5895	TEST_P(OperatorTest, GatherND_##DATA_KIND##_##INDEX_KIND##_Test##N) { \
5896	CHECK_IF_ENABLED(); \
5897	gatherNDFloatTest##N<DATA_TYPE, INDEX_TYPE>( \
5898	bindings_, mod_, F_, EE_, ElemKind::DATA_KIND, ElemKind::INDEX_KIND); \
5899	}
5900
5901	TEST_GATHER_ND(`1`, FloatTy, Int32ITy, float, int32_t)
5902	TEST_GATHER_ND(`2`, FloatTy, Int32ITy, float, int32_t)
5903	TEST_GATHER_ND(`3`, FloatTy, Int32ITy, float, int32_t)
5904	TEST_GATHER_ND(`4`, FloatTy, Int32ITy, float, int32_t)
5905	TEST_GATHER_ND(`5`, FloatTy, Int32ITy, float, int32_t)
5906
5907	#if DIM_T_BITWIDTH >= 64
5908	TEST_GATHER_ND(`1`, FloatTy, Int64ITy, float, int64_t)
5909	TEST_GATHER_ND(`2`, FloatTy, Int64ITy, float, int64_t)
5910	TEST_GATHER_ND(`3`, FloatTy, Int64ITy, float, int64_t)
5911	TEST_GATHER_ND(`4`, FloatTy, Int64ITy, float, int64_t)
5912	TEST_GATHER_ND(`5`, FloatTy, Int64ITy, float, int64_t)
5913	#endif
5914
5915	TEST_GATHER_ND(`1`, Float16Ty, Int32ITy, float16_t, int32_t)
5916	TEST_GATHER_ND(`2`, Float16Ty, Int32ITy, float16_t, int32_t)
5917	TEST_GATHER_ND(`3`, Float16Ty, Int32ITy, float16_t, int32_t)
5918	TEST_GATHER_ND(`4`, Float16Ty, Int32ITy, float16_t, int32_t)
5919	TEST_GATHER_ND(`5`, Float16Ty, Int32ITy, float16_t, int32_t)
5920
5921	#if DIM_T_BITWIDTH >= 64
5922	TEST_GATHER_ND(`1`, Float16Ty, Int64ITy, float16_t, int64_t)
5923	TEST_GATHER_ND(`2`, Float16Ty, Int64ITy, float16_t, int64_t)
5924	TEST_GATHER_ND(`3`, Float16Ty, Int64ITy, float16_t, int64_t)
5925	TEST_GATHER_ND(`4`, Float16Ty, Int64ITy, float16_t, int64_t)
5926	TEST_GATHER_ND(`5`, Float16Ty, Int64ITy, float16_t, int64_t)
5927	#endif
5928
5929	#undef TEST_GATHER_ND
5930
5931	/// Helper for testing GatherND with different \p ITy / \p IndexType.
5932	template <typename IndexType>
5933	static void gatherNDInt8InputTest(glow::PlaceholderBindings &bindings,
5934	glow::Module &mod, glow::Function *F,
5935	glow::ExecutionEngine &EE, ElemKind ITy) {
5936	/*
5937	Data = [
5938	[
5939	[0,1],
5940	[2,3]
5941	],
5942	[
5943	[4,5],
5944	[6,7]
5945	]
5946	]
5947
5948	INDICES = [
5949	[[0,1],
5950	[1,0]]
5951	]
5952
5953	OUTPUT = [
5954	[2,3],
5955	[4,5]
5956	]
5957	*/
5958
5959	auto *data = mod.createPlaceholder(ElemKind::Int8QTy, {`2`, `2`, `2`}, `1.0`, `0`,
5960	"data", false);
5961	auto indices = mod.createPlaceholder(ITy, {`2`, `1`, `2`}, "indices", false*);
5962
5963	bindings.allocate(data)->getHandle<int8_t>() = {
5964	`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`,
5965	};
5966	bindings.allocate(indices)->getHandle<IndexType>() = {
5967	`0`,
5968	`1`,
5969	`1`,
5970	`0`,
5971	};
5972
5973	auto *R = F->createGatherND("gather", data, indices);
5974
5975	auto *result = F->createSave("save", R);
5976	bindings.allocate(result->getPlaceholder());
5977
5978	EE.compile(CompilationMode::Infer);
5979	EE.run(bindings);
5980
5981	Tensor *resultT = bindings.get(result->getPlaceholder());
5982	Tensor expectedT(ElemKind::Int8QTy, {`2`, `1`, `2`}, `1.0`, `0`);
5983	expectedT.getHandle<int8_t>() = {`2`, `3`, `4`, `5`};
5984
5985	EXPECT_TRUE(resultT->isEqual(expectedT));
5986	}
5987
5988	/// Test that Gather works with Int8 data and Int32 indices.
5989	TEST_P(OperatorTest, GatherNDDataInt8IdxInt32) {
5990	CHECK_IF_ENABLED();
5991	gatherNDInt8InputTest<int32_t>(bindings_, mod_, F_, EE_, ElemKind::Int32ITy);
5992	}
5993
5994	#if DIM_T_BITWIDTH >= 64
5995	/// Test that Gather works with Int8 data and Int64 indices.
5996	TEST_P(OperatorTest, GatherNDDataInt8IdxInt64) {
5997	CHECK_IF_ENABLED();
5998	gatherNDInt8InputTest<int64_t>(bindings_, mod_, F_, EE_, ElemKind::Int64ITy);
5999	}
6000	#endif
6001
6002	/// Helper for testing GatherRanges with different \p ITy / \p IndexType.
6003	template <typename DataType, typename IndexType>
6004	void gatherRangesTest(glow::PlaceholderBindings &bindings_, glow::Module &mod_,
6005	glow::Function *F_, glow::ExecutionEngine &EE_,
6006	ElemKind DTy, ElemKind ITy) {
6007	/*
6008	DATA = [1, 2, 3, 4, 5, 6]
6009	RANGES = [
6010	[
6011	[0, 1],
6012	[2, 2],
6013	],
6014	[
6015	[4, 1],
6016	[5, 1],
6017	]
6018	]
6019	OUTPUT = [1, 3, 4, 5, 6]
6020	LENGTHS = [3, 2]
6021	*/
6022	auto *data = createPlaceholderConditionallyQuantized(mod_, DTy, {`6`}, "data",
6023	false, "N");
6024	auto ranges = mod_.createPlaceholder(ITy, {`2`, `2`, `2`}, "ranges", false*);
6025
6026	bindings_.allocate(data)->getHandle<DataType>() = {`1`, `2`, `3`, `4`, `5`, `6`};
6027	bindings_.allocate(ranges)->getHandle<IndexType>() = {`0`, `1`, `2`, `2`, `4`, `1`, `5`, `1`};
6028
6029	auto *R =
6030	F_->createGatherRanges("gatherranges", data, ranges, /maxOutputSize=/`5`);
6031
6032	auto *output = F_->createSave("output", R->getOutput());
6033	auto *lengths = F_->createSave("lengths", R->getLengths());
6034
6035	Tensor *outputT = bindings_.allocate(output->getPlaceholder());
6036	Tensor *lengthsT = bindings_.allocate(lengths->getPlaceholder());
6037
6038	EE_.compile(CompilationMode::Infer);
6039	EE_.run(bindings_);
6040
6041	auto expectedOutputT = createTensorConditionallyQuantized(DTy, {`5`});
6042	expectedOutputT.getHandle<DataType>() = {`1`, `3`, `4`, `5`, `6`};
6043	EXPECT_TRUE(outputT->isEqual(expectedOutputT));
6044
6045	Tensor expectedLengthsT(ITy, {`2`});
6046	expectedLengthsT.getHandle<IndexType>() = {`3`, `2`};
6047	EXPECT_TRUE(lengthsT->isEqual(expectedLengthsT));
6048	}
6049
6050	/// Test GatherRanges with Int64 data and Int32 indices.
6051	TEST_P(OperatorTest, GatherRangesDataInt64IdxInt32) {
6052	CHECK_IF_ENABLED();
6053	gatherRangesTest<int64_t, int32_t>(bindings_, mod_, F_, EE_,
6054	ElemKind::Int64ITy, ElemKind::Int32ITy);
6055	}
6056
6057	#if DIM_T_BITWIDTH >= 64
6058	/// Test GatherRanges with Int64 data and Int64 indices.
6059	TEST_P(OperatorTest, GatherRangesDataInt64IdxInt64) {
6060	CHECK_IF_ENABLED();
6061	gatherRangesTest<int64_t, int64_t>(bindings_, mod_, F_, EE_,
6062	ElemKind::Int64ITy, ElemKind::Int64ITy);
6063	}
6064	#endif
6065
6066	/// Test GatherRanges with Float data and Int32 indices.
6067	TEST_P(OperatorTest, GatherRangesDataFloatIdxInt32) {
6068	CHECK_IF_ENABLED();
6069	gatherRangesTest<float, int32_t>(bindings_, mod_, F_, EE_, ElemKind::FloatTy,
6070	ElemKind::Int32ITy);
6071	}
6072
6073	#if DIM_T_BITWIDTH >= 64
6074	/// Test GatherRanges with Float data and Int64 indices.
6075	TEST_P(OperatorTest, GatherRangesDataFloatIdxInt64) {
6076	CHECK_IF_ENABLED();
6077	gatherRangesTest<float, int64_t>(bindings_, mod_, F_, EE_, ElemKind::FloatTy,
6078	ElemKind::Int64ITy);
6079	}
6080	#endif
6081
6082	/// Test GatherRanges with Float16 data and Int32 indices.
6083	TEST_P(OperatorTest, GatherRangesDataFloat16IdxInt32) {
6084	CHECK_IF_ENABLED();
6085	gatherRangesTest<float16_t, int32_t>(bindings_, mod_, F_, EE_,
6086	ElemKind::Float16Ty, ElemKind::Int32ITy);
6087	}
6088
6089	/// Test GatherRanges with BFloat16 data and Int32 indices.
6090	TEST_P(OperatorTest, GatherRangesDataBFloat16IdxInt32) {
6091	CHECK_IF_ENABLED();
6092	gatherRangesTest<bfloat16_t, int32_t>(
6093	bindings_, mod_, F_, EE_, ElemKind::BFloat16Ty, ElemKind::Int32ITy);
6094	}
6095
6096	#if DIM_T_BITWIDTH >= 64
6097	/// Test GatherRanges with Float16 data and Int64 indices.
6098	TEST_P(OperatorTest, GatherRangesDataFloat16IdxInt64) {
6099	CHECK_IF_ENABLED();
6100	gatherRangesTest<float16_t, int64_t>(bindings_, mod_, F_, EE_,
6101	ElemKind::Float16Ty, ElemKind::Int64ITy);
6102	}
6103
6104	/// Test GatherRanges with BFloat16 data and Int64 indices.
6105	TEST_P(OperatorTest, GatherRangesDataBFloat16IdxInt64) {
6106	CHECK_IF_ENABLED();
6107	gatherRangesTest<bfloat16_t, int64_t>(
6108	bindings_, mod_, F_, EE_, ElemKind::BFloat16Ty, ElemKind::Int64ITy);
6109	}
6110	#endif
6111
6112	/// Test GatherRanges with Int8Q data and Int32 indices.
6113	TEST_P(OperatorTest, GatherRangesDataInt8QIdxInt32) {
6114	CHECK_IF_ENABLED();
6115	gatherRangesTest<int8_t, int32_t>(bindings_, mod_, F_, EE_, ElemKind::Int8QTy,
6116	ElemKind::Int32ITy);
6117	}
6118
6119	#if DIM_T_BITWIDTH >= 64
6120	/// Test GatherRanges with Int8Q data and Int64 indices.
6121	TEST_P(OperatorTest, GatherRangesDataInt8QIdxInt64) {
6122	CHECK_IF_ENABLED();
6123	gatherRangesTest<int8_t, int64_t>(bindings_, mod_, F_, EE_, ElemKind::Int8QTy,
6124	ElemKind::Int64ITy);
6125	}
6126	#endif
6127
6128	/// Check if the code generation of transposes
6129	/// is correct for tensors with 2 dimensions.
6130	/// Note: This assumes that Tensor::transpose is correct.
6131	TEST_P(OperatorTest, Transpose2Dims) {
6132	CHECK_IF_ENABLED();
6133
6134	auto A = mod_.createPlaceholder(ElemKind::FloatTy, {`20`, `13`}, "A", false*);
6135	bindings_.allocate(A)->getHandle().randomize(-`3.0`, `3.0`, mod_.getPRNG());
6136
6137	auto *tr = F_->createTranspose("tr", A, {`1`, `0`});
6138	auto *result = F_->createSave("saveTranspose", tr);
6139	bindings_.allocate(result->getPlaceholder());
6140
6141	EE_.compile(CompilationMode::Infer);
6142	EE_.run(bindings_);
6143
6144	Tensor dest(ElemKind::FloatTy, {`13`, `20`});
6145	bindings_.get(A)->transpose(&dest, {`1`, `0`});
6146	EXPECT_TRUE(bindings_.get(result->getPlaceholder())->isEqual(dest));
6147	}
6148
6149	/// Check that transpose is supported for FP16.
6150	TEST_P(OperatorTest, FP16Transpose2Dims) {
6151	CHECK_IF_ENABLED();
6152
6153	auto A = mod_.createPlaceholder(ElemKind::Float16Ty, {`20`, `13`}, "A", false*);
6154	bindings_.allocate(A)->getHandle<float16_t>().randomize(-`3.0`, `3.0`,
6155	mod_.getPRNG());
6156
6157	auto *tr = F_->createTranspose("tr", A, {`1`, `0`});
6158	auto *result = F_->createSave("saveTranspose", tr);
6159	bindings_.allocate(result->getPlaceholder());
6160
6161	EE_.compile(CompilationMode::Infer);
6162	EE_.run(bindings_);
6163
6164	Tensor dest(ElemKind::Float16Ty, {`13`, `20`});
6165	bindings_.get(A)->transpose(&dest, {`1`, `0`});
6166	EXPECT_TRUE(bindings_.get(result->getPlaceholder())->isEqual(dest));
6167	}
6168
6169	/// Check that transpose is supported for BFloat16.
6170	TEST_P(OperatorTest, BFloat16Transpose2Dims) {
6171	CHECK_IF_ENABLED();
6172
6173	auto A = mod_.createPlaceholder(ElemKind::BFloat16Ty, {`20`, `13`}, "A", false*);
6174	bindings_.allocate(A)->getHandle<bfloat16_t>().randomize(-`3.0`, `3.0`,
6175	mod_.getPRNG());
6176
6177	auto *tr = F_->createTranspose("tr", A, {`1`, `0`});
6178	auto *result = F_->createSave("saveTranspose", tr);
6179	bindings_.allocate(result->getPlaceholder());
6180
6181	EE_.compile(CompilationMode::Infer);
6182	EE_.run(bindings_);
6183
6184	Tensor dest(ElemKind::BFloat16Ty, {`13`, `20`});
6185	bindings_.get(A)->transpose(&dest, {`1`, `0`});
6186	EXPECT_TRUE(bindings_.get(result->getPlaceholder())->isEqual(dest));
6187	}
6188
6189	/// Check that transpose is supported for BoolTy.
6190	TEST_P(OperatorTest, BoolTranspose2Dims) {
6191	CHECK_IF_ENABLED();
6192
6193	auto A = mod_.createPlaceholder(ElemKind::BoolTy, {`20`, `13`}, "A", false*);
6194	bindings_.allocate(A)->getHandle<bool>().randomize(`0`, `1`, mod_.getPRNG());
6195
6196	auto *tr = F_->createTranspose("tr", A, {`1`, `0`});
6197	auto *result = F_->createSave("saveTranspose", tr);
6198	bindings_.allocate(result->getPlaceholder());
6199
6200	EE_.compile(CompilationMode::Infer);
6201	EE_.run(bindings_);
6202
6203	Tensor dest(ElemKind::BoolTy, {`13`, `20`});
6204	bindings_.get(A)->transpose(&dest, {`1`, `0`});
6205	EXPECT_TRUE(bindings_.get(result->getPlaceholder())->isEqual(dest));
6206	}
6207
6208	/// Check that transpose is supported for 6 dimensions.
6209	TEST_P(OperatorTest, Transpose6Dims) {
6210	CHECK_IF_ENABLED();
6211
6212	auto *A =
6213	mod_.createPlaceholder(ElemKind::FloatTy, {`1`, `2`, `2`, `2`, `3`, `3`}, "A", false);
6214	bindings_.allocate(A)->getHandle().randomize(`0`, `100`, mod_.getPRNG());
6215
6216	auto *tr = F_->createTranspose("tr", A, {`0`, `3`, `4`, `1`, `5`, `2`});
6217	auto *result = F_->createSave("saveTranspose", tr);
6218	bindings_.allocate(result->getPlaceholder());
6219
6220	EE_.compile(CompilationMode::Infer);
6221	EE_.run(bindings_);
6222
6223	Tensor dest(ElemKind::FloatTy, {`1`, `2`, `2`, `2`, `3`, `3`});
6224	bindings_.get(A)->transpose(&dest, {`0`, `3`, `4`, `1`, `5`, `2`});
6225	EXPECT_TRUE(bindings_.get(result->getPlaceholder())->isEqual(dest));
6226	}
6227
6228	/// Helper to check if the code generation of transposes
6229	/// is correct for tensors with 3 dimensions using \p DTy.
6230	/// Note: This assumes that Tensor::transpose is correct.
6231	template <typename DataType>
6232	static void testTranspose3Dims(glow::PlaceholderBindings &bindings,
6233	glow::Module &mod, glow::Function *F,
6234	glow::ExecutionEngine &EE, ElemKind DTy) {
6235	constexpr dim_t dims[] = {`20`, `13`, `7`};
6236	auto A = createPlaceholderConditionallyQuantized(mod, DTy, dims, "A", false*);
6237	bindings.allocate(A)->getHandle<DataType>().randomize(-`3.0`, `3.0`,
6238	mod.getPRNG());
6239
6240	int nbOfShuffle = `0`;
6241	SaveNode *savedTransposes[`6`];
6242	unsigned_t shuffles[`6`][`3`];
6243
6244	for (unsigned_t i = `0`; i < `3`; ++i) {
6245	for (unsigned_t j = `0`; j < `3`; ++j) {
6246	if (j == i) {
6247	continue;
6248	}
6249	for (unsigned_t k = `0`; k < `3`; ++k) {
6250	if (k == j \|\| k == i) {
6251	continue;
6252	}
6253	shuffles[nbOfShuffle][`0`] = i;
6254	shuffles[nbOfShuffle][`1`] = j;
6255	shuffles[nbOfShuffle][`2`] = k;
6256	auto *tr = F->createTranspose("tr", A, shuffles[nbOfShuffle]);
6257	savedTransposes[nbOfShuffle] = F->createSave("saveTranspose", tr);
6258	bindings.allocate(savedTransposes[nbOfShuffle]->getPlaceholder());
6259	++nbOfShuffle;
6260	}
6261	}
6262	}
6263
6264	// We should have exactly 6 possible permutations for 3 dimensions.
6265	EXPECT_EQ(`6`, nbOfShuffle);
6266
6267	EE.compile(CompilationMode::Infer);
6268	EE.run(bindings);
6269
6270	for (int i = `0`; i < `6`; ++i) {
6271	auto dest = createTensorConditionallyQuantized(
6272	DTy,
6273	{dims[shuffles[i][`0`]], dims[shuffles[i][`1`]], dims[shuffles[i][`2`]]});
6274	bindings.get(A)->transpose(&dest, shuffles[i]);
6275	EXPECT_TRUE(
6276	bindings.get(savedTransposes[i]->getPlaceholder())->isEqual(dest));
6277	}
6278	}
6279
6280	/// Test Transpose3Dims with Float.
6281	TEST_P(OperatorTest, Transpose3Dims_Float) {
6282	CHECK_IF_ENABLED();
6283	testTranspose3Dims<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy);
6284	}
6285
6286	/// Test Transpose3Dims with Float16.
6287	TEST_P(OperatorTest, Transpose3Dims_Float16) {
6288	CHECK_IF_ENABLED();
6289	testTranspose3Dims<float16_t>(bindings_, mod_, F_, EE_, ElemKind::Float16Ty);
6290	}
6291
6292	/// Test Transpose3Dims with BFloat16.
6293	TEST_P(OperatorTest, Transpose3Dims_BFloat16) {
6294	CHECK_IF_ENABLED();
6295	testTranspose3Dims<bfloat16_t>(bindings_, mod_, F_, EE_,
6296	ElemKind::BFloat16Ty);
6297	}
6298
6299	/// Test Transpose3Dims with Int8.
6300	TEST_P(OperatorTest, Transpose3Dims_Int8) {
6301	CHECK_IF_ENABLED();
6302	testTranspose3Dims<int8_t>(bindings_, mod_, F_, EE_, ElemKind::Int8QTy);
6303	}
6304
6305	/// Test that Transpose optimization into Reshape yields expected results.
6306	TEST_P(OperatorTest, TransposeIntoReshapeOptim) {
6307	CHECK_IF_ENABLED();
6308	auto *batch = mod_.createPlaceholder(ElemKind::FloatTy, {`1`, `3`, `2`, `4`}, "batch",
6309	false, "NHWC");
6310	auto IH = bindings_.allocate(batch)->getHandle();
6311	for (size_t i = `0`; i < `24`; i++) {
6312	IH.raw(i) = i + `1`;
6313	}
6314
6315	Node *T = F_->createTranspose("transpose", batch, {`1`, `2`, `0`, `3`}, "HWNC");
6316	Node *R = F_->createBatchedReduceMean("reduce.mean", T, {`2`, `3`});
6317	SaveNode *O = F_->createSave("ret", R);
6318	bindings_.allocate(mod_.getPlaceholders());
6319	EE_.compile(CompilationMode::Infer);
6320	EE_.run(bindings_);
6321
6322	auto result = bindings_.get(O->getPlaceholder())->getHandle();
6323	std::vector<dim_t> expectedDims = {`3`, `2`};
6324	EXPECT_TRUE(result.dims().vec() == expectedDims);
6325
6326	std::vector<float> expectedValues = {`2.5f`, `6.5f`, `10.5f`, `14.5f`, `18.5f`, `22.5f`};
6327	for (size_t i = `0`; i < `3` * `2`; i++) {
6328	EXPECT_EQ(result.raw(i), expectedValues[i]);
6329	}
6330	}
6331
6332	/// Helper to check the code generation for flip nodes.
6333	template <typename elemType>
6334	static void testFlip(glow::PlaceholderBindings &bindings, glow::Module &mod,
6335	glow::Function *F, glow::ExecutionEngine &EE,
6336	std::vector<elemType> inputData,
6337	std::vector<elemType> expectedData,
6338	llvm::ArrayRef<dim_t> dims, dim_t axis,
6339	ElemKind elemKind = ElemKind::FloatTy) {
6340
6341	// Create network.
6342	auto *input =
6343	createPlaceholderConditionallyQuantized(mod, elemKind, dims, "input",
6344	/ isTrainable / false);
6345	auto *flip = F->createFlip("flip", input, axis);
6346	Placeholder *output = F->createSave("save", flip)->getPlaceholder();
6347
6348	// Allocate input/output and initialize input.
6349	auto inputH = bindings.allocate(input)->getHandle<elemType>();
6350	auto outputH = bindings.allocate(output)->getHandle<elemType>();
6351	inputH = inputData;
6352
6353	// Compile and run.
6354	EE.compile(CompilationMode::Infer);
6355	EE.run(bindings);
6356
6357	// Compare output with reference.
6358	EXPECT_EQ(outputH.size(), expectedData.size());
6359	for (size_t i = `0`; i < expectedData.size(); i++) {
6360	EXPECT_EQ(outputH.raw(i), expectedData[i]);
6361	}
6362	}
6363
6364	/// Test Flip 1D with Int8.
6365	TEST_P(OperatorTest, Flip1D_Int8) {
6366	ENABLED_BACKENDS("Interpreter", "CPU");
6367	testFlip<int8_t>(bindings_, mod_, F_, EE_, {`1`, `2`, `3`, `4`}, {`4`, `3`, `2`, `1`}, {`4`}, `0`,
6368	ElemKind::Int8QTy);
6369	}
6370
6371	/// Test Flip 1D with Int32.
6372	TEST_P(OperatorTest, Flip1D_Int32) {
6373	ENABLED_BACKENDS("Interpreter", "CPU");
6374	testFlip<int32_t>(bindings_, mod_, F_, EE_, {`1`, `2`, `3`, `4`}, {`4`, `3`, `2`, `1`}, {`4`},
6375	`0`, ElemKind::Int32QTy);
6376	}
6377
6378	/// Test Flip 1D with Int64.
6379	TEST_P(OperatorTest, Flip1D_Int64) {
6380	ENABLED_BACKENDS("Interpreter", "CPU");
6381	testFlip<int64_t>(bindings_, mod_, F_, EE_, {`1`, `2`, `3`, `4`}, {`4`, `3`, `2`, `1`}, {`4`},
6382	`0`, ElemKind::Int64ITy);
6383	}
6384
6385	#define FLIP_3D_INPUT \
6386	{ 1, 2, 3, 4, 5, 6, 7, 8 }
6387	#define FLIP_3D_AXIS0 \
6388	{ 5, 6, 7, 8, 1, 2, 3, 4 }
6389	#define FLIP_3D_AXIS1 \
6390	{ 3, 4, 1, 2, 7, 8, 5, 6 }
6391	#define FLIP_3D_AXIS2 \
6392	{ 2, 1, 4, 3, 6, 5, 8, 7 }
6393
6394	#define FLIP_4D_INPUT \
6395	{ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 }
6396	#define FLIP_4D_AXIS0 \
6397	{ 9, 10, 11, 12, 13, 14, 15, 16, 1, 2, 3, 4, 5, 6, 7, 8 }
6398	#define FLIP_4D_AXIS1 \
6399	{ 5, 6, 7, 8, 1, 2, 3, 4, 13, 14, 15, 16, 9, 10, 11, 12 }
6400	#define FLIP_4D_AXIS2 \
6401	{ 3, 4, 1, 2, 7, 8, 5, 6, 11, 12, 9, 10, 15, 16, 13, 14 }
6402	#define FLIP_4D_AXIS3 \
6403	{ 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15 }
6404
6405	#define FLIP_5D_INPUT \
6406	{ \
6407	1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, \
6408	22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32 \
6409	}
6410	#define FLIP_5D_AXIS0 \
6411	{ \
6412	17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, \
6413	4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 \
6414	}
6415	#define FLIP_5D_AXIS1 \
6416	{ \
6417	9, 10, 11, 12, 13, 14, 15, 16, 1, 2, 3, 4, 5, 6, 7, 8, 25, 26, 27, 28, 29, \
6418	30, 31, 32, 17, 18, 19, 20, 21, 22, 23, 24 \
6419	}
6420	#define FLIP_5D_AXIS2 \
6421	{ \
6422	5, 6, 7, 8, 1, 2, 3, 4, 13, 14, 15, 16, 9, 10, 11, 12, 21, 22, 23, 24, 17, \
6423	18, 19, 20, 29, 30, 31, 32, 25, 26, 27, 28 \
6424	}
6425	#define FLIP_5D_AXIS3 \
6426	{ \
6427	3, 4, 1, 2, 7, 8, 5, 6, 11, 12, 9, 10, 15, 16, 13, 14, 19, 20, 17, 18, 23, \
6428	24, 21, 22, 27, 28, 25, 26, 31, 32, 29, 30 \
6429	}
6430	#define FLIP_5D_AXIS4 \
6431	{ \
6432	2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15, 18, 17, 20, 19, 22, \
6433	21, 24, 23, 26, 25, 28, 27, 30, 29, 32, 31 \
6434	}
6435
6436	#define FLIP_6D_INPUT \
6437	{ \
6438	1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, \
6439	22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, \
6440	39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, \
6441	56, 57, 58, 59, 60, 61, 62, 63, 64 \
6442	}
6443	#define FLIP_6D_AXIS0 \
6444	{ \
6445	33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, \
6446	51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 1, 2, 3, 4, 5, \
6447	6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, \
6448	24, 25, 26, 27, 28, 29, 30, 31, 32 \
6449	}
6450	#define FLIP_6D_AXIS1 \
6451	{ \
6452	17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, \
6453	4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 49, 50, 51, 52, 53, 54, \
6454	55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 33, 34, 35, 36, 37, 38, 39, \
6455	40, 41, 42, 43, 44, 45, 46, 47, 48 \
6456	}
6457	#define FLIP_6D_AXIS2 \
6458	{ \
6459	9, 10, 11, 12, 13, 14, 15, 16, 1, 2, 3, 4, 5, 6, 7, 8, 25, 26, 27, 28, 29, \
6460	30, 31, 32, 17, 18, 19, 20, 21, 22, 23, 24, 41, 42, 43, 44, 45, 46, \
6461	47, 48, 33, 34, 35, 36, 37, 38, 39, 40, 57, 58, 59, 60, 61, 62, 63, \
6462	64, 49, 50, 51, 52, 53, 54, 55, 56 \
6463	}
6464	#define FLIP_6D_AXIS3 \
6465	{ \
6466	5, 6, 7, 8, 1, 2, 3, 4, 13, 14, 15, 16, 9, 10, 11, 12, 21, 22, 23, 24, 17, \
6467	18, 19, 20, 29, 30, 31, 32, 25, 26, 27, 28, 37, 38, 39, 40, 33, 34, \
6468	35, 36, 45, 46, 47, 48, 41, 42, 43, 44, 53, 54, 55, 56, 49, 50, 51, \
6469	52, 61, 62, 63, 64, 57, 58, 59, 60 \
6470	}
6471	#define FLIP_6D_AXIS4 \
6472	{ \
6473	3, 4, 1, 2, 7, 8, 5, 6, 11, 12, 9, 10, 15, 16, 13, 14, 19, 20, 17, 18, 23, \
6474	24, 21, 22, 27, 28, 25, 26, 31, 32, 29, 30, 35, 36, 33, 34, 39, 40, \
6475	37, 38, 43, 44, 41, 42, 47, 48, 45, 46, 51, 52, 49, 50, 55, 56, 53, \
6476	54, 59, 60, 57, 58, 63, 64, 61, 62 \
6477	}
6478	#define FLIP_6D_AXIS5 \
6479	{ \
6480	2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15, 18, 17, 20, 19, 22, \
6481	21, 24, 23, 26, 25, 28, 27, 30, 29, 32, 31, 34, 33, 36, 35, 38, 37, \
6482	40, 39, 42, 41, 44, 43, 46, 45, 48, 47, 50, 49, 52, 51, 54, 53, 56, \
6483	55, 58, 57, 60, 59, 62, 61, 64, 63 \
6484	}
6485
6486	/// Test Flip 1D with Float.
6487	TEST_P(OperatorTest, Flip1D_Axis0_Float) {
6488	ENABLED_BACKENDS("Interpreter", "CPU");
6489	testFlip<float>(bindings_, mod_, F_, EE_, {`1`, `2`}, {`2`, `1`}, {`2`}, `0`);
6490	}
6491
6492	/// Test Flip 2D with Float.
6493	TEST_P(OperatorTest, Flip2D_Axis0_Float) {
6494	ENABLED_BACKENDS("Interpreter", "CPU");
6495	testFlip<float>(bindings_, mod_, F_, EE_, {`1`, `2`, `3`, `4`}, {`3`, `4`, `1`, `2`}, {`2`, `2`},
6496	`0`);
6497	}
6498	TEST_P(OperatorTest, Flip2D_Axis1_Float) {
6499	ENABLED_BACKENDS("Interpreter", "CPU");
6500	testFlip<float>(bindings_, mod_, F_, EE_, {`1`, `2`, `3`, `4`}, {`2`, `1`, `4`, `3`}, {`2`, `2`},
6501	`1`);
6502	}
6503
6504	/// Test Flip 3D with Float.
6505	TEST_P(OperatorTest, Flip3D_Axis0_Float) {
6506	ENABLED_BACKENDS("Interpreter", "CPU");
6507	testFlip<float>(bindings_, mod_, F_, EE_, FLIP_3D_INPUT, FLIP_3D_AXIS0,
6508	{`2`, `2`, `2`}, `0`);
6509	}
6510	TEST_P(OperatorTest, Flip3D_Axis1_Float) {
6511	ENABLED_BACKENDS("Interpreter", "CPU");
6512	testFlip<float>(bindings_, mod_, F_, EE_, FLIP_3D_INPUT, FLIP_3D_AXIS1,
6513	{`2`, `2`, `2`}, `1`);
6514	}
6515	TEST_P(OperatorTest, Flip3D_Axis2_Float) {
6516	ENABLED_BACKENDS("Interpreter", "CPU");
6517	testFlip<float>(bindings_, mod_, F_, EE_, FLIP_3D_INPUT, FLIP_3D_AXIS2,
6518	{`2`, `2`, `2`}, `2`);
6519	}
6520
6521	/// Test Flip 4D with Float.
6522	TEST_P(OperatorTest, Flip4D_Axis0_Float) {
6523	ENABLED_BACKENDS("Interpreter", "CPU");
6524	testFlip<float>(bindings_, mod_, F_, EE_, FLIP_4D_INPUT, FLIP_4D_AXIS0,
6525	{`2`, `2`, `2`, `2`}, `0`);
6526	}
6527	TEST_P(OperatorTest, Flip4D_Axis1_Float) {
6528	ENABLED_BACKENDS("Interpreter", "CPU");
6529	testFlip<float>(bindings_, mod_, F_, EE_, FLIP_4D_INPUT, FLIP_4D_AXIS1,
6530	{`2`, `2`, `2`, `2`}, `1`);
6531	}
6532	TEST_P(OperatorTest, Flip4D_Axis2_Float) {
6533	ENABLED_BACKENDS("Interpreter", "CPU");
6534	testFlip<float>(bindings_, mod_, F_, EE_, FLIP_4D_INPUT, FLIP_4D_AXIS2,
6535	{`2`, `2`, `2`, `2`}, `2`);
6536	}
6537	TEST_P(OperatorTest, Flip4D_Axis3_Float) {
6538	ENABLED_BACKENDS("Interpreter", "CPU");
6539	testFlip<float>(bindings_, mod_, F_, EE_, FLIP_4D_INPUT, FLIP_4D_AXIS3,
6540	{`2`, `2`, `2`, `2`}, `3`);
6541	}
6542
6543	/// Test Flip 5D with Float.
6544	TEST_P(OperatorTest, Flip5D_Axis0_Float) {
6545	ENABLED_BACKENDS("Interpreter", "CPU");
6546	testFlip<float>(bindings_, mod_, F_, EE_, FLIP_5D_INPUT, FLIP_5D_AXIS0,
6547	{`2`, `2`, `2`, `2`, `2`}, `0`);
6548	}
6549	TEST_P(OperatorTest, Flip5D_Axis1_Float) {
6550	ENABLED_BACKENDS("Interpreter", "CPU");
6551	testFlip<float>(bindings_, mod_, F_, EE_, FLIP_5D_INPUT, FLIP_5D_AXIS1,
6552	{`2`, `2`, `2`, `2`, `2`}, `1`);
6553	}
6554	TEST_P(OperatorTest, Flip5D_Axis2_Float) {
6555	ENABLED_BACKENDS("Interpreter", "CPU");
6556	testFlip<float>(bindings_, mod_, F_, EE_, FLIP_5D_INPUT, FLIP_5D_AXIS2,
6557	{`2`, `2`, `2`, `2`, `2`}, `2`);
6558	}
6559	TEST_P(OperatorTest, Flip5D_Axis3_Float) {
6560	ENABLED_BACKENDS("Interpreter", "CPU");
6561	testFlip<float>(bindings_, mod_, F_, EE_, FLIP_5D_INPUT, FLIP_5D_AXIS3,
6562	{`2`, `2`, `2`, `2`, `2`}, `3`);
6563	}
6564	TEST_P(OperatorTest, Flip5D_Axis4_Float) {
6565	ENABLED_BACKENDS("Interpreter", "CPU");
6566	testFlip<float>(bindings_, mod_, F_, EE_, FLIP_5D_INPUT, FLIP_5D_AXIS4,
6567	{`2`, `2`, `2`, `2`, `2`}, `4`);
6568	}
6569
6570	/// Test Flip 6D with Float.
6571	TEST_P(OperatorTest, Flip6D_Axis0_Float) {
6572	ENABLED_BACKENDS("Interpreter", "CPU");
6573	testFlip<float>(bindings_, mod_, F_, EE_, FLIP_6D_INPUT, FLIP_6D_AXIS0,
6574	{`2`, `2`, `2`, `2`, `2`, `2`}, `0`);
6575	}
6576	TEST_P(OperatorTest, Flip6D_Axis1_Float) {
6577	ENABLED_BACKENDS("Interpreter", "CPU");
6578	testFlip<float>(bindings_, mod_, F_, EE_, FLIP_6D_INPUT, FLIP_6D_AXIS1,
6579	{`2`, `2`, `2`, `2`, `2`, `2`}, `1`);
6580	}
6581	TEST_P(OperatorTest, Flip6D_Axis2_Float) {
6582	ENABLED_BACKENDS("Interpreter", "CPU");
6583	testFlip<float>(bindings_, mod_, F_, EE_, FLIP_6D_INPUT, FLIP_6D_AXIS2,
6584	{`2`, `2`, `2`, `2`, `2`, `2`}, `2`);
6585	}
6586	TEST_P(OperatorTest, Flip6D_Axis3_Float) {
6587	ENABLED_BACKENDS("Interpreter", "CPU");
6588	testFlip<float>(bindings_, mod_, F_, EE_, FLIP_6D_INPUT, FLIP_6D_AXIS3,
6589	{`2`, `2`, `2`, `2`, `2`, `2`}, `3`);
6590	}
6591	TEST_P(OperatorTest, Flip6D_Axis4_Float) {
6592	ENABLED_BACKENDS("Interpreter", "CPU");
6593	testFlip<float>(bindings_, mod_, F_, EE_, FLIP_6D_INPUT, FLIP_6D_AXIS4,
6594	{`2`, `2`, `2`, `2`, `2`, `2`}, `4`);
6595	}
6596	TEST_P(OperatorTest, Flip6D_Axis5_Float) {
6597	ENABLED_BACKENDS("Interpreter", "CPU");
6598	testFlip<float>(bindings_, mod_, F_, EE_, FLIP_6D_INPUT, FLIP_6D_AXIS5,
6599	{`2`, `2`, `2`, `2`, `2`, `2`}, `5`);
6600	}
6601
6602	#undef FLIP_3D_INPUT
6603	#undef FLIP_3D_AXIS0
6604	#undef FLIP_3D_AXIS1
6605	#undef FLIP_3D_AXIS2
6606	#undef FLIP_4D_INPUT
6607	#undef FLIP_4D_AXIS0
6608	#undef FLIP_4D_AXIS1
6609	#undef FLIP_4D_AXIS2
6610	#undef FLIP_4D_AXIS3
6611	#undef FLIP_5D_INPUT
6612	#undef FLIP_5D_AXIS0
6613	#undef FLIP_5D_AXIS1
6614	#undef FLIP_5D_AXIS2
6615	#undef FLIP_5D_AXIS3
6616	#undef FLIP_5D_AXIS4
6617	#undef FLIP_6D_INPUT
6618	#undef FLIP_6D_AXIS0
6619	#undef FLIP_6D_AXIS1
6620	#undef FLIP_6D_AXIS2
6621	#undef FLIP_6D_AXIS3
6622	#undef FLIP_6D_AXIS4
6623	#undef FLIP_6D_AXIS5
6624
6625	/// Check that gather on Int64ITy/size_t works.
6626	TEST_P(OperatorTest, GatherSizeT) {
6627	CHECK_IF_ENABLED();
6628
6629	/*
6630	DATA = [
6631	[1, 2],
6632	[3, 4],
6633	[5, 6],
6634	]
6635	INDICES = [
6636	[0, 1, 0, 1],
6637	[1, 2, 2, 0],
6638	]
6639	OUTPUT = [
6640	[
6641	[1, 2],
6642	[3, 4],
6643	[1, 2],
6644	[3, 4],
6645	],
6646	[
6647	[3, 4],
6648	[5, 6],
6649	[5, 6],
6650	[1, 2],
6651	],
6652	]
6653	*/
6654	auto *data =
6655	mod_.createPlaceholder(ElemKind::Int64ITy, {`3`, `2`}, "data", false);
6656	auto *indices =
6657	mod_.createPlaceholder(ElemKind::Int64ITy, {`2`, `4`}, "indices", false);
6658
6659	bindings_.allocate(data)->getHandle<int64_t>() = {
6660	`1`, `2`, `3`, `4`, `5`, `6`,
6661	};
6662	bindings_.allocate(indices)->getHandle<int64_t>() = {
6663	`0`, `1`, `0`, `1`, `1`, `2`, `2`, `0`,
6664	};
6665
6666	auto *R = F_->createGather("gather", data, indices);
6667
6668	auto *result = F_->createSave("save", R);
6669	bindings_.allocate(result->getPlaceholder());
6670
6671	EE_.compile(CompilationMode::Infer);
6672	EE_.run(bindings_);
6673
6674	auto H = bindings_.get(result->getPlaceholder())->getHandle<int64_t>();
6675
6676	EXPECT_EQ(H.at({`0`, `0`, `0`}), `1`);
6677	EXPECT_EQ(H.at({`0`, `0`, `1`}), `2`);
6678	EXPECT_EQ(H.at({`0`, `1`, `0`}), `3`);
6679	EXPECT_EQ(H.at({`0`, `1`, `1`}), `4`);
6680	EXPECT_EQ(H.at({`0`, `2`, `0`}), `1`);
6681	EXPECT_EQ(H.at({`0`, `2`, `1`}), `2`);
6682	EXPECT_EQ(H.at({`0`, `3`, `0`}), `3`);
6683	EXPECT_EQ(H.at({`0`, `3`, `1`}), `4`);
6684
6685	EXPECT_EQ(H.at({`1`, `0`, `0`}), `3`);
6686	EXPECT_EQ(H.at({`1`, `0`, `1`}), `4`);
6687	EXPECT_EQ(H.at({`1`, `1`, `0`}), `5`);
6688	EXPECT_EQ(H.at({`1`, `1`, `1`}), `6`);
6689	EXPECT_EQ(H.at({`1`, `2`, `0`}), `5`);
6690	EXPECT_EQ(H.at({`1`, `2`, `1`}), `6`);
6691	EXPECT_EQ(H.at({`1`, `3`, `0`}), `1`);
6692	EXPECT_EQ(H.at({`1`, `3`, `1`}), `2`);
6693	}
6694
6695	TEST_P(OperatorTest, BatchedGather) {
6696	CHECK_IF_ENABLED();
6697
6698	/*
6699	DATA = [
6700	[1.0, 1.2, 2.4, 4.5],
6701	[2.3, 3.4, 3.6, 2.3],
6702	[4.5, 5.7, 1.2, 4.5],
6703	]
6704
6705	INDICES = [0, 2],
6706
6707	OUTPUT = [
6708	[1.0, 2.4],
6709	[2.3, 3.6],
6710	[4.5, 1.2],
6711	]
6712	*/
6713	auto data = mod_.createPlaceholder(ElemKind::FloatTy, {`3`, `4`}, "data", false*);
6714	auto *indices =
6715	mod_.createPlaceholder(ElemKind::Int64ITy, {`2`}, "indices", false);
6716
6717	bindings_.allocate(data)->getHandle() = {
6718	`1.0f`, `1.2f`, `2.4f`, `4.5f`, `2.3f`, `3.4f`, `3.6f`, `2.3f`, `4.5f`, `5.7f`, `1.2f`, `4.5f`,
6719	};
6720	bindings_.allocate(indices)->getHandle<int64_t>() = {
6721	`0`,
6722	`2`,
6723	};
6724
6725	// Create a batched gather (a single batch dimension).
6726	auto *R = F_->createGather("gather", data, indices, `1`);
6727
6728	auto *result = F_->createSave("save", R);
6729	bindings_.allocate(result->getPlaceholder());
6730
6731	EE_.compile(CompilationMode::Infer);
6732	EE_.run(bindings_);
6733
6734	auto H = bindings_.get(result->getPlaceholder())->getHandle();
6735	EXPECT_FLOAT_EQ(H.at({`0`, `0`}), `1.0`);
6736	EXPECT_FLOAT_EQ(H.at({`0`, `1`}), `2.4`);
6737	EXPECT_FLOAT_EQ(H.at({`1`, `0`}), `2.3`);
6738	EXPECT_FLOAT_EQ(H.at({`1`, `1`}), `3.6`);
6739	EXPECT_FLOAT_EQ(H.at({`2`, `0`}), `4.5`);
6740	EXPECT_FLOAT_EQ(H.at({`2`, `1`}), `1.2`);
6741	}
6742
6743	TEST_P(OperatorTest, ScatterData) {
6744	CHECK_IF_ENABLED();
6745
6746	auto data = mod_.createPlaceholder(ElemKind::FloatTy, {`5`, `2`}, "data", false*);
6747	auto *indices =
6748	mod_.createPlaceholder(ElemKind::Int32ITy, {`2`, `1`}, "indices", false);
6749	auto *slices =
6750	mod_.createPlaceholder(ElemKind::FloatTy, {`2`, `2`}, "slices", false);
6751
6752	bindings_.allocate(data)->getHandle() = {`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`};
6753	bindings_.allocate(indices)->getHandle<int32_t>() = {`1`, `3`};
6754	bindings_.allocate(slices)->getHandle() = {-`3`, -`4`, -`7`, -`8`};
6755
6756	auto *R = F_->createScatterData("scatterdata", data, indices, slices);
6757
6758	auto *result = F_->createSave("save", R);
6759	bindings_.allocate(result->getPlaceholder());
6760
6761	EE_.compile(CompilationMode::Infer);
6762	EE_.run(bindings_);
6763
6764	auto H = bindings_.get(result->getPlaceholder())->getHandle();
6765	EXPECT_FLOAT_EQ(H.at({`0`, `0`}), `1.0`);
6766	EXPECT_FLOAT_EQ(H.at({`0`, `1`}), `2.0`);
6767	EXPECT_FLOAT_EQ(H.at({`1`, `0`}), -`3.0`);
6768	EXPECT_FLOAT_EQ(H.at({`1`, `1`}), -`4.0`);
6769	EXPECT_FLOAT_EQ(H.at({`2`, `0`}), `5.0`);
6770	EXPECT_FLOAT_EQ(H.at({`2`, `1`}), `6.0`);
6771	EXPECT_FLOAT_EQ(H.at({`3`, `0`}), -`7.0`);
6772	EXPECT_FLOAT_EQ(H.at({`3`, `1`}), -`8.0`);
6773	EXPECT_FLOAT_EQ(H.at({`4`, `0`}), `9.0`);
6774	EXPECT_FLOAT_EQ(H.at({`4`, `1`}), `10.0`);
6775	}
6776
6777	TEST_P(OperatorTest, ScatterDataCumulative) {
6778	CHECK_IF_ENABLED();
6779
6780	auto data = mod_.createPlaceholder(ElemKind::FloatTy, {`5`, `2`}, "data", false*);
6781	auto *indices =
6782	mod_.createPlaceholder(ElemKind::Int32ITy, {`4`, `1`}, "indices", false);
6783	auto *slices =
6784	mod_.createPlaceholder(ElemKind::FloatTy, {`4`, `2`}, "slices", false);
6785
6786	bindings_.allocate(data)->getHandle() = {`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`};
6787	bindings_.allocate(indices)->getHandle<int32_t>() = {`1`, `2`, `2`, `3`};
6788	bindings_.allocate(slices)->getHandle() = {`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`};
6789
6790	auto *R = F_->createScatterData("scatterdata", data, indices, slices,
6791	true / cumulative /);
6792
6793	auto *result = F_->createSave("save", R);
6794	bindings_.allocate(result->getPlaceholder());
6795
6796	EE_.compile(CompilationMode::Infer);
6797	EE_.run(bindings_);
6798
6799	auto H = bindings_.get(result->getPlaceholder())->getHandle();
6800
6801	EXPECT_FLOAT_EQ(H.at({`0`, `0`}), `0.0`);
6802	EXPECT_FLOAT_EQ(H.at({`0`, `1`}), `0.0`);
6803	EXPECT_FLOAT_EQ(H.at({`1`, `0`}), `1.0`);
6804	EXPECT_FLOAT_EQ(H.at({`1`, `1`}), `2.0`);
6805	EXPECT_FLOAT_EQ(H.at({`2`, `0`}), `8.0`);
6806	EXPECT_FLOAT_EQ(H.at({`2`, `1`}), `10.0`);
6807	EXPECT_FLOAT_EQ(H.at({`3`, `0`}), `7.0`);
6808	EXPECT_FLOAT_EQ(H.at({`3`, `1`}), `8.0`);
6809	EXPECT_FLOAT_EQ(H.at({`4`, `0`}), `0.0`);
6810	EXPECT_FLOAT_EQ(H.at({`4`, `1`}), `0.0`);
6811	}
6812
6813	TEST_P(OperatorTest, ScatterDataQuantized) {
6814	CHECK_IF_ENABLED();
6815
6816	auto data = mod_.createPlaceholder(ElemKind::FloatTy, {`5`, `2`}, "data", false*);
6817	auto *indices =
6818	mod_.createPlaceholder(ElemKind::Int32ITy, {`2`, `1`}, "indices", false);
6819	auto *slices =
6820	mod_.createPlaceholder(ElemKind::FloatTy, {`2`, `2`}, "slices", false);
6821
6822	bindings_.allocate(data)->getHandle() = {`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`};
6823	bindings_.allocate(indices)->getHandle<int32_t>() = {`1`, `3`};
6824	bindings_.allocate(slices)->getHandle() = {-`3`, -`4`, -`7`, -`8`};
6825
6826	auto qParams = glow::quantization::chooseQuantizationParams({-`11`, `11`});
6827	auto dataTy =
6828	mod_.uniqueType(ElemKind::Int8QTy, {`5`, `2`}, qParams.scale, qParams.offset);
6829	auto slicesTy =
6830	mod_.uniqueType(ElemKind::Int8QTy, {`2`, `2`}, qParams.scale, qParams.offset);
6831
6832	auto *dataQ = F_->createQuantize("quantizeQ", data, dataTy);
6833	auto *slicesQ = F_->createQuantize("quantizeS", slices, slicesTy);
6834	auto *SA = F_->createScatterData("scatterdata", dataQ, indices, slicesQ);
6835	auto *DQ = F_->createDequantize("dequantize", SA, ElemKind::FloatTy);
6836
6837	auto *result = F_->createSave("save", DQ);
6838	bindings_.allocate(result->getPlaceholder());
6839
6840	EE_.compile(CompilationMode::Infer);
6841	EE_.run(bindings_);
6842
6843	auto H = bindings_.get(result->getPlaceholder())->getHandle();
6844
6845	EXPECT_NEAR(H.at({`0`, `0`}), `1.0`, `0.05`);
6846	EXPECT_NEAR(H.at({`0`, `1`}), `2.0`, `0.05`);
6847	EXPECT_NEAR(H.at({`1`, `0`}), -`3.0`, `0.05`);
6848	EXPECT_NEAR(H.at({`1`, `1`}), -`4.0`, `0.05`);
6849	EXPECT_NEAR(H.at({`2`, `0`}), `5.0`, `0.05`);
6850	EXPECT_NEAR(H.at({`2`, `1`}), `6.0`, `0.05`);
6851	EXPECT_NEAR(H.at({`3`, `0`}), -`7.0`, `0.05`);
6852	EXPECT_NEAR(H.at({`3`, `1`}), -`8.0`, `0.05`);
6853	EXPECT_NEAR(H.at({`4`, `0`}), `9.0`, `0.05`);
6854	EXPECT_NEAR(H.at({`4`, `1`}), `10.0`, `0.05`);
6855	}
6856
6857	TEST_P(OperatorTest, ScatterDataNDimensionalSimple) {
6858	CHECK_IF_ENABLED();
6859
6860	// Data = {{1,2},{3,4},{5,6}}
6861	// Slices = {-3,-4}
6862	// Indices = {{1,0},{1,1}}
6863	// Result = {{1,2},{-3,-4},{5,6}}
6864	auto data = mod_.createPlaceholder(ElemKind::FloatTy, {`3`, `2`}, "data", false*);
6865	auto *indices =
6866	mod_.createPlaceholder(ElemKind::Int32ITy, {`2`, `2`}, "indices", false);
6867	auto *slices =
6868	mod_.createPlaceholder(ElemKind::FloatTy, {`2`}, "slices", false);
6869
6870	// Fill tensor with consecutive data.
6871	std::vector<float> init(`6`);
6872	std::iota(init.begin(), init.end(), `1`);
6873	bindings_.allocate(data)->getHandle() = init;
6874	bindings_.allocate(indices)->getHandle<int32_t>() = {`1`, `0`, `1`, `1`};
6875	bindings_.allocate(slices)->getHandle() = {-`3.`, -`4.`};
6876	auto *R = F_->createScatterData("scatterdata", data, indices, slices);
6877
6878	auto *result = F_->createSave("save", R);
6879	bindings_.allocate(result->getPlaceholder());
6880
6881	EE_.compile(CompilationMode::Infer);
6882	EE_.run(bindings_);
6883
6884	std::vector<dim_t> expectedDims = {`3`, `2`};
6885	std::vector<float> expectedValues = {`1.`, `2.`, -`3.`, -`4.`, `5.`, `6.`};
6886	auto H = bindings_.get(result->getPlaceholder())->getHandle();
6887	EXPECT_TRUE(H.dims().vec() == expectedDims);
6888	for (dim_t i = `0`; i < expectedValues.size(); i++) {
6889	EXPECT_EQ(expectedValues[i], H.raw(i));
6890	}
6891	}
6892
6893	TEST_P(OperatorTest, ScatterDataNDimensional) {
6894	CHECK_IF_ENABLED();
6895
6896	// In tensor 2x4x4x3, make two updates with 2-dimensional slices by
6897	// 2-dimensional indices:
6898	// 1. By index [0, 3], set [[-1., -2., -3.]
6899	// [-4., -5., -6.]
6900	// [-7., -8., -9.]
6901	// [-10., -11., -12.]];
6902	//
6903	// 2. By index [1, 1], set [[-13., -14., -15.]
6904	// [-16., -17., -18.]
6905	// [-19., -20., -21.]
6906	// [-22., -23., -24.]];
6907	//
6908	auto *data =
6909	mod_.createPlaceholder(ElemKind::FloatTy, {`2`, `4`, `4`, `3`}, "data", false);
6910	auto *indices =
6911	mod_.createPlaceholder(ElemKind::Int32ITy, {`2`, `2`}, "indices", false);
6912	auto *slices =
6913	mod_.createPlaceholder(ElemKind::FloatTy, {`2`, `4`, `3`}, "slices", false);
6914
6915	// Fill tensor with consecutive data.
6916	std::vector<float> init(`2` * `4` * `4` * `3`);
6917	std::iota(init.begin(), init.end(), `0`);
6918	bindings_.allocate(data)->getHandle() = init;
6919	bindings_.allocate(indices)->getHandle<int32_t>() = {`0`, `3`, `1`, `1`};
6920	std::vector<float> initUpdates;
6921	for (int32_t i = -`1`; i > -`25`; i--) {
6922	initUpdates.push_back(static_cast<float>(i));
6923	}
6924	bindings_.allocate(slices)->getHandle() = initUpdates;
6925
6926	auto *R = F_->createScatterData("scatterdata", data, indices, slices);
6927
6928	auto *result = F_->createSave("save", R);
6929	bindings_.allocate(result->getPlaceholder());
6930
6931	EE_.compile(CompilationMode::Infer);
6932	EE_.run(bindings_);
6933
6934	std::vector<dim_t> expectedDims = {`2`, `4`, `4`, `3`};
6935	std::vector<float> expectedValues = {
6936	`0.0f`, `1.0f`, `2.0f`, `3.0f`, `4.0f`, `5.0f`,
6937	`6.0f`, `7.0f`, `8.0f`, `9.0f`, `10.0f`, `11.0f`,
6938
6939	`12.0f`, `13.0f`, `14.0f`, `15.0f`, `16.0f`, `17.0f`,
6940	`18.0f`, `19.0f`, `20.0f`, `21.0f`, `22.0f`, `23.0f`,
6941
6942	`24.0f`, `25.0f`, `26.0f`, `27.0f`, `28.0f`, `29.0f`,
6943	`30.0f`, `31.0f`, `32.0f`, `33.0f`, `34.0f`, `35.0f`,
6944
6945	-`1.0f`, -`2.0f`, -`3.0f`, -`4.0f`, -`5.0f`, -`6.0f`,
6946	-`7.0f`, -`8.0f`, -`9.0f`, -`10.0f`, -`11.0f`, -`12.0f`,
6947
6948	`48.0f`, `49.0f`, `50.0f`, `51.0f`, `52.0f`, `53.0f`,
6949	`54.0f`, `55.0f`, `56.0f`, `57.0f`, `58.0f`, `59.0f`,
6950
6951	-`13.0f`, -`14.0f`, -`15.0f`, -`16.0f`, -`17.0f`, -`18.0f`,
6952	-`19.0f`, -`20.0f`, -`21.0f`, -`22.0f`, -`23.0f`, -`24.0f`,
6953
6954	`72.0f`, `73.0f`, `74.0f`, `75.0f`, `76.0f`, `77.0f`,
6955	`78.0f`, `79.0f`, `80.0f`, `81.0f`, `82.0f`, `83.0f`,
6956
6957	`84.0f`, `85.0f`, `86.0f`, `87.0f`, `88.0f`, `89.0f`,
6958	`90.0f`, `91.0f`, `92.0f`, `93.0f`, `94.0f`, `95.0f`};
6959	auto H = bindings_.get(result->getPlaceholder())->getHandle();
6960	EXPECT_TRUE(H.dims().vec() == expectedDims);
6961	for (dim_t i = `0`; i < expectedValues.size(); i++) {
6962	EXPECT_EQ(expectedValues[i], H.raw(i));
6963	}
6964	}
6965
6966	TEST_P(OperatorTest, ScatterAddQuantized) {
6967	CHECK_IF_ENABLED();
6968
6969	auto data = mod_.createPlaceholder(ElemKind::FloatTy, {`5`, `2`}, "data", false*);
6970	auto *indices =
6971	mod_.createPlaceholder(ElemKind::Int32ITy, {`2`, `1`}, "indices", false);
6972	auto *slices =
6973	mod_.createPlaceholder(ElemKind::FloatTy, {`2`, `2`}, "slices", false);
6974
6975	bindings_.allocate(data)->getHandle() = {`1`, `2`, -`3`, -`8`, `5`, `6`, `7`, `8`, `9`, `10`};
6976	bindings_.allocate(indices)->getHandle<int32_t>() = {`1`, `3`};
6977	bindings_.allocate(slices)->getHandle() = {`3`, -`8`, -`7`, `8`};
6978
6979	auto qParams = glow::quantization::chooseQuantizationParams({-`11`, `11`});
6980	auto dataTy =
6981	mod_.uniqueType(ElemKind::Int8QTy, {`5`, `2`}, qParams.scale, qParams.offset);
6982	auto slicesTy =
6983	mod_.uniqueType(ElemKind::Int8QTy, {`2`, `2`}, qParams.scale, qParams.offset);
6984
6985	auto *dataQ = F_->createQuantize("quantizeQ", data, dataTy);
6986	auto *slicesQ = F_->createQuantize("quantizeS", slices, slicesTy);
6987	auto *SA = F_->createScatterData("scatteradd", dataQ, indices, slicesQ,
6988	/Cumulative/ true);
6989	auto *DQ = F_->createDequantize("dequantize", SA, ElemKind::FloatTy);
6990
6991	auto *result = F_->createSave("save", DQ);
6992	bindings_.allocate(result->getPlaceholder());
6993
6994	EE_.compile(CompilationMode::Infer);
6995	EE_.run(bindings_);
6996
6997	auto H = bindings_.get(result->getPlaceholder())->getHandle();
6998
6999	EXPECT_NEAR(H.at({`0`, `0`}), `1.0`, `0.05`);
7000	EXPECT_NEAR(H.at({`0`, `1`}), `2.0`, `0.05`);
7001	EXPECT_NEAR(H.at({`1`, `0`}), `0.0`, `0.05`);
7002	EXPECT_NEAR(H.at({`1`, `1`}), -`11.0`, `0.05`);
7003	EXPECT_NEAR(H.at({`2`, `0`}), `5.0`, `0.05`);
7004	EXPECT_NEAR(H.at({`2`, `1`}), `6.0`, `0.05`);
7005	EXPECT_NEAR(H.at({`3`, `0`}), `0.0`, `0.05`);
7006	EXPECT_NEAR(H.at({`3`, `1`}), `11.0`, `0.05`);
7007	EXPECT_NEAR(H.at({`4`, `0`}), `9.0`, `0.05`);
7008	EXPECT_NEAR(H.at({`4`, `1`}), `10.0`, `0.05`);
7009	}
7010
7011	TEST_P(OperatorTest, ScatterAddNDimensionalSimple) {
7012	CHECK_IF_ENABLED();
7013	// Test that scatter addition works.
7014	// Data = {{1,2},{3,4},{5,6}}
7015	// Slices = {-3,-4}
7016	// Indices = {{1,0},{1,1}}
7017	// Result = {{1,2},{0,0},{5,6}}
7018	auto data = mod_.createPlaceholder(ElemKind::FloatTy, {`3`, `2`}, "data", false*);
7019	auto *indices =
7020	mod_.createPlaceholder(ElemKind::Int32ITy, {`2`, `2`}, "indices", false);
7021	auto *slices =
7022	mod_.createPlaceholder(ElemKind::FloatTy, {`2`}, "slices", false);
7023
7024	// Fill tensor with consecutive data.
7025	std::vector<float> init;
7026	for (int32_t i = `1`; i < `7`; i++) {
7027	init.push_back(static_cast<float>(i));
7028	}
7029	bindings_.allocate(data)->getHandle() = init;
7030	bindings_.allocate(indices)->getHandle<int32_t>() = {`1`, `0`, `1`, `1`};
7031	bindings_.allocate(slices)->getHandle() = {-`3.`, -`4.`};
7032	auto *R = F_->createScatterData("scatteradd", data, indices, slices,
7033	/Cumulative/ true);
7034
7035	auto *result = F_->createSave("save", R);
7036	bindings_.allocate(result->getPlaceholder());
7037
7038	EE_.compile(CompilationMode::Infer);
7039	EE_.run(bindings_);
7040
7041	std::vector<dim_t> expectedDims = {`3`, `2`};
7042	std::vector<float> expectedValues = {`1.`, `2.`, `0.`, `0.`, `5.`, `6.`};
7043	auto H = bindings_.get(result->getPlaceholder())->getHandle();
7044	EXPECT_TRUE(H.dims().vec() == expectedDims);
7045	for (dim_t i = `0`; i < expectedValues.size(); i++) {
7046	EXPECT_EQ(expectedValues[i], H.raw(i));
7047	}
7048	}
7049
7050	TEST_P(OperatorTest, ScatterAddNDimensionalDuplicatingIndices) {
7051	CHECK_IF_ENABLED();
7052	// Test that scatter addition with duplicating indices works.
7053	// Data = {{1,2},{3,4},{5,6}}
7054	// Slices = {-3,-4,-3,-4}
7055	// Indices = {{1,0},{1,1}{1,0},{1,1}}
7056	// Result = {{1,2},{-3,-4},{5,6}}
7057	auto data = mod_.createPlaceholder(ElemKind::FloatTy, {`3`, `2`}, "data", false*);
7058	auto *indices =
7059	mod_.createPlaceholder(ElemKind::Int32ITy, {`4`, `2`}, "indices", false);
7060	auto *slices =
7061	mod_.createPlaceholder(ElemKind::FloatTy, {`4`}, "slices", false);
7062
7063	// Fill tensor with consecutive data.
7064	std::vector<float> init;
7065	for (int32_t i = `1`; i < `7`; i++) {
7066	init.push_back(static_cast<float>(i));
7067	}
7068	bindings_.allocate(data)->getHandle() = init;
7069	bindings_.allocate(indices)->getHandle<int32_t>() = {`1`, `0`, `1`, `1`, `1`, `0`, `1`, `1`};
7070	bindings_.allocate(slices)->getHandle() = {-`3.`, -`4.`, -`3.`, -`4.`};
7071	auto *R = F_->createScatterData("scatteradd", data, indices, slices,
7072	/Cumulative/ true);
7073
7074	auto *result = F_->createSave("save", R);
7075	bindings_.allocate(result->getPlaceholder());
7076
7077	EE_.compile(CompilationMode::Infer);
7078	EE_.run(bindings_);
7079
7080	std::vector<dim_t> expectedDims = {`3`, `2`};
7081	std::vector<float> expectedValues = {`1.`, `2.`, -`3.`, -`4.`, `5.`, `6.`};
7082	auto H = bindings_.get(result->getPlaceholder())->getHandle();
7083	EXPECT_TRUE(H.dims().vec() == expectedDims);
7084	for (dim_t i = `0`; i < expectedValues.size(); i++) {
7085	EXPECT_EQ(expectedValues[i], H.raw(i));
7086	}
7087	}
7088
7089	#define COMPARE_ARITH_FUN(_OP_NAME_) \
7090	static FunctionTensorPair createAndInitBasic##_OP_NAME_##Test( \
7091	glow::PlaceholderBindings &bindings, glow::ExecutionEngine &EE) { \
7092	auto &mod = EE.getModule(); \
7093	Function *F = mod.createFunction("main"); \
7094	\
7095	auto *A = mod.createPlaceholder(ElemKind::FloatTy, {1, 4}, "A", false); \
7096	auto *B = mod.createPlaceholder(ElemKind::FloatTy, {1, 4}, "B", false); \
7097	bindings.allocate(A)->getHandle() = {1.0f, -1.2f, 0.5f, -1.3f}; \
7098	bindings.allocate(B)->getHandle() = {1.8f, -0.2f, -2.4f, 2.7f}; \
7099	\
7100	auto *add = F->create##_OP_NAME_("arith", A, B); \
7101	auto *result = F->createSave("save", add); \
7102	auto *resultTensor = bindings.allocate(result->getPlaceholder()); \
7103	\
7104	return std::make_pair(F, resultTensor); \
7105	}
7106	COMPARE_ARITH_FUN(Add)
7107	COMPARE_ARITH_FUN(Sub)
7108	COMPARE_ARITH_FUN(Mul)
7109	COMPARE_ARITH_FUN(Div)
7110	COMPARE_ARITH_FUN(FloorDiv)
7111	COMPARE_ARITH_FUN(Max)
7112	COMPARE_ARITH_FUN(Min)
7113	COMPARE_ARITH_FUN(Fmod)
7114	#undef COMPARE_ARITH_FUN
7115
7116	#define COMPARE_ARITH_FLOAT_VS_INT8(_OP_NAME_) \
7117	TEST_P(OperatorStatelessTest, Basic##_OP_NAME_##NetFloatVsInt8) { \
7118	CHECK_IF_ENABLED(); \
7119	compareAgainstInterpreter( \
7120	getBackendName(), createAndInitBasic##_OP_NAME_##Test, \
7121	ElemKind::FloatTy, ElemKind::Int8QTy, 0.035f, parCloneCountOpt); \
7122	}
7123	COMPARE_ARITH_FLOAT_VS_INT8(Add)
7124	COMPARE_ARITH_FLOAT_VS_INT8(Sub)
7125	COMPARE_ARITH_FLOAT_VS_INT8(Mul)
7126	COMPARE_ARITH_FLOAT_VS_INT8(Div)
7127	COMPARE_ARITH_FLOAT_VS_INT8(Max)
7128	COMPARE_ARITH_FLOAT_VS_INT8(Min)
7129	#undef COMPARE_ARITH_FLOAT_VS_INT8
7130
7131	#define COMPARE_ARITH_FLOAT_VS_FLOAT16(_OP_NAME_) \
7132	TEST_P(OperatorStatelessTest, Basic##_OP_NAME_##NetFloatVsFloat16) { \
7133	CHECK_IF_ENABLED(); \
7134	compareAgainstInterpreter( \
7135	getBackendName(), createAndInitBasic##_OP_NAME_##Test, \
7136	ElemKind::FloatTy, ElemKind::Float16Ty, 0.01f, parCloneCountOpt); \
7137	}
7138
7139	#define COMPARE_ARITH_FLOAT_VS_BFLOAT16(_OP_NAME_) \
7140	TEST_P(OperatorStatelessTest, Basic##_OP_NAME_##NetFloatVsBFloat16) { \
7141	CHECK_IF_ENABLED(); \
7142	compareAgainstInterpreter( \
7143	getBackendName(), createAndInitBasic##_OP_NAME_##Test, \
7144	ElemKind::FloatTy, ElemKind::BFloat16Ty, 0.01f, parCloneCountOpt); \
7145	}
7146	COMPARE_ARITH_FLOAT_VS_FLOAT16(Add)
7147	COMPARE_ARITH_FLOAT_VS_FLOAT16(Sub)
7148	COMPARE_ARITH_FLOAT_VS_FLOAT16(Mul)
7149	COMPARE_ARITH_FLOAT_VS_FLOAT16(Div)
7150	COMPARE_ARITH_FLOAT_VS_FLOAT16(FloorDiv)
7151	COMPARE_ARITH_FLOAT_VS_FLOAT16(Max)
7152	COMPARE_ARITH_FLOAT_VS_FLOAT16(Min)
7153	COMPARE_ARITH_FLOAT_VS_FLOAT16(Fmod)
7154
7155	COMPARE_ARITH_FLOAT_VS_BFLOAT16(Add)
7156	COMPARE_ARITH_FLOAT_VS_BFLOAT16(Sub)
7157	COMPARE_ARITH_FLOAT_VS_BFLOAT16(Mul)
7158	COMPARE_ARITH_FLOAT_VS_BFLOAT16(Div)
7159	COMPARE_ARITH_FLOAT_VS_BFLOAT16(FloorDiv)
7160	COMPARE_ARITH_FLOAT_VS_BFLOAT16(Max)
7161	COMPARE_ARITH_FLOAT_VS_BFLOAT16(Min)
7162	COMPARE_ARITH_FLOAT_VS_BFLOAT16(Fmod)
7163	#undef COMPARE_ARITH_FLOAT_VS_FLOAT16
7164	#undef COMPARE_ARITH_FLOAT_VS_BFLOAT16
7165
7166	#define ARITH_FUN_IMPL(_OP_NAME_, _REFERENCE_FUNCTION_, _PARENTHESES_) \
7167	template <typename DataType> \
7168	static void testArithmetic##_OP_NAME_##Impl( \
7169	glow::PlaceholderBindings &bindings, glow::Module &mod, \
7170	glow::Function *F, glow::ExecutionEngine &EE, ElemKind DTy) { \
7171	std::vector<DataType> data1 = {3, 17, -7, 23}; \
7172	std::vector<DataType> data2 = {13, -5, 19, 11}; \
7173	auto *A = mod.createPlaceholder(DTy, {1, 4}, "A", false); \
7174	auto *B = mod.createPlaceholder(DTy, {1, 4}, "B", false); \
7175	bindings.allocate(A)->getHandle<DataType>() = data1; \
7176	bindings.allocate(B)->getHandle<DataType>() = data2; \
7177	\
7178	auto *add = F->create##_OP_NAME_("arith", A, B); \
7179	auto *result = F->createSave("save", add); \
7180	auto *resultTensor = bindings.allocate(result->getPlaceholder()); \
7181	\
7182	EE.compile(CompilationMode::Infer); \
7183	EE.run(bindings); \
7184	std::vector<DataType> reference; \
7185	assert(data1.size() == data2.size() && "Size mismatch!"); \
7186	for (size_t i = 0; i < data1.size(); i++) { \
7187	reference.push_back( \
7188	_REFERENCE_FUNCTION_<DataType> _PARENTHESES_(data1[i], data2[i])); \
7189	} \
7190	auto RH = resultTensor->getHandle<DataType>(); \
7191	EXPECT_EQ(reference.size(), RH.size()); \
7192	for (size_t i = 0; i < reference.size(); i++) { \
7193	EXPECT_EQ(reference[i], RH.raw(i)); \
7194	} \
7195	}
7196
7197	#define ARITH_FUNC_TEST_TYPED(_OP_NAME_, _DATA_TYPE_, _ELEM_KIND_) \
7198	TEST_P(OperatorTest, Arith##_OP_NAME_##_##_DATA_TYPE_) { \
7199	CHECK_IF_ENABLED(); \
7200	testArithmetic##_OP_NAME_##Impl<_DATA_TYPE_>(bindings_, mod_, F_, EE_, \
7201	_ELEM_KIND_); \
7202	}
7203
7204	template <typename DataType> static DataType fMod(DataType a, DataType b) {
7205	return static_cast<DataType>(
7206	std::fmod(static_cast<float>(a), static_cast<float>(b)));
7207	}
7208
7209	#define ARITH_FUNC_TEST(_OP_NAME_, _REFERENCE_FUNCTION_, _PARENTHESES_) \
7210	ARITH_FUN_IMPL(_OP_NAME_, _REFERENCE_FUNCTION_, _PARENTHESES_) \
7211	ARITH_FUNC_TEST_TYPED(_OP_NAME_, int32_t, ElemKind::Int32ITy) \
7212	ARITH_FUNC_TEST_TYPED(_OP_NAME_, int64_t, ElemKind::Int64ITy) \
7213	ARITH_FUNC_TEST_TYPED(_OP_NAME_, float, ElemKind::FloatTy) \
7214	ARITH_FUNC_TEST_TYPED(_OP_NAME_, float16_t, ElemKind::Float16Ty) \
7215	ARITH_FUNC_TEST_TYPED(_OP_NAME_, bfloat16_t, ElemKind::BFloat16Ty)
7216
7217	ARITH_FUNC_TEST(Add, std::plus, ())
7218	ARITH_FUNC_TEST(Sub, std::minus, ())
7219	ARITH_FUNC_TEST(Mul, std::multiplies, ())
7220	ARITH_FUNC_TEST(Div, std::divides, ())
7221	ARITH_FUNC_TEST(Max, std::max, )
7222	ARITH_FUNC_TEST(Min, std::min, )
7223	ARITH_FUNC_TEST(Fmod, fMod, )
7224
7225	#undef ARITH_FUN_IMPL
7226	#undef ARITH_FUNC_TEST_TYPED
7227	#undef ARITH_FUNC_TEST
7228
7229	/// Reference function for FloorDivide
7230	template <typename DataType>
7231	static DataType floorDivide(DataType a, DataType b) {
7232	return std::floor(static_cast<float>(a) / static_cast<float>(b));
7233	}
7234
7235	/// Reference function for TruncDivide
7236	template <typename DataType>
7237	static DataType truncDivide(DataType a, DataType b) {
7238	return std::trunc(static_cast<float>(a) / static_cast<float>(b));
7239	}
7240
7241	/// Helper to test FloorDiv using \p DataType.
7242	template <typename DataType>
7243	static void testFloorDiv(glow::PlaceholderBindings &bindings, glow::Module &mod,
7244	glow::Function *F, glow::ExecutionEngine &EE,
7245	ElemKind DTy, bool truncate) {
7246	std::vector<DataType> data1 = {`3`, `15`, `7`, `22`};
7247	std::vector<DataType> data2 = {-`6`, -`5`, `14`, `11`};
7248	float scale = `0.5`;
7249	int offset = `0`;
7250	Placeholder A = nullptr*;
7251	Placeholder B = nullptr*;
7252	if (isQuantizedElemKind(DTy)) {
7253	A = mod.createPlaceholder(DTy, {`1`, `4`}, scale, offset, "A", false);
7254	B = mod.createPlaceholder(DTy, {`1`, `4`}, scale, offset, "B", false);
7255	} else {
7256	A = mod.createPlaceholder(DTy, {`1`, `4`}, "A", false);
7257	B = mod.createPlaceholder(DTy, {`1`, `4`}, "B", false);
7258	}
7259	bindings.allocate(A)->getHandle<DataType>() = data1;
7260	bindings.allocate(B)->getHandle<DataType>() = data2;
7261
7262	auto *floorDiv = F->createFloorDiv("floorDiv", A, B, truncate);
7263	auto *result = F->createSave("save", floorDiv);
7264	auto *resultTensor = bindings.allocate(result->getPlaceholder());
7265
7266	EE.compile(CompilationMode::Infer);
7267	EE.run(bindings);
7268	std::vector<DataType> reference;
7269	assert(data1.size() == data2.size() && "Size mismatch!");
7270	for (size_t i = `0`; i < data1.size(); i++) {
7271	reference.push_back(truncate ? truncDivide<DataType>(data1[i], data2[i])
7272	: floorDivide<DataType>(data1[i], data2[i]));
7273	}
7274	auto RH = resultTensor->getHandle<DataType>();
7275	EXPECT_EQ(reference.size(), RH.size());
7276	for (size_t i = `0`; i < reference.size(); i++) {
7277	if (isQuantizedElemKind(DTy)) {
7278	EXPECT_EQ(reference[i], static_cast<DataType>(quantization::dequantize(
7279	RH.raw(i), {scale, offset})));
7280	} else {
7281	EXPECT_EQ(reference[i], RH.raw(i));
7282	}
7283	}
7284	}
7285
7286	TEST_P(OperatorTest, FloorDiv_FloatTy) {
7287	CHECK_IF_ENABLED();
7288
7289	testFloorDiv<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy,
7290	/ truncate / false);
7291	}
7292
7293	TEST_P(OperatorTest, FloorDiv_Float16Ty) {
7294	CHECK_IF_ENABLED();
7295
7296	testFloorDiv<float16_t>(bindings_, mod_, F_, EE_, ElemKind::Float16Ty,
7297	/ truncate / false);
7298	}
7299
7300	TEST_P(OperatorTest, FloorDiv_Int64ITy) {
7301	CHECK_IF_ENABLED();
7302
7303	testFloorDiv<int64_t>(bindings_, mod_, F_, EE_, ElemKind::Int64ITy,
7304	/ truncate / false);
7305	}
7306
7307	TEST_P(OperatorTest, FloorDiv_Int32ITy) {
7308	CHECK_IF_ENABLED();
7309
7310	testFloorDiv<int32_t>(bindings_, mod_, F_, EE_, ElemKind::Int32ITy,
7311	/ truncate / false);
7312	}
7313
7314	TEST_P(OperatorTest, FloorDiv_Int8QTy) {
7315	CHECK_IF_ENABLED();
7316
7317	testFloorDiv<int8_t>(bindings_, mod_, F_, EE_, ElemKind::Int8QTy,
7318	/ truncate / false);
7319	}
7320
7321	TEST_P(OperatorTest, FloorDiv_Trunc_FloatTy) {
7322	CHECK_IF_ENABLED();
7323
7324	testFloorDiv<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy,
7325	/ truncate / true);
7326	}
7327
7328	TEST_P(OperatorTest, FloorDiv_Trunc_Float16Ty) {
7329	CHECK_IF_ENABLED();
7330
7331	testFloorDiv<float16_t>(bindings_, mod_, F_, EE_, ElemKind::Float16Ty,
7332	/ truncate / true);
7333	}
7334
7335	TEST_P(OperatorTest, FloorDiv_Trunc_Int64ITy) {
7336	CHECK_IF_ENABLED();
7337
7338	testFloorDiv<int64_t>(bindings_, mod_, F_, EE_, ElemKind::Int64ITy,
7339	/ truncate / true);
7340	}
7341
7342	TEST_P(OperatorTest, FloorDiv_Trunc_Int32ITy) {
7343	CHECK_IF_ENABLED();
7344
7345	testFloorDiv<int32_t>(bindings_, mod_, F_, EE_, ElemKind::Int32ITy,
7346	/ truncate / true);
7347	}
7348
7349	TEST_P(OperatorTest, FloorDiv_Trunc_Int8QTy) {
7350	CHECK_IF_ENABLED();
7351
7352	testFloorDiv<int8_t>(bindings_, mod_, F_, EE_, ElemKind::Int8QTy,
7353	/ truncate / true);
7354	}
7355
7356	TEST_P(OperatorTest, IntMatMul) {
7357	CHECK_IF_ENABLED();
7358
7359	// The scaling factor 1.4x was carefully selected to make sure we don't
7360	// overflow or underflow the calculation.
7361	TypeRef resTy = mod_.uniqueType(ElemKind::Int8QTy, {`3`, `3`}, `0.60`, `4`);
7362	TypeRef lhsTy = mod_.uniqueType(ElemKind::Int8QTy, {`3`, `3`}, `0.075`, -`2`);
7363	TypeRef rhsTy = mod_.uniqueType(ElemKind::Int8QTy, {`3`, `3`}, `0.075`, `2`);
7364
7365	auto lhs = mod_.createPlaceholder(ElemKind::FloatTy, {`3`, `3`}, "lhs", false*);
7366	auto rhs = mod_.createPlaceholder(ElemKind::FloatTy, {`3`, `3`}, "rhs", false*);
7367
7368	bindings_.allocate(lhs)->getHandle() = {
7369	`1.0`, `2.0`, `3.0`, `4.0`, `5.0`, -`5.0`, -`4.0`, -`3.0`, `9.0`,
7370	};
7371
7372	bindings_.allocate(rhs)->getHandle() = {
7373	`0.1f`, -`0.2f`, `0.3f`, `9.0f`, -`8.0f`, `7.0f`, `6.0f`, `5.0f`, `9.0f`,
7374	};
7375
7376	auto *lhsq = F_->createQuantize("lhs.q", lhs, lhsTy);
7377	auto *rhsq = F_->createQuantize("rhs.q", rhs, rhsTy);
7378
7379	auto *matmulq = F_->createMatMul("matmul.q", resTy, lhsq, rhsq);
7380
7381	auto *rq = F_->createDequantize("dequant", matmulq, ElemKind::FloatTy);
7382
7383	auto *result = F_->createSave("save", rq);
7384	bindings_.allocate(result->getPlaceholder());
7385
7386	EE_.compile(CompilationMode::Infer);
7387	EE_.run(bindings_);
7388
7389	/*
7390	Test the following matrix multiplication:
7391	A = [[1.0, 2.0, 3.0], [4.0, 5.0, -5.0], [-4.0, -3.0, 9.0]]
7392	B = [[0.1, -0.2, 0.3], [9.0, -8.0, 7.0], [6.0, 5.0, 9.0]]
7393	A x B = [36.1, -1.2, 41.3], [15.4, -65.8, -8.8], [26.6, 69.8, 58.8]]
7394	*/
7395
7396	auto H = bindings_.get(result->getPlaceholder())->getHandle();
7397	EXPECT_NEAR(H.at({`0`, `0`}), `36.1`, `1.0`);
7398	EXPECT_NEAR(H.at({`0`, `1`}), -`1.2`, `1.0`);
7399	EXPECT_NEAR(H.at({`0`, `2`}), `41.3`, `1.0`);
7400	EXPECT_NEAR(H.at({`1`, `0`}), `15.4`, `1.0`);
7401	EXPECT_NEAR(H.at({`1`, `1`}), -`65.8`, `1.0`);
7402	EXPECT_NEAR(H.at({`1`, `2`}), -`8.8`, `1.0`);
7403	EXPECT_NEAR(H.at({`2`, `0`}), `26.6`, `1.0`);
7404	EXPECT_NEAR(H.at({`2`, `1`}), `69.8`, `1.0`);
7405	EXPECT_NEAR(H.at({`2`, `2`}), `58.8`, `1.0`);
7406	}
7407
7408	/// Gemm test for quantized case with Int32QTy bias
7409	TEST_P(OperatorTest, IntGemm) {
7410	CHECK_IF_ENABLED();
7411
7412	TypeRef resTy = mod_.uniqueType(ElemKind::Int8QTy, {`1`, `2`}, `1`, `0`);
7413
7414	auto *inp =
7415	mod_.createPlaceholder(ElemKind::Int8QTy, {`1`, `5`}, `1`, `0`, "inp", false);
7416	bindings_.allocate(inp);
7417	auto *weight =
7418	mod_.createPlaceholder(ElemKind::Int8QTy, {`2`, `5`}, `1`, `0`, "weight", false);
7419	bindings_.allocate(weight);
7420	auto *bias =
7421	mod_.createPlaceholder(ElemKind::Int32QTy, {`2`}, `1`, `0`, "bias", false);
7422	bindings_.allocate(bias);
7423
7424	bindings_.get(inp)->getHandle<int8_t>() = {
7425	`1`, `1`, `1`, `1`, `1`,
7426	};
7427
7428	bindings_.get(weight)->getHandle<int8_t>() = {`2`, `2`, `2`, `2`, `2`, `3`, `3`, `3`, `3`, `3`};
7429
7430	bindings_.get(bias)->getHandle<int32_t>() = {`1`, `2`};
7431
7432	auto *gemmnode = F_->createGemm("gemm", resTy, inp, weight, bias, `1`, `1`, `0`, `1`);
7433
7434	auto *S = F_->createSave("save", gemmnode);
7435	bindings_.allocate(S->getPlaceholder());
7436	EE_.compile(CompilationMode::Infer);
7437
7438	EE_.run(bindings_);
7439
7440	auto result = bindings_.get(S->getPlaceholder());
7441	Tensor expected(resTy);
7442	expected.getHandle<int8_t>() = {`11`, `17`};
7443	EXPECT_TRUE(expected.isEqual(*result));
7444	}
7445
7446	TEST_P(OperatorTest, IntBatchedArith) {
7447	CHECK_IF_ENABLED();
7448
7449	TypeRef resTy = mod_.uniqueType(ElemKind::Int8QTy, {`1`, `3`, `3`}, `0.10`, `1.0`);
7450	TypeRef lhsTy = mod_.uniqueType(ElemKind::Int8QTy, {`1`, `3`, `3`}, `0.11`, `4.0`);
7451	TypeRef rhsTy = mod_.uniqueType(ElemKind::Int8QTy, {`3`, `3`}, `0.14`, -`2.0`);
7452
7453	auto *lhs =
7454	mod_.createPlaceholder(ElemKind::FloatTy, {`1`, `3`, `3`}, "lhs", false);
7455	bindings_.allocate(lhs);
7456	auto rhs = mod_.createPlaceholder(ElemKind::FloatTy, {`3`, `3`}, "rhs", false*);
7457	bindings_.allocate(rhs);
7458
7459	bindings_.get(lhs)->getHandle() = {
7460	`8.7f`, `6.5f`, `4.3f`, `2.1f`, `1.0f`, -`5.1f`, -`4.0f`, -`12.0f`, `0.2f`,
7461	};
7462
7463	bindings_.get(rhs)->getHandle() = {
7464	-`9.1f`, -`0.4f`, `1.3f`, `2.2f`, -`8.1f`, `7.6f`, -`6.4f`, `10.0f`, `9.1f`,
7465	};
7466
7467	auto *lhsq = F_->createQuantize("lhs.q", lhs, lhsTy);
7468	auto *rhsq = F_->createQuantize("rhs.q", rhs, rhsTy);
7469
7470	auto *matmulq = F_->createBatchedAdd("add", resTy, lhsq, rhsq);
7471
7472	auto *rq = F_->createDequantize("dequant", matmulq, ElemKind::FloatTy);
7473
7474	auto *result = F_->createSave("save", rq);
7475	bindings_.allocate(result->getPlaceholder());
7476	EE_.compile(CompilationMode::Infer);
7477
7478	EE_.run(bindings_);
7479
7480	// A = [8.7, 6.5, 4.3, 2.1, 1.0, -5.1, -4.0, -12.0, 0.2]
7481	// B = [-9.1, -0.4, 1.3, 2.2, -8.1, 7.6, -6.4, 10.0, 9.1]
7482	// A + B = [-0.4, 6.1, 5.6, 4.3, -7.1, 2.5, -10.4, -2. , 9.3]
7483	auto H = bindings_.get(result->getPlaceholder())->getHandle();
7484	constexpr float allowedError = `0.105`;
7485	EXPECT_NEAR(H.at({`0`, `0`, `0`}), -`0.4`, allowedError);
7486	EXPECT_NEAR(H.at({`0`, `0`, `1`}), `6.1`, allowedError);
7487	EXPECT_NEAR(H.at({`0`, `0`, `2`}), `5.6`, allowedError);
7488	EXPECT_NEAR(H.at({`0`, `1`, `0`}), `4.3`, allowedError);
7489	EXPECT_NEAR(H.at({`0`, `1`, `1`}), -`7.1`, allowedError);
7490	EXPECT_NEAR(H.at({`0`, `1`, `2`}), `2.5`, allowedError);
7491	EXPECT_NEAR(H.at({`0`, `2`, `0`}), -`10.4`, allowedError);
7492	EXPECT_NEAR(H.at({`0`, `2`, `1`}), -`2`, allowedError);
7493	EXPECT_NEAR(H.at({`0`, `2`, `2`}), `9.3`, allowedError);
7494	}
7495
7496	TEST_P(OperatorTest, convTest) {
7497	CHECK_IF_ENABLED();
7498	auto *input =
7499	mod_.createPlaceholder(ElemKind::FloatTy, {`1`, `3`, `3`, `1`}, "input", false);
7500	auto IH = bindings_.allocate(input)->getHandle();
7501	IH = {`1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`};
7502
7503	auto filter =
7504	mod_.createPlaceholder(ElemKind::FloatTy, {`1`, `3`, `3`, `1`}, "filter", false);
7505	auto FH = bindings_.allocate(filter)->getHandle();
7506	FH = {`0`, `0`, `0`, `1`, `1`, `1`, `0`, `0`, `0`};
7507
7508	auto *zeroBias =
7509	mod_.createPlaceholder(ElemKind::FloatTy, {`1`}, "bias", false);
7510	bindings_.allocate(zeroBias)->zero();
7511
7512	auto outTy = mod_.uniqueType(ElemKind::FloatTy, {`1`, `3`, `3`, `1`});
7513
7514	ConvolutionNode *CN =
7515	F_->createConv("Conv", input, filter, zeroBias, outTy, `3`, `1`, `1`, `1`);
7516	SaveNode *S = F_->createSave("save", CN);
7517	bindings_.allocate(S->getPlaceholder());
7518
7519	EE_.compile(CompilationMode::Infer);
7520	EE_.run(bindings_);
7521
7522	auto result = bindings_.get(S->getPlaceholder());
7523
7524	Tensor expected(outTy);
7525	expected.getHandle() = {`2`, `3`, `2`, `2`, `3`, `2`, `2`, `3`, `2`};
7526
7527	EXPECT_TRUE(expected.isEqual(*result));
7528	}
7529
7530	// Conv2D test with non-square dilation
7531	TEST_P(OperatorTest, NonSquareDilationConv2D) {
7532	CHECK_IF_ENABLED();
7533	auto *input =
7534	mod_.createPlaceholder(ElemKind::FloatTy, {`1`, `3`, `3`, `1`}, "input", false);
7535	auto IH = bindings_.allocate(input)->getHandle();
7536	IH = {`1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`};
7537
7538	auto filter =
7539	mod_.createPlaceholder(ElemKind::FloatTy, {`1`, `3`, `3`, `1`}, "filter", false);
7540	auto FH = bindings_.allocate(filter)->getHandle();
7541	FH = {`0`, `0`, `0`, `1`, `1`, `1`, `0`, `0`, `0`};
7542
7543	auto *zeroBias =
7544	mod_.createPlaceholder(ElemKind::FloatTy, {`1`}, "bias", false);
7545	bindings_.allocate(zeroBias)->zero();
7546
7547	auto outTy = mod_.uniqueType(ElemKind::FloatTy, {`1`, `1`, `3`, `1`});
7548
7549	ConvolutionNode *CN = F_->createConv(
7550	"Conv", input, filter, zeroBias, outTy, / kernel / `3`,
7551	/ stride / `1`, / pad / `1`, / group / `1`, / dilation / {`2`, `1`});
7552	SaveNode *S = F_->createSave("save", CN);
7553	bindings_.allocate(S->getPlaceholder());
7554
7555	EE_.compile(CompilationMode::Infer);
7556	EE_.run(bindings_);
7557
7558	auto result = bindings_.get(S->getPlaceholder());
7559
7560	Tensor expected(outTy);
7561	expected.getHandle() = {`2`, `3`, `2`};
7562
7563	EXPECT_TRUE(expected.isEqual(*result));
7564	}
7565
7566	TEST_P(OperatorTest, convTest_Float16) {
7567	CHECK_IF_ENABLED();
7568	auto *input =
7569	mod_.createPlaceholder(ElemKind::Float16Ty, {`1`, `3`, `3`, `1`}, "input", false);
7570	auto IH = bindings_.allocate(input)->getHandle<float16_t>();
7571	IH = {`1.1`, `1.2`, `1.3`, `1.4`, `1.5`, `1.6`, `1.7`, `1.8`, `1.9`};
7572
7573	auto filter = mod_.createPlaceholder(ElemKind::Float16Ty, {`1`, `3`, `3`, `1`},
7574	"filter", false);
7575	auto FH = bindings_.allocate(filter)->getHandle<float16_t>();
7576	FH = {`0.25`, `0.5`, `0.25`, `1`, `1`, `1`, `0.25`, `0.5`, `0.25`};
7577
7578	auto *zeroBias =
7579	mod_.createPlaceholder(ElemKind::Float16Ty, {`1`}, "bias", false);
7580	bindings_.allocate(zeroBias)->zero();
7581
7582	auto outTy = mod_.uniqueType(ElemKind::Float16Ty, {`1`, `3`, `3`, `1`});
7583
7584	ConvolutionNode *CN =
7585	F_->createConv("Conv", input, filter, zeroBias, outTy, `3`, `1`, `1`, `1`);
7586	SaveNode *S = F_->createSave("save", CN);
7587	bindings_.allocate(S->getPlaceholder());
7588
7589	EE_.compile(CompilationMode::Infer);
7590	EE_.run(bindings_);
7591
7592	auto result = bindings_.get(S->getPlaceholder())->getHandle<float16_t>();
7593
7594	Tensor expected(outTy);
7595	auto expectedH = expected.getHandle<float16_t>();
7596	expectedH = {`3.375`, `5.102`, `3.676`, `5.051`, `7.5`, `5.449`, `4.574`, `6.898`, `4.875`};
7597
7598	for (dim_t x = `0`; x < `3`; x++) {
7599	for (dim_t y = `0`; y < `3`; y++) {
7600	EXPECT_NEAR(result.at({`0`, x, y, `0`}), expectedH.at({`0`, x, y, `0`}), `0.001`);
7601	}
7602	}
7603	}
7604
7605	TEST_P(OperatorTest, convTest_BFloat16) {
7606	CHECK_IF_ENABLED();
7607	auto *input = mod_.createPlaceholder(ElemKind::BFloat16Ty, {`1`, `3`, `3`, `1`},
7608	"input", false);
7609	auto IH = bindings_.allocate(input)->getHandle<bfloat16_t>();
7610	IH = {`1.1`, `1.2`, `1.3`, `1.4`, `1.5`, `1.6`, `1.7`, `1.8`, `1.9`};
7611
7612	auto filter = mod_.createPlaceholder(ElemKind::BFloat16Ty, {`1`, `3`, `3`, `1`},
7613	"filter", false);
7614	auto FH = bindings_.allocate(filter)->getHandle<bfloat16_t>();
7615	FH = {`0.25`, `0.5`, `0.25`, `1`, `1`, `1`, `0.25`, `0.5`, `0.25`};
7616
7617	auto *zeroBias =
7618	mod_.createPlaceholder(ElemKind::BFloat16Ty, {`1`}, "bias", false);
7619	bindings_.allocate(zeroBias)->zero();
7620
7621	auto outTy = mod_.uniqueType(ElemKind::BFloat16Ty, {`1`, `3`, `3`, `1`});
7622
7623	ConvolutionNode *CN =
7624	F_->createConv("Conv", input, filter, zeroBias, outTy, `3`, `1`, `1`, `1`);
7625	SaveNode *S = F_->createSave("save", CN);
7626	bindings_.allocate(S->getPlaceholder());
7627
7628	EE_.compile(CompilationMode::Infer);
7629	EE_.run(bindings_);
7630
7631	auto result = bindings_.get(S->getPlaceholder())->getHandle<bfloat16_t>();
7632
7633	Tensor expected(outTy);
7634	auto expectedH = expected.getHandle<bfloat16_t>();
7635	expectedH = {`3.375`, `5.102`, `3.676`, `5.051`, `7.5`, `5.449`, `4.574`, `6.898`, `4.875`};
7636
7637	for (dim_t x = `0`; x < `3`; x++) {
7638	for (dim_t y = `0`; y < `3`; y++) {
7639	EXPECT_NEAR(result.at({`0`, x, y, `0`}), expectedH.at({`0`, x, y, `0`}), `0.05`);
7640	}
7641	}
7642	}
7643
7644	template <size_t convDepth>
7645	static FunctionTensorPair
7646	createAndInitConvDepthTest(glow::PlaceholderBindings &bindings,
7647	glow::ExecutionEngine &EE) {
7648	auto &mod = EE.getModule();
7649	Function *F = mod.createFunction("main");
7650
7651	auto *input =
7652	mod.createPlaceholder(ElemKind::FloatTy, {`1`, `10`, `10`, `3`}, "in", false);
7653	auto *conv = F->createConv(bindings, "conv", input, convDepth, `5`, `1`, `0`, `1`);
7654	auto *bias = llvm::cast<Placeholder>(conv->getBias().getNode());
7655
7656	bindings.allocate(input)->getHandle().randomize(-`1.0`, `1.0`, mod.getPRNG());
7657	bindings.get(bias)->getHandle().randomize(-`2.0`, `2.0`, mod.getPRNG());
7658
7659	auto *res = F->createSave("save", conv);
7660	::glow::convertPlaceholdersToConstants(F, bindings,
7661	{input, res->getPlaceholder()});
7662	auto *resultTensor = bindings.allocate(res->getPlaceholder());
7663
7664	return std::make_pair(F, resultTensor);
7665	}
7666
7667	TEST_P(OperatorStatelessTest, Int8ConvolutionDepth10) {
7668	CHECK_IF_ENABLED();
7669	compareAgainstInterpreter(getBackendName(), createAndInitConvDepthTest<`10`>,
7670	ElemKind::FloatTy, ElemKind::Int8QTy, `0.045f`,
7671	parCloneCountOpt);
7672	}
7673
7674	TEST_P(OperatorStatelessTest, Int16ConvolutionDepth10) {
7675	CHECK_IF_ENABLED();
7676	compareAgainstInterpreter(getBackendName(), createAndInitConvDepthTest<`10`>,
7677	ElemKind::FloatTy, ElemKind::Int16QTy, `0.03f`,
7678	parCloneCountOpt);
7679	}
7680
7681	TEST_P(OperatorStatelessTest, Int8ConvolutionDepth8) {
7682	CHECK_IF_ENABLED();
7683	compareAgainstInterpreter(getBackendName(), createAndInitConvDepthTest<`8`>,
7684	ElemKind::FloatTy, ElemKind::Int8QTy, `0.03f`,
7685	parCloneCountOpt);
7686	}
7687	TEST_P(OperatorStatelessTest, Int16ConvolutionDepth8) {
7688	CHECK_IF_ENABLED();
7689	compareAgainstInterpreter(getBackendName(), createAndInitConvDepthTest<`8`>,
7690	ElemKind::FloatTy, ElemKind::Int16QTy, `0.03f`,
7691	parCloneCountOpt);
7692	}
7693
7694	TEST_P(OperatorStatelessTest, FP16ConvolutionDepth10) {
7695	CHECK_IF_ENABLED();
7696	compareAgainstInterpreter(getBackendName(), createAndInitConvDepthTest<`10`>,
7697	ElemKind::FloatTy, ElemKind::Float16Ty, `0.015f`,
7698	parCloneCountOpt);
7699	}
7700
7701	TEST_P(OperatorStatelessTest, BFloat16ConvolutionDepth10) {
7702	CHECK_IF_ENABLED();
7703	compareAgainstInterpreter(getBackendName(), createAndInitConvDepthTest<`10`>,
7704	ElemKind::FloatTy, ElemKind::BFloat16Ty, `0.015f`,
7705	parCloneCountOpt);
7706	}
7707
7708	TEST_P(OperatorStatelessTest, FP16ConvolutionDepth8) {
7709	CHECK_IF_ENABLED();
7710	compareAgainstInterpreter(getBackendName(), createAndInitConvDepthTest<`8`>,
7711	ElemKind::FloatTy, ElemKind::Float16Ty, `0.015f`,
7712	parCloneCountOpt);
7713	}
7714
7715	TEST_P(OperatorStatelessTest, BFloat16ConvolutionDepth8) {
7716	CHECK_IF_ENABLED();
7717	compareAgainstInterpreter(getBackendName(), createAndInitConvDepthTest<`8`>,
7718	ElemKind::FloatTy, ElemKind::BFloat16Ty, `0.015f`,
7719	parCloneCountOpt);
7720	}
7721
7722	TEST_P(OperatorStatelessTest, ConvolutionDepth10_Int8_BiasInt8) {
7723	ENABLED_BACKENDS("Interpreter", "CPU");
7724	compareAgainstInterpreter(
7725	getBackendName(), createAndInitConvDepthTest<`10`>, ElemKind::FloatTy,
7726	ElemKind::Int8QTy, `0.03f`, parCloneCountOpt,
7727	/ convertToRowwiseQuantization / false,
7728	quantization::Schema::Asymmetric, ElemKind::Int8QTy);
7729	}
7730
7731	TEST_P(OperatorStatelessTest, ConvolutionDepth10_Int8_BiasInt32) {
7732	ENABLED_BACKENDS("Interpreter", "CPU");
7733	compareAgainstInterpreter(
7734	getBackendName(), createAndInitConvDepthTest<`10`>, ElemKind::FloatTy,
7735	ElemKind::Int8QTy, `0.03f`, parCloneCountOpt,
7736	/ convertToRowwiseQuantization / false,
7737	quantization::Schema::Asymmetric, ElemKind::Int32QTy);
7738	}
7739
7740	TEST_P(OperatorStatelessTest, ConvolutionDepth10_Int16_BiasInt16) {
7741	ENABLED_BACKENDS("Interpreter");
7742	compareAgainstInterpreter(
7743	getBackendName(), createAndInitConvDepthTest<`10`>, ElemKind::FloatTy,
7744	ElemKind::Int16QTy, `0.0003f`, parCloneCountOpt,
7745	/ convertToRowwiseQuantization / false,
7746	quantization::Schema::Asymmetric, ElemKind::Int16QTy);
7747	}
7748
7749	TEST_P(OperatorStatelessTest, ConvolutionDepth10_Int16_BiasInt32) {
7750	ENABLED_BACKENDS("Interpreter");
7751	compareAgainstInterpreter(
7752	getBackendName(), createAndInitConvDepthTest<`10`>, ElemKind::FloatTy,
7753	ElemKind::Int16QTy, `0.0003f`, parCloneCountOpt,
7754	/ convertToRowwiseQuantization / false,
7755	quantization::Schema::Asymmetric, ElemKind::Int32QTy);
7756	}
7757
7758	static FunctionTensorPair
7759	createAndInitBasicConcatTest(glow::PlaceholderBindings &bindings,
7760	glow::ExecutionEngine &EE) {
7761	auto &mod = EE.getModule();
7762	Function *F = mod.createFunction("main");
7763
7764	auto A = mod.createPlaceholder(ElemKind::FloatTy, {`3`, `3`}, "A", false*);
7765	auto B = mod.createPlaceholder(ElemKind::FloatTy, {`2`, `3`}, "B", false*);
7766	bindings.allocate(A)->getHandle().randomize(-`1.0`, `1.0`, mod.getPRNG());
7767	bindings.allocate(B)->getHandle().randomize(-`1.0`, `1.0`, mod.getPRNG());
7768
7769	auto *C = F->createConcat("concat", {A, B}, `0`);
7770	auto *res = F->createSave("save", C);
7771	auto *resultTensor = bindings.allocate(res->getPlaceholder());
7772
7773	::glow::convertPlaceholdersToConstants(F, bindings,
7774	{A, B, res->getPlaceholder()});
7775
7776	return std::make_pair(F, resultTensor);
7777	}
7778
7779	TEST_P(OperatorStatelessTest, IntConcat) {
7780	CHECK_IF_ENABLED();
7781	compareAgainstInterpreter(getBackendName(), createAndInitBasicConcatTest,
7782	ElemKind::FloatTy, ElemKind::Int8QTy, `0.05f`,
7783	parCloneCountOpt);
7784	}
7785
7786	TEST_P(OperatorTest, DynamicRowwiseQuantizedFullyConnectedBasic) {
7787	CHECK_IF_ENABLED();
7788	auto *input =
7789	mod_.createPlaceholder(ElemKind::Float16Ty, {`2`, `3`}, "input", false);
7790	Constant *weights =
7791	mod_.createConstant(ElemKind::Int8QTy, {`3`, `4`}, `1000.0`, `0.0`, "weights");
7792	Constant *bias = mod_.createConstant(ElemKind::FloatTy, {`4`}, "bias");
7793	Constant *scales =
7794	mod_.createConstant(ElemKind::FloatTy, {`4`}, "weight_scales");
7795	Constant *offsets =
7796	mod_.createConstant(ElemKind::Int32ITy, {`4`}, "weight_offsets");
7797	bindings_.allocate(input)->getHandle<float16_t>() = {`1.0f`, `2.0f`, `3.0f`,
7798	`4.0f`, `5.0f`, `6.0f`};
7799	weights->getPayloadMutable().getHandle<int8_t>() = {`1`, `4`, `7`, `10`, `2`, `5`,
7800	`8`, `11`, `3`, `6`, `9`, `12`};
7801	bias->getPayloadMutable().getHandle<float>() = {`1.0f`, `2.0f`, `3.0f`, `4.0f`};
7802	scales->getPayloadMutable().getHandle<float>() = {`1.0f`, `2.0f`, `3.0f`, `4.0f`};
7803	offsets->getPayloadMutable().getHandle<int>() = {`0`, `0`, `0`, `0`};
7804
7805	auto *DRQFC = F_->createDynamicRowwiseQuantizedFullyConnected(
7806	"drqfc", input, weights, bias, scales, offsets);
7807	auto *S = F_->createSave("save", DRQFC);
7808	bindings_.allocate(S->getPlaceholder());
7809
7810	EE_.compile(CompilationMode::Infer);
7811	EE_.run(bindings_);
7812
7813	auto result = bindings_.get(S->getPlaceholder())->getHandle<float16_t>();
7814	std::vector<dim_t> expectedDimensions = {`2`, `4`};
7815	std::vector<float> expectedValues = {`15.0f`, `66.0f`, `153.0f`, `276.0f`,
7816	`33.0f`, `156.0f`, `369.0f`, `672.0f`};
7817	EXPECT_TRUE(result.dims().vec() == expectedDimensions);
7818	for (size_t i = `0`; i < `2` * `4`; i++) {
7819	// DynRQFC's largest error in this unittest is around 1.0
7820	EXPECT_NEAR(result.raw(i), expectedValues[i], `1.5`);
7821	}
7822	}
7823
7824	TEST_P(OperatorTest, DynamicQuantizedFullyConnectedBasic) {
7825	CHECK_IF_ENABLED();
7826	auto *input =
7827	mod_.createPlaceholder(ElemKind::Float16Ty, {`2`, `3`}, "input", false);
7828	Constant *weights =
7829	mod_.createConstant(ElemKind::Int8QTy, {`3`, `4`}, `1.0`, `0.0`, "weights");
7830	Constant *bias = mod_.createConstant(ElemKind::FloatTy, {`4`}, "bias");
7831	bindings_.allocate(input)->getHandle<float16_t>() = {`1.0f`, `2.0f`, `3.0f`,
7832	`4.0f`, `5.0f`, `6.0f`};
7833	weights->getPayloadMutable().getHandle<int8_t>() = {`1`, `4`, `7`, `10`, `2`, `5`,
7834	`8`, `11`, `3`, `6`, `9`, `12`};
7835	bias->getPayloadMutable().getHandle<float>() = {`1.0f`, `2.0f`, `3.0f`, `4.0f`};
7836
7837	auto *DQFC =
7838	F_->createDynamicQuantizedFullyConnected("dqfc", input, weights, bias);
7839	auto *S = F_->createSave("save", DQFC);
7840	bindings_.allocate(S->getPlaceholder());
7841
7842	EE_.compile(CompilationMode::Infer);
7843	EE_.run(bindings_);
7844
7845	auto result = bindings_.get(S->getPlaceholder())->getHandle<float16_t>();
7846	std::vector<dim_t> expectedDimensions = {`2`, `4`};
7847	std::vector<float> expectedValues = {`15.0f`, `34.0f`, `53.0f`, `72.0f`,
7848	`33.0f`, `79.0f`, `125.0f`, `171.0f`};
7849	EXPECT_TRUE(result.dims().vec() == expectedDimensions);
7850	for (size_t i = `0`; i < `2` * `4`; i++) {
7851	// DynQFC's largest error in this unittest is around 2e-1
7852	EXPECT_NEAR(result.raw(i), expectedValues[i], `3e-1`);
7853	}
7854	}
7855
7856	TEST_P(OperatorTest, DynamicQuantizedFullyConnectedStrongWeights) {
7857	CHECK_IF_ENABLED();
7858	auto *input =
7859	mod_.createPlaceholder(ElemKind::Float16Ty, {`3`, `4`}, "input", false);
7860	Constant *weights =
7861	mod_.createConstant(ElemKind::Int8QTy, {`4`, `2`}, `0.5`, `0`, "weights");
7862	Constant *bias = mod_.createConstant(ElemKind::FloatTy, {`2`}, "bias");
7863	bindings_.allocate(input)->getHandle<float16_t>() = {
7864	`1.0f`, `2.0f`, `3.0f`, `4.0f`, `2.0f`, `3.0f`, `4.0f`, `5.0f`, `3.0f`, `4.0f`, `5.0f`, `6.0f`};
7865	weights->getPayloadMutable().getHandle<int8_t>() = {`1`, `4`, `2`, `3`, `3`, `2`, `4`, `1`};
7866	bias->getPayloadMutable().getHandle<float>() = {`1.0f`, `2.0f`};
7867
7868	auto *DQFC =
7869	F_->createDynamicQuantizedFullyConnected("dqfc", input, weights, bias);
7870	auto *S = F_->createSave("save", DQFC);
7871	bindings_.allocate(S->getPlaceholder());
7872
7873	EE_.compile(CompilationMode::Infer);
7874	EE_.run(bindings_);
7875
7876	auto result = bindings_.get(S->getPlaceholder())->getHandle<float16_t>();
7877	std::vector<dim_t> expectedDimensions = {`3`, `2`};
7878	std::vector<float> expectedValues = {`16.0f`, `12.0f`, `21.0f`,
7879	`17.0f`, `26.0f`, `22.0f`};
7880	EXPECT_TRUE(result.dims().vec() == expectedDimensions);
7881	for (size_t i = `0`; i < `3` * `2`; i++) {
7882	// DynQFC's largest error in this unittest is around 2e-1
7883	EXPECT_NEAR(result.raw(i), expectedValues[i], `3e-1`);
7884	}
7885	}
7886
7887	TEST_P(OperatorTest, FCWithFlatten) {
7888	CHECK_IF_ENABLED();
7889
7890	auto *input =
7891	mod_.createPlaceholder(ElemKind::FloatTy, {`2`, `1`, `3`}, "input", false);
7892	Constant *weights = mod_.createConstant(ElemKind::FloatTy, {`3`, `4`}, "weights");
7893	Constant *bias = mod_.createConstant(ElemKind::FloatTy, {`4`}, "bias");
7894
7895	bindings_.allocate(input)->getHandle() = {`1.0f`, `2.0f`, `3.0f`, `4.0f`, `5.0f`, `6.0f`};
7896	weights->getPayloadMutable().getHandle() = {`1.0f`, `4.0f`, `7.0f`, `10.0f`, //
7897	`2.0f`, `5.0f`, `8.0f`, `11.0f`, //
7898	`3.0f`, `6.0f`, `9.0f`, `12.0f`};
7899	bias->getPayloadMutable().getHandle() = {`0.1f`, `0.2f`, `0.3f`, `0.4f`};
7900
7901	auto *FC = F_->createFullyConnected("fc", input, weights, bias);
7902	auto *S = F_->createSave("save", FC);
7903	bindings_.allocate(S->getPlaceholder());
7904
7905	EE_.compile(CompilationMode::Infer);
7906	EE_.run(bindings_);
7907
7908	auto result = bindings_.get(S->getPlaceholder())->getHandle();
7909	std::vector<dim_t> expectedDimensions = {`2`, `4`};
7910	std::vector<float> expectedValues = {`14.1f`, `32.2f`, `50.3f`, `68.4f`,
7911	`32.1f`, `77.2f`, `122.3f`, `167.4f`};
7912	EXPECT_TRUE(result.dims().vec() == expectedDimensions);
7913	for (size_t i = `0`; i < `2` * `4`; i++) {
7914	EXPECT_FLOAT_EQ(result.raw(i), expectedValues[i]);
7915	}
7916	}
7917
7918	TEST_P(OperatorTest, TestFP32Accumulator) {
7919	CHECK_IF_ENABLED();
7920	auto *input =
7921	mod_.createPlaceholder(ElemKind::Float16Ty, {`1`, `3`}, "input", false);
7922	Constant *weights =
7923	mod_.createConstant(ElemKind::Float16Ty, {`3`, `2`}, "weights");
7924	Constant *bias = mod_.createConstant(ElemKind::Float16Ty, {`2`}, "bias");
7925
7926	/ 9.7e-4 is smaller than what the mantissa can represent*
7927	when the initial value is 1, but 2 9.7e-4 is exactly*
7928	the smallest number that can be represented after 1
7929	In Fp16 accumulation, we will be losing the update leading to 1,
7930	in fp32, we get a value slightly larger than 1.
7931	*/
7932	bindings_.allocate(input)->getHandle<float16_t>() = {`1.0f`, `9.7e-4`, `9.7e-4f`};
7933	weights->getPayloadMutable().getHandle<float16_t>() = {`1.0f`, `1.0f`, `0.5f`,
7934	`1.0f`, `0.5f`, `1.0f`};
7935	bias->getPayloadMutable().getHandle<float16_t>() = {`0.0f`, `0.0f`};
7936
7937	auto *FC = F_->createFullyConnected("fc", input, weights, bias);
7938	auto *S = F_->createSave("save", FC);
7939	bindings_.allocate(S->getPlaceholder());
7940	EE_.compile(CompilationMode::Infer);
7941	EE_.run(bindings_);
7942	auto result = bindings_.get(S->getPlaceholder())->getHandle<float16_t>();
7943	std::vector<dim_t> expectedDimensions = {`1`, `2`};
7944
7945	EXPECT_TRUE(result.dims().vec() == expectedDimensions);
7946	float finalResult = result.raw(`0`);
7947	if (finalResult == `1.0`) {
7948	llvm::outs() << "fp16 accumulator\n";
7949	} else if (fabs(finalResult - `1.00098`) < `1e-3`) {
7950	llvm::outs() << "fp32 accumulator\n";
7951	} else {
7952	// Unhandled case
7953	FAIL() << "unknown " << finalResult;
7954	}
7955	llvm::outs().flush();
7956	}
7957
7958	static FunctionTensorPair
7959	createAndInitBasicFCTest(glow::PlaceholderBindings &bindings,
7960	glow::ExecutionEngine &EE) {
7961	auto &mod = EE.getModule();
7962	Function *F = mod.createFunction("main");
7963
7964	auto *input =
7965	mod.createPlaceholder(ElemKind::FloatTy, {`1`, `10`, `10`, `3`}, "in", false);
7966	auto *fc = F->createFullyConnected(bindings, "FC", input, `30`);
7967
7968	auto *weights = llvm::cast<Placeholder>(fc->getWeights());
7969	auto *bias = llvm::cast<Placeholder>(fc->getBias());
7970
7971	bindings.allocate(input)->getHandle().randomize(-`0.5`, `0.5`, mod.getPRNG());
7972	bindings.get(bias)->getHandle().randomize(`0`, `0.00001`, mod.getPRNG());
7973	bindings.get(weights)->getHandle().randomize(-`0.7`, `0.7`, mod.getPRNG());
7974
7975	auto *res = F->createSave("save", fc);
7976	::glow::convertPlaceholdersToConstants(F, bindings,
7977	{input, res->getPlaceholder()});
7978	auto *resultTensor = bindings.allocate(res->getPlaceholder());
7979
7980	return std::make_pair(F, resultTensor);
7981	}
7982
7983	TEST_P(OperatorStatelessTest, IntFC) {
7984	CHECK_IF_ENABLED();
7985	compareAgainstInterpreter(getBackendName(), createAndInitBasicFCTest,
7986	ElemKind::FloatTy, ElemKind::Int8QTy, `0.05f`,
7987	parCloneCountOpt);
7988	}
7989
7990	/// Test FC with Float16.
7991	TEST_P(OperatorStatelessTest, FC_Float16) {
7992	CHECK_IF_ENABLED();
7993	compareAgainstInterpreter(getBackendName(), createAndInitBasicFCTest,
7994	ElemKind::FloatTy, ElemKind::Float16Ty, `0.02f`,
7995	parCloneCountOpt);
7996	}
7997
7998	/// Test FC with BFloat16.
7999	TEST_P(OperatorStatelessTest, FC_BFloat16) {
8000	CHECK_IF_ENABLED();
8001	compareAgainstInterpreter(getBackendName(), createAndInitBasicFCTest,
8002	ElemKind::FloatTy, ElemKind::BFloat16Ty, `0.02f`,
8003	parCloneCountOpt);
8004	}
8005
8006	/// Test Int8 FullyConnected with Int8 bias.
8007	TEST_P(OperatorStatelessTest, FullyConnected_Int8_BiasInt8) {
8008	ENABLED_BACKENDS("Interpreter", "CPU");
8009	compareAgainstInterpreter(
8010	getBackendName(), createAndInitBasicFCTest, ElemKind::FloatTy,
8011	ElemKind::Int8QTy, `0.05f`, parCloneCountOpt,
8012	/ convertToRowwiseQuantization / false,
8013	quantization::Schema::Asymmetric, ElemKind::Int8QTy);
8014	}
8015
8016	/// Test Int8 FullyConnected with Int32 bias.
8017	TEST_P(OperatorStatelessTest, FullyConnected_Int8_BiasInt32) {
8018	ENABLED_BACKENDS("Interpreter", "CPU", "NNPI");
8019	compareAgainstInterpreter(
8020	getBackendName(), createAndInitBasicFCTest, ElemKind::FloatTy,
8021	ElemKind::Int8QTy, `0.05f`, parCloneCountOpt,
8022	/ convertToRowwiseQuantization / false,
8023	quantization::Schema::Asymmetric, ElemKind::Int32QTy);
8024	}
8025
8026	/// Test Int16 FullyConnected with Int16 bias.
8027	TEST_P(OperatorStatelessTest, FullyConnected_Int16_BiasInt16) {
8028	ENABLED_BACKENDS("Interpreter");
8029	compareAgainstInterpreter(
8030	getBackendName(), createAndInitBasicFCTest, ElemKind::FloatTy,
8031	ElemKind::Int16QTy, `0.0005f`, parCloneCountOpt,
8032	/ convertToRowwiseQuantization / false,
8033	quantization::Schema::Asymmetric, ElemKind::Int16QTy);
8034	}
8035
8036	/// Test Int16 FullyConnected with Int32 bias.
8037	TEST_P(OperatorStatelessTest, FullyConnected_Int16_BiasInt32) {
8038	ENABLED_BACKENDS("Interpreter");
8039	compareAgainstInterpreter(
8040	getBackendName(), createAndInitBasicFCTest, ElemKind::FloatTy,
8041	ElemKind::Int16QTy, `0.0005f`, parCloneCountOpt,
8042	/ convertToRowwiseQuantization / false,
8043	quantization::Schema::Asymmetric, ElemKind::Int32QTy);
8044	}
8045
8046	TEST_P(OperatorTest, EntropyLossTest) {
8047	CHECK_IF_ENABLED();
8048
8049	auto P = mod_.createPlaceholder(ElemKind::FloatTy, {`2`, `3`}, "P", false*);
8050	auto Y = mod_.createPlaceholder(ElemKind::Int64ITy, {`2`}, "Y", false*);
8051
8052	bindings_.allocate(P)->getHandle() = {`0.2f`, `0.5f`, `0.3f`, `0.4f`, `0.3f`, `0.3f`};
8053	bindings_.allocate(Y)->getHandle<int64_t>() = {`1`, `2`};
8054	auto *ceLoss = F_->createCrossEntropyLoss("CELoss", P, Y);
8055	auto *L = F_->createSave("save", ceLoss);
8056	bindings_.allocate(L->getPlaceholder());
8057
8058	EE_.compile(CompilationMode::Infer);
8059	EE_.run(bindings_);
8060
8061	auto R = bindings_.get(L->getPlaceholder())->getHandle();
8062	EXPECT_NEAR(R.at({`0`}), -log(`0.5`) - log(`0.3`), `0.1`);
8063	}
8064
8065	/// Check that the max operator works properly with FP16.
8066	TEST_P(OperatorTest, FP16Max) {
8067	CHECK_IF_ENABLED();
8068
8069	PseudoRNG PRNG;
8070
8071	auto *inputA =
8072	mod_.createPlaceholder(ElemKind::Float16Ty, {`1`, `3`, `3`, `1`}, "A", false);
8073	bindings_.allocate(inputA)->getHandle<float16_t>().randomize(-`3.0`, `3.0`, PRNG);
8074	auto *inputB =
8075	mod_.createPlaceholder(ElemKind::Float16Ty, {`1`, `3`, `3`, `1`}, "B", false);
8076	bindings_.allocate(inputB)->getHandle<float16_t>().randomize(-`3.0`, `3.0`, PRNG);
8077	auto *Max = F_->createMax("max", inputA, inputB);
8078	auto *S = F_->createSave("save", Max);
8079	bindings_.allocate(S->getPlaceholder());
8080
8081	EE_.compile(CompilationMode::Infer);
8082	EE_.run(bindings_);
8083
8084	auto result = bindings_.get(S->getPlaceholder())->getHandle<float16_t>();
8085	auto handleA = bindings_.get(inputA)->getHandle<float16_t>();
8086	auto handleB = bindings_.get(inputB)->getHandle<float16_t>();
8087	ASSERT_EQ(result.size(), handleA.size());
8088	for (size_t idx = `0`, end = result.size(); idx != end; ++idx) {
8089	EXPECT_EQ(result.raw(idx), std::max(handleA.raw(idx), handleB.raw(idx)));
8090	}
8091	}
8092
8093	/// Check that the max operator works properly with FP16.
8094	TEST_P(OperatorTest, BFloat16Max) {
8095	CHECK_IF_ENABLED();
8096
8097	PseudoRNG PRNG;
8098
8099	auto *inputA =
8100	mod_.createPlaceholder(ElemKind::BFloat16Ty, {`1`, `3`, `3`, `1`}, "A", false);
8101	bindings_.allocate(inputA)->getHandle<bfloat16_t>().randomize(-`3.0`, `3.0`,
8102	PRNG);
8103	auto *inputB =
8104	mod_.createPlaceholder(ElemKind::BFloat16Ty, {`1`, `3`, `3`, `1`}, "B", false);
8105	bindings_.allocate(inputB)->getHandle<bfloat16_t>().randomize(-`3.0`, `3.0`,
8106	PRNG);
8107	auto *Max = F_->createMax("max", inputA, inputB);
8108	auto *S = F_->createSave("save", Max);
8109	bindings_.allocate(S->getPlaceholder());
8110
8111	EE_.compile(CompilationMode::Infer);
8112	EE_.run(bindings_);
8113
8114	auto result = bindings_.get(S->getPlaceholder())->getHandle<bfloat16_t>();
8115	auto handleA = bindings_.get(inputA)->getHandle<bfloat16_t>();
8116	auto handleB = bindings_.get(inputB)->getHandle<bfloat16_t>();
8117	ASSERT_EQ(result.size(), handleA.size());
8118	for (size_t idx = `0`, end = result.size(); idx != end; ++idx) {
8119	EXPECT_EQ(result.raw(idx), std::max(handleA.raw(idx), handleB.raw(idx)));
8120	}
8121	}
8122
8123	/// Helper to test Broadcast Max/Min using \p DTy and \p NTy
8124	template <typename DataType, typename NodeType>
8125	static void testBroadcastMaxMin(glow::PlaceholderBindings &bindings,
8126	glow::Module &mod, glow::Function *F,
8127	glow::ExecutionEngine &EE, ElemKind DTy) {
8128
8129	auto inputA = mod.createPlaceholder(DTy, {`1`, `3`, `3`, `1`}, "A", false*);
8130	bindings.allocate(inputA)->getHandle<DataType>().randomize(-`3.0`, `3.0`,
8131	mod.getPRNG());
8132	auto inputB = mod.createPlaceholder(DTy, {`1`, `3`, `3`, `1`}, "B", false*);
8133	bindings.allocate(inputB)->getHandle<DataType>().randomize(-`3.0`, `3.0`,
8134	mod.getPRNG());
8135
8136	Node *maxorMinOp = F->createNodeWithBroadcast<NodeType>(
8137	"maxormin", -`1` /axis /, inputA, inputB);
8138
8139	auto *S = F->createSave("save", maxorMinOp);
8140	bindings.allocate(S->getPlaceholder());
8141
8142	EE.compile(CompilationMode::Infer);
8143	EE.run(bindings);
8144
8145	ASSERT_TRUE(F->verify(&EE.getBackend()))
8146	<< "Function must pass verification.";
8147
8148	auto result = bindings.get(S->getPlaceholder())->getHandle<DataType>();
8149	auto handleA = bindings.get(inputA)->getHandle<DataType>();
8150	auto handleB = bindings.get(inputB)->getHandle<DataType>();
8151	ASSERT_EQ(result.size(), handleA.size());
8152	for (size_t idx = `0`, end = result.size(); idx != end; ++idx) {
8153	if (std::is_same<NodeType, MaxNode>::value) {
8154	EXPECT_EQ(result.raw(idx), std::max(handleA.raw(idx), handleB.raw(idx)));
8155	} else {
8156	EXPECT_EQ(result.raw(idx), std::min(handleA.raw(idx), handleB.raw(idx)));
8157	}
8158	}
8159	}
8160
8161	TEST_P(OperatorTest, BroadCastMax) {
8162	CHECK_IF_ENABLED();
8163	testBroadcastMaxMin<int64_t, MaxNode>(bindings_, mod_, F_, EE_,
8164	ElemKind::Int64ITy);
8165	}
8166
8167	TEST_P(OperatorTest, BroadCastMin) {
8168	CHECK_IF_ENABLED();
8169	testBroadcastMaxMin<int64_t, MinNode>(bindings_, mod_, F_, EE_,
8170	ElemKind::Int64ITy);
8171	}
8172
8173	TEST_P(OperatorTest, RescaleNode) {
8174	CHECK_IF_ENABLED();
8175
8176	// Check the outputs of the RescaleQuantized operation.
8177	auto *input = mod_.createPlaceholder(ElemKind::Int8QTy, {`4`, `10`}, `0.4`, -`3`,
8178	"input", false);
8179	bindings_.allocate(input)->init(Tensor::InitKind::Broadcast, `40`,
8180	mod_.getPRNG());
8181
8182	auto T1 = mod_.uniqueType(ElemKind::Int8QTy, {`4`, `10`}, `0.7`, `5`);
8183	auto T2 = mod_.uniqueType(ElemKind::Int8QTy, {`4`, `10`}, `0.3`, -`4`);
8184	auto resTy = mod_.uniqueType(ElemKind::Int8QTy, {`4`, `10`}, `0.4`, -`4`);
8185
8186	// Test a sequence of rescale operations that the optimizer may try to
8187	// optimize at some point.
8188	auto *X = F_->createRescaleQuantized("R1", input, T1);
8189	auto *Y = F_->createRescaleQuantized("R2", X, T2);
8190	auto *Z = F_->createRescaleQuantized("R3", Y, resTy);
8191
8192	auto *output = F_->createSave("save", Z);
8193	bindings_.allocate(output->getPlaceholder());
8194
8195	EE_.compile(CompilationMode::Infer);
8196	EE_.run(bindings_);
8197
8198	auto RI = bindings_.get(input)->getHandle<int8_t>();
8199	auto RO = bindings_.get(output->getPlaceholder())->getHandle<int8_t>();
8200
8201	EXPECT_EQ(RI.raw(`0`), `40`);
8202	EXPECT_NEAR(RO.raw(`0`), `40`, `1`);
8203	}
8204
8205	TEST_P(OperatorTest, QuantizedArithmeticRescaled) {
8206	CHECK_IF_ENABLED();
8207
8208	const dim_t len = `100`;
8209
8210	// In this test we check the correctness of the quantized Max, Min, Add,
8211	// Sub, Mul, and Div nodes as well as how they interact with the rescaling
8212	// node.
8213	auto A = mod_.createPlaceholder(ElemKind::FloatTy, {len}, "A", false*);
8214	auto B = mod_.createPlaceholder(ElemKind::FloatTy, {len}, "B", false*);
8215	auto C = mod_.createPlaceholder(ElemKind::FloatTy, {len}, "C", false*);
8216
8217	auto AH = bindings_.allocate(A)->getHandle();
8218	auto BH = bindings_.allocate(B)->getHandle();
8219	auto CH = bindings_.allocate(C)->getHandle();
8220
8221	AH.randomize(-`10`, `10`, mod_.getPRNG());
8222	BH.randomize(-`10`, `10`, mod_.getPRNG());
8223	// Below, randomize between 1 and 10 to avoid division by 0 later.
8224	CH.randomize(`1`, `10`, mod_.getPRNG());
8225
8226	auto TA = mod_.uniqueType(ElemKind::Int8QTy, {len}, `0.2`, `0`);
8227	auto TB = mod_.uniqueType(ElemKind::Int8QTy, {len}, `0.1`, `0`);
8228	auto TC = mod_.uniqueType(ElemKind::Int8QTy, {len}, `0.3`, `0`);
8229
8230	auto TI1 = mod_.uniqueType(ElemKind::Int8QTy, {len}, `1.1`, `0`);
8231	auto TI2 = mod_.uniqueType(ElemKind::Int8QTy, {len}, `0.8`, `0`);
8232	auto TI3 = mod_.uniqueType(ElemKind::Int8QTy, {len}, `0.9`, `0`);
8233	auto TI4 = mod_.uniqueType(ElemKind::Int8QTy, {len}, `1.0`, `0`);
8234	auto TI5 = mod_.uniqueType(ElemKind::Int8QTy, {len}, `1.2`, `0`);
8235	auto TI6 = mod_.uniqueType(ElemKind::Int8QTy, {len}, `0.7`, `0`);
8236
8237	auto TO1 = mod_.uniqueType(ElemKind::Int8QTy, {len}, `1.0`, `0`);
8238	auto TO2 = mod_.uniqueType(ElemKind::Int8QTy, {len}, `0.9`, `0`);
8239	auto TO3 = mod_.uniqueType(ElemKind::Int8QTy, {len}, `1.1`, `0`);
8240	auto TO4 = mod_.uniqueType(ElemKind::Int8QTy, {len}, `1.2`, `0`);
8241	auto TO5 = mod_.uniqueType(ElemKind::Int8QTy, {len}, `1.0`, `0`);
8242	auto TO6 = mod_.uniqueType(ElemKind::Int8QTy, {len}, `1.1`, `0`);
8243
8244	// Quantize input vars and apply max/min/add/sub/mul/div quantized.
8245	auto *QA = F_->createQuantize("QA", A, TA);
8246	auto *QB = F_->createQuantize("QB", B, TB);
8247	auto *QC = F_->createQuantize("QC", C, TC);
8248
8249	Node *max = F_->createMax("max", TI1, QA, QB);
8250	Node *min = F_->createMin("min", TI2, QA, QB);
8251	Node *add = F_->createAdd("add", TI3, QA, QB);
8252	Node *sub = F_->createSub("sub", TI4, QA, QB);
8253	Node *mul = F_->createMul("mul", TI5, QA, QB);
8254	Node *div = F_->createDiv("div", TI6, QB, QC);
8255
8256	// Rescale quantized results.
8257	max = F_->createRescaleQuantized("rescaleMax", max, TO1);
8258	min = F_->createRescaleQuantized("rescaleMin", min, TO2);
8259	add = F_->createRescaleQuantized("rescaleAdd", add, TO3);
8260	sub = F_->createRescaleQuantized("rescaleSub", sub, TO4);
8261	mul = F_->createRescaleQuantized("rescaleMul", mul, TO5);
8262	div = F_->createRescaleQuantized("rescaleDiv", div, TO6);
8263
8264	// Dequantize results back to floating-point.
8265	max = F_->createDequantize("maxDQ", max, ElemKind::FloatTy);
8266	min = F_->createDequantize("minDQ", min, ElemKind::FloatTy);
8267	add = F_->createDequantize("addDQ", add, ElemKind::FloatTy);
8268	sub = F_->createDequantize("subDQ", sub, ElemKind::FloatTy);
8269	mul = F_->createDequantize("mulDQ", mul, ElemKind::FloatTy);
8270	div = F_->createDequantize("divDQ", div, ElemKind::FloatTy);
8271
8272	// Save results of the operations.
8273	auto *O1 = F_->createSave("saveMax", max);
8274	auto *O2 = F_->createSave("saveMin", min);
8275	auto *O3 = F_->createSave("saveAdd", add);
8276	auto *O4 = F_->createSave("saveSub", sub);
8277	auto *O5 = F_->createSave("saveMul", mul);
8278	auto *O6 = F_->createSave("saveDiv", div);
8279
8280	bindings_.allocate(O1->getPlaceholder());
8281	bindings_.allocate(O2->getPlaceholder());
8282	bindings_.allocate(O3->getPlaceholder());
8283	bindings_.allocate(O4->getPlaceholder());
8284	bindings_.allocate(O5->getPlaceholder());
8285	bindings_.allocate(O6->getPlaceholder());
8286
8287	EE_.compile(CompilationMode::Infer);
8288	EE_.run(bindings_);
8289
8290	for (dim_t i = `0`; i < len; i++) {
8291	auto max = std::max(AH.at({i}), BH.at({i}));
8292	auto min = std::min(AH.at({i}), BH.at({i}));
8293	auto add = AH.at({i}) + BH.at({i});
8294	auto sub = AH.at({i}) - BH.at({i});
8295	auto mul = AH.at({i}) * BH.at({i});
8296	auto div = BH.at({i}) / CH.at({i});
8297
8298	// We generate numbers up to 110, so a difference of 2 (~2%) is
8299	// reasonable.
8300	EXPECT_NEAR(max, bindings_.get(O1->getPlaceholder())->getHandle().at({i}),
8301	`2.0`);
8302	EXPECT_NEAR(min, bindings_.get(O2->getPlaceholder())->getHandle().at({i}),
8303	`2.0`);
8304	EXPECT_NEAR(add, bindings_.get(O3->getPlaceholder())->getHandle().at({i}),
8305	`2.0`);
8306	EXPECT_NEAR(sub, bindings_.get(O4->getPlaceholder())->getHandle().at({i}),
8307	`2.0`);
8308	EXPECT_NEAR(mul, bindings_.get(O5->getPlaceholder())->getHandle().at({i}),
8309	`2.0`);
8310	EXPECT_NEAR(div, bindings_.get(O6->getPlaceholder())->getHandle().at({i}),
8311	`2.0`);
8312	}
8313	}
8314
8315	static FunctionTensorPair
8316	createAndInitTransposeNet(glow::PlaceholderBindings &bindings,
8317	glow::ExecutionEngine &EE) {
8318	auto &mod = EE.getModule();
8319	Function *F = mod.createFunction("main");
8320
8321	auto A = mod.createPlaceholder(ElemKind::FloatTy, {`2`, `3`}, "A", false*);
8322	bindings.allocate(A)->getHandle() = {`1`, `1.2f`, `0.5f`, `1.3f`, `2.7f`, `3.1f`};
8323	auto *tr = F->createTranspose("Tr", A, {`1`, `0`});
8324	auto *result = F->createSave("Ret", tr);
8325	auto *resultTensor = bindings.allocate(result->getPlaceholder());
8326
8327	return std::make_pair(F, resultTensor);
8328	}
8329
8330	TEST_P(OperatorStatelessTest, QuantizedTranspose) {
8331	CHECK_IF_ENABLED();
8332	compareAgainstInterpreter(getBackendName(), createAndInitTransposeNet,
8333	ElemKind::FloatTy, ElemKind::Int8QTy, `0.0045f`,
8334	parCloneCountOpt);
8335	}
8336
8337	TEST_P(OperatorTest, QuantizedArithmeticUnrescaled) {
8338	CHECK_IF_ENABLED();
8339
8340	const dim_t len = `1000`;
8341
8342	// In this test we check the correctness of the quantized Max, Min, Add,
8343	// Sub, Mul, and Div operations.
8344	auto TQA = mod_.uniqueType(ElemKind::Int8QTy, {len}, `1.1`, -`1`);
8345	auto TQB = mod_.uniqueType(ElemKind::Int8QTy, {len}, `0.9`, `2`);
8346	// For TQC, set offset to -11 to avoid division by 0 later.
8347	auto TQC = mod_.uniqueType(ElemKind::Int8QTy, {len}, `1.2`, -`11`);
8348	auto TO1 = mod_.uniqueType(ElemKind::Int8QTy, {len}, `1.4`, `3`);
8349	auto TO2 = mod_.uniqueType(ElemKind::Int8QTy, {len}, `0.8`, `2`);
8350	auto TO3 = mod_.uniqueType(ElemKind::Int8QTy, {len}, `1.7`, `5`);
8351	auto TO4 = mod_.uniqueType(ElemKind::Int8QTy, {len}, `0.3`, -`7`);
8352	auto TO5 = mod_.uniqueType(ElemKind::Int8QTy, {len}, `1.2`, `3`);
8353	auto TO6 = mod_.uniqueType(ElemKind::Int8QTy, {len}, `1.0`, -`2`);
8354
8355	auto *QA = mod_.createPlaceholder(ElemKind::Int8QTy, {len}, TQA->getScale(),
8356	TQA->getOffset(), "QA", false);
8357	auto *QB = mod_.createPlaceholder(ElemKind::Int8QTy, {len}, TQB->getScale(),
8358	TQB->getOffset(), "QB", false);
8359	auto *QC = mod_.createPlaceholder(ElemKind::Int8QTy, {len}, TQC->getScale(),
8360	TQC->getOffset(), "QC", false);
8361
8362	bindings_.allocate(QA)->getHandle<int8_t>().randomize(-`10`, `10`,
8363	mod_.getPRNG());
8364	bindings_.allocate(QB)->getHandle<int8_t>().randomize(-`10`, `10`,
8365	mod_.getPRNG());
8366	bindings_.allocate(QC)->getHandle<int8_t>().randomize(-`10`, `10`,
8367	mod_.getPRNG());
8368
8369	// Apply max/min/add/sub/mul/div quantized.
8370	Node *max = F_->createMax("max", TO1, QA, QB);
8371	Node *min = F_->createMin("min", TO2, QA, QB);
8372	Node *add = F_->createAdd("add", TO3, QA, QB);
8373	Node *sub = F_->createSub("sub", TO4, QA, QB);
8374	Node *mul = F_->createMul("mul", TO5, QA, QB);
8375	Node *div = F_->createDiv("div", TO6, QB, QC);
8376
8377	// Save results of the operations.
8378	auto *O1 = F_->createSave("saveMax", max);
8379	auto *O2 = F_->createSave("saveMin", min);
8380	auto *O3 = F_->createSave("saveAdd", add);
8381	auto *O4 = F_->createSave("saveSub", sub);
8382	auto *O5 = F_->createSave("saveMul", mul);
8383	auto *O6 = F_->createSave("saveDiv", div);
8384
8385	bindings_.allocate(O1->getPlaceholder());
8386	bindings_.allocate(O2->getPlaceholder());
8387	bindings_.allocate(O3->getPlaceholder());
8388	bindings_.allocate(O4->getPlaceholder());
8389	bindings_.allocate(O5->getPlaceholder());
8390	bindings_.allocate(O6->getPlaceholder());
8391
8392	auto QAH = bindings_.get(QA)->getHandle<int8_t>();
8393	auto QBH = bindings_.get(QB)->getHandle<int8_t>();
8394	auto QCH = bindings_.get(QC)->getHandle<int8_t>();
8395	auto O1H = bindings_.get(O1->getPlaceholder())->getHandle<int8_t>();
8396	auto O2H = bindings_.get(O2->getPlaceholder())->getHandle<int8_t>();
8397	auto O3H = bindings_.get(O3->getPlaceholder())->getHandle<int8_t>();
8398	auto O4H = bindings_.get(O4->getPlaceholder())->getHandle<int8_t>();
8399	auto O5H = bindings_.get(O5->getPlaceholder())->getHandle<int8_t>();
8400	auto O6H = bindings_.get(O6->getPlaceholder())->getHandle<int8_t>();
8401
8402	EE_.compile(CompilationMode::Infer);
8403	EE_.run(bindings_);
8404
8405	for (dim_t i = `0`; i < len; i++) {
8406	float a = TQA->getScale() * (QAH.at({i}) - TQA->getOffset());
8407	float b = TQB->getScale() * (QBH.at({i}) - TQB->getOffset());
8408	float c = TQC->getScale() * (QCH.at({i}) - TQC->getOffset());
8409	float max = std::max(a, b) / TO1->getScale() + TO1->getOffset();
8410	float min = std::min(a, b) / TO2->getScale() + TO2->getOffset();
8411	float add = (a + b) / TO3->getScale() + TO3->getOffset();
8412	float sub = (a - b) / TO4->getScale() + TO4->getOffset();
8413	float mul = (a * b) / TO5->getScale() + TO5->getOffset();
8414	float div = (b / c) / TO6->getScale() + TO6->getOffset();
8415
8416	EXPECT_NEAR(std::round(max), O1H.at({i}), `1.0`);
8417	EXPECT_NEAR(std::round(min), O2H.at({i}), `1.0`);
8418	EXPECT_NEAR(std::round(add), O3H.at({i}), `1.0`);
8419	EXPECT_NEAR(std::round(sub), O4H.at({i}), `1.0`);
8420	EXPECT_NEAR(std::round(mul), O5H.at({i}), `1.0`);
8421	EXPECT_NEAR(std::round(div), O6H.at({i}), `1.0`);
8422	}
8423	}
8424
8425	TEST_P(OperatorTest, QuantizedCmpLTEAndSelect) {
8426	CHECK_IF_ENABLED();
8427
8428	// In this test we check the correctness of the quantized
8429	// less-than-or-equal-to comparison operator.
8430	const dim_t len = `1000`;
8431	auto TQA = mod_.uniqueType(ElemKind::Int8QTy, {len}, `1.1`, -`3`);
8432	auto TQB = mod_.uniqueType(ElemKind::Int8QTy, {len}, `0.9`, `5`);
8433	auto TQC = mod_.uniqueType(ElemKind::Int8QTy, {len}, `0.8`, `3`);
8434	auto TQD = mod_.uniqueType(ElemKind::Int8QTy, {len}, `1.2`, -`4`);
8435	auto OT = mod_.uniqueType(ElemKind::Int8QTy, {len}, `1.5`, -`2`);
8436
8437	auto *QA = mod_.createPlaceholder(ElemKind::Int8QTy, {len}, TQA->getScale(),
8438	TQA->getOffset(), "QA", false);
8439	auto *QB = mod_.createPlaceholder(ElemKind::Int8QTy, {len}, TQB->getScale(),
8440	TQB->getOffset(), "QB", false);
8441	auto *QC = mod_.createPlaceholder(ElemKind::Int8QTy, {len}, TQC->getScale(),
8442	TQC->getOffset(), "QC", false);
8443	auto *QD = mod_.createPlaceholder(ElemKind::Int8QTy, {len}, TQD->getScale(),
8444	TQD->getOffset(), "QD", false);
8445
8446	auto QAH = bindings_.allocate(QA)->getHandle<int8_t>();
8447	auto QBH = bindings_.allocate(QB)->getHandle<int8_t>();
8448	auto QCH = bindings_.allocate(QC)->getHandle<int8_t>();
8449	auto QDH = bindings_.allocate(QD)->getHandle<int8_t>();
8450
8451	QAH.randomize(-`128`, `127`, mod_.getPRNG());
8452	QBH.randomize(-`128`, `127`, mod_.getPRNG());
8453	QCH.randomize(-`128`, `127`, mod_.getPRNG());
8454	QDH.randomize(-`128`, `127`, mod_.getPRNG());
8455
8456	// Apply comparison and selection quantized.
8457	Node *cmpLTE = F_->createCmpLTE("cmpLTE", QA, QB);
8458	Node *select = F_->createSelect("select", OT, cmpLTE, QC, QD);
8459
8460	// Save result of the operation.
8461	auto *out = F_->createSave("save", select);
8462	auto OH = bindings_.allocate(out->getPlaceholder())->getHandle<int8_t>();
8463
8464	EE_.compile(CompilationMode::Infer);
8465	EE_.run(bindings_);
8466
8467	int count_strict = `0`;
8468	int count = `0`;
8469	for (dim_t i = `0`; i < len; i++) {
8470	float a = TQA->getScale() * (QAH.at({i}) - TQA->getOffset());
8471	float b = TQB->getScale() * (QBH.at({i}) - TQB->getOffset());
8472	float c = TQC->getScale() * (QCH.at({i}) - TQC->getOffset());
8473	float d = TQD->getScale() * (QDH.at({i}) - TQD->getOffset());
8474	float tmp = (a <= b) ? c : d;
8475	int32_t q = std::round(tmp / `1.5` - `2`);
8476	int8_t select = quantization::clip<int32_t, int8_t>(q);
8477
8478	if (OH.at({i}) != select) {
8479	count_strict++;
8480	if (std::abs(OH.at({i}) - select) > `1`) {
8481	count++;
8482	}
8483	}
8484	}
8485	// Require that the number of off-by-1 errors be at most 0.6%.
8486	EXPECT_LE(count_strict, `6`);
8487	EXPECT_LE(count, `4`);
8488	}
8489
8490	TEST_P(OperatorTest, TestQuantizedRescaleSequence) {
8491	CHECK_IF_ENABLED();
8492
8493	const dim_t len = `100`;
8494
8495	auto A = mod_.createPlaceholder(ElemKind::FloatTy, {len}, "A", false*);
8496
8497	auto AH = bindings_.allocate(A)->getHandle();
8498
8499	// Notice that the range below is the an approximation of the scale factors
8500	// in T3 and T4. If we increase the size of the range we may start losing
8501	// some values.
8502	AH.randomize(-`12`, `12`, mod_.getPRNG());
8503
8504	auto T1 = mod_.uniqueType(ElemKind::Int8QTy, {len}, `1.0`, `0`);
8505	auto T2 = mod_.uniqueType(ElemKind::Int8QTy, {len}, `0.9`, `2`);
8506	auto T3 = mod_.uniqueType(ElemKind::Int8QTy, {len}, `0.1`, -`3`);
8507	auto T4 = mod_.uniqueType(ElemKind::Int8QTy, {len}, `0.1`, `7`);
8508	auto T5 = mod_.uniqueType(ElemKind::Int8QTy, {len}, `0.3`, -`3`);
8509
8510	Node *R = F_->createQuantize("R", A, T1);
8511	// Check that a sequence of type conversions does not change the result.
8512	R = F_->createRescaleQuantized("R", R, T1);
8513	R = F_->createRescaleQuantized("R", R, T2);
8514	R = F_->createRescaleQuantized("R", R, T3);
8515	// Check that adding the quantized zero does not change the result.
8516	auto *G = F_->createSplat("splatZero", T3, `0.0`);
8517	R = F_->createAdd("addZero", G, R);
8518	R = F_->createRescaleQuantized("R", R, T4);
8519	R = F_->createRescaleQuantized("R", R, T5);
8520	R = F_->createRescaleQuantized("R", R, T1);
8521	auto *DQ = F_->createDequantize("DQ", R, ElemKind::FloatTy);
8522
8523	// Test a sequence of rescale operations t
8524	auto *result = F_->createSave("save", DQ);
8525	auto OH = bindings_.allocate(result->getPlaceholder())->getHandle();
8526	EE_.compile(CompilationMode::Infer);
8527	EE_.run(bindings_);
8528
8529	for (dim_t i = `0`; i < len; i++) {
8530	EXPECT_NEAR(AH.at({i}), OH.at({i}), `1.0`);
8531	}
8532	}
8533
8534	/// Helper to test concatVectors using \p DTy.
8535	template <typename DataType>
8536	static void testConcatVectors(glow::PlaceholderBindings &bindings,
8537	glow::Module &mod, glow::Function *F,
8538	glow::ExecutionEngine &EE, ElemKind DTy) {
8539	F->setName("concatVectors");
8540
8541	auto *V1 =
8542	createPlaceholderConditionallyQuantized(mod, DTy, {`10`}, "V1", false);
8543	auto *V2 =
8544	createPlaceholderConditionallyQuantized(mod, DTy, {`20`}, "V2", false);
8545	auto *V3 =
8546	createPlaceholderConditionallyQuantized(mod, DTy, {`30`}, "V3", false);
8547
8548	bindings.allocate(V1);
8549	bindings.allocate(V2);
8550	bindings.allocate(V3);
8551
8552	Node *L = F->createConcat("concat", {V1, V2, V3}, `0`);
8553	auto *result = F->createSave("ret", L);
8554	bindings.allocate(result->getPlaceholder());
8555
8556	auto I1 = createTensorConditionallyQuantized(DTy, {`10`});
8557	auto I2 = createTensorConditionallyQuantized(DTy, {`20`});
8558	auto I3 = createTensorConditionallyQuantized(DTy, {`30`});
8559
8560	for (dim_t i = `0`; i < `10`; i++) {
8561	I1.getHandle<DataType>().at({i}) = i;
8562
8563	I2.getHandle<DataType>().at({i}) = i + `10`;
8564	I2.getHandle<DataType>().at({i + `10`}) = i + `20`;
8565	I3.getHandle<DataType>().at({i}) = i + `30`;
8566	I3.getHandle<DataType>().at({i + `10`}) = i + `40`;
8567	I3.getHandle<DataType>().at({i + `20`}) = i + `50`;
8568	}
8569
8570	EE.compile(CompilationMode::Infer);
8571
8572	// Testing the output vector.
8573	updateInputPlaceholders(bindings, {V1, V2, V3}, {&I1, &I2, &I3});
8574	EE.run(bindings);
8575
8576	auto RNWH = bindings.get(result->getPlaceholder())->getHandle<DataType>();
8577	(void)RNWH;
8578
8579	for (dim_t i = `0`; i < `60`; i++) {
8580	EXPECT_NEAR(RNWH.at({i}), static_cast<DataType>(i), `0.001`);
8581	}
8582	}
8583
8584	/// Test concatenating vectors that are Int64ITy.
8585	TEST_P(OperatorTest, concatVectors_Int64) {
8586	CHECK_IF_ENABLED();
8587	testConcatVectors<int64_t>(bindings_, mod_, F_, EE_, ElemKind::Int64ITy);
8588	}
8589
8590	/// Test concatenating vectors that are Int32ITy.
8591	TEST_P(OperatorTest, concatVectors_Int32) {
8592	CHECK_IF_ENABLED();
8593	testConcatVectors<int32_t>(bindings_, mod_, F_, EE_, ElemKind::Int32ITy);
8594	}
8595
8596	/// Test concatenating vectors that are Int8Qty.
8597	TEST_P(OperatorTest, concatVectors_Int8) {
8598	CHECK_IF_ENABLED();
8599	testConcatVectors<int8_t>(bindings_, mod_, F_, EE_, ElemKind::Int8QTy);
8600	}
8601
8602	/// Test concatenating vectors that are BoolTy.
8603	TEST_P(OperatorTest, concatVectors_Bool) {
8604	CHECK_IF_ENABLED();
8605	testConcatVectors<bool>(bindings_, mod_, F_, EE_, ElemKind::BoolTy);
8606	}
8607
8608	/// Test concatenating vectors that are FloatTy.
8609	TEST_P(OperatorTest, concatVectors_Float) {
8610	CHECK_IF_ENABLED();
8611	testConcatVectors<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy);
8612	}
8613
8614	/// Test concatenating vectors that are Float16Ty.
8615	TEST_P(OperatorTest, concatVectors_Float16) {
8616	CHECK_IF_ENABLED();
8617	testConcatVectors<float16_t>(bindings_, mod_, F_, EE_, ElemKind::Float16Ty);
8618	}
8619
8620	/// Test concatenating vectors that are Float16Ty.
8621	TEST_P(OperatorTest, concatVectors_BFloat16) {
8622	CHECK_IF_ENABLED();
8623	testConcatVectors<bfloat16_t>(bindings_, mod_, F_, EE_, ElemKind::BFloat16Ty);
8624	}
8625
8626	/// Helper to test ConcatVectorsRepeated using \p DTy.
8627	template <typename DataType>
8628	static void testConcatVectorsRepeated(glow::PlaceholderBindings &bindings,
8629	glow::Module &mod, glow::Function *F,
8630	glow::ExecutionEngine &EE, ElemKind DTy) {
8631	F->setName("concatVectors");
8632
8633	auto *V1 =
8634	createPlaceholderConditionallyQuantized(mod, DTy, {`10`}, "V1", false);
8635	auto *V2 =
8636	createPlaceholderConditionallyQuantized(mod, DTy, {`20`}, "V2", false);
8637	bindings.allocate(V1);
8638	bindings.allocate(V2);
8639
8640	// Alternate adding sequences of V1 and V2, so that the IRGen'd
8641	// InsertTensors have different counts.
8642	Node *L = F->createConcat("concat", {V2, V1, V1, V1, V2, V2, V1, V1, V2}, `0`);
8643	auto *result = F->createSave("ret", L);
8644	bindings.allocate(result->getPlaceholder());
8645
8646	auto I1 = createTensorConditionallyQuantized(DTy, {`10`});
8647	auto I2 = createTensorConditionallyQuantized(DTy, {`20`});
8648	auto I1H = I1.getHandle<DataType>();
8649	auto I2H = I2.getHandle<DataType>();
8650	for (dim_t i = `0`; i < `10`; i++) {
8651	I1H.at({i}) = `1`;
8652
8653	I2H.at({i}) = `2`;
8654	I2H.at({i + `10`}) = `2`;
8655	}
8656
8657	EE.compile(CompilationMode::Infer);
8658
8659	// Testing the output vector.
8660	updateInputPlaceholders(bindings, {V1, V2}, {&I1, &I2});
8661	EE.run(bindings);
8662
8663	auto outH = bindings.get(result->getPlaceholder())->getHandle<DataType>();
8664
8665	// Simply verify here that the values are in their correct places, based on
8666	// the number of times/order V1 and V2 are concatenated and their sizes.
8667	for (dim_t i = `0`; i < `130`; i++) {
8668	if ((i < `20`) \|\| (i >= `50` && i < `90`) \|\| (i >= `110`)) {
8669	EXPECT_EQ(outH.at({i}), static_cast<DataType>(`2`));
8670	} else {
8671	EXPECT_EQ(outH.at({i}), static_cast<DataType>(`1`));
8672	}
8673	}
8674	}
8675
8676	/// Check that concatenating two tensors repeatedly is correct. This is
8677	/// intended to verify that IRGen to InsertTensor instructions with axis/count
8678	/// works correctly. Testing Int64ITy data.
8679	TEST_P(OperatorTest, concatVectorsRepeated_Int64) {
8680	CHECK_IF_ENABLED();
8681	testConcatVectorsRepeated<int64_t>(bindings_, mod_, F_, EE_,
8682	ElemKind::Int64ITy);
8683	}
8684
8685	/// Check that concatenating two tensors repeatedly is correct. This is
8686	/// intended to verify that IRGen to InsertTensor instructions with axis/count
8687	/// works correctly. Testing Int32ITy data.
8688	TEST_P(OperatorTest, concatVectorsRepeated_Int32) {
8689	CHECK_IF_ENABLED();
8690	testConcatVectorsRepeated<int32_t>(bindings_, mod_, F_, EE_,
8691	ElemKind::Int32ITy);
8692	}
8693
8694	/// Check that concatenating two tensors repeatedly is correct. This is
8695	/// intended to verify that IRGen to InsertTensor instructions with axis/count
8696	/// works correctly. Testing Int8QTy data.
8697	TEST_P(OperatorTest, concatVectorsRepeated_Int8) {
8698	CHECK_IF_ENABLED();
8699	testConcatVectorsRepeated<int8_t>(bindings_, mod_, F_, EE_,
8700	ElemKind::Int8QTy);
8701	}
8702
8703	/// Check that concatenating two tensors repeatedly is correct. This is
8704	/// intended to verify that IRGen to InsertTensor instructions with axis/count
8705	/// works correctly. Testing BoolTy data.
8706	TEST_P(OperatorTest, concatVectorsRepeated_Bool) {
8707	CHECK_IF_ENABLED();
8708	testConcatVectorsRepeated<bool>(bindings_, mod_, F_, EE_, ElemKind::BoolTy);
8709	}
8710
8711	/// Check that concatenating two tensors repeatedly is correct. This is
8712	/// intended to verify that IRGen to InsertTensor instructions with axis/count
8713	/// works correctly. Testing FloatTy data.
8714	TEST_P(OperatorTest, concatVectorsRepeated_Float) {
8715	CHECK_IF_ENABLED();
8716	testConcatVectorsRepeated<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy);
8717	}
8718
8719	/// Check that concatenating two tensors repeatedly is correct. This is
8720	/// intended to verify that IRGen to InsertTensor instructions with axis/count
8721	/// works correctly. Testing Float16Ty data.
8722	TEST_P(OperatorTest, concatVectorsRepeated_Float16) {
8723	CHECK_IF_ENABLED();
8724	testConcatVectorsRepeated<float16_t>(bindings_, mod_, F_, EE_,
8725	ElemKind::Float16Ty);
8726	}
8727
8728	/// Check that concatenating two tensors repeatedly is correct. This is
8729	/// intended to verify that IRGen to InsertTensor instructions with axis/count
8730	/// works correctly. Testing BFloat16Ty data.
8731	TEST_P(OperatorTest, concatVectorsRepeated_BFloat16) {
8732	CHECK_IF_ENABLED();
8733	testConcatVectorsRepeated<bfloat16_t>(bindings_, mod_, F_, EE_,
8734	ElemKind::BFloat16Ty);
8735	}
8736
8737	/// Helper to test SliceVectors using \p DTy.
8738	template <typename DataType>
8739	static void testSliceVectors(glow::PlaceholderBindings &bindings,
8740	glow::Module &mod, glow::Function *F,
8741	glow::ExecutionEngine &EE, ElemKind DTy) {
8742	F->setName("sliceVectors");
8743
8744	auto *V =
8745	createPlaceholderConditionallyQuantized(mod, DTy, {`3`, `30`}, "V", false);
8746	bindings.allocate(V);
8747
8748	Node *S1 = F->createSlice("slice1", V, {`0`, `10`}, {`3`, `13`});
8749	Node *S2 = F->createSlice("slice2", V, {`1`, `0`}, {`2`, `30`});
8750	Node *S3 = F->createSlice("slice3", V, {`2`, `10`}, {`3`, `12`});
8751
8752	auto *result1 = F->createSave("ret1", S1);
8753	auto *result2 = F->createSave("ret2", S2);
8754	auto *result3 = F->createSave("ret3", S3);
8755
8756	bindings.allocate(result1->getPlaceholder());
8757	bindings.allocate(result2->getPlaceholder());
8758	bindings.allocate(result3->getPlaceholder());
8759
8760	auto I = createTensorConditionallyQuantized(DTy, {`3`, `30`});
8761	auto IH = I.getHandle<DataType>();
8762	for (dim_t j = `0`; j < `30`; j++) {
8763	IH.at({`0`, j}) = j;
8764	IH.at({`1`, j}) = j + `30`;
8765	IH.at({`2`, j}) = j + `60`;
8766	}
8767
8768	EE.compile(CompilationMode::Infer);
8769
8770	// Testing the output slices.
8771	updateInputPlaceholders(bindings, {V}, {&I});
8772	EE.run(bindings);
8773
8774	auto RNWH1 = bindings.get(result1->getPlaceholder())->getHandle<DataType>();
8775	auto RNWH2 = bindings.get(result2->getPlaceholder())->getHandle<DataType>();
8776	auto RNWH3 = bindings.get(result3->getPlaceholder())->getHandle<DataType>();
8777
8778	EXPECT_EQ(`3`, RNWH1.dims()[`0`]);
8779	EXPECT_EQ(`3`, RNWH1.dims()[`1`]);
8780	for (dim_t i = `0`; i < `3`; i++) {
8781	for (dim_t j = `10`; j < `13`; j++) {
8782	EXPECT_NEAR(RNWH1.at({i, j - `10`}), j + i * `30`, `0.001`);
8783	}
8784	}
8785	EXPECT_EQ(`1`, RNWH2.dims()[`0`]);
8786	EXPECT_EQ(`30`, RNWH2.dims()[`1`]);
8787	for (dim_t j = `0`; j < `30`; j++) {
8788	EXPECT_NEAR(RNWH2.at({`0`, j}), j + `30`, `0.001`);
8789	}
8790	EXPECT_EQ(`1`, RNWH3.dims()[`0`]);
8791	EXPECT_EQ(`2`, RNWH3.dims()[`1`]);
8792	for (dim_t j = `10`; j < `12`; j++) {
8793	EXPECT_NEAR(RNWH3.at({`0`, j - `10`}), j + `60`, `0.001`);
8794	}
8795	}
8796
8797	/// Test slicing with Int64ITy.
8798	TEST_P(OperatorTest, sliceVectors_Int64) {
8799	CHECK_IF_ENABLED();
8800	testSliceVectors<int64_t>(bindings_, mod_, F_, EE_, ElemKind::Int64ITy);
8801	}
8802
8803	/// Test slicing with FloatTy.
8804	TEST_P(OperatorTest, sliceVectors_Float) {
8805	CHECK_IF_ENABLED();
8806	testSliceVectors<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy);
8807	}
8808
8809	/// Test slicing with Float16Ty.
8810	TEST_P(OperatorTest, sliceVectors_Float16) {
8811	CHECK_IF_ENABLED();
8812	testSliceVectors<float16_t>(bindings_, mod_, F_, EE_, ElemKind::Float16Ty);
8813	}
8814
8815	/// Test slicing with BFloat16Ty.
8816	TEST_P(OperatorTest, sliceVectors_BFloat16) {
8817	CHECK_IF_ENABLED();
8818	testSliceVectors<bfloat16_t>(bindings_, mod_, F_, EE_, ElemKind::BFloat16Ty);
8819	}
8820
8821	/// Test slicing with Int8QTy.
8822	TEST_P(OperatorTest, sliceVectors_Int8) {
8823	CHECK_IF_ENABLED();
8824	testSliceVectors<int8_t>(bindings_, mod_, F_, EE_, ElemKind::Int8QTy);
8825	}
8826
8827	/// Test slicing with Int32QTy.
8828	TEST_P(OperatorTest, sliceVectors_Int32Q) {
8829	CHECK_IF_ENABLED();
8830	testSliceVectors<int32_t>(bindings_, mod_, F_, EE_, ElemKind::Int32QTy);
8831	}
8832
8833	/// Test slicing with Int32ITy.
8834	TEST_P(OperatorTest, sliceVectors_Int32I) {
8835	CHECK_IF_ENABLED();
8836	testSliceVectors<int32_t>(bindings_, mod_, F_, EE_, ElemKind::Int32ITy);
8837	}
8838
8839	/// Test slicing with BoolTy.
8840	TEST_P(OperatorTest, sliceVectors_BoolTy) {
8841	CHECK_IF_ENABLED();
8842	auto input = mod_.createPlaceholder(ElemKind::BoolTy, {`5`}, "inp", false*);
8843	bindings_.allocate(input)->getHandle<bool>() = {false, true, false, true,
8844	true};
8845
8846	Node *S1 = F_->createSlice("slice1", input, {`0`}, {`2`});
8847	Node *S2 = F_->createSlice("slice2", input, {`2`}, {`5`});
8848	auto *save1 = F_->createSave("save", S1);
8849	auto *save2 = F_->createSave("save", S2);
8850	auto *out1 = bindings_.allocate(save1->getPlaceholder());
8851	auto *out2 = bindings_.allocate(save2->getPlaceholder());
8852	EE_.compile(CompilationMode::Infer);
8853	EE_.run(bindings_);
8854	auto outH1 = out1->getHandle<bool>();
8855	auto outH2 = out2->getHandle<bool>();
8856	EXPECT_EQ(outH1.size(), `2`);
8857	EXPECT_EQ(outH2.size(), `3`);
8858	EXPECT_EQ(outH1.raw(`0`), false);
8859	EXPECT_EQ(outH1.raw(`1`), true);
8860	EXPECT_EQ(outH2.raw(`0`), false);
8861	EXPECT_EQ(outH2.raw(`1`), true);
8862	EXPECT_EQ(outH2.raw(`2`), true);
8863	}
8864
8865	/// Helper to test SliceConcatVectors using \p DTy.
8866	template <typename DataType>
8867	static void testSliceConcatVectors(glow::PlaceholderBindings &bindings,
8868	glow::Module &mod, glow::Function *F,
8869	glow::ExecutionEngine &EE, ElemKind DTy) {
8870	F->setName("sliceConcatVectors");
8871
8872	auto *V =
8873	createPlaceholderConditionallyQuantized(mod, DTy, {`5`, `4`}, "V", false);
8874	bindings.allocate(V);
8875
8876	auto I = createTensorConditionallyQuantized(DTy, {`5`, `4`});
8877	auto IH = I.getHandle<DataType>();
8878	for (dim_t i = `0`; i < `5`; i++) {
8879	for (dim_t j = `0`; j < `4`; j++) {
8880	IH.at({i, j}) = i * `10` + j;
8881	}
8882	}
8883
8884	Node *S0 = F->createSlice("slice0", V, {`1`, `0`}, {`5`, `4`});
8885	Node *S1 = F->createSlice("slice1", S0, {`0`, `0`}, {`2`, `4`});
8886	Node *S2 = F->createSlice("slice2", S0, {`2`, `0`}, {`4`, `4`});
8887	Node *S3 = F->createSlice("slice3", S0, {`0`, `0`}, {`2`, `2`});
8888	Node *S4 = F->createSlice("slice4", S0, {`2`, `2`}, {`4`, `4`});
8889	Node *S5 = F->createSlice("slice5", V, {`0`, `0`}, {`1`, `4`});
8890
8891	Node *C0 = F->createConcat("concat0", {S5, S1}, `0`);
8892	Node *C1 = F->createConcat("concat1", {S3, S4}, `1`);
8893	Node *C2 = F->createConcat("concat2", {S2, C1, C0}, `0`);
8894
8895	auto *result = F->createSave("ret", C2);
8896	bindings.allocate(result->getPlaceholder());
8897
8898	EE.compile(CompilationMode::Infer);
8899
8900	updateInputPlaceholders(bindings, {V}, {&I});
8901	EE.run(bindings);
8902
8903	const DataType expected[`7`][`4`] = {
8904	{`30`, `31`, `32`, `33`}, {`40`, `41`, `42`, `43`}, {`10`, `11`, `32`, `33`}, {`20`, `21`, `42`, `43`},
8905	{`0`, `1`, `2`, `3`}, {`10`, `11`, `12`, `13`}, {`20`, `21`, `22`, `23`}};
8906
8907	auto resultH = bindings.get(result->getPlaceholder())->getHandle<DataType>();
8908	EXPECT_EQ(`7`, resultH.dims()[`0`]);
8909	EXPECT_EQ(`4`, resultH.dims()[`1`]);
8910	for (dim_t i = `0`; i < `7`; i++) {
8911	for (dim_t j = `0`; j < `4`; j++) {
8912	EXPECT_EQ(resultH.at({i, j}), expected[i][j]);
8913	}
8914	}
8915	}
8916
8917	/// Test a combination of slicing and concating, in Int64ITy.
8918	TEST_P(OperatorTest, sliceConcatVectors_Int64) {
8919	CHECK_IF_ENABLED();
8920	testSliceConcatVectors<int64_t>(bindings_, mod_, F_, EE_, ElemKind::Int64ITy);
8921	}
8922
8923	/// Test a combination of slicing and concating, in Int8QTy.
8924	TEST_P(OperatorTest, sliceConcatVectors_Int8) {
8925	CHECK_IF_ENABLED();
8926	testSliceConcatVectors<int8_t>(bindings_, mod_, F_, EE_, ElemKind::Int8QTy);
8927	}
8928
8929	/// Test a combination of slicing and concating, in FloatTy.
8930	TEST_P(OperatorTest, sliceConcatVectors_Float) {
8931	CHECK_IF_ENABLED();
8932	testSliceConcatVectors<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy);
8933	}
8934
8935	/// Test a combination of slicing and concating, in Float16Ty.
8936	TEST_P(OperatorTest, sliceConcatVectors_Float16) {
8937	CHECK_IF_ENABLED();
8938	testSliceConcatVectors<float16_t>(bindings_, mod_, F_, EE_,
8939	ElemKind::Float16Ty);
8940	}
8941
8942	/// Test a combination of slicing and concating, in BFloat16Ty.
8943	TEST_P(OperatorTest, sliceConcatVectors_BFloat16) {
8944	CHECK_IF_ENABLED();
8945	testSliceConcatVectors<bfloat16_t>(bindings_, mod_, F_, EE_,
8946	ElemKind::BFloat16Ty);
8947	}
8948
8949	TEST_P(OperatorTest, Tile) {
8950	CHECK_IF_ENABLED();
8951
8952	F_->setName("concatVectors");
8953
8954	auto V = mod_.createPlaceholder(ElemKind::FloatTy, {`4`, `5`}, "V", false*);
8955	bindings_.allocate(V);
8956
8957	Node T0 = F_->createTile("tile0", V, /* tiles / `3`, / axis / `0`);
8958	auto *result0 = F_->createSave("res0", T0);
8959	bindings_.allocate(result0->getPlaceholder());
8960
8961	Node T1 = F_->createTile("tile1", V, /* tiles / `3`, / axis / `1`);
8962	auto *result1 = F_->createSave("res1", T1);
8963	bindings_.allocate(result1->getPlaceholder());
8964
8965	Tensor VT(ElemKind::FloatTy, {`4`, `5`});
8966
8967	for (dim_t i = `0`; i < `4`; i++) {
8968	for (dim_t j = `0`; j < `5`; j++) {
8969	VT.getHandle<float>().at({i, j}) = i * `5` + j;
8970	}
8971	}
8972
8973	EE_.compile(CompilationMode::Infer);
8974
8975	updateInputPlaceholders(bindings_, {V}, {&VT});
8976	EE_.run(bindings_);
8977
8978	// Testing the output vector with axis 0.
8979	auto res0 = bindings_.get(result0->getPlaceholder())->getHandle<float>();
8980	for (dim_t i = `0`; i < res0.dims()[`0`]; i++) {
8981	for (dim_t j = `0`; j < res0.dims()[`1`]; j++) {
8982	EXPECT_EQ(res0.at({i, j}), (i % `4`) * `5` + j);
8983	}
8984	}
8985
8986	// Testing the output vector with axis 1.
8987	auto res1 = bindings_.get(result1->getPlaceholder())->getHandle<float>();
8988	for (dim_t i = `0`; i < res1.dims()[`0`]; i++) {
8989	for (dim_t j = `0`; j < res1.dims()[`1`]; j++) {
8990	EXPECT_EQ(res1.at({i, j}), i * `5` + (j % `5`));
8991	}
8992	}
8993	}
8994
8995	TEST_P(OperatorTest, QuantizedTile) {
8996	CHECK_IF_ENABLED();
8997
8998	F_->setName("concatVectors");
8999
9000	auto V = mod_.createPlaceholder(ElemKind::FloatTy, {`4`, `5`}, "V", false*);
9001	bindings_.allocate(V);
9002
9003	auto quantizationParams =
9004	glow::quantization::chooseQuantizationParams({`0`, `20`});
9005	auto quantizeTy =
9006	mod_.uniqueType(ElemKind::Int8QTy, {`4`, `5`}, quantizationParams.scale,
9007	quantizationParams.offset);
9008	auto *Q = F_->createQuantize("quantize", V, quantizeTy);
9009
9010	Node T0 = F_->createTile("tile0", Q, /* tiles / `3`, / axis / `0`);
9011	auto *DQ0 = F_->createDequantize("dequantize0", T0, ElemKind::FloatTy);
9012	auto *result0 = F_->createSave("res0", DQ0);
9013	bindings_.allocate(result0->getPlaceholder());
9014
9015	Node T1 = F_->createTile("tile1", Q, /* tiles / `3`, / axis / `1`);
9016	auto *DQ1 = F_->createDequantize("dequantize1", T1, ElemKind::FloatTy);
9017	auto *result1 = F_->createSave("res1", DQ1);
9018	bindings_.allocate(result1->getPlaceholder());
9019
9020	Tensor VT(ElemKind::FloatTy, {`4`, `5`});
9021
9022	for (dim_t i = `0`; i < `4`; i++) {
9023	for (dim_t j = `0`; j < `5`; j++) {
9024	VT.getHandle<float>().at({i, j}) = i * `5` + j;
9025	}
9026	}
9027
9028	EE_.compile(CompilationMode::Infer);
9029
9030	updateInputPlaceholders(bindings_, {V}, {&VT});
9031	EE_.run(bindings_);
9032
9033	// Testing the output vector with axis 0.
9034	auto res0 = bindings_.get(result0->getPlaceholder())->getHandle<float>();
9035	for (dim_t i = `0`; i < res0.dims()[`0`]; i++) {
9036	for (dim_t j = `0`; j < res0.dims()[`1`]; j++) {
9037	EXPECT_NEAR(res0.at({i, j}), (i % `4`) * `5` + j, `0.05`);
9038	}
9039	}
9040
9041	// Testing the output vector with axis 1.
9042	auto res1 = bindings_.get(result1->getPlaceholder())->getHandle<float>();
9043	(void)res1;
9044	for (dim_t i = `0`; i < res1.dims()[`0`]; i++) {
9045	for (dim_t j = `0`; j < res1.dims()[`1`]; j++) {
9046	EXPECT_NEAR(res1.at({i, j}), i * `5` + (j % `5`), `0.05`);
9047	}
9048	}
9049	}
9050
9051	TEST_P(OperatorTest, Clip) {
9052	CHECK_IF_ENABLED();
9053
9054	auto X = mod_.createPlaceholder(ElemKind::FloatTy, {`5`, `5`}, "X", false*);
9055	auto xHandle = bindings_.allocate(X)->getHandle();
9056	xHandle = {`45.0`, `16.0`, `59.0`, `99.0`, `48.0`, `12.0`, `44.0`, `46.0`, `82.0`,
9057	`28.0`, `1.0`, `91.0`, `18.0`, `9.0`, `71.0`, `24.0`, `37.0`, `61.0`,
9058	`12.0`, `81.0`, `36.0`, `38.0`, `30.0`, `84.0`, `40.0`};
9059
9060	float min = `20.0`;
9061	float max = `60.0`;
9062	auto *node = F_->createClip("clip", X, min, max);
9063	auto *save = F_->createSave("save", node);
9064	auto *saveTensor = bindings_.allocate(save->getPlaceholder());
9065	EE_.compile(CompilationMode::Infer);
9066	EE_.run(bindings_);
9067
9068	auto result = saveTensor->getHandle();
9069	std::vector<dim_t> expectedDims = {`5`, `5`};
9070	std::vector<float> expectedValues = {`45.0`, `20.0`, `59.0`, `60.0`, `48.0`, `20.0`, `44.0`,
9071	`46.0`, `60.0`, `28.0`, `20.0`, `60.0`, `20.0`, `20.0`,
9072	`60.0`, `24.0`, `37.0`, `60.0`, `20.0`, `60.0`, `36.0`,
9073	`38.0`, `30.0`, `60.0`, `40.0`};
9074	EXPECT_TRUE(result.dims().vec() == expectedDims);
9075	for (size_t i = `0`; i < `5` * `5`; i++) {
9076	EXPECT_FLOAT_EQ(result.raw(i), expectedValues[i]);
9077	}
9078	}
9079
9080	TEST_P(OperatorTest, LeakyRelu_FloatTy) {
9081	CHECK_IF_ENABLED();
9082	auto inp = mod_.createPlaceholder(ElemKind::FloatTy, {`3`}, "inp", false*);
9083	bindings_.allocate(inp)->getHandle<float>() = {-`2`, `0.0`, `2`};
9084	auto node = F_->createLeakyRELU("leaky_relu", inp, /* alpha / `0.5`);
9085	auto *save = F_->createSave("save", node);
9086	auto *outT = bindings_.allocate(save->getPlaceholder());
9087	EE_.compile(CompilationMode::Infer);
9088	EE_.run(bindings_);
9089	auto outH = outT->getHandle<float>();
9090	EXPECT_EQ(outH.size(), `3`);
9091	EXPECT_FLOAT_EQ(outH.raw(`0`), -`1.0`);
9092	EXPECT_FLOAT_EQ(outH.raw(`1`), `0.0`);
9093	EXPECT_FLOAT_EQ(outH.raw(`2`), `2.0`);
9094	}
9095
9096	TEST_P(OperatorTest, LeakyRelu_Int8QTy) {
9097	CHECK_IF_ENABLED();
9098	auto *inp =
9099	mod_.createPlaceholder(ElemKind::Int8QTy, {`5`}, `0.5`, `0`, "inp", false);
9100	bindings_.allocate(inp)->getHandle<int8_t>() = {-`4`, -`2`, `0`, `2`, `4`};
9101	auto node = F_->createLeakyRELU("leaky_relu", inp, /* alpha / `0.5`);
9102	auto *save = F_->createSave("save", node);
9103	auto *outT = bindings_.allocate(save->getPlaceholder());
9104	EE_.compile(CompilationMode::Infer);
9105	EE_.run(bindings_);
9106	auto outH = outT->getHandle<int8_t>();
9107	EXPECT_EQ(outH.size(), `5`);
9108	EXPECT_EQ(outH.raw(`0`), -`2`);
9109	EXPECT_EQ(outH.raw(`1`), -`1`);
9110	EXPECT_EQ(outH.raw(`2`), `0`);
9111	EXPECT_EQ(outH.raw(`3`), `2`);
9112	EXPECT_EQ(outH.raw(`4`), `4`);
9113	}
9114
9115	TEST_P(OperatorTest, Not) {
9116	CHECK_IF_ENABLED();
9117	auto input = mod_.createPlaceholder(ElemKind::BoolTy, {`2`}, "inp", false*);
9118	bindings_.allocate(input)->getHandle<bool>() = {false, true};
9119	auto *node = F_->createNot("not", input);
9120	auto *save = F_->createSave("save", node);
9121	auto *outT = bindings_.allocate(save->getPlaceholder());
9122	EE_.compile(CompilationMode::Infer);
9123	EE_.run(bindings_);
9124	auto outH = outT->getHandle<bool>();
9125	EXPECT_EQ(outH.size(), `2`);
9126	EXPECT_EQ(outH.raw(`0`), true);
9127	EXPECT_EQ(outH.raw(`1`), false);
9128	}
9129
9130	TEST_P(OperatorTest, And) {
9131	CHECK_IF_ENABLED();
9132	auto LHS = mod_.createPlaceholder(ElemKind::BoolTy, {`4`}, "LHS", false*);
9133	auto RHS = mod_.createPlaceholder(ElemKind::BoolTy, {`4`}, "RHS", false*);
9134	bindings_.allocate(LHS)->getHandle<bool>() = {false, true, false, true};
9135	bindings_.allocate(RHS)->getHandle<bool>() = {false, false, true, true};
9136	auto *node = F_->createAnd("and", LHS, RHS);
9137	auto *save = F_->createSave("save", node);
9138	auto *outT = bindings_.allocate(save->getPlaceholder());
9139	EE_.compile(CompilationMode::Infer);
9140	EE_.run(bindings_);
9141	auto outH = outT->getHandle<bool>();
9142	EXPECT_EQ(outH.size(), `4`);
9143	EXPECT_EQ(outH.raw(`0`), false);
9144	EXPECT_EQ(outH.raw(`1`), false);
9145	EXPECT_EQ(outH.raw(`2`), false);
9146	EXPECT_EQ(outH.raw(`3`), true);
9147	}
9148
9149	TEST_P(OperatorTest, Or) {
9150	CHECK_IF_ENABLED();
9151	auto LHS = mod_.createPlaceholder(ElemKind::BoolTy, {`4`}, "LHS", false*);
9152	auto RHS = mod_.createPlaceholder(ElemKind::BoolTy, {`4`}, "RHS", false*);
9153	bindings_.allocate(LHS)->getHandle<bool>() = {false, true, false, true};
9154	bindings_.allocate(RHS)->getHandle<bool>() = {false, false, true, true};
9155	auto *node = F_->createOr("or", LHS, RHS);
9156	auto *save = F_->createSave("save", node);
9157	auto *outT = bindings_.allocate(save->getPlaceholder());
9158	EE_.compile(CompilationMode::Infer);
9159	EE_.run(bindings_);
9160	auto outH = outT->getHandle<bool>();
9161	EXPECT_EQ(outH.size(), `4`);
9162	EXPECT_EQ(outH.raw(`0`), false);
9163	EXPECT_EQ(outH.raw(`1`), true);
9164	EXPECT_EQ(outH.raw(`2`), true);
9165	EXPECT_EQ(outH.raw(`3`), true);
9166	}
9167
9168	TEST_P(OperatorTest, Xor) {
9169	CHECK_IF_ENABLED();
9170	auto LHS = mod_.createPlaceholder(ElemKind::BoolTy, {`4`}, "LHS", false*);
9171	auto RHS = mod_.createPlaceholder(ElemKind::BoolTy, {`4`}, "RHS", false*);
9172	bindings_.allocate(LHS)->getHandle<bool>() = {false, true, false, true};
9173	bindings_.allocate(RHS)->getHandle<bool>() = {false, false, true, true};
9174	auto *node = F_->createXor("xor", LHS, RHS);
9175	auto *save = F_->createSave("save", node);
9176	auto *outT = bindings_.allocate(save->getPlaceholder());
9177	EE_.compile(CompilationMode::Infer);
9178	EE_.run(bindings_);
9179	auto outH = outT->getHandle<bool>();
9180	EXPECT_EQ(outH.size(), `4`);
9181	EXPECT_EQ(outH.raw(`0`), false);
9182	EXPECT_EQ(outH.raw(`1`), true);
9183	EXPECT_EQ(outH.raw(`2`), true);
9184	EXPECT_EQ(outH.raw(`3`), false);
9185	}
9186
9187	TEST_P(OperatorTest, Abs_FloatTy) {
9188	CHECK_IF_ENABLED();
9189	auto inp = mod_.createPlaceholder(ElemKind::FloatTy, {`2`}, "inp", false*);
9190	bindings_.allocate(inp)->getHandle<float>() = {-`1.0`, `1.0`};
9191	auto *node = F_->createAbs("abs", inp);
9192	auto *save = F_->createSave("save", node);
9193	auto *outT = bindings_.allocate(save->getPlaceholder());
9194	EE_.compile(CompilationMode::Infer);
9195	EE_.run(bindings_);
9196	auto outH = outT->getHandle<float>();
9197	EXPECT_EQ(outH.size(), `2`);
9198	EXPECT_FLOAT_EQ(outH.raw(`0`), `1.0`);
9199	EXPECT_FLOAT_EQ(outH.raw(`1`), `1.0`);
9200	}
9201
9202	TEST_P(OperatorTest, Abs_Int8QTy) {
9203	CHECK_IF_ENABLED();
9204	auto *inp =
9205	mod_.createPlaceholder(ElemKind::Int8QTy, {`2`}, `1.0`, `0`, "inp", false);
9206	bindings_.allocate(inp)->getHandle<int8_t>() = {-`1`, `1`};
9207	auto *node = F_->createAbs("abs", inp);
9208	auto *save = F_->createSave("save", node);
9209	auto *outT = bindings_.allocate(save->getPlaceholder());
9210	EE_.compile(CompilationMode::Infer);
9211	EE_.run(bindings_);
9212	auto outH = outT->getHandle<int8_t>();
9213	EXPECT_EQ(outH.size(), `2`);
9214	EXPECT_EQ(outH.raw(`0`), `1`);
9215	EXPECT_EQ(outH.raw(`1`), `1`);
9216	}
9217
9218	TEST_P(OperatorTest, Neg_FloatTy) {
9219	CHECK_IF_ENABLED();
9220	auto inp = mod_.createPlaceholder(ElemKind::FloatTy, {`2`}, "inp", false*);
9221	bindings_.allocate(inp)->getHandle<float>() = {`1.0`, -`1.0`};
9222	auto *node = F_->createNeg("neg", inp);
9223	auto *save = F_->createSave("save", node);
9224	auto *outT = bindings_.allocate(save->getPlaceholder());
9225	EE_.compile(CompilationMode::Infer);
9226	EE_.run(bindings_);
9227	auto outH = outT->getHandle<float>();
9228	EXPECT_EQ(outH.size(), `2`);
9229	EXPECT_FLOAT_EQ(outH.raw(`0`), -`1.0`);
9230	EXPECT_FLOAT_EQ(outH.raw(`1`), `1.0`);
9231	}
9232
9233	TEST_P(OperatorTest, Neg_Int32ITy) {
9234	CHECK_IF_ENABLED();
9235	auto inp = mod_.createPlaceholder(ElemKind::Int32ITy, {`3`}, "inp", false*);
9236	bindings_.allocate(inp)->getHandle<int32_t>() = {`1`, `0`, -`1`};
9237	auto *node = F_->createNeg("neg", inp);
9238	auto *save = F_->createSave("save", node);
9239	auto *outT = bindings_.allocate(save->getPlaceholder());
9240	EE_.compile(CompilationMode::Infer);
9241	EE_.run(bindings_);
9242	auto outH = outT->getHandle<int32_t>();
9243	EXPECT_EQ(outH.size(), `3`);
9244	EXPECT_FLOAT_EQ(outH.raw(`0`), -`1`);
9245	EXPECT_FLOAT_EQ(outH.raw(`1`), `0`);
9246	EXPECT_FLOAT_EQ(outH.raw(`2`), `1`);
9247	}
9248
9249	TEST_P(OperatorTest, Neg_Int8QTy) {
9250	CHECK_IF_ENABLED();
9251	auto *inp =
9252	mod_.createPlaceholder(ElemKind::Int8QTy, {`2`}, `1.0`, `0`, "inp", false);
9253	bindings_.allocate(inp)->getHandle<int8_t>() = {-`1`, `1`};
9254	auto *node = F_->createNeg("neg", inp);
9255	auto *save = F_->createSave("save", node);
9256	auto *outT = bindings_.allocate(save->getPlaceholder());
9257	EE_.compile(CompilationMode::Infer);
9258	EE_.run(bindings_);
9259	auto outH = outT->getHandle<int8_t>();
9260	EXPECT_EQ(outH.size(), `2`);
9261	EXPECT_EQ(outH.raw(`0`), `1`);
9262	EXPECT_EQ(outH.raw(`1`), -`1`);
9263	}
9264
9265	TEST_P(OperatorTest, Floor_FloatTy) {
9266	CHECK_IF_ENABLED();
9267	auto inp = mod_.createPlaceholder(ElemKind::FloatTy, {`3`}, "inp", false*);
9268	bindings_.allocate(inp)->getHandle<float>() = {-`0.2`, `1.0`, `1.99`};
9269	auto *node = F_->createFloor("floor", inp);
9270	auto *save = F_->createSave("save", node);
9271	auto *outT = bindings_.allocate(save->getPlaceholder());
9272	EE_.compile(CompilationMode::Infer);
9273	EE_.run(bindings_);
9274	auto outH = outT->getHandle<float>();
9275	EXPECT_EQ(outH.size(), `3`);
9276	EXPECT_FLOAT_EQ(outH.raw(`0`), -`1.0`);
9277	EXPECT_FLOAT_EQ(outH.raw(`1`), `1.0`);
9278	EXPECT_FLOAT_EQ(outH.raw(`2`), `1.0`);
9279	}
9280
9281	TEST_P(OperatorTest, Floor_Int8QTy) {
9282	CHECK_IF_ENABLED();
9283	auto *inp =
9284	mod_.createPlaceholder(ElemKind::Int8QTy, {`5`}, `0.5`, `0`, "inp", false);
9285	bindings_.allocate(inp)->getHandle<int8_t>() = {-`2`, -`1`, `0`, `1`, `2`};
9286	auto *node = F_->createFloor("floor", inp);
9287	auto *save = F_->createSave("save", node);
9288	auto *outT = bindings_.allocate(save->getPlaceholder());
9289	EE_.compile(CompilationMode::Infer);
9290	EE_.run(bindings_);
9291	auto outH = outT->getHandle<int8_t>();
9292	EXPECT_EQ(outH.size(), `5`);
9293	EXPECT_EQ(outH.raw(`0`), -`2`);
9294	EXPECT_EQ(outH.raw(`1`), -`2`);
9295	EXPECT_EQ(outH.raw(`2`), `0`);
9296	EXPECT_EQ(outH.raw(`3`), `0`);
9297	EXPECT_EQ(outH.raw(`4`), `2`);
9298	}
9299
9300	TEST_P(OperatorTest, Sign_FloatTy) {
9301	CHECK_IF_ENABLED();
9302	auto inp = mod_.createPlaceholder(ElemKind::FloatTy, {`3`}, "inp", false*);
9303	bindings_.allocate(inp)->getHandle<float>() = {-`1.0`, `0.0`, `1.0`};
9304	auto *node = F_->createSign("Sign", inp);
9305	auto *save = F_->createSave("save", node);
9306	auto *outT = bindings_.allocate(save->getPlaceholder());
9307	EE_.compile(CompilationMode::Infer);
9308	EE_.run(bindings_);
9309	auto outH = outT->getHandle<float>();
9310	EXPECT_EQ(outH.size(), `3`);
9311	EXPECT_FLOAT_EQ(outH.raw(`0`), -`1.0`);
9312	EXPECT_FLOAT_EQ(outH.raw(`1`), `0.0`);
9313	EXPECT_FLOAT_EQ(outH.raw(`2`), `1.0`);
9314	}
9315
9316	TEST_P(OperatorTest, Sign_Int8QTy) {
9317	CHECK_IF_ENABLED();
9318
9319	auto qParams = glow::quantization::chooseQuantizationParams({-`100`, `100`});
9320	auto *inp = mod_.createPlaceholder(ElemKind::Int8QTy, {`3`}, qParams.scale,
9321	qParams.offset, "input", false);
9322	bindings_.allocate(inp)->getHandle<int8_t>() = {-`100`, `0`, `100`};
9323
9324	auto *node = F_->createSign("Sign", inp);
9325	auto *save = F_->createSave("save", node);
9326	auto *outT = bindings_.allocate(save->getPlaceholder());
9327	EE_.compile(CompilationMode::Infer);
9328	EE_.run(bindings_);
9329	auto outH = outT->getHandle<int8_t>();
9330	EXPECT_EQ(outH.size(), `3`);
9331	EXPECT_EQ(outH.raw(`0`), -`1`);
9332	EXPECT_EQ(outH.raw(`1`), `0`);
9333	EXPECT_EQ(outH.raw(`2`), `1`);
9334	}
9335
9336	TEST_P(OperatorTest, Ceil_FloatTy) {
9337	CHECK_IF_ENABLED();
9338	auto inp = mod_.createPlaceholder(ElemKind::FloatTy, {`3`}, "inp", false*);
9339	bindings_.allocate(inp)->getHandle<float>() = {-`0.2`, `1.0`, `1.99`};
9340	auto *node = F_->createCeil("ceil", inp);
9341	auto *save = F_->createSave("save", node);
9342	auto *outT = bindings_.allocate(save->getPlaceholder());
9343	EE_.compile(CompilationMode::Infer);
9344	EE_.run(bindings_);
9345	auto outH = outT->getHandle<float>();
9346	EXPECT_EQ(outH.size(), `3`);
9347	EXPECT_FLOAT_EQ(outH.raw(`0`), `0.0`);
9348	EXPECT_FLOAT_EQ(outH.raw(`1`), `1.0`);
9349	EXPECT_FLOAT_EQ(outH.raw(`2`), `2.0`);
9350	}
9351
9352	TEST_P(OperatorTest, Ceil_Int8QTy) {
9353	CHECK_IF_ENABLED();
9354	auto *inp =
9355	mod_.createPlaceholder(ElemKind::Int8QTy, {`5`}, `0.5`, `0`, "inp", false);
9356	bindings_.allocate(inp)->getHandle<int8_t>() = {-`2`, -`1`, `0`, `1`, `2`};
9357	auto *node = F_->createCeil("ceil", inp);
9358	auto *save = F_->createSave("save", node);
9359	auto *outT = bindings_.allocate(save->getPlaceholder());
9360	EE_.compile(CompilationMode::Infer);
9361	EE_.run(bindings_);
9362	auto outH = outT->getHandle<int8_t>();
9363	EXPECT_EQ(outH.size(), `5`);
9364	EXPECT_EQ(outH.raw(`0`), -`2`);
9365	EXPECT_EQ(outH.raw(`1`), `0`);
9366	EXPECT_EQ(outH.raw(`2`), `0`);
9367	EXPECT_EQ(outH.raw(`3`), `2`);
9368	EXPECT_EQ(outH.raw(`4`), `2`);
9369	}
9370
9371	TEST_P(OperatorTest, Round_FloatTy) {
9372	CHECK_IF_ENABLED();
9373	auto inp = mod_.createPlaceholder(ElemKind::FloatTy, {`5`}, "inp", false*);
9374	bindings_.allocate(inp)->getHandle<float>() = {`0.9`, `2.5`, `2.3`, `1.5`, -`4.5`};
9375	auto *node = F_->createRound("round", inp);
9376	auto *save = F_->createSave("save", node);
9377	auto *outT = bindings_.allocate(save->getPlaceholder());
9378	EE_.compile(CompilationMode::Infer);
9379	EE_.run(bindings_);
9380	auto outH = outT->getHandle<float>();
9381	EXPECT_EQ(outH.size(), `5`);
9382	// Rounding mode required by ONNX, Numpy, TensorFlow is round to even which
9383	// rounds to nearest even integer those values with fractional part 0.5.
9384	EXPECT_FLOAT_EQ(outH.raw(`0`), `1.0`);
9385	EXPECT_FLOAT_EQ(outH.raw(`1`), `2.0`);
9386	EXPECT_FLOAT_EQ(outH.raw(`2`), `2.0`);
9387	EXPECT_FLOAT_EQ(outH.raw(`3`), `2.0`);
9388	EXPECT_FLOAT_EQ(outH.raw(`4`), -`4.0`);
9389	}
9390
9391	TEST_P(OperatorTest, Round_Int8QTy) {
9392	CHECK_IF_ENABLED();
9393	auto *inp =
9394	mod_.createPlaceholder(ElemKind::Int8QTy, {`5`}, `0.1`, `0`, "inp", false);
9395	bindings_.allocate(inp)->getHandle<int8_t>() = {-`8`, -`2`, `0`, `2`, `8`};
9396	auto *node = F_->createRound("round", inp);
9397	auto *save = F_->createSave("save", node);
9398	auto *outT = bindings_.allocate(save->getPlaceholder());
9399	EE_.compile(CompilationMode::Infer);
9400	EE_.run(bindings_);
9401	auto outH = outT->getHandle<int8_t>();
9402	EXPECT_EQ(outH.size(), `5`);
9403	EXPECT_EQ(outH.raw(`0`), -`10`);
9404	EXPECT_EQ(outH.raw(`1`), `0`);
9405	EXPECT_EQ(outH.raw(`2`), `0`);
9406	EXPECT_EQ(outH.raw(`3`), `0`);
9407	EXPECT_EQ(outH.raw(`4`), `10`);
9408	}
9409
9410	/// Helper to test Truncate using floating point \p elemKind.
9411	template <typename ElemType>
9412	static void testTruncateFloat(glow::PlaceholderBindings &bindings,
9413	glow::Module &mod, glow::Function *F,
9414	glow::ExecutionEngine &EE, ElemKind elemKind) {
9415	auto inp = mod.createPlaceholder(elemKind, {`3`}, "inp", false*);
9416	bindings.allocate(inp)->getHandle<ElemType>() = {-`0.2`, `1.0`, `1.99`};
9417	auto *node = F->createTruncate("truncate", inp);
9418	auto *save = F->createSave("save", node);
9419	auto *outT = bindings.allocate(save->getPlaceholder());
9420	EE.compile(CompilationMode::Infer);
9421	EE.run(bindings);
9422	auto outH = outT->getHandle<ElemType>();
9423	EXPECT_EQ(outH.size(), `3`);
9424	EXPECT_FLOAT_EQ(outH.raw(`0`), `0`);
9425	EXPECT_FLOAT_EQ(outH.raw(`1`), `1.0`);
9426	EXPECT_FLOAT_EQ(outH.raw(`2`), `1.0`);
9427	}
9428
9429	TEST_P(OperatorTest, Truncate_FloatTy) {
9430	CHECK_IF_ENABLED();
9431	testTruncateFloat<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy);
9432	}
9433
9434	TEST_P(OperatorTest, Truncate_Float16Ty) {
9435	CHECK_IF_ENABLED();
9436	testTruncateFloat<float16_t>(bindings_, mod_, F_, EE_, ElemKind::Float16Ty);
9437	}
9438
9439	TEST_P(OperatorTest, Truncate_Int8QTy) {
9440	CHECK_IF_ENABLED();
9441	auto *inp =
9442	mod_.createPlaceholder(ElemKind::Int8QTy, {`5`}, `0.5`, `0`, "inp", false);
9443	bindings_.allocate(inp)->getHandle<int8_t>() = {-`3`, -`2`, `0`, `1`, `2`};
9444	auto *node = F_->createTruncate("truncate", inp);
9445	auto *save = F_->createSave("save", node);
9446	auto *outT = bindings_.allocate(save->getPlaceholder());
9447	EE_.compile(CompilationMode::Infer);
9448	EE_.run(bindings_);
9449	auto outH = outT->getHandle<int8_t>();
9450	EXPECT_EQ(outH.size(), `5`);
9451	EXPECT_EQ(outH.raw(`0`), -`2`);
9452	EXPECT_EQ(outH.raw(`1`), -`2`);
9453	EXPECT_EQ(outH.raw(`2`), `0`);
9454	EXPECT_EQ(outH.raw(`3`), `0`);
9455	EXPECT_EQ(outH.raw(`4`), `2`);
9456	}
9457
9458	TEST_P(OperatorTest, Sqrt_FloatTy) {
9459	CHECK_IF_ENABLED();
9460	auto inp = mod_.createPlaceholder(ElemKind::FloatTy, {`4`}, "inp", false*);
9461	bindings_.allocate(inp)->getHandle<float>() = {`0.0`, `1.0`, `4.0`, `9.0`};
9462	auto *node = F_->createSqrt("sqrt", inp);
9463	auto *save = F_->createSave("save", node);
9464	auto *outT = bindings_.allocate(save->getPlaceholder());
9465	EE_.compile(CompilationMode::Infer);
9466	EE_.run(bindings_);
9467	auto outH = outT->getHandle<float>();
9468	EXPECT_EQ(outH.size(), `4`);
9469	EXPECT_FLOAT_EQ(outH.raw(`0`), `0.0`);
9470	EXPECT_FLOAT_EQ(outH.raw(`1`), `1.0`);
9471	EXPECT_FLOAT_EQ(outH.raw(`2`), `2.0`);
9472	EXPECT_FLOAT_EQ(outH.raw(`3`), `3.0`);
9473	}
9474
9475	TEST_P(OperatorTest, Sqrt_Int8QTy) {
9476	CHECK_IF_ENABLED();
9477	auto *inp =
9478	mod_.createPlaceholder(ElemKind::Int8QTy, {`4`}, `1.0`, `0`, "inp", false);
9479	bindings_.allocate(inp)->getHandle<int8_t>() = {`0`, `1`, `4`, `9`};
9480	auto *node = F_->createSqrt("sqrt", inp);
9481	auto *save = F_->createSave("save", node);
9482	auto *outT = bindings_.allocate(save->getPlaceholder());
9483	EE_.compile(CompilationMode::Infer);
9484	EE_.run(bindings_);
9485	auto outH = outT->getHandle<int8_t>();
9486	EXPECT_EQ(outH.size(), `4`);
9487	EXPECT_EQ(outH.raw(`0`), `0`);
9488	EXPECT_EQ(outH.raw(`1`), `1`);
9489	EXPECT_EQ(outH.raw(`2`), `2`);
9490	EXPECT_EQ(outH.raw(`3`), `3`);
9491	}
9492
9493	TEST_P(OperatorTest, Rsqrt_FloatTy) {
9494	CHECK_IF_ENABLED();
9495	auto inp = mod_.createPlaceholder(ElemKind::FloatTy, {`4`}, "inp", false*);
9496	bindings_.allocate(inp)->getHandle<float>() = {`1.0`, `4.0`, `16.0`, `64.0`};
9497	auto *node = F_->createRsqrt("rsqrt", inp);
9498	auto *save = F_->createSave("save", node);
9499	auto *outT = bindings_.allocate(save->getPlaceholder());
9500	EE_.compile(CompilationMode::Infer);
9501	EE_.run(bindings_);
9502	auto outH = outT->getHandle<float>();
9503	EXPECT_EQ(outH.size(), `4`);
9504	EXPECT_FLOAT_EQ(outH.raw(`0`), `1.0`);
9505	EXPECT_FLOAT_EQ(outH.raw(`1`), `0.5`);
9506	EXPECT_FLOAT_EQ(outH.raw(`2`), `0.25`);
9507	EXPECT_FLOAT_EQ(outH.raw(`3`), `0.125`);
9508	}
9509
9510	TEST_P(OperatorTest, Rsqrt_Int8QTy) {
9511	CHECK_IF_ENABLED();
9512	auto *inp =
9513	mod_.createPlaceholder(ElemKind::Int8QTy, {`4`}, `1.0`, `0`, "inp", false);
9514	bindings_.allocate(inp)->getHandle<int8_t>() = {`1`, `4`, `16`, `64`};
9515	auto outTy = mod_.uniqueType(ElemKind::Int8QTy, {`4`}, `1.0` / `8.0`, `0`);
9516	auto *node = F_->createRsqrt("rsqrt", outTy, inp);
9517	auto *save = F_->createSave("save", node);
9518	auto *outT = bindings_.allocate(save->getPlaceholder());
9519	EE_.compile(CompilationMode::Infer);
9520	EE_.run(bindings_);
9521	auto outH = outT->getHandle<int8_t>();
9522	EXPECT_EQ(outH.size(), `4`);
9523	EXPECT_EQ(outH.raw(`0`), `8`);
9524	EXPECT_EQ(outH.raw(`1`), `4`);
9525	EXPECT_EQ(outH.raw(`2`), `2`);
9526	EXPECT_EQ(outH.raw(`3`), `1`);
9527	}
9528
9529	TEST_P(OperatorTest, Reciprocal_FloatTy) {
9530	CHECK_IF_ENABLED();
9531	auto inp = mod_.createPlaceholder(ElemKind::FloatTy, {`4`}, "inp", false*);
9532	bindings_.allocate(inp)->getHandle<float>() = {`1.0`, `2.0`, `4.0`, `8.0`};
9533	auto *node = F_->createReciprocal("reciprocal", inp);
9534	auto *save = F_->createSave("save", node);
9535	auto *outT = bindings_.allocate(save->getPlaceholder());
9536	EE_.compile(CompilationMode::Infer);
9537	EE_.run(bindings_);
9538	auto outH = outT->getHandle<float>();
9539	EXPECT_EQ(outH.size(), `4`);
9540	EXPECT_FLOAT_EQ(outH.raw(`0`), `1.0`);
9541	EXPECT_FLOAT_EQ(outH.raw(`1`), `0.5`);
9542	EXPECT_FLOAT_EQ(outH.raw(`2`), `0.25`);
9543	EXPECT_FLOAT_EQ(outH.raw(`3`), `0.125`);
9544	}
9545
9546	TEST_P(OperatorTest, Reciprocal_Int8QTy) {
9547	CHECK_IF_ENABLED();
9548	auto *inp =
9549	mod_.createPlaceholder(ElemKind::Int8QTy, {`4`}, `1.0`, `0`, "inp", false);
9550	bindings_.allocate(inp)->getHandle<int8_t>() = {`1`, `2`, `4`, `8`};
9551	auto outTy = mod_.uniqueType(ElemKind::Int8QTy, {`4`}, `1.0` / `8.0`, `0`);
9552	auto *node = F_->createReciprocal("reciprocal", outTy, inp);
9553	auto *save = F_->createSave("save", node);
9554	auto *outT = bindings_.allocate(save->getPlaceholder());
9555	EE_.compile(CompilationMode::Infer);
9556	EE_.run(bindings_);
9557	auto outH = outT->getHandle<int8_t>();
9558	EXPECT_EQ(outH.size(), `4`);
9559	EXPECT_EQ(outH.raw(`0`), `8`);
9560	EXPECT_EQ(outH.raw(`1`), `4`);
9561	EXPECT_EQ(outH.raw(`2`), `2`);
9562	EXPECT_EQ(outH.raw(`3`), `1`);
9563	}
9564
9565	TEST_P(OperatorTest, Sin_FloatTy) {
9566	CHECK_IF_ENABLED();
9567	auto inp = mod_.createPlaceholder(ElemKind::FloatTy, {`4`}, "inp", false*);
9568	bindings_.allocate(inp)->getHandle<float>() = {-`1.0`, `0.0`, `1.0`, `2.0`};
9569	auto *node = F_->createSin("sin", inp);
9570	auto *save = F_->createSave("save", node);
9571	auto *outT = bindings_.allocate(save->getPlaceholder());
9572	EE_.compile(CompilationMode::Infer);
9573	EE_.run(bindings_);
9574	auto outH = outT->getHandle<float>();
9575	EXPECT_EQ(outH.size(), `4`);
9576	EXPECT_FLOAT_EQ(outH.raw(`0`), std::sin(-`1.0`));
9577	EXPECT_FLOAT_EQ(outH.raw(`1`), std::sin(`0.0`));
9578	EXPECT_FLOAT_EQ(outH.raw(`2`), std::sin(`1.0`));
9579	EXPECT_FLOAT_EQ(outH.raw(`3`), std::sin(`2.0`));
9580	}
9581
9582	TEST_P(OperatorTest, Sin_Int8QTy) {
9583	CHECK_IF_ENABLED();
9584	auto *inp =
9585	mod_.createPlaceholder(ElemKind::Int8QTy, {`4`}, `1.0`, `0`, "inp", false);
9586	bindings_.allocate(inp)->getHandle<int8_t>() = {-`1`, `0`, `1`, `2`};
9587	auto outTy = mod_.uniqueType(ElemKind::Int8QTy, {`4`}, `1.0` / `127.0`, `0`);
9588	auto *node = F_->createSin("sin", outTy, inp);
9589	auto *save = F_->createSave("save", node);
9590	auto *outT = bindings_.allocate(save->getPlaceholder());
9591	EE_.compile(CompilationMode::Infer);
9592	EE_.run(bindings_);
9593	auto outH = outT->getHandle<int8_t>();
9594	EXPECT_EQ(outH.size(), `4`);
9595	EXPECT_EQ(outH.raw(`0`), static_cast<int8_t>(std::round(std::sin(-`1`) * `127`)));
9596	EXPECT_EQ(outH.raw(`1`), static_cast<int8_t>(std::round(std::sin(`0`) * `127`)));
9597	EXPECT_EQ(outH.raw(`2`), static_cast<int8_t>(std::round(std::sin(`1`) * `127`)));
9598	EXPECT_EQ(outH.raw(`3`), static_cast<int8_t>(std::round(std::sin(`2`) * `127`)));
9599	}
9600
9601	TEST_P(OperatorTest, Cos_FloatTy) {
9602	CHECK_IF_ENABLED();
9603	auto inp = mod_.createPlaceholder(ElemKind::FloatTy, {`4`}, "inp", false*);
9604	bindings_.allocate(inp)->getHandle<float>() = {-`1.0`, `0.0`, `1.0`, `2.0`};
9605	auto *node = F_->createCos("cos", inp);
9606	auto *save = F_->createSave("save", node);
9607	auto *outT = bindings_.allocate(save->getPlaceholder());
9608	EE_.compile(CompilationMode::Infer);
9609	EE_.run(bindings_);
9610	auto outH = outT->getHandle<float>();
9611	EXPECT_EQ(outH.size(), `4`);
9612	EXPECT_FLOAT_EQ(outH.raw(`0`), std::cos(-`1.0`));
9613	EXPECT_FLOAT_EQ(outH.raw(`1`), std::cos(`0.0`));
9614	EXPECT_FLOAT_EQ(outH.raw(`2`), std::cos(`1.0`));
9615	EXPECT_FLOAT_EQ(outH.raw(`3`), std::cos(`2.0`));
9616	}
9617
9618	TEST_P(OperatorTest, Cos_Int8QTy) {
9619	CHECK_IF_ENABLED();
9620	auto *inp =
9621	mod_.createPlaceholder(ElemKind::Int8QTy, {`4`}, `1.0`, `0`, "inp", false);
9622	bindings_.allocate(inp)->getHandle<int8_t>() = {-`1`, `0`, `1`, `2`};
9623	auto outTy = mod_.uniqueType(ElemKind::Int8QTy, {`4`}, `1.0` / `127.0`, `0`);
9624	auto *node = F_->createCos("cos", outTy, inp);
9625	auto *save = F_->createSave("save", node);
9626	auto *outT = bindings_.allocate(save->getPlaceholder());
9627	EE_.compile(CompilationMode::Infer);
9628	EE_.run(bindings_);
9629	auto outH = outT->getHandle<int8_t>();
9630	EXPECT_EQ(outH.size(), `4`);
9631	EXPECT_EQ(outH.raw(`0`), static_cast<int8_t>(std::round(std::cos(-`1`) * `127`)));
9632	EXPECT_EQ(outH.raw(`1`), static_cast<int8_t>(std::round(std::cos(`0`) * `127`)));
9633	EXPECT_EQ(outH.raw(`2`), static_cast<int8_t>(std::round(std::cos(`1`) * `127`)));
9634	EXPECT_EQ(outH.raw(`3`), static_cast<int8_t>(std::round(std::cos(`2`) * `127`)));
9635	}
9636
9637	TEST_P(OperatorTest, Erf_FloatTy) {
9638	CHECK_IF_ENABLED();
9639	auto inp = mod_.createPlaceholder(ElemKind::FloatTy, {`4`}, "inp", false*);
9640	bindings_.allocate(inp)->getHandle<float>() = {-`1.0`, `0.0`, `1.0`, `2.0`};
9641	auto *node = F_->createErf("erf", inp);
9642	auto *save = F_->createSave("save", node);
9643	auto *outT = bindings_.allocate(save->getPlaceholder());
9644	EE_.compile(CompilationMode::Infer);
9645	EE_.run(bindings_);
9646	auto outH = outT->getHandle<float>();
9647	EXPECT_EQ(outH.size(), `4`);
9648	EXPECT_FLOAT_EQ(outH.raw(`0`), std::erf(-`1.0`));
9649	EXPECT_FLOAT_EQ(outH.raw(`1`), std::erf(`0.0`));
9650	EXPECT_FLOAT_EQ(outH.raw(`2`), std::erf(`1.0`));
9651	EXPECT_FLOAT_EQ(outH.raw(`3`), std::erf(`2.0`));
9652	}
9653
9654	TEST_P(OperatorTest, Erf_Int8QTy) {
9655	CHECK_IF_ENABLED();
9656	auto *inp =
9657	mod_.createPlaceholder(ElemKind::Int8QTy, {`4`}, `1.0`, `0`, "inp", false);
9658	bindings_.allocate(inp)->getHandle<int8_t>() = {-`1`, `0`, `1`, `2`};
9659	auto outTy = mod_.uniqueType(ElemKind::Int8QTy, {`4`}, `1.0` / `127.0`, `0`);
9660	auto *node = F_->createErf("erf", outTy, inp);
9661	auto *save = F_->createSave("save", node);
9662	auto *outT = bindings_.allocate(save->getPlaceholder());
9663	EE_.compile(CompilationMode::Infer);
9664	EE_.run(bindings_);
9665	auto outH = outT->getHandle<int8_t>();
9666	EXPECT_EQ(outH.size(), `4`);
9667	EXPECT_EQ(outH.raw(`0`), static_cast<int8_t>(std::round(std::erf(-`1`) * `127`)));
9668	EXPECT_EQ(outH.raw(`1`), static_cast<int8_t>(std::round(std::erf(`0`) * `127`)));
9669	EXPECT_EQ(outH.raw(`2`), static_cast<int8_t>(std::round(std::erf(`1`) * `127`)));
9670	EXPECT_EQ(outH.raw(`3`), static_cast<int8_t>(std::round(std::erf(`2`) * `127`)));
9671	}
9672
9673	TEST_P(OperatorTest, HardSwish_FloatTy) {
9674	CHECK_IF_ENABLED();
9675
9676	auto hardSwish = [](float x) {
9677	return x * std::min(std::max(x + (float)`3`, (float)`0.`), (float)`6.`) *
9678	(float)`0.166666667`;
9679	};
9680
9681	auto inp = mod_.createPlaceholder(ElemKind::FloatTy, {`4`}, "inp", false*);
9682	bindings_.allocate(inp)->getHandle<float>() = {-`1.0`, `0.0`, `1.0`, `2.0`};
9683	auto *node = F_->createHardSwish("hardSwish", inp);
9684	auto *save = F_->createSave("save", node);
9685	auto *outT = bindings_.allocate(save->getPlaceholder());
9686	EE_.compile(CompilationMode::Infer);
9687	EE_.run(bindings_);
9688	auto outH = outT->getHandle<float>();
9689	EXPECT_EQ(outH.size(), `4`);
9690	EXPECT_FLOAT_EQ(outH.raw(`0`), hardSwish(-`1.0`));
9691	EXPECT_FLOAT_EQ(outH.raw(`1`), hardSwish(`0.0`));
9692	EXPECT_FLOAT_EQ(outH.raw(`2`), hardSwish(`1.0`));
9693	EXPECT_FLOAT_EQ(outH.raw(`3`), hardSwish(`2.0`));
9694	}
9695
9696	/// Helper to test CmpNEQ using \p elemKind.
9697	template <typename ElemType>
9698	static void testCmpNEQ(glow::PlaceholderBindings &bindings, glow::Module &mod,
9699	glow::Function *F, glow::ExecutionEngine &EE,
9700	ElemKind elemKind) {
9701	auto *LHS =
9702	createPlaceholderConditionallyQuantized(mod, elemKind, {`2`}, "LHS", false);
9703	auto *RHS =
9704	createPlaceholderConditionallyQuantized(mod, elemKind, {`2`}, "RHS", false);
9705	bindings.allocate(LHS)->getHandle<ElemType>() = {`1`, `1`};
9706	bindings.allocate(RHS)->getHandle<ElemType>() = {`1`, `2`};
9707	auto *node = F->createCmpNEQ("cmpNEQ", LHS, RHS);
9708	auto *save = F->createSave("save", node);
9709	auto *outT = bindings.allocate(save->getPlaceholder());
9710	EE.compile(CompilationMode::Infer);
9711	EE.run(bindings);
9712	auto outH = outT->getHandle<bool>();
9713	EXPECT_EQ(outH.size(), `2`);
9714	EXPECT_EQ(outH.raw(`0`), false);
9715	EXPECT_EQ(outH.raw(`1`), true);
9716	}
9717
9718	TEST_P(OperatorTest, CmpNEQ_FloatTy) {
9719	CHECK_IF_ENABLED();
9720	testCmpNEQ<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy);
9721	}
9722
9723	TEST_P(OperatorTest, CmpNEQ_Int8QTy) {
9724	CHECK_IF_ENABLED();
9725	testCmpNEQ<int8_t>(bindings_, mod_, F_, EE_, ElemKind::Int8QTy);
9726	}
9727
9728	TEST_P(OperatorTest, CmpNEQ_Int16QTy) {
9729	CHECK_IF_ENABLED();
9730	testCmpNEQ<int16_t>(bindings_, mod_, F_, EE_, ElemKind::Int16QTy);
9731	}
9732
9733	TEST_P(OperatorTest, CmpNEQ_Int32ITy) {
9734	CHECK_IF_ENABLED();
9735	testCmpNEQ<int32_t>(bindings_, mod_, F_, EE_, ElemKind::Int32ITy);
9736	}
9737
9738	TEST_P(OperatorTest, CmpNEQ_Int64ITy) {
9739	CHECK_IF_ENABLED();
9740	testCmpNEQ<int64_t>(bindings_, mod_, F_, EE_, ElemKind::Int64ITy);
9741	}
9742
9743	/// Helper to test CmpGT using \p elemKind.
9744	template <typename ElemType>
9745	static void testCmpGT(glow::PlaceholderBindings &bindings, glow::Module &mod,
9746	glow::Function *F, glow::ExecutionEngine &EE,
9747	ElemKind elemKind) {
9748	auto *LHS =
9749	createPlaceholderConditionallyQuantized(mod, elemKind, {`3`}, "LHS", false);
9750	auto *RHS =
9751	createPlaceholderConditionallyQuantized(mod, elemKind, {`3`}, "RHS", false);
9752	bindings.allocate(LHS)->getHandle<ElemType>() = {`1`, `1`, `2`};
9753	bindings.allocate(RHS)->getHandle<ElemType>() = {`1`, `2`, `1`};
9754	auto *node = F->createCmpGT("cmpGT", LHS, RHS);
9755	auto *save = F->createSave("save", node);
9756	auto *outT = bindings.allocate(save->getPlaceholder());
9757	EE.compile(CompilationMode::Infer);
9758	EE.run(bindings);
9759	auto outH = outT->getHandle<bool>();
9760	EXPECT_EQ(outH.size(), `3`);
9761	EXPECT_EQ(outH.raw(`0`), false);
9762	EXPECT_EQ(outH.raw(`1`), false);
9763	EXPECT_EQ(outH.raw(`2`), true);
9764	}
9765
9766	TEST_P(OperatorTest, CmpGT_FloatTy) {
9767	CHECK_IF_ENABLED();
9768	testCmpGT<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy);
9769	}
9770
9771	TEST_P(OperatorTest, CmpGT_Int8QTy) {
9772	CHECK_IF_ENABLED();
9773	testCmpGT<int8_t>(bindings_, mod_, F_, EE_, ElemKind::Int8QTy);
9774	}
9775
9776	TEST_P(OperatorTest, CmpGT_Int32ITy) {
9777	CHECK_IF_ENABLED();
9778	testCmpGT<int32_t>(bindings_, mod_, F_, EE_, ElemKind::Int32ITy);
9779	}
9780
9781	TEST_P(OperatorTest, CmpGT_Int64ITy) {
9782	CHECK_IF_ENABLED();
9783	testCmpGT<int64_t>(bindings_, mod_, F_, EE_, ElemKind::Int64ITy);
9784	}
9785
9786	/// Helper to test CmpGTE using \p elemKind.
9787	template <typename ElemType>
9788	static void testCmpGTE(glow::PlaceholderBindings &bindings, glow::Module &mod,
9789	glow::Function *F, glow::ExecutionEngine &EE,
9790	ElemKind elemKind) {
9791	auto *LHS =
9792	createPlaceholderConditionallyQuantized(mod, elemKind, {`3`}, "LHS", false);
9793	auto *RHS =
9794	createPlaceholderConditionallyQuantized(mod, elemKind, {`3`}, "RHS", false);
9795	bindings.allocate(LHS)->getHandle<ElemType>() = {`1`, `1`, `2`};
9796	bindings.allocate(RHS)->getHandle<ElemType>() = {`1`, `2`, `1`};
9797	auto *node = F->createCmpGTE("cmpGTE", LHS, RHS);
9798	auto *save = F->createSave("save", node);
9799	auto *outT = bindings.allocate(save->getPlaceholder());
9800	EE.compile(CompilationMode::Infer);
9801	EE.run(bindings);
9802	auto outH = outT->getHandle<bool>();
9803	EXPECT_EQ(outH.size(), `3`);
9804	EXPECT_EQ(outH.raw(`0`), true);
9805	EXPECT_EQ(outH.raw(`1`), false);
9806	EXPECT_EQ(outH.raw(`2`), true);
9807	}
9808
9809	TEST_P(OperatorTest, CmpGTE_FloatTy) {
9810	CHECK_IF_ENABLED();
9811	testCmpGTE<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy);
9812	}
9813
9814	TEST_P(OperatorTest, CmpGTE_Int8QTy) {
9815	CHECK_IF_ENABLED();
9816	testCmpGTE<int8_t>(bindings_, mod_, F_, EE_, ElemKind::Int8QTy);
9817	}
9818
9819	TEST_P(OperatorTest, CmpGTE_Int32ITy) {
9820	CHECK_IF_ENABLED();
9821	testCmpGTE<int32_t>(bindings_, mod_, F_, EE_, ElemKind::Int32ITy);
9822	}
9823
9824	TEST_P(OperatorTest, CmpGTE_Int64ITy) {
9825	CHECK_IF_ENABLED();
9826	testCmpGTE<int64_t>(bindings_, mod_, F_, EE_, ElemKind::Int64ITy);
9827	}
9828
9829	/// Helper to test CmpLT using \p elemKind.
9830	template <typename ElemType>
9831	static void testCmpLT(glow::PlaceholderBindings &bindings, glow::Module &mod,
9832	glow::Function *F, glow::ExecutionEngine &EE,
9833	ElemKind elemKind) {
9834	auto *LHS =
9835	createPlaceholderConditionallyQuantized(mod, elemKind, {`3`}, "LHS", false);
9836	auto *RHS =
9837	createPlaceholderConditionallyQuantized(mod, elemKind, {`3`}, "RHS", false);
9838	bindings.allocate(LHS)->getHandle<ElemType>() = {`1`, `1`, `2`};
9839	bindings.allocate(RHS)->getHandle<ElemType>() = {`1`, `2`, `1`};
9840	auto *node = F->createCmpLT("cmpLT", LHS, RHS);
9841	auto *save = F->createSave("save", node);
9842	auto *outT = bindings.allocate(save->getPlaceholder());
9843	EE.compile(CompilationMode::Infer);
9844	EE.run(bindings);
9845	auto outH = outT->getHandle<bool>();
9846	EXPECT_EQ(outH.size(), `3`);
9847	EXPECT_EQ(outH.raw(`0`), false);
9848	EXPECT_EQ(outH.raw(`1`), true);
9849	EXPECT_EQ(outH.raw(`2`), false);
9850	}
9851
9852	TEST_P(OperatorTest, CmpLT_FloatTy) {
9853	CHECK_IF_ENABLED();
9854	testCmpLT<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy);
9855	}
9856
9857	TEST_P(OperatorTest, CmpLT_Int8QTy) {
9858	CHECK_IF_ENABLED();
9859	testCmpLT<int8_t>(bindings_, mod_, F_, EE_, ElemKind::Int8QTy);
9860	}
9861
9862	TEST_P(OperatorTest, CmpLT_Int32ITy) {
9863	CHECK_IF_ENABLED();
9864	testCmpLT<int32_t>(bindings_, mod_, F_, EE_, ElemKind::Int32ITy);
9865	}
9866
9867	TEST_P(OperatorTest, CmpLT_Int64ITy) {
9868	CHECK_IF_ENABLED();
9869	testCmpLT<int64_t>(bindings_, mod_, F_, EE_, ElemKind::Int64ITy);
9870	}
9871
9872	/// Helper to test CmpLTE using \p elemKind.
9873	template <typename ElemType>
9874	static void testCmpLTE(glow::PlaceholderBindings &bindings, glow::Module &mod,
9875	glow::Function *F, glow::ExecutionEngine &EE,
9876	ElemKind elemKind) {
9877	auto *LHS =
9878	createPlaceholderConditionallyQuantized(mod, elemKind, {`3`}, "LHS", false);
9879	auto *RHS =
9880	createPlaceholderConditionallyQuantized(mod, elemKind, {`3`}, "RHS", false);
9881	bindings.allocate(LHS)->getHandle<ElemType>() = {`1`, `1`, `2`};
9882	bindings.allocate(RHS)->getHandle<ElemType>() = {`1`, `2`, `1`};
9883	auto *node = F->createCmpLTE("cmpLTE", LHS, RHS);
9884	auto *save = F->createSave("save", node);
9885	auto *outT = bindings.allocate(save->getPlaceholder());
9886	EE.compile(CompilationMode::Infer);
9887	EE.run(bindings);
9888	auto outH = outT->getHandle<bool>();
9889	EXPECT_EQ(outH.size(), `3`);
9890	EXPECT_EQ(outH.raw(`0`), true);
9891	EXPECT_EQ(outH.raw(`1`), true);
9892	EXPECT_EQ(outH.raw(`2`), false);
9893	}
9894
9895	TEST_P(OperatorTest, CmpLTE_FloatTy) {
9896	CHECK_IF_ENABLED();
9897	testCmpLTE<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy);
9898	}
9899
9900	TEST_P(OperatorTest, CmpLTE_Int8QTy) {
9901	CHECK_IF_ENABLED();
9902	testCmpLTE<int8_t>(bindings_, mod_, F_, EE_, ElemKind::Int8QTy);
9903	}
9904
9905	TEST_P(OperatorTest, CmpLTE_Int32ITy) {
9906	CHECK_IF_ENABLED();
9907	testCmpLTE<int32_t>(bindings_, mod_, F_, EE_, ElemKind::Int32ITy);
9908	}
9909
9910	TEST_P(OperatorTest, CmpLTE_Int64ITy) {
9911	CHECK_IF_ENABLED();
9912	testCmpLTE<int64_t>(bindings_, mod_, F_, EE_, ElemKind::Int64ITy);
9913	}
9914
9915	TEST_P(OperatorTest, simpleCmpSelectPredication) {
9916	CHECK_IF_ENABLED();
9917
9918	// A simple test that checks predication of some values using the
9919	// compare-select pair of instructions. Keep doubling some values
9920	// until some condition is met.
9921
9922	auto *inputs =
9923	mod_.createPlaceholder(ElemKind::FloatTy, {`10`}, "inputs", false);
9924	auto *counters =
9925	mod_.createPlaceholder(ElemKind::FloatTy, {`10`}, "counters", false);
9926
9927	bindings_.allocate(counters)->getHandle() = {`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`};
9928	bindings_.allocate(inputs)->getHandle().clear(`1`);
9929
9930	Node *cnt = counters;
9931	NodeValue data = inputs;
9932	Node *const1 = F_->createSplat("const1", counters->getType(), `1.0`);
9933	Node *const0 = F_->createSplat("const0", counters->getType(), `0.0`);
9934
9935	for (int i = `0`; i < `10`; i++) {
9936	cnt = F_->createSub("sub1", cnt, const1);
9937	Node *pred = F_->createCmpLTE("cmp", const0, cnt);
9938
9939	Node *const2 = F_->createSplat("const2", data.getType(), `2.0`);
9940	Node *newData = F_->createMul("mul2x", data, const2);
9941
9942	data = F_->createSelect("select", pred, newData, data);
9943	}
9944
9945	auto *SN = F_->createSave("ret", data);
9946	bindings_.allocate(SN->getPlaceholder());
9947
9948	EE_.compile(CompilationMode::Infer);
9949	EE_.run(bindings_);
9950
9951	auto H = bindings_.get(SN->getPlaceholder())->getHandle();
9952	ASSERT_NEAR(H.at(`0`), `1`, `0.001`);
9953	ASSERT_NEAR(H.at(`1`), `2`, `0.001`);
9954	ASSERT_NEAR(H.at(`2`), `4`, `0.001`);
9955	ASSERT_NEAR(H.at(`3`), `8`, `0.001`);
9956	ASSERT_NEAR(H.at(`4`), `16`, `0.001`);
9957	ASSERT_NEAR(H.at(`5`), `32`, `0.001`);
9958	ASSERT_NEAR(H.at(`6`), `64`, `0.001`);
9959	ASSERT_NEAR(H.at(`7`), `128`, `0.001`);
9960	ASSERT_NEAR(H.at(`8`), `256`, `0.001`);
9961	ASSERT_NEAR(H.at(`9`), `512`, `0.001`);
9962	}
9963
9964	TEST_P(OperatorTest, simplePredication) {
9965	CHECK_IF_ENABLED();
9966
9967	auto *inputs =
9968	mod_.createPlaceholder(ElemKind::FloatTy, {`10`, `10`, `10`}, "inputs", false);
9969	auto *counters =
9970	mod_.createPlaceholder(ElemKind::FloatTy, {`10`}, "counters", false);
9971
9972	bindings_.allocate(counters)->getHandle() = {`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`};
9973	bindings_.allocate(inputs)->getHandle().randomize(-`10`, `10`, mod_.getPRNG());
9974
9975	Node *C5 = F_->createSplat("C5", counters->getType(), `5.0`);
9976	Node *pred = F_->createCmpLTE("cmp", C5, counters);
9977
9978	auto *FC0 = F_->createFullyConnected(bindings_, "FC0", inputs, `128`);
9979	auto *RL0 = F_->createRELU("RL0", FC0);
9980	auto *FC1 = F_->createFullyConnected(bindings_, "FC1", RL0, `64`);
9981	auto *RL1 = F_->createRELU("RL1", FC1);
9982	auto *FC2 = F_->createFullyConnected(bindings_, "FC2", RL1, `32`);
9983	auto *RL2 = F_->createRELU("RL2", FC2);
9984
9985	auto *save = F_->createSave("ret", RL2);
9986	bindings_.allocate(save->getPlaceholder());
9987
9988	FC0->setPredicate(pred);
9989	FC1->setPredicate(pred);
9990	FC2->setPredicate(pred);
9991
9992	::glow::convertPlaceholdersToConstants(
9993	F_, bindings_, {inputs, counters, save->getPlaceholder()});
9994	EE_.compile(CompilationMode::Infer);
9995	EE_.run(bindings_);
9996	}
9997
9998	TEST_P(OperatorTest, ChannelShuffle) {
9999	CHECK_IF_ENABLED();
10000
10001	auto *inputs =
10002	mod_.createPlaceholder(ElemKind::FloatTy, {`1`, `12`, `1`, `1`}, "inputs", false);
10003	bindings_.allocate(inputs)->getHandle() = {`1`, `2`, `3`, `4`, `5`, `6`,
10004	`7`, `8`, `9`, `10`, `11`, `12`};
10005
10006	Node *CS = F_->createChannelShuffle("CS", inputs, `3`, `1`);
10007	SaveNode *S = F_->createSave("save", CS);
10008	bindings_.allocate(S->getPlaceholder());
10009
10010	EE_.compile(CompilationMode::Infer);
10011	EE_.run(bindings_);
10012
10013	auto results = bindings_.get(S->getPlaceholder())->getHandle();
10014
10015	EXPECT_EQ(results.size(), `12`);
10016	std::vector<float> expected = {`1`, `5`, `9`, `2`, `6`, `10`, `3`, `7`, `11`, `4`, `8`, `12`};
10017	for (dim_t i = `0`; i < expected.size(); i++)
10018	EXPECT_FLOAT_EQ(results.at({`0`, i, `0`, `0`}), expected[i]);
10019	}
10020
10021	TEST_P(OperatorTest, SqueezeOneAxis) {
10022	CHECK_IF_ENABLED();
10023
10024	auto *inputs =
10025	mod_.createPlaceholder(ElemKind::FloatTy, {`1`, `2`, `1`, `5`}, "inputs", false);
10026	bindings_.allocate(inputs)->getHandle() = {`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`};
10027
10028	std::vector<float> expectedValues = {`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`};
10029
10030	std::vector<dim_t> axes = {`0`};
10031	Node *SQZ = F_->createSqueeze("SQZ", inputs, axes);
10032	SaveNode *S = F_->createSave("save", SQZ);
10033	bindings_.allocate(S->getPlaceholder());
10034
10035	EE_.compile(CompilationMode::Infer);
10036	EE_.run(bindings_);
10037
10038	auto results = bindings_.get(S->getPlaceholder())->getHandle();
10039	std::vector<dim_t> expectedDims = {`2`, `1`, `5`};
10040	EXPECT_TRUE(results.dims().vec() == expectedDims);
10041	for (size_t i = `0`; i < `10`; i++)
10042	EXPECT_FLOAT_EQ(results.raw(i), expectedValues[i]);
10043	}
10044
10045	TEST_P(OperatorTest, SqueezeTwoAxes) {
10046	CHECK_IF_ENABLED();
10047
10048	auto mod = &EE_.getModule();
10049	auto *inputs =
10050	mod->createPlaceholder(ElemKind::FloatTy, {`1`, `2`, `1`, `5`}, "inputs", false);
10051	bindings_.allocate(inputs)->getHandle() = {`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`};
10052
10053	std::vector<float> expectedValues = {`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`};
10054
10055	std::vector<dim_t> axes = {`0`, `2`, `2`};
10056	Node *SQZ = F_->createSqueeze("SQZ", inputs, axes);
10057	SaveNode *S = F_->createSave("save", SQZ);
10058	bindings_.allocate(S->getPlaceholder());
10059
10060	EE_.compile(CompilationMode::Infer);
10061	EE_.run(bindings_);
10062
10063	auto results = bindings_.get(S->getPlaceholder())->getHandle();
10064	std::vector<dim_t> expectedDims = {`2`, `5`};
10065	EXPECT_TRUE(results.dims().vec() == expectedDims);
10066	for (size_t i = `0`; i < `10`; i++)
10067	EXPECT_FLOAT_EQ(results.raw(i), expectedValues[i]);
10068	}
10069
10070	TEST_P(OperatorTest, SqueezeExpand) {
10071	CHECK_IF_ENABLED();
10072
10073	auto mod = &EE_.getModule();
10074	auto *inputs =
10075	mod->createPlaceholder(ElemKind::FloatTy, {`1`, `2`, `1`, `5`}, "inputs", false);
10076	bindings_.allocate(inputs)->getHandle() = {`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`};
10077	auto *emptyInput =
10078	mod->createPlaceholder(ElemKind::FloatTy, {`1`}, "emptyInput", false);
10079	bindings_.allocate(emptyInput)->getHandle() = {`42.0`};
10080
10081	std::vector<float> expectedValues = {`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`};
10082
10083	std::vector<dim_t> axes = {`0`};
10084	Node *SQZ = F_->createSqueeze("SQZ", emptyInput, axes);
10085	SaveNode *S1 = F_->createSave("save", SQZ);
10086	Node *UnSQZ = F_->createExpandDims("UnSQZ", SQZ, axes);
10087	SaveNode *S2 = F_->createSave("save", UnSQZ);
10088
10089	bindings_.allocate(S1->getPlaceholder());
10090	bindings_.allocate(S2->getPlaceholder());
10091
10092	EE_.compile(CompilationMode::Infer);
10093	EE_.run(bindings_);
10094
10095	auto res1 = bindings_.get(S1->getPlaceholder())->getHandle();
10096	EXPECT_TRUE(res1.dims().vec() == std::vector<dim_t>());
10097	EXPECT_FLOAT_EQ(res1.raw(`0`), `42.0`);
10098	auto res2 = bindings_.get(S2->getPlaceholder())->getHandle();
10099	EXPECT_TRUE(res2.dims().vec() == std::vector<dim_t>(`1`, `1`));
10100	EXPECT_FLOAT_EQ(res2.raw(`0`), `42.0`);
10101	}
10102
10103	/// Helper to test ExpandDims using \p DTy.
10104	template <typename DataType>
10105	static void testExpandDims(glow::PlaceholderBindings &bindings,
10106	glow::Module &mod, glow::Function *F,
10107	glow::ExecutionEngine &EE, ElemKind DTy) {
10108	auto *inputs = createPlaceholderConditionallyQuantized(mod, DTy, {`2`, `2`},
10109	"inputs", false);
10110	auto IH = bindings.allocate(inputs)->getHandle<DataType>();
10111	IH = {`1`, `2`, `3`, `4`};
10112
10113	// This should be uniqued and sorted, so should become {0, 1, 3, 5}.
10114	std::vector<dim_t> axes = {`3`, `0`, `5`, `1`, `3`};
10115	Node *EDN = F->createExpandDims("expand", inputs, axes);
10116	SaveNode *S = F->createSave("save", EDN);
10117	bindings.allocate(S->getPlaceholder());
10118
10119	EE.compile(CompilationMode::Infer);
10120	EE.run(bindings);
10121
10122	// Expected dims based on the axes above; inserted new dimensions of 1 in
10123	// every unique axes location, based on the output tensor shape.
10124	std::vector<dim_t> expectedDims = {`1`, `1`, `2`, `1`, `2`, `1`};
10125	auto results = bindings.get(S->getPlaceholder())->getHandle<DataType>();
10126	EXPECT_TRUE(results.dims().vec() == expectedDims);
10127
10128	// The data should be the same, as this was just a reshape.
10129	for (size_t i = `0`; i < `4`; i++) {
10130	EXPECT_FLOAT_EQ(results.raw(i), IH.raw(i));
10131	}
10132	}
10133
10134	/// Check that the expand dims operator works, which is implemented with a
10135	/// reshape, in FloatTy.
10136	TEST_P(OperatorTest, ExpandDims_Float) {
10137	CHECK_IF_ENABLED();
10138	testExpandDims<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy);
10139	}
10140
10141	/// Check that the expand dims operator works, which is implemented with a
10142	/// reshape, in Float16Ty.
10143	TEST_P(OperatorTest, ExpandDims_Float16) {
10144	CHECK_IF_ENABLED();
10145	testExpandDims<float16_t>(bindings_, mod_, F_, EE_, ElemKind::Float16Ty);
10146	}
10147
10148	/// Check that the expand dims operator works, which is implemented with a
10149	/// reshape, in BFloat16Ty.
10150	TEST_P(OperatorTest, ExpandDims_BFloat16) {
10151	CHECK_IF_ENABLED();
10152	testExpandDims<bfloat16_t>(bindings_, mod_, F_, EE_, ElemKind::BFloat16Ty);
10153	}
10154
10155	/// Check that the expand dims operator works, which is implemented with a
10156	/// reshape, in Int8QTy.
10157	TEST_P(OperatorTest, ExpandDims_Int8) {
10158	CHECK_IF_ENABLED();
10159	testExpandDims<int8_t>(bindings_, mod_, F_, EE_, ElemKind::Int8QTy);
10160	}
10161
10162	/// Helper to test Split using \p DTy.
10163	template <typename DataType>
10164	static void testSplit(glow::PlaceholderBindings &bindings, glow::Module &mod,
10165	glow::Function *F, glow::ExecutionEngine &EE,
10166	ElemKind DTy) {
10167	auto *inputs = createPlaceholderConditionallyQuantized(mod, DTy, {`1`, `2`, `6`},
10168	"inputs", false);
10169	bindings.allocate(inputs)->getHandle<DataType>() = {`1`, `2`, `3`, `4`, `5`, `6`,
10170	`7`, `8`, `9`, `10`, `11`, `12`};
10171
10172	std::vector<SliceNode *> outputs1;
10173	F->createSplit("Split1", inputs, /outputNum = / `2`, /axis = / `2`,
10174	/split = / {}, outputs1);
10175	std::vector<SliceNode *> outputs2;
10176	F->createSplit("Split2", inputs, /outputNum = / `2`, /axis = / `2`,
10177	/split = / {`2`, `4`}, outputs2);
10178	auto *S1 = F->createSave("save1", outputs1 [`0`]);
10179	auto *S2 = F->createSave("save2", outputs1 [`1`]);
10180	auto *S3 = F->createSave("save3", outputs2 [`0`]);
10181	auto *S4 = F->createSave("save4", outputs2 [`1`]);
10182
10183	auto *result1 = bindings.allocate(S1->getPlaceholder());
10184	auto *result2 = bindings.allocate(S2->getPlaceholder());
10185	auto *result3 = bindings.allocate(S3->getPlaceholder());
10186	auto *result4 = bindings.allocate(S4->getPlaceholder());
10187
10188	EE.compile(CompilationMode::Infer);
10189	EE.run(bindings);
10190
10191	Tensor expected1 = createTensorConditionallyQuantized(DTy, {`1`, `2`, `3`});
10192	expected1.getHandle<DataType>() = {`1`, `2`, `3`, `7`, `8`, `9`};
10193	EXPECT_TRUE(result1->isEqual(expected1));
10194
10195	Tensor expected2 = createTensorConditionallyQuantized(DTy, {`1`, `2`, `3`});
10196	expected2.getHandle<DataType>() = {`4`, `5`, `6`, `10`, `11`, `12`};
10197	EXPECT_TRUE(result2->isEqual(expected2));
10198
10199	Tensor expected3 = createTensorConditionallyQuantized(DTy, {`1`, `2`, `2`});
10200	expected3.getHandle<DataType>() = {`1`, `2`, `7`, `8`};
10201	EXPECT_TRUE(result3->isEqual(expected3));
10202
10203	Tensor expected4 = createTensorConditionallyQuantized(DTy, {`1`, `2`, `4`});
10204	expected4.getHandle<DataType>() = {`3`, `4`, `5`, `6`, `9`, `10`, `11`, `12`};
10205	EXPECT_TRUE(result4->isEqual(expected4));
10206	}
10207
10208	/// Test that Split is correctly supported in FloatTy.
10209	TEST_P(OperatorTest, Split_Float) {
10210	CHECK_IF_ENABLED();
10211	testSplit<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy);
10212	}
10213
10214	/// Test that Split is correctly supported in Float16Ty.
10215	TEST_P(OperatorTest, Split_Float16) {
10216	CHECK_IF_ENABLED();
10217	testSplit<float16_t>(bindings_, mod_, F_, EE_, ElemKind::Float16Ty);
10218	}
10219
10220	/// Test that Split is correctly supported in BFloat16Ty.
10221	TEST_P(OperatorTest, Split_BFloat16) {
10222	CHECK_IF_ENABLED();
10223	testSplit<bfloat16_t>(bindings_, mod_, F_, EE_, ElemKind::BFloat16Ty);
10224	}
10225
10226	/// Test that Split is correctly supported in Int8QTy.
10227	TEST_P(OperatorTest, Split_Int8) {
10228	CHECK_IF_ENABLED();
10229	testSplit<int8_t>(bindings_, mod_, F_, EE_, ElemKind::Int8QTy);
10230	}
10231
10232	/// Test Relu with Int8QTy.
10233	TEST_P(OperatorTest, Relu_Int8) {
10234	CHECK_IF_ENABLED();
10235
10236	std::vector<float> inputVals = {-`2.0`, -`1.0`, `0.0`, `1.0`, `2.0`};
10237	dim_t size = inputVals.size();
10238	const float inputScale = `1.0`;
10239	const int32_t inputOffset = `5`;
10240	const float outputScale = `0.5`;
10241	const int32_t outputOffset = -`128`;
10242
10243	auto *inputTy =
10244	mod_.uniqueType(ElemKind::Int8QTy, {size}, inputScale, inputOffset);
10245	auto *outputTy =
10246	mod_.uniqueType(ElemKind::Int8QTy, {size}, outputScale, outputOffset);
10247	auto input = mod_.createPlaceholder(inputTy, "input", false*);
10248	auto *relu = F_->createRELU("relu", input, outputTy);
10249	auto *dequantize =
10250	F_->createDequantize("dequantize", relu, ElemKind::FloatTy);
10251	auto *save = F_->createSave("save", dequantize);
10252	bindings_.allocate(mod_.getPlaceholders());
10253
10254	auto inputH = bindings_.get(input)->getHandle<int8_t>();
10255	for (dim_t idx = `0`; idx < size; idx++) {
10256	inputH.raw(idx) =
10257	quantization::quantize(inputVals [idx], {inputScale, inputOffset});
10258	}
10259
10260	EE_.compile(CompilationMode::Infer);
10261	EE_.run(bindings_);
10262
10263	auto outputH = bindings_.get(save->getPlaceholder())->getHandle();
10264	for (dim_t idx = `0`; idx < size; idx++) {
10265	float expectedValue = std::max(`0.0f`, inputVals [idx]);
10266	EXPECT_EQ(expectedValue, outputH.raw(idx));
10267	}
10268	}
10269
10270	// Test for elementwise FloorDiv with quantization and Broadcast
10271	TEST_P(OperatorTest, IntFloorDivBroadcast) {
10272	CHECK_IF_ENABLED();
10273
10274	const float in1Scale = `0.9`;
10275	const float in2Scale = `1.2`;
10276	const float outScale = `1`;
10277	const int32_t in1Offset = `2`;
10278	const int32_t in2Offset = -`11`;
10279	const int32_t outOffset = -`2`;
10280	const dim_t N = `2`;
10281	const dim_t C = `3`;
10282	const dim_t H = `4`;
10283	const dim_t W = `5`;
10284
10285	auto in1Ty =
10286	mod_.uniqueType(ElemKind::Int8QTy, {N, C, H, W}, in1Scale, in1Offset);
10287	auto in2Ty = mod_.uniqueType(ElemKind::Int8QTy, {W}, in2Scale, in2Offset);
10288	auto outTy =
10289	mod_.uniqueType(ElemKind::Int8QTy, {N, C, H, W}, outScale, outOffset);
10290
10291	auto in1 = mod_.createPlaceholder(in1Ty, "in1", false*);
10292	auto in2 = mod_.createPlaceholder(in2Ty, "in2", false*);
10293
10294	bindings_.allocate(in1)->getHandle<int8_t>().randomize(-`10`, `10`,
10295	mod_.getPRNG());
10296	bindings_.allocate(in2)->getHandle<int8_t>().randomize(-`10`, `10`,
10297	mod_.getPRNG());
10298	constexpr int axis = -`1`;
10299	auto *floorDivBroadcast = F_->createFloorDivWithBroadcast(
10300	"floorDivBroadcast", axis, outTy, in1, in2);
10301
10302	auto *saveFloorDiv = F_->createSave("saveFloorDiv", floorDivBroadcast);
10303
10304	bindings_.allocate(saveFloorDiv->getPlaceholder());
10305
10306	auto Qin1H = bindings_.get(in1)->getHandle<int8_t>();
10307	auto Qin2H = bindings_.get(in2)->getHandle<int8_t>();
10308
10309	EE_.compile(CompilationMode::Infer);
10310	EE_.run(bindings_);
10311
10312	auto resultFloorDiv =
10313	bindings_.get(saveFloorDiv->getPlaceholder())->getHandle<int8_t>();
10314
10315	for (dim_t w = `0`; w < W; w++) {
10316	float b = quantization::dequantize(Qin2H.at({w}), {in2Scale, in2Offset});
10317	for (dim_t n = `0`; n < N; n++) {
10318	for (dim_t c = `0`; c < C; c++) {
10319	for (dim_t h = `0`; h < H; h++) {
10320	float a = quantization::dequantize(Qin1H.at({n, c, h, w}),
10321	{in1Scale, in1Offset});
10322	int8_t floorDiv =
10323	quantization::quantize(std::floor(a / b), {outScale, outOffset});
10324
10325	EXPECT_NEAR(floorDiv, resultFloorDiv.at({n, c, h, w}), `1`);
10326	}
10327	}
10328	}
10329	}
10330	}
10331
10332	// Test for elementwise ope with quantization and broadcast support
10333	TEST_P(OperatorTest, IntElementWiseBroadcast) {
10334	CHECK_IF_ENABLED();
10335
10336	const float in1Scale = `0.9`;
10337	const float in2Scale = `1.2`;
10338	const float outScale = `1`;
10339	const int32_t in1Offset = `2`;
10340	const int32_t in2Offset = -`11`;
10341	const int32_t outOffset = -`2`;
10342	const dim_t N = `2`;
10343	const dim_t C = `3`;
10344	const dim_t H = `4`;
10345	const dim_t W = `5`;
10346
10347	auto in1Ty =
10348	mod_.uniqueType(ElemKind::Int8QTy, {N, C, H, W}, in1Scale, in1Offset);
10349	auto in2Ty = mod_.uniqueType(ElemKind::Int8QTy, {W}, in2Scale, in2Offset);
10350	auto outTy =
10351	mod_.uniqueType(ElemKind::Int8QTy, {N, C, H, W}, outScale, outOffset);
10352
10353	auto in1 = mod_.createPlaceholder(in1Ty, "in1", false*);
10354	auto in2 = mod_.createPlaceholder(in2Ty, "in2", false*);
10355
10356	bindings_.allocate(in1)->getHandle<int8_t>().randomize(-`10`, `10`,
10357	mod_.getPRNG());
10358	bindings_.allocate(in2)->getHandle<int8_t>().randomize(-`10`, `10`,
10359	mod_.getPRNG());
10360	constexpr int axis = -`1`;
10361	auto *addBroadcast = F_->createNodeWithBroadcastOutTy<AddNode>(
10362	"addBroadcast", axis, outTy, in1, in2);
10363
10364	auto *subBroadcast = F_->createNodeWithBroadcastOutTy<SubNode>(
10365	"subBroadcast", axis, outTy, in1, in2);
10366
10367	auto *mulBroadcast = F_->createNodeWithBroadcastOutTy<MulNode>(
10368	"mulBroadcast", axis, outTy, in1, in2);
10369
10370	auto *divBroadcast = F_->createNodeWithBroadcastOutTy<DivNode>(
10371	"divBroadcast", axis, outTy, in1, in2);
10372
10373	auto *minBroadcast = F_->createNodeWithBroadcastOutTy<MinNode>(
10374	"minBroadcast", axis, outTy, in1, in2);
10375
10376	auto *maxBroadcast = F_->createNodeWithBroadcastOutTy<MaxNode>(
10377	"maxBroadcast", axis, outTy, in1, in2);
10378
10379	auto *saveAdd = F_->createSave("saveAdd", addBroadcast);
10380	auto *saveSub = F_->createSave("saveSub", subBroadcast);
10381	auto *saveMul = F_->createSave("saveMul", mulBroadcast);
10382	auto *saveDiv = F_->createSave("saveDiv", divBroadcast);
10383	auto *saveMin = F_->createSave("saveMin", minBroadcast);
10384	auto *saveMax = F_->createSave("saveMax", maxBroadcast);
10385
10386	bindings_.allocate(saveAdd->getPlaceholder());
10387	bindings_.allocate(saveSub->getPlaceholder());
10388	bindings_.allocate(saveMul->getPlaceholder());
10389	bindings_.allocate(saveDiv->getPlaceholder());
10390	bindings_.allocate(saveMin->getPlaceholder());
10391	bindings_.allocate(saveMax->getPlaceholder());
10392
10393	auto Qin1H = bindings_.get(in1)->getHandle<int8_t>();
10394	auto Qin2H = bindings_.get(in2)->getHandle<int8_t>();
10395
10396	EE_.compile(CompilationMode::Infer);
10397	EE_.run(bindings_);
10398
10399	auto resultAdd =
10400	bindings_.get(saveAdd->getPlaceholder())->getHandle<int8_t>();
10401	auto resultSub =
10402	bindings_.get(saveSub->getPlaceholder())->getHandle<int8_t>();
10403	auto resultMul =
10404	bindings_.get(saveMul->getPlaceholder())->getHandle<int8_t>();
10405	auto resultDiv =
10406	bindings_.get(saveDiv->getPlaceholder())->getHandle<int8_t>();
10407	auto resultMin =
10408	bindings_.get(saveMin->getPlaceholder())->getHandle<int8_t>();
10409	auto resultMax =
10410	bindings_.get(saveMax->getPlaceholder())->getHandle<int8_t>();
10411
10412	for (dim_t w = `0`; w < W; w++) {
10413	float b = quantization::dequantize(Qin2H.at({w}), {in2Scale, in2Offset});
10414	for (dim_t n = `0`; n < N; n++) {
10415	for (dim_t c = `0`; c < C; c++) {
10416	for (dim_t h = `0`; h < H; h++) {
10417	float a = quantization::dequantize(Qin1H.at({n, c, h, w}),
10418	{in1Scale, in1Offset});
10419	int8_t add = quantization::quantize((a + b), {outScale, outOffset});
10420	int8_t sub = quantization::quantize((a - b), {outScale, outOffset});
10421	int8_t mul = quantization::quantize((a * b), {outScale, outOffset});
10422	int8_t div = quantization::quantize((a / b), {outScale, outOffset});
10423	int8_t min =
10424	quantization::quantize(std::min(a, b), {outScale, outOffset});
10425	int8_t max =
10426	quantization::quantize(std::max(a, b), {outScale, outOffset});
10427
10428	EXPECT_NEAR(add, resultAdd.at({n, c, h, w}), `1`);
10429	EXPECT_NEAR(sub, resultSub.at({n, c, h, w}), `1`);
10430	EXPECT_NEAR(mul, resultMul.at({n, c, h, w}), `1`);
10431	EXPECT_NEAR(div, resultDiv.at({n, c, h, w}), `1`);
10432	EXPECT_NEAR(min, resultMin.at({n, c, h, w}), `1`);
10433	EXPECT_NEAR(max, resultMax.at({n, c, h, w}), `1`);
10434	}
10435	}
10436	}
10437	}
10438	}
10439
10440	/// Test Clip with Int8QTy.
10441	TEST_P(OperatorTest, Clip_Int8) {
10442	CHECK_IF_ENABLED();
10443
10444	std::vector<float> inputVals = {-`3`, -`2`, -`1`, `0`, `1`, `2`, `3`, `4`};
10445	float clipMin = -`2.0`;
10446	float clipMax = `3.0`;
10447	dim_t size = inputVals.size();
10448	const float inputScale = `1.0`;
10449	const int32_t inputOffset = `5`;
10450	const float outputScale = `0.5`;
10451	const int32_t outputOffset = -`3`;
10452
10453	auto *inputTy =
10454	mod_.uniqueType(ElemKind::Int8QTy, {size}, inputScale, inputOffset);
10455	auto *outputTy =
10456	mod_.uniqueType(ElemKind::Int8QTy, {size}, outputScale, outputOffset);
10457	auto input = mod_.createPlaceholder(inputTy, "input", false*);
10458	auto *relu = F_->createClip("clip", input, outputTy, clipMin, clipMax);
10459	auto *dequantize =
10460	F_->createDequantize("dequantize", relu, ElemKind::FloatTy);
10461	auto *save = F_->createSave("save", dequantize);
10462	bindings_.allocate(mod_.getPlaceholders());
10463
10464	auto inputH = bindings_.get(input)->getHandle<int8_t>();
10465	for (dim_t idx = `0`; idx < size; idx++) {
10466	inputH.raw(idx) =
10467	quantization::quantize(inputVals [idx], {inputScale, inputOffset});
10468	}
10469
10470	EE_.compile(CompilationMode::Infer);
10471	EE_.run(bindings_);
10472
10473	auto outputH = bindings_.get(save->getPlaceholder())->getHandle();
10474	for (dim_t idx = `0`; idx < size; idx++) {
10475	float expectedValue = std::min(clipMax, std::max(clipMin, inputVals [idx]));
10476	EXPECT_EQ(expectedValue, outputH.raw(idx));
10477	}
10478	}
10479
10480	/// Verify quantized splats work correctly (add 0 to it to ensure constant
10481	/// folding doesn't make this test meaningless).
10482	TEST_P(OperatorTest, IntSplat) {
10483	CHECK_IF_ENABLED();
10484
10485	const float splatValue = `10`;
10486	const float scale = `1.0`;
10487	const int32_t offset = `5`;
10488	const dim_t size = `3`;
10489
10490	auto *in = mod_.createPlaceholder(ElemKind::Int8QTy, {size}, scale, offset,
10491	"in", false);
10492	auto splatTy = mod_.uniqueType(ElemKind::Int8QTy, {size}, scale, offset);
10493	auto *splat = F_->createSplat("splat", splatTy, splatValue);
10494	auto *add = F_->createAdd("add", in, splat);
10495	auto *dequantize = F_->createDequantize("dequantize", add, ElemKind::FloatTy);
10496	auto *save = F_->createSave("save", dequantize);
10497
10498	bindings_.allocate(in)->zero();
10499	auto resultH = bindings_.allocate(save->getPlaceholder())->getHandle();
10500
10501	EE_.compile(CompilationMode::Infer);
10502	EE_.run(bindings_);
10503
10504	for (dim_t i = `0`; i < resultH.size(); i++) {
10505	EXPECT_EQ(splatValue, resultH.raw(i));
10506	}
10507	}
10508
10509	/// Verify fp16 splats work correctly (add 0 to it to ensure constant
10510	/// folding doesn't make this test meaningless).
10511	TEST_P(OperatorTest, Fp16Splat) {
10512	CHECK_IF_ENABLED();
10513
10514	const float splatValue = `10`;
10515	const dim_t size = `3`;
10516
10517	auto in = mod_.createPlaceholder(ElemKind::Float16Ty, {size}, "in", false*);
10518	auto splatTy = mod_.uniqueType(ElemKind::Float16Ty, {size});
10519	auto *splat = F_->createSplat("splat", splatTy, splatValue);
10520	auto *add = F_->createAdd("add", in, splat);
10521	auto *save = F_->createSave("save", add);
10522
10523	bindings_.allocate(in)->zero();
10524	auto resultH =
10525	bindings_.allocate(save->getPlaceholder())->getHandle<float16_t>();
10526
10527	EE_.compile(CompilationMode::Infer);
10528	EE_.run(bindings_);
10529
10530	for (dim_t i = `0`; i < resultH.size(); i++) {
10531	EXPECT_EQ(float16_t (splatValue), resultH.raw(i));
10532	}
10533	}
10534
10535	/// Verify bfloat16 splats work correctly (add 0 to it to ensure constant
10536	/// folding doesn't make this test meaningless).
10537	TEST_P(OperatorTest, BFloat16Splat) {
10538	CHECK_IF_ENABLED();
10539
10540	const float splatValue = `10`;
10541	const dim_t size = `3`;
10542
10543	auto in = mod_.createPlaceholder(ElemKind::BFloat16Ty, {size}, "in", false*);
10544	auto splatTy = mod_.uniqueType(ElemKind::BFloat16Ty, {size});
10545	auto *splat = F_->createSplat("splat", splatTy, splatValue);
10546	auto *add = F_->createAdd("add", in, splat);
10547	auto *save = F_->createSave("save", add);
10548
10549	bindings_.allocate(in)->zero();
10550	auto resultH =
10551	bindings_.allocate(save->getPlaceholder())->getHandle<bfloat16_t>();
10552
10553	EE_.compile(CompilationMode::Infer);
10554	EE_.run(bindings_);
10555
10556	for (dim_t i = `0`; i < resultH.size(); i++) {
10557	EXPECT_EQ(bfloat16_t (splatValue), resultH.raw(i));
10558	}
10559	}
10560
10561	// simple convTranspose. symmetric, no pads, strides or channels.
10562	TEST_P(OperatorTest, sanityConvTranspose) {
10563	CHECK_IF_ENABLED();
10564
10565	float biasVal[`2`] = {`1.1`, `2.2`};
10566	auto *input =
10567	mod_.createPlaceholder(ElemKind::FloatTy, {`1`, `2`, `2`, `1`}, "input", false);
10568	bindings_.allocate(input)->getHandle() = {`2.`, `3.`, `4.`, `5.`};
10569
10570	auto *filter =
10571	mod_.createPlaceholder(ElemKind::FloatTy, {`2`, `3`, `3`, `1`}, "filter", false);
10572	bindings_.allocate(filter)->getHandle() = {`2.`, `3.`, `4.`, `5.`, `6.`, `7.`,
10573	`8.`, `9.`, `10.`, `3.`, `4.`, `5.`,
10574	`6.`, `7.`, `8.`, `9.`, `10.`, `11.`};
10575
10576	auto bias = mod_.createPlaceholder(ElemKind::FloatTy, {`2`}, "bias", false*);
10577	bindings_.allocate(bias)->getHandle() = biasVal;
10578
10579	std::pair<dim_t, dim_t> outWH =
10580	calculateConvTransposeOutputDims(`2`, `2`, {`3`, `3`}, {`1`, `1`}, {`0`, `0`, `0`, `0`});
10581	auto outTy =
10582	mod_.uniqueType(ElemKind::FloatTy, {`1`, outWH.first, outWH.second, `2`});
10583
10584	ConvTransposeNode *CN =
10585	F_->createConvTranspose("ConvTranspose", input, filter, bias, outTy,
10586	{`3`, `3`}, {`1`, `1`}, {`0`, `0`, `0`, `0`}, `1`);
10587
10588	SaveNode *S = F_->createSave("save", CN);
10589	bindings_.allocate(S->getPlaceholder());
10590
10591	::glow::convertPlaceholdersToConstants(F_, bindings_,
10592	{input, S->getPlaceholder()});
10593	EE_.compile(CompilationMode::Infer);
10594	EE_.run(bindings_);
10595
10596	auto result = bindings_.get(S->getPlaceholder())->getHandle();
10597	std::vector<dim_t> expectedDims = {`1`, `4`, `4`, `2`};
10598	ASSERT_TRUE(result.dims().vec() == expectedDims);
10599	EXPECT_FLOAT_EQ(result.at({`0`, `0`, `0`, `0`}), `4` + biasVal[`0`]);
10600	EXPECT_FLOAT_EQ(result.at({`0`, `0`, `1`, `0`}), `12` + biasVal[`0`]);
10601	EXPECT_FLOAT_EQ(result.at({`0`, `0`, `2`, `0`}), `17` + biasVal[`0`]);
10602	EXPECT_FLOAT_EQ(result.at({`0`, `0`, `3`, `0`}), `12` + biasVal[`0`]);
10603	EXPECT_FLOAT_EQ(result.at({`0`, `1`, `0`, `0`}), `18` + biasVal[`0`]);
10604	EXPECT_FLOAT_EQ(result.at({`0`, `1`, `1`, `0`}), `49` + biasVal[`0`]);
10605	EXPECT_FLOAT_EQ(result.at({`0`, `1`, `2`, `0`}), `63` + biasVal[`0`]);
10606	EXPECT_FLOAT_EQ(result.at({`0`, `1`, `3`, `0`}), `41` + biasVal[`0`]);
10607	EXPECT_FLOAT_EQ(result.at({`0`, `2`, `0`, `0`}), `36` + biasVal[`0`]);
10608	EXPECT_FLOAT_EQ(result.at({`0`, `2`, `1`, `0`}), `91` + biasVal[`0`]);
10609	EXPECT_FLOAT_EQ(result.at({`0`, `2`, `2`, `0`}), `105` + biasVal[`0`]);
10610	EXPECT_FLOAT_EQ(result.at({`0`, `2`, `3`, `0`}), `65` + biasVal[`0`]);
10611	EXPECT_FLOAT_EQ(result.at({`0`, `3`, `0`, `0`}), `32` + biasVal[`0`]);
10612	EXPECT_FLOAT_EQ(result.at({`0`, `3`, `1`, `0`}), `76` + biasVal[`0`]);
10613	EXPECT_FLOAT_EQ(result.at({`0`, `3`, `2`, `0`}), `85` + biasVal[`0`]);
10614	EXPECT_FLOAT_EQ(result.at({`0`, `3`, `3`, `0`}), `50` + biasVal[`0`]);
10615
10616	EXPECT_FLOAT_EQ(result.at({`0`, `0`, `0`, `1`}), `6` + biasVal[`1`]);
10617	EXPECT_FLOAT_EQ(result.at({`0`, `0`, `1`, `1`}), `17` + biasVal[`1`]);
10618	EXPECT_FLOAT_EQ(result.at({`0`, `0`, `2`, `1`}), `22` + biasVal[`1`]);
10619	EXPECT_FLOAT_EQ(result.at({`0`, `0`, `3`, `1`}), `15` + biasVal[`1`]);
10620	EXPECT_FLOAT_EQ(result.at({`0`, `1`, `0`, `1`}), `24` + biasVal[`1`]);
10621	EXPECT_FLOAT_EQ(result.at({`0`, `1`, `1`, `1`}), `63` + biasVal[`1`]);
10622	EXPECT_FLOAT_EQ(result.at({`0`, `1`, `2`, `1`}), `77` + biasVal[`1`]);
10623	EXPECT_FLOAT_EQ(result.at({`0`, `1`, `3`, `1`}), `49` + biasVal[`1`]);
10624	EXPECT_FLOAT_EQ(result.at({`0`, `2`, `0`, `1`}), `42` + biasVal[`1`]);
10625	EXPECT_FLOAT_EQ(result.at({`0`, `2`, `1`, `1`}), `105` + biasVal[`1`]);
10626	EXPECT_FLOAT_EQ(result.at({`0`, `2`, `2`, `1`}), `119` + biasVal[`1`]);
10627	EXPECT_FLOAT_EQ(result.at({`0`, `2`, `3`, `1`}), `73` + biasVal[`1`]);
10628	EXPECT_FLOAT_EQ(result.at({`0`, `3`, `0`, `1`}), `36` + biasVal[`1`]);
10629	EXPECT_FLOAT_EQ(result.at({`0`, `3`, `1`, `1`}), `85` + biasVal[`1`]);
10630	EXPECT_FLOAT_EQ(result.at({`0`, `3`, `2`, `1`}), `94` + biasVal[`1`]);
10631	EXPECT_FLOAT_EQ(result.at({`0`, `3`, `3`, `1`}), `55` + biasVal[`1`]);
10632	}
10633
10634	// ConvTranspose with non-square dilation.
10635	TEST_P(OperatorTest, NonSquareDilationConvTranspose) {
10636	CHECK_IF_ENABLED();
10637
10638	std::vector<unsigned_t> dilation = {`1`, `2`};
10639	auto *input =
10640	mod_.createPlaceholder(ElemKind::FloatTy, {`1`, `2`, `2`, `1`}, "input", false);
10641	bindings_.allocate(input)->getHandle() = {`2.`, `3.`, `4.`, `5.`};
10642
10643	auto *filter =
10644	mod_.createPlaceholder(ElemKind::FloatTy, {`2`, `2`, `2`, `1`}, "filter", false);
10645	bindings_.allocate(filter)->getHandle() = {`2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`};
10646
10647	auto bias = mod_.createPlaceholder(ElemKind::FloatTy, {`2`}, "bias", false*);
10648	bindings_.allocate(bias)->getHandle() = {`0.`, `0.`};
10649
10650	std::pair<dim_t, dim_t> outWH = calculateConvTransposeOutputDims(
10651	`2`, `2`, {`2`, `2`}, {`1`, `1`}, {`0`, `0`, `0`, `0`}, dilation);
10652	auto outTy =
10653	mod_.uniqueType(ElemKind::FloatTy, {`1`, outWH.first, outWH.second, `2`});
10654
10655	ConvTransposeNode *CN =
10656	F_->createConvTranspose("ConvTranspose", input, filter, bias, outTy,
10657	{`2`, `2`}, {`1`, `1`}, {`0`, `0`, `0`, `0`}, `1`, dilation);
10658
10659	SaveNode *S = F_->createSave("save", CN);
10660	bindings_.allocate(S->getPlaceholder());
10661
10662	::glow::convertPlaceholdersToConstants(F_, bindings_,
10663	{input, S->getPlaceholder()});
10664	EE_.compile(CompilationMode::Infer);
10665	EE_.run(bindings_);
10666
10667	auto result = bindings_.get(S->getPlaceholder())->getHandle();
10668	std::vector<dim_t> expectedDims = {`1`, `3`, `4`, `2`};
10669	ASSERT_TRUE(result.dims().vec() == expectedDims);
10670	std::vector<float> expected = {`4.`, `12.`, `6.`, `18.`, `6.`, `14.`, `9.`, `21.`,
10671	`16.`, `40.`, `22.`, `54.`, `22.`, `46.`, `30.`, `62.`,
10672	`16.`, `32.`, `20.`, `40.`, `20.`, `36.`, `25.`, `45.`};
10673	for (dim_t i = `0`; i < result.size(); i++) {
10674	EXPECT_FLOAT_EQ(result.raw(i), expected[i]);
10675	}
10676	}
10677
10678	/// ConvTranspose with multi-channel input/output and asymmetric kernel,
10679	/// strides, pads.
10680	TEST_P(OperatorTest, ConvTransposedAsymmetric) {
10681
10682	CHECK_IF_ENABLED();
10683
10684	float biasVal[`2`] = {`1.1`, `2.2`};
10685	auto bias = mod_.createPlaceholder(ElemKind::FloatTy, {`2`}, "bias", false);
10686	bindings_.allocate(bias)->getHandle() = biasVal;
10687
10688	auto *input =
10689	mod_.createPlaceholder(ElemKind::FloatTy, {`1`, `4`, `4`, `3`}, "input", false);
10690	auto IH = bindings_.allocate(input)->getHandle();
10691	for (dim_t i = `0`; i < IH.size(); i++) {
10692	IH.raw(i) = i;
10693	}
10694
10695	auto filter =
10696	mod_.createPlaceholder(ElemKind::FloatTy, {`2`, `3`, `2`, `3`}, "filter", false);
10697	auto FH = bindings_.allocate(filter)->getHandle();
10698	for (dim_t i = `0`; i < FH.size(); i++) {
10699	FH.raw(i) = i * `2`;
10700	}
10701
10702	std::pair<dim_t, dim_t> outWH =
10703	calculateConvTransposeOutputDims(`4`, `4`, {`3`, `2`}, {`1`, `2`}, {`0`, `3`, `1`, `2`});
10704	auto outTy =
10705	mod_.uniqueType(ElemKind::FloatTy, {`1`, outWH.first, outWH.second, `2`});
10706
10707	ConvTransposeNode *CN =
10708	F_->createConvTranspose("ConvTranspose", input, filter, bias, outTy,
10709	{`3`, `2`}, {`1`, `2`}, {`0`, `3`, `1`, `2`}, `1`);
10710
10711	SaveNode *S = F_->createSave("save", CN);
10712	bindings_.allocate(S->getPlaceholder());
10713
10714	::glow::convertPlaceholdersToConstants(F_, bindings_,
10715	{input, S->getPlaceholder()});
10716	EE_.compile(CompilationMode::Infer);
10717	EE_.run(bindings_);
10718	auto result = bindings_.get(S->getPlaceholder())->getHandle();
10719	std::vector<dim_t> expectedDims = {`1`, `5`, `3`, `2`};
10720	ASSERT_TRUE(result.dims().vec() == expectedDims);
10721	// values from onnxruntime w/o bias, thus bias is added during compare.
10722	std::vector<float> expected = {
10723	`100`, `532`, `46`, `802`, `172`, `928`, `632`, `2792`, `416`, `3224`,
10724	`884`, `3692`, `2028`, `7212`, `1542`, `7698`, `2568`, `8724`, `4188`, `13260`,
10725	`3054`, `13098`, `4728`, `14772`, `5096`, `12440`, `4232`, `12224`, `5564`, `13556`};
10726	for (dim_t i = `0`; i < result.size(); i++) {
10727	float exp = expected [i] + biasVal[i % `2`];
10728	EXPECT_FLOAT_EQ(result.raw(i), exp);
10729	}
10730	}
10731
10732	/// ConvTranspose test with Group>1
10733	TEST_P(OperatorTest, ConvTransposedGroup) {
10734
10735	CHECK_IF_ENABLED();
10736
10737	float biasVal[`2`] = {`0`, `0`};
10738	auto bias = mod_.createPlaceholder(ElemKind::FloatTy, {`2`}, "bias", false);
10739	bindings_.allocate(bias)->getHandle() = biasVal;
10740
10741	auto *input =
10742	mod_.createPlaceholder(ElemKind::FloatTy, {`1`, `3`, `3`, `2`}, "input", false);
10743	bindings_.allocate(input)->getHandle() = {`0.`, `9.`, `1.`, `10.`, `2.`, `11.`,
10744	`3.`, `12.`, `4.`, `13.`, `5.`, `14.`,
10745	`6.`, `15.`, `7.`, `16.`, `8.`, `17.`};
10746
10747	auto filter =
10748	mod_.createPlaceholder(ElemKind::FloatTy, {`1`, `2`, `2`, `2`}, "filter", false);
10749	bindings_.allocate(filter)->getHandle() = {
10750	`0.`, `8.`, `2.`, `10.`, `4.`, `12.`, `6.`, `14`,
10751	};
10752
10753	std::pair<dim_t, dim_t> outWH =
10754	calculateConvTransposeOutputDims(`3`, `3`, {`2`, `2`}, {`2`, `2`}, {`0`, `0`, `0`, `0`});
10755	auto outTy =
10756	mod_.uniqueType(ElemKind::FloatTy, {`1`, outWH.first, outWH.second, `2`});
10757
10758	ConvTransposeNode *CN =
10759	F_->createConvTranspose("ConvTranspose", input, filter, bias, outTy,
10760	{`2`, `2`}, {`2`, `2`}, {`0`, `0`, `0`, `0`}, / group / `2`);
10761
10762	SaveNode *S = F_->createSave("save", CN);
10763	bindings_.allocate(S->getPlaceholder());
10764
10765	::glow::convertPlaceholdersToConstants(F_, bindings_,
10766	{input, S->getPlaceholder()});
10767	EE_.compile(CompilationMode::Infer);
10768	EE_.run(bindings_);
10769	auto result = bindings_.get(S->getPlaceholder())->getHandle();
10770	std::vector<dim_t> expectedDims = {`1`, `6`, `6`, `2`};
10771	ASSERT_TRUE(result.dims().vec() == expectedDims);
10772	std::vector<float> expected = {
10773	`0`, `72`, `0`, `90`, `0`, `80`, `2`, `100`, `0`, `88`, `4`, `110`, `0`, `108`, `0`,
10774	`126`, `4`, `120`, `6`, `140`, `8`, `132`, `12`, `154`, `0`, `96`, `6`, `120`, `0`, `104`,
10775	`8`, `130`, `0`, `112`, `10`, `140`, `12`, `144`, `18`, `168`, `16`, `156`, `24`, `182`, `20`,
10776	`168`, `30`, `196`, `0`, `120`, `12`, `150`, `0`, `128`, `14`, `160`, `0`, `136`, `16`, `170`,
10777	`24`, `180`, `36`, `210`, `28`, `192`, `42`, `224`, `32`, `204`, `48`, `238`};
10778
10779	for (dim_t i = `0`; i < result.size(); i++) {
10780	EXPECT_FLOAT_EQ(result.raw(i), expected[i]);
10781	}
10782	}
10783
10784	/// Compare ConvTranspose with equivalent Convolution, no strides.
10785	/// TODO - need version with Strides (dilate input).
10786	template <unsigned_t kernel, unsigned_t stride, unsigned_t pad, unsigned_t idim>
10787	static void convTransposeConvCompare(glow::PlaceholderBindings &bindings,
10788	glow::Module &mod, glow::Function *F,
10789	glow::ExecutionEngine &EE) {
10790	unsigned_t Cpad = kernel - pad - `1`;
10791	llvm::SmallVector<unsigned_t, `4`> pads = {pad, pad, pad, pad};
10792	llvm::SmallVector<unsigned_t, `4`> Cpads = {Cpad, Cpad, Cpad, Cpad};
10793	llvm::SmallVector<unsigned_t, `2`> kernels = {kernel, kernel};
10794	llvm::SmallVector<unsigned_t, `2`> strides = {stride, stride};
10795
10796	auto *input = mod.createPlaceholder(ElemKind::FloatTy, {`1`, idim, idim, `1`},
10797	"input", false);
10798	bindings.allocate(input)->getHandle().randomize(-`10.0`, `10.0`, mod.getPRNG());
10799
10800	auto *filterConv = mod.createPlaceholder(
10801	ElemKind::FloatTy, {`1`, kernel, kernel, `1`}, "filterC", false);
10802	bindings.allocate(filterConv)
10803	->getHandle()
10804	.randomize(-`10.0`, `10.0`, mod.getPRNG());
10805	auto FCH = bindings.get(filterConv)->getHandle();
10806
10807	auto *filterConvTr = mod.createPlaceholder(
10808	ElemKind::FloatTy, {`1`, kernel, kernel, `1`}, "filterD", false);
10809	auto FDH = bindings.allocate(filterConvTr)->getHandle();
10810	for (dim_t i = `0`; i < kernel * kernel; i++) {
10811	FDH.raw(i) = FCH.raw(kernel * kernel - i - `1`);
10812	}
10813
10814	auto bias = mod.createPlaceholder(ElemKind::FloatTy, {`1`}, "bias", false*);
10815	bindings.allocate(bias)->zero();
10816
10817	std::pair<dim_t, dim_t> outHW =
10818	calculateConvTransposeOutputDims(idim, idim, kernels, strides, pads);
10819
10820	auto outTy =
10821	mod.uniqueType(ElemKind::FloatTy, {`1`, outHW.first, outHW.second, `1`});
10822
10823	ConvolutionNode *CN = F->createConv("conv", input, filterConv, bias, outTy,
10824	kernels, strides, Cpads, / group / `1`);
10825	ConvTransposeNode *DN =
10826	F->createConvTranspose("ConvTranspose", input, filterConvTr, bias, outTy,
10827	kernels, strides, pads, / group / `1`);
10828
10829	SaveNode *SC = F->createSave("saveC", CN);
10830	bindings.allocate(SC->getPlaceholder());
10831
10832	SaveNode *SD = F->createSave("saveD", DN);
10833	bindings.allocate(SD->getPlaceholder());
10834
10835	::glow::convertPlaceholdersToConstants(
10836	F, bindings, {input, SC->getPlaceholder(), SD->getPlaceholder()});
10837	EE.compile(CompilationMode::Infer);
10838	EE.run(bindings);
10839
10840	outHW = calculateConvPoolOutputDims(idim, idim, kernels, strides, Cpads);
10841
10842	auto resultConv = bindings.get(SC->getPlaceholder())->getHandle();
10843	auto resultConvTranspose = bindings.get(SD->getPlaceholder())->getHandle();
10844
10845	std::vector<dim_t> expectedDims = {`1`, outHW.first, outHW.second, `1`};
10846	ASSERT_TRUE(resultConv.dims().vec() == expectedDims);
10847	ASSERT_TRUE(resultConvTranspose.dims().vec() == expectedDims);
10848
10849	for (dim_t i = `0`; i < outHW.first; i++) {
10850	for (dim_t j = `0`; j < outHW.second; j++) {
10851	EXPECT_FLOAT_EQ(static_cast<float>(resultConv.at({`0`, i, j, `0`})),
10852	static_cast<float>(resultConvTranspose.at({`0`, i, j, `0`})));
10853	}
10854	}
10855	}
10856
10857	TEST_P(OperatorTest, ConvTransposeonvolutionCompareSimpleK8S1P0I3) {
10858	ENABLED_BACKENDS("Interpreter", "CPU");
10859	convTransposeConvCompare<`8`, `1`, `0`, `3`>(bindings_, mod_, F_, EE_);
10860	}
10861
10862	TEST_P(OperatorTest, ConvTransposeConvolutionCompareSimpleK6S1P1I4) {
10863	ENABLED_BACKENDS("Interpreter", "CPU");
10864	convTransposeConvCompare<`6`, `1`, `1`, `4`>(bindings_, mod_, F_, EE_);
10865	}
10866
10867	TEST_P(OperatorTest, ConvTransposeConvolutionCompareSimpleK5S1P2I3) {
10868	ENABLED_BACKENDS("Interpreter", "CPU");
10869	convTransposeConvCompare<`5`, `1`, `2`, `3`>(bindings_, mod_, F_, EE_);
10870	}
10871
10872	TEST_P(OperatorTest, GroupConvolution) {
10873	CHECK_IF_ENABLED();
10874
10875	auto *input =
10876	mod_.createPlaceholder(ElemKind::FloatTy, {`1`, `2`, `1`, `8`}, "input", false);
10877	auto IH = bindings_.allocate(input)->getHandle();
10878	for (dim_t i = `0`; i < `2` * `8`; i++) {
10879	IH.raw(i) = i + `1`;
10880	}
10881
10882	auto filter =
10883	mod_.createPlaceholder(ElemKind::FloatTy, {`6`, `1`, `1`, `4`}, "filter", false);
10884	auto FH = bindings_.allocate(filter)->getHandle();
10885	for (dim_t i = `0`; i < `6`; i++)
10886	for (dim_t j = `0`; j < `4`; j++) {
10887	FH.at({i, `0`, `0`, j}) = pow(`10.0`, i);
10888	}
10889
10890	auto *zeroBias =
10891	mod_.createPlaceholder(ElemKind::FloatTy, {`6`}, "bias", false);
10892	bindings_.allocate(zeroBias)->zero();
10893
10894	auto outTy = mod_.uniqueType(ElemKind::FloatTy, {`1`, `2`, `1`, `6`});
10895
10896	ConvolutionNode *CN =
10897	F_->createConv("Conv", input, filter, zeroBias, outTy, `1`, `1`, `0`, `2`);
10898	SaveNode *S = F_->createSave("save", CN);
10899	bindings_.allocate(S->getPlaceholder());
10900
10901	::glow::convertPlaceholdersToConstants(F_, bindings_,
10902	{input, S->getPlaceholder()});
10903	EE_.compile(CompilationMode::Infer);
10904	EE_.run(bindings_);
10905
10906	auto result = bindings_.get(S->getPlaceholder())->getHandle();
10907
10908	std::vector<dim_t> expectedDims = {`1`, `2`, `1`, `6`};
10909	ASSERT_TRUE(result.dims().vec() == expectedDims);
10910	EXPECT_FLOAT_EQ(result.at({`0`, `0`, `0`, `0`}), `1` + `2` + `3` + `4`);
10911	EXPECT_FLOAT_EQ(result.at({`0`, `0`, `0`, `1`}), (`1` + `2` + `3` + `4`) * `10`);
10912	EXPECT_FLOAT_EQ(result.at({`0`, `0`, `0`, `2`}), (`1` + `2` + `3` + `4`) * `100`);
10913	EXPECT_FLOAT_EQ(result.at({`0`, `0`, `0`, `3`}), (`5` + `6` + `7` + `8`) * `1000`);
10914	EXPECT_FLOAT_EQ(result.at({`0`, `0`, `0`, `4`}), (`5` + `6` + `7` + `8`) * `10000`);
10915	EXPECT_FLOAT_EQ(result.at({`0`, `0`, `0`, `5`}), (`5` + `6` + `7` + `8`) * `100000`);
10916	EXPECT_FLOAT_EQ(result.at({`0`, `1`, `0`, `0`}), `9` + `10` + `11` + `12`);
10917	EXPECT_FLOAT_EQ(result.at({`0`, `1`, `0`, `1`}), (`9` + `10` + `11` + `12`) * `10`);
10918	EXPECT_FLOAT_EQ(result.at({`0`, `1`, `0`, `2`}), (`9` + `10` + `11` + `12`) * `100`);
10919	EXPECT_FLOAT_EQ(result.at({`0`, `1`, `0`, `3`}), (`13` + `14` + `15` + `16`) * `1000`);
10920	EXPECT_FLOAT_EQ(result.at({`0`, `1`, `0`, `4`}), (`13` + `14` + `15` + `16`) * `10000`);
10921	EXPECT_FLOAT_EQ(result.at({`0`, `1`, `0`, `5`}), (`13` + `14` + `15` + `16`) * `100000`);
10922	}
10923
10924	/// Utility function to test numerically the ChannelwiseQuantizedConvolution2D
10925	/// against a floating point Convolution for different parameters.
10926	static void testChannelwiseQuantizedConv2D(
10927	glow::PlaceholderBindings &bindings, glow::Module &mod, glow::Function *F,
10928	glow::ExecutionEngine &EE, quantization::Schema schema, ElemKind elemQKind,
10929	ElemKind biasElemQKind, bool filterFloat, bool biasFloat,
10930	bool biasScalesExplicit) {
10931
10932	std::vector<dim_t> inputDims = {`5`, `10`, `10`, `4`};
10933	std::vector<dim_t> filterDims = {`8`, `3`, `3`, `2`};
10934	std::vector<dim_t> biasDims = {`8`};
10935	std::vector<dim_t> outputDims = {`5`, `6`, `6`, `8`};
10936	std::vector<unsigned_t> kernels = {`3`, `3`};
10937	std::vector<unsigned_t> strides = {`1`, `1`};
10938	std::vector<unsigned_t> pads = {`0`, `0`, `0`, `0`};
10939	dim_t group = `2`;
10940	std::vector<unsigned_t> dilation = {`2`, `2`};
10941	dim_t qDim = `0`;
10942	dim_t qStep = `1`;
10943
10944	// Create input placeholder.
10945	auto *inputF =
10946	mod.createPlaceholder(ElemKind::FloatTy, inputDims, "inputF", false);
10947	bindings.allocate(inputF)->getHandle<float>().randomize(-`1.0`, `1.0`,
10948	mod.getPRNG());
10949
10950	// Quantize input.
10951	auto inputTQP =
10952	quantization::chooseQuantizationParams({-`1.0`, `1.0`}, schema, elemQKind);
10953	auto *inputQTy =
10954	mod.uniqueType(elemQKind, inputDims, inputTQP.scale, inputTQP.offset);
10955	auto *inputQ = F->createQuantize("inputQ", inputF, inputQTy);
10956
10957	// Create float filter constant.
10958	auto *filterF = mod.createConstant(ElemKind::FloatTy, filterDims, "filterF");
10959	filterF->getPayloadMutable().getHandle<float>().randomize(-`1.0`, `1.0`,
10960	mod.getPRNG());
10961
10962	// Create float bias constant.
10963	auto *biasF = mod.createConstant(ElemKind::FloatTy, biasDims, "biasF");
10964	biasF->getPayloadMutable().getHandle<float>().randomize(-`1.0`, `1.0`,
10965	mod.getPRNG());
10966
10967	// Create quantized filter and filterScales/filterOffsets constants for
10968	// ChannelwiseQuantizedConvolution.
10969	dim_t numChannels = outputDims [`3`];
10970	Constant *filterQ =
10971	mod.createConstant(elemQKind, filterDims, `1.0`, `0`, "filterQ");
10972	Constant *filterScales =
10973	mod.createConstant(ElemKind::FloatTy, {numChannels}, "filterScales");
10974	Constant *filterOffsets =
10975	mod.createConstant(ElemKind::Int32ITy, {numChannels}, "filterOffsets");
10976	quantization::getTensorQuantizationParams(
10977	filterF->getPayload(), filterScales->getPayloadMutable(),
10978	filterOffsets->getPayloadMutable(), schema, elemQKind, qDim, qStep);
10979	filterQ->getPayloadMutable() = quantization::quantizeTensor(
10980	filterF->getPayload(), filterScales->getPayload(),
10981	filterOffsets->getPayload(), elemQKind, qDim, qStep);
10982
10983	// Create quantized bias and biasScales/biasOffsets constants for
10984	// ChannelwiseQuantizedConvolution.
10985	Constant *biasQ =
10986	mod.createConstant(biasElemQKind, {numChannels}, `1.0`, `0`, "biasQ");
10987	Constant *biasScales =
10988	mod.createConstant(ElemKind::FloatTy, {numChannels}, "biasScales");
10989	Constant *biasOffsets =
10990	mod.createConstant(ElemKind::Int32ITy, {numChannels}, "biasOffsets");
10991	auto biasScalesH = biasScales->getPayload().getHandle<float>();
10992	auto biasOffsetsH = biasOffsets->getPayload().getHandle<int32_t>();
10993	auto filterScalesH = filterScales->getPayload().getHandle<float>();
10994	auto filterOffsetsH = filterOffsets->getPayload().getHandle<int32_t>();
10995	auto inputScale = inputQ->getResult().getType()->getScale();
10996	auto inputOffset = inputQ->getResult().getType()->getOffset();
10997	if (biasScalesExplicit) {
10998	quantization::getTensorQuantizationParams(
10999	biasF->getPayload(), biasScales->getPayloadMutable(),
11000	biasOffsets->getPayloadMutable(), schema, biasElemQKind, qDim, qStep);
11001	for (dim_t idx = `0`; idx < numChannels; idx++) {
11002	auto biasTQPNew = specializeBiasQuantizationParams(
11003	{biasScalesH.raw(idx), biasOffsetsH.raw(idx)},
11004	{inputScale, inputOffset},
11005	{filterScalesH.raw(idx), filterOffsetsH.raw(idx)}, schema,
11006	biasElemQKind);
11007	biasScalesH.raw(idx) = biasTQPNew.scale;
11008	biasOffsetsH.raw(idx) = biasTQPNew.offset;
11009	}
11010	} else {
11011	for (dim_t idx = `0`; idx < numChannels; idx++) {
11012	float filterScale = filterScalesH.raw(idx);
11013	biasScalesH.raw(idx) = inputScale * filterScale;
11014	biasOffsetsH.raw(idx) = `0`;
11015	}
11016	}
11017	biasQ->getPayloadMutable() = quantization::quantizeTensor(
11018	biasF->getPayload(), biasScales->getPayload(), biasOffsets->getPayload(),
11019	biasElemQKind, qDim, qStep);
11020
11021	// Get optimal output TQP based on inspecting the output range for the
11022	// particular values of the convolution parameters. If the convolution
11023	// sizes are changed than these parameters must be adjusted.
11024	auto outputTQP =
11025	quantization::chooseQuantizationParams({-`6.0`, `6.0`}, schema, elemQKind);
11026	auto *outQTy =
11027	mod.uniqueType(elemQKind, outputDims, outputTQP.scale, outputTQP.offset);
11028
11029	// Prepare parameters for ChannelwiseQuantizedConvolutionNode.
11030	Constant filterCWQ = nullptr*;
11031	Constant filterScalesCWQ = nullptr*;
11032	Constant filterOffsetsCWQ = nullptr*;
11033	if (filterFloat) {
11034	filterCWQ = filterF;
11035	} else {
11036	filterCWQ = filterQ;
11037	filterScalesCWQ = filterScales;
11038	filterOffsetsCWQ = filterOffsets;
11039	}
11040	Constant biasCWQ = nullptr*;
11041	Constant biasScalesCWQ = nullptr*;
11042	Constant biasOffsetsCWQ = nullptr*;
11043	if (biasFloat) {
11044	biasCWQ = biasF;
11045	} else {
11046	biasCWQ = biasQ;
11047	}
11048	if (biasScalesExplicit) {
11049	biasScalesCWQ = biasScales;
11050	biasOffsetsCWQ = biasOffsets;
11051	}
11052
11053	// Create ChannelwiseQuantizedConvolution and Dequantize.
11054	ChannelwiseQuantizedConvolutionNode *outQ = F->createChannelwiseQuantizedConv(
11055	"CWQConv", inputQ, filterCWQ, biasCWQ, filterScalesCWQ, filterOffsetsCWQ,
11056	biasScalesCWQ, biasOffsetsCWQ, outQTy, kernels, strides, pads, group,
11057	dilation, / quantizeFilter / true,
11058	/ quantizeBias / true, schema, elemQKind, biasElemQKind);
11059	DequantizeNode *out =
11060	F->createDequantize("dequantize", outQ, ElemKind::FloatTy);
11061	SaveNode *saveOut = F->createSave("saveOut", out);
11062	bindings.allocate(saveOut->getPlaceholder());
11063
11064	// Create reference floating-point Convolution.
11065	auto *refTy = mod.uniqueType(ElemKind::FloatTy, outputDims);
11066	ConvolutionNode *ref = F->createConv("Conv", inputF, filterF, biasF, refTy,
11067	kernels, strides, pads, group, dilation);
11068	SaveNode *saveRef = F->createSave("saveRef", ref);
11069	bindings.allocate(saveRef->getPlaceholder());
11070
11071	// Compile and run.
11072	EE.compile(CompilationMode::Infer);
11073	EE.run(bindings);
11074
11075	// Extra validations.
11076	EXPECT_EQ(F->getNodes().size(), `6`);
11077	EXPECT_EQ(outQ->getFilter().getElementType(), elemQKind);
11078	EXPECT_EQ(outQ->getBias().getElementType(), biasElemQKind);
11079
11080	// Check error. If bias is carefully quantized then the bias precision does
11081	// not matter and so the error tolerance is the same.
11082	auto outH = bindings.get(saveOut->getPlaceholder())->getHandle();
11083	auto refH = bindings.get(saveRef->getPlaceholder())->getHandle();
11084	for (dim_t idx = `0`; idx < refH.size(); idx++) {
11085	float errVal = std::abs(refH.raw(idx) - outH.raw(idx));
11086	EXPECT_LT(errVal, `0.05f`);
11087	}
11088	}
11089
11090	#define TEST_CWQCONV(testName, ...) \
11091	TEST_P(OperatorTest, testName) { \
11092	CHECK_IF_ENABLED(); \
11093	testChannelwiseQuantizedConv2D(bindings_, mod_, F_, EE_, \
11094	quantization::Schema::Asymmetric, \
11095	__VA_ARGS__); \
11096	}
11097
11098	/// These unit tests prove that the bias quantization for low precision (Int8)
11099	/// requires a special handling because if we provide a quantized bias with
11100	/// implicit quantization parameters biasScales[i] =
11101	/// inputScalefilterScales[i] and biasOffsets[i]=0 does not work numerically*
11102	/// due to BIAS DATA saturation. Therefore in the unit tests below we do not
11103	/// use the _FF tests.
11104	TEST_CWQCONV(ChannelwiseQuantizedConv2D_Int8_BiasInt8_FFT, ElemKind::Int8QTy,
11105	ElemKind::Int8QTy, false, false, true)
11106	TEST_CWQCONV(ChannelwiseQuantizedConv2D_Int8_BiasInt8_FTF, ElemKind::Int8QTy,
11107	ElemKind::Int8QTy, false, true, false)
11108	TEST_CWQCONV(ChannelwiseQuantizedConv2D_Int8_BiasInt8_FTT, ElemKind::Int8QTy,
11109	ElemKind::Int8QTy, false, true, true)
11110	TEST_CWQCONV(ChannelwiseQuantizedConv2D_Int8_BiasInt8_TFT, ElemKind::Int8QTy,
11111	ElemKind::Int8QTy, true, false, true)
11112	TEST_CWQCONV(ChannelwiseQuantizedConv2D_Int8_BiasInt8_TTF, ElemKind::Int8QTy,
11113	ElemKind::Int8QTy, true, true, false)
11114	TEST_CWQCONV(ChannelwiseQuantizedConv2D_Int8_BiasInt8_TTT, ElemKind::Int8QTy,
11115	ElemKind::Int8QTy, true, true, true)
11116
11117	/// These unit tests prove that the bias quantization for high precision
11118	/// (Int32) can work without a special handling (implicit quantization
11119	/// parameters).
11120	TEST_CWQCONV(ChannelwiseQuantizedConv2D_Int8_BiasInt32_FFF, ElemKind::Int8QTy,
11121	ElemKind::Int32QTy, false, false, false)
11122	TEST_CWQCONV(ChannelwiseQuantizedConv2D_Int8_BiasInt32_FFT, ElemKind::Int8QTy,
11123	ElemKind::Int32QTy, false, false, true)
11124	TEST_CWQCONV(ChannelwiseQuantizedConv2D_Int8_BiasInt32_FTF, ElemKind::Int8QTy,
11125	ElemKind::Int32QTy, false, true, false)
11126	TEST_CWQCONV(ChannelwiseQuantizedConv2D_Int8_BiasInt32_FTT, ElemKind::Int8QTy,
11127	ElemKind::Int32QTy, false, true, true)
11128	TEST_CWQCONV(ChannelwiseQuantizedConv2D_Int8_BiasInt32_TFF, ElemKind::Int8QTy,
11129	ElemKind::Int32QTy, true, false, false)
11130	TEST_CWQCONV(ChannelwiseQuantizedConv2D_Int8_BiasInt32_TFT, ElemKind::Int8QTy,
11131	ElemKind::Int32QTy, true, false, true)
11132	TEST_CWQCONV(ChannelwiseQuantizedConv2D_Int8_BiasInt32_TTF, ElemKind::Int8QTy,
11133	ElemKind::Int32QTy, true, true, false)
11134	TEST_CWQCONV(ChannelwiseQuantizedConv2D_Int8_BiasInt32_TTT, ElemKind::Int8QTy,
11135	ElemKind::Int32QTy, true, true, true)
11136	#undef TEST_CWQCONV
11137
11138	/// Test ChannelwiseQuantizedConv2D corner case with INT32 bias with
11139	/// very small filter data which would cause a bias up-shift and saturation
11140	/// if not properly handled. This kind of corner case is very commonly found
11141	/// in numerically ill-defined depthwise convolutions in MobileNet.
11142	TEST_P(OperatorTest, ChannelwiseQuantizedConv2D_Int32Bias_SmallFilterData) {
11143	CHECK_IF_ENABLED();
11144
11145	std::vector<dim_t> inputDims = {`1`, `5`, `5`, `8`};
11146	std::vector<dim_t> filterDims = {`8`, `3`, `3`, `1`};
11147	std::vector<dim_t> biasDims = {`8`};
11148	std::vector<dim_t> outputDims = {`1`, `5`, `5`, `8`};
11149	std::vector<unsigned_t> kernels = {`3`, `3`};
11150	std::vector<unsigned_t> strides = {`1`, `1`};
11151	std::vector<unsigned_t> pads = {`1`, `1`, `1`, `1`};
11152	dim_t group = `8`;
11153	std::vector<unsigned_t> dilation = {`1`, `1`};
11154	ElemKind elemQKind = ElemKind::Int8QTy;
11155	ElemKind biasElemQKind = ElemKind::Int32QTy;
11156	quantization::Schema schema = quantization::Schema::Asymmetric;
11157
11158	// Create input placeholder.
11159	auto *inputF =
11160	mod_.createPlaceholder(ElemKind::FloatTy, inputDims, "inputF", false);
11161	bindings_.allocate(inputF)->getHandle<float>().randomize(-`1.0`, `1.0`,
11162	mod_.getPRNG());
11163
11164	// Quantize input.
11165	auto inputTQP =
11166	quantization::chooseQuantizationParams({-`1.0`, `1.0`}, schema, elemQKind);
11167	auto *inputQTy =
11168	mod_.uniqueType(elemQKind, inputDims, inputTQP.scale, inputTQP.offset);
11169	auto *inputQ = F_->createQuantize("inputQ", inputF, inputQTy);
11170
11171	// Create float filter constant with small values.
11172	auto *filterF = mod_.createConstant(ElemKind::FloatTy, filterDims, "filterF");
11173	filterF->getPayloadMutable().getHandle<float>().randomize(-`1e-5`, `1e-5`,
11174	mod_.getPRNG());
11175
11176	// Create float bias constant.
11177	auto *biasF = mod_.createConstant(ElemKind::FloatTy, biasDims, "biasF");
11178	biasF->getPayloadMutable().getHandle<float>().randomize(-`1.0`, `1.0`,
11179	mod_.getPRNG());
11180
11181	// Create ChannelwiseQuantizedConvolution and Dequantize.
11182	auto outTQP =
11183	quantization::chooseQuantizationParams({-`1.0`, `1.0`}, schema, elemQKind);
11184	auto *outQTy =
11185	mod_.uniqueType(elemQKind, outputDims, outTQP.scale, outTQP.offset);
11186	ChannelwiseQuantizedConvolutionNode *outQ =
11187	F_->createChannelwiseQuantizedConv(
11188	"CWQConv", inputQ, filterF, biasF, nullptr, nullptr, nullptr, nullptr,
11189	outQTy, kernels, strides, pads, group, dilation,
11190	/ quantizeFilter / true,
11191	/ quantizeBias / true, schema, elemQKind, biasElemQKind);
11192	DequantizeNode *out =
11193	F_->createDequantize("dequantize", outQ, ElemKind::FloatTy);
11194	SaveNode *saveOut = F_->createSave("saveOut", out);
11195	bindings_.allocate(saveOut->getPlaceholder());
11196
11197	// Create reference floating-point Convolution.
11198	auto *refTy = mod_.uniqueType(ElemKind::FloatTy, outputDims);
11199	ConvolutionNode *refF =
11200	F_->createConv("Conv", inputF, filterF, biasF, refTy, kernels, strides,
11201	pads, group, dilation);
11202	SaveNode *saveRef = F_->createSave("saveRef", refF);
11203	bindings_.allocate(saveRef->getPlaceholder());
11204
11205	// Check bias/filter quantization parameters.
11206	float inputScale = inputTQP.scale;
11207	auto *biasScalesC = llvm::dyn_cast<Constant>(outQ->getBiasScales().getNode());
11208	EXPECT_TRUE(biasScalesC);
11209	auto biasScalesH = biasScalesC->getPayload().getHandle<float>();
11210	auto *filterScalesC =
11211	llvm::dyn_cast<Constant>(outQ->getFilterScales().getNode());
11212	EXPECT_TRUE(filterScalesC);
11213	auto filterScalesH = filterScalesC->getPayload().getHandle<float>();
11214	auto *biasOffsetsC =
11215	llvm::dyn_cast<Constant>(outQ->getBiasOffsets().getNode());
11216	EXPECT_TRUE(biasOffsetsC);
11217	auto biasOffsetsH = biasOffsetsC->getPayload().getHandle<int32_t>();
11218	for (dim_t idx = `0`; idx < biasScalesH.size(); idx++) {
11219	EXPECT_EQ(biasOffsetsH.raw(idx), `0`);
11220	EXPECT_EQ(biasScalesH.raw(idx), inputScale * filterScalesH.raw(idx));
11221	}
11222
11223	// Compile and run.
11224	EE_.compile(CompilationMode::Infer);
11225	EE_.run(bindings_);
11226
11227	// Check error.
11228	auto outH = bindings_.get(saveOut->getPlaceholder())->getHandle();
11229	auto refH = bindings_.get(saveRef->getPlaceholder())->getHandle();
11230	for (dim_t idx = `0`; idx < refH.size(); idx++) {
11231	float errVal = std::abs(refH.raw(idx) - outH.raw(idx));
11232	EXPECT_LT(errVal, `0.005f`);
11233	}
11234	}
11235
11236	/// Test ChannelwiseQuantizedConv2D corner case with INT32 bias with
11237	/// zero bias data which would cause filter data underflow when quantized
11238	/// if not properly handled. This happens when we have a convolution
11239	/// where bias is not used.
11240	TEST_P(OperatorTest, ChannelwiseQuantizedConv2D_Int32Bias_ZeroBiasData) {
11241	CHECK_IF_ENABLED();
11242
11243	std::vector<dim_t> inputDims = {`1`, `5`, `5`, `8`};
11244	std::vector<dim_t> filterDims = {`8`, `3`, `3`, `1`};
11245	std::vector<dim_t> biasDims = {`8`};
11246	std::vector<dim_t> outputDims = {`1`, `5`, `5`, `8`};
11247	std::vector<unsigned_t> kernels = {`3`, `3`};
11248	std::vector<unsigned_t> strides = {`1`, `1`};
11249	std::vector<unsigned_t> pads = {`1`, `1`, `1`, `1`};
11250	dim_t group = `8`;
11251	std::vector<unsigned_t> dilation = {`1`, `1`};
11252	ElemKind elemQKind = ElemKind::Int8QTy;
11253	ElemKind biasElemQKind = ElemKind::Int32QTy;
11254	quantization::Schema schema = quantization::Schema::Asymmetric;
11255
11256	// Create input placeholder.
11257	auto *inputF =
11258	mod_.createPlaceholder(ElemKind::FloatTy, inputDims, "inputF", false);
11259	bindings_.allocate(inputF)->getHandle<float>().randomize(-`1.0`, `1.0`,
11260	mod_.getPRNG());
11261
11262	// Quantize input.
11263	auto inputTQP =
11264	quantization::chooseQuantizationParams({-`1.0`, `1.0`}, schema, elemQKind);
11265	auto *inputQTy =
11266	mod_.uniqueType(elemQKind, inputDims, inputTQP.scale, inputTQP.offset);
11267	auto *inputQ = F_->createQuantize("inputQ", inputF, inputQTy);
11268
11269	// Create float filter constant.
11270	auto *filterF = mod_.createConstant(ElemKind::FloatTy, filterDims, "filterF");
11271	filterF->getPayloadMutable().getHandle<float>().randomize(-`1.0`, `1.0`,
11272	mod_.getPRNG());
11273
11274	// Create float bias constant with zero data.
11275	auto *biasF = mod_.createConstant(ElemKind::FloatTy, biasDims, "biasF");
11276	biasF->getPayloadMutable().zero();
11277
11278	// Create ChannelwiseQuantizedConvolution and Dequantize.
11279	auto outTQP =
11280	quantization::chooseQuantizationParams({-`3.0`, `3.0`}, schema, elemQKind);
11281	auto *outQTy =
11282	mod_.uniqueType(elemQKind, outputDims, outTQP.scale, outTQP.offset);
11283	ChannelwiseQuantizedConvolutionNode *outQ =
11284	F_->createChannelwiseQuantizedConv(
11285	"CWQConv", inputQ, filterF, biasF, nullptr, nullptr, nullptr, nullptr,
11286	outQTy, kernels, strides, pads, group, dilation,
11287	/ quantizeFilter / true,
11288	/ quantizeBias / true, schema, elemQKind, biasElemQKind);
11289	DequantizeNode *out =
11290	F_->createDequantize("dequantize", outQ, ElemKind::FloatTy);
11291	SaveNode *saveOut = F_->createSave("saveOut", out);
11292	bindings_.allocate(saveOut->getPlaceholder());
11293
11294	// Create reference floating-point Convolution.
11295	auto *refTy = mod_.uniqueType(ElemKind::FloatTy, outputDims);
11296	ConvolutionNode *refF =
11297	F_->createConv("Conv", inputF, filterF, biasF, refTy, kernels, strides,
11298	pads, group, dilation);
11299	SaveNode *saveRef = F_->createSave("saveRef", refF);
11300	bindings_.allocate(saveRef->getPlaceholder());
11301
11302	// Check bias/filter quantization parameters.
11303	float inputScale = inputTQP.scale;
11304	auto *biasScalesC = llvm::dyn_cast<Constant>(outQ->getBiasScales().getNode());
11305	EXPECT_TRUE(biasScalesC);
11306	auto biasScalesH = biasScalesC->getPayload().getHandle<float>();
11307	auto *filterScalesC =
11308	llvm::dyn_cast<Constant>(outQ->getFilterScales().getNode());
11309	EXPECT_TRUE(filterScalesC);
11310	auto filterScalesH = filterScalesC->getPayload().getHandle<float>();
11311	auto *biasOffsetsC =
11312	llvm::dyn_cast<Constant>(outQ->getBiasOffsets().getNode());
11313	EXPECT_TRUE(biasOffsetsC);
11314	auto biasOffsetsH = biasOffsetsC->getPayload().getHandle<int32_t>();
11315	for (dim_t idx = `0`; idx < biasScalesH.size(); idx++) {
11316	EXPECT_EQ(biasOffsetsH.raw(idx), `0`);
11317	EXPECT_EQ(biasScalesH.raw(idx), inputScale * filterScalesH.raw(idx));
11318	}
11319
11320	// Compile and run.
11321	EE_.compile(CompilationMode::Infer);
11322	EE_.run(bindings_);
11323
11324	// Check error.
11325	auto outH = bindings_.get(saveOut->getPlaceholder())->getHandle();
11326	auto refH = bindings_.get(saveRef->getPlaceholder())->getHandle();
11327	for (dim_t idx = `0`; idx < refH.size(); idx++) {
11328	float errVal = std::abs(refH.raw(idx) - outH.raw(idx));
11329	EXPECT_LT(errVal, `0.05f`);
11330	}
11331	}
11332
11333	/// Utility function to test numerically the ChannelwiseQuantizedConvolution2D
11334	/// against Interpreter implementation.
11335	static FunctionTensorPair
11336	createAndInitBasicChannelwiseConv2DTest(glow::PlaceholderBindings &bindings,
11337	glow::ExecutionEngine &EE) {
11338
11339	auto &mod = EE.getModule();
11340	Function *F = mod.createFunction("main");
11341
11342	std::vector<dim_t> inputDims = {`5`, `10`, `10`, `4`};
11343	std::vector<dim_t> filterDims = {`8`, `3`, `3`, `2`};
11344	std::vector<dim_t> biasDims = {`8`};
11345	std::vector<dim_t> outputDims = {`5`, `6`, `6`, `8`};
11346	std::vector<unsigned_t> kernels = {`3`, `3`};
11347	std::vector<unsigned_t> strides = {`1`, `1`};
11348	std::vector<unsigned_t> pads = {`0`, `0`, `0`, `0`};
11349	dim_t group = `2`;
11350	std::vector<unsigned_t> dilation = {`2`, `2`};
11351
11352	// Create input placeholder.
11353	auto *input =
11354	mod.createPlaceholder(ElemKind::FloatTy, inputDims, "input", false);
11355	bindings.allocate(input)->getHandle<float>().randomize(-`1.0`, `1.0`,
11356	mod.getPRNG());
11357
11358	// Create filter constant.
11359	auto *filter = mod.createConstant(ElemKind::FloatTy, filterDims, "filter");
11360	filter->getPayloadMutable().getHandle<float>().randomize(-`1.0`, `1.0`,
11361	mod.getPRNG());
11362
11363	// Create bias constant.
11364	auto *bias = mod.createConstant(ElemKind::FloatTy, biasDims, "bias");
11365	bias->getPayloadMutable().getHandle<float>().randomize(-`1.0`, `1.0`,
11366	mod.getPRNG());
11367
11368	// Create Convolution.
11369	auto *outTy = mod.uniqueType(ElemKind::FloatTy, outputDims);
11370	ConvolutionNode *conv =
11371	F->createConv("Conv", input, filter, bias, outTy, kernels, strides, pads,
11372	group, dilation);
11373	SaveNode *save = F->createSave("save", conv);
11374	auto *outputTensor = bindings.allocate(save->getPlaceholder());
11375	return std::make_pair(F, outputTensor);
11376	}
11377
11378	/// Test Int8 ChannelwiseQuantizedConvolution2D with Int8 bias.
11379	TEST_P(OperatorStatelessTest, ChannelwiseQuantizedConv2D_Int8_BiasInt8) {
11380	CHECK_IF_ENABLED();
11381	compareAgainstInterpreter(
11382	getBackendName(), createAndInitBasicChannelwiseConv2DTest,
11383	ElemKind::FloatTy, ElemKind::Int8QTy, `0.05f`, parCloneCountOpt,
11384	/ convertToRowwiseQuantization / false,
11385	quantization::Schema::Asymmetric, ElemKind::Int8QTy,
11386	/ forceFP16AccumSLS / false,
11387	PrecisionConfiguration::Float16Format::None,
11388	/ convertToChannelwiseQuantization / true);
11389	}
11390
11391	/// Test Int8 ChannelwiseQuantizedConvolution2D with Int32 bias.
11392	TEST_P(OperatorStatelessTest, ChannelwiseQuantizedConv2D_Int8_BiasInt32) {
11393	CHECK_IF_ENABLED();
11394	compareAgainstInterpreter(
11395	getBackendName(), createAndInitBasicChannelwiseConv2DTest,
11396	ElemKind::FloatTy, ElemKind::Int8QTy, `0.05f`, parCloneCountOpt,
11397	/ convertToRowwiseQuantization / false,
11398	quantization::Schema::Asymmetric, ElemKind::Int32QTy,
11399	/ forceFP16AccumSLS / false,
11400	PrecisionConfiguration::Float16Format::None,
11401	/ convertToChannelwiseQuantization / true);
11402	}
11403
11404	/// Test the functionality of channelwise quantized group convolution using
11405	/// ChannelwiseQuantizedConvNode.
11406	TEST_P(OperatorTest, ChannelwiseQuantizedConv2D) {
11407	CHECK_IF_ENABLED();
11408
11409	constexpr size_t groups = `2`;
11410	constexpr dim_t output_channel = `4`;
11411
11412	auto *input =
11413	mod_.createPlaceholder(ElemKind::FloatTy, {`1`, `2`, `3`, `2`}, "input", false);
11414	auto IH = bindings_.allocate(input)->getHandle<float>();
11415	for (size_t i = `0`; i < `2` * `3` * `2`; i++) {
11416	IH.raw(i) = i + `1`;
11417	}
11418
11419	auto *qInTy = mod_.uniqueType(ElemKind::Int8QTy, {`1`, `2`, `3`, `2`}, `1.0`, `0`);
11420	auto *qInput = F_->createQuantize("qInput", input, qInTy);
11421
11422	auto filterT = Tensor (ElemKind::Int8QTy, {`4`, `2`, `1`, `1`}, `1.0`, `0`);
11423	for (dim_t i = `0`; i < `4`; i++) {
11424	for (dim_t j = `0`; j < `2`; j++) {
11425	for (dim_t k = `0`; k < `1`; k++) {
11426	for (dim_t l = `0`; l < `1`; l++) {
11427	filterT.getHandle<int8_t>().at({i, j, k, l}) = j + `1`;
11428	}
11429	}
11430	}
11431	}
11432	auto *filter = mod_.createConstant("filter", std::move(filterT));
11433
11434	auto biasT = Tensor (ElemKind::FloatTy, {`4`});
11435	biasT.zero();
11436	auto *bias = mod_.createConstant("bias", std::move(biasT));
11437
11438	auto filterScalesT = Tensor (ElemKind::FloatTy, {output_channel});
11439	for (size_t i = `0`; i < filterScalesT.size(); i++) {
11440	filterScalesT.getHandle<float>().raw(i) = `1`;
11441	}
11442	auto *filterScales =
11443	mod_.createConstant("filterScales", std::move(filterScalesT));
11444
11445	auto filterOffsetsT = Tensor (ElemKind::Int32ITy, {output_channel});
11446	filterOffsetsT.zero();
11447	auto *filterOffsets =
11448	mod_.createConstant("filterOffsets", std::move(filterOffsetsT));
11449
11450	auto *outTy = mod_.uniqueType(ElemKind::Int8QTy, {`1`, `1`, `3`, `4`}, `1.0`, `0`);
11451	ChannelwiseQuantizedConvolutionNode *CQC = F_->createChannelwiseQuantizedConv(
11452	"channelwiseQuantizedConv", qInput, filter, bias, filterScales,
11453	filterOffsets, / biasScales / nullptr, / biasOffsets / nullptr, outTy,
11454	{`2`, `1`}, {`1`, `1`}, {`0`, `0`, `0`, `0`}, groups);
11455
11456	DequantizeNode *dq =
11457	F_->createDequantize("dequantize", CQC, ElemKind::FloatTy);
11458	SaveNode *S = F_->createSave("save", dq);
11459	bindings_.allocate(S->getPlaceholder());
11460
11461	::glow::convertPlaceholdersToConstants(F_, bindings_,
11462	{input, S->getPlaceholder()});
11463
11464	EE_.compile(CompilationMode::Infer);
11465	EE_.run(bindings_);
11466
11467	auto result = bindings_.get(S->getPlaceholder())->getHandle();
11468
11469	std::vector<dim_t> expectedDims = {`1`, `1`, `3`, `4`};
11470	ASSERT_TRUE(result.dims().vec() == expectedDims);
11471	EXPECT_FLOAT_EQ(result.at({`0`, `0`, `0`, `0`}), `15`);
11472	EXPECT_FLOAT_EQ(result.at({`0`, `0`, `0`, `1`}), `15`);
11473	EXPECT_FLOAT_EQ(result.at({`0`, `0`, `0`, `2`}), `18`);
11474	EXPECT_FLOAT_EQ(result.at({`0`, `0`, `0`, `3`}), `18`);
11475	EXPECT_FLOAT_EQ(result.at({`0`, `0`, `1`, `0`}), `21`);
11476	EXPECT_FLOAT_EQ(result.at({`0`, `0`, `1`, `1`}), `21`);
11477
11478	EXPECT_FLOAT_EQ(result.at({`0`, `0`, `1`, `2`}), `24`);
11479	EXPECT_FLOAT_EQ(result.at({`0`, `0`, `1`, `3`}), `24`);
11480	EXPECT_FLOAT_EQ(result.at({`0`, `0`, `2`, `0`}), `27`);
11481	EXPECT_FLOAT_EQ(result.at({`0`, `0`, `2`, `1`}), `27`);
11482	EXPECT_FLOAT_EQ(result.at({`0`, `0`, `2`, `2`}), `30`);
11483	EXPECT_FLOAT_EQ(result.at({`0`, `0`, `2`, `3`}), `30`);
11484	}
11485
11486	/// Test the functionality of channelwise quantized group convolution using
11487	/// ChannelwiseQuantizedConvNode.
11488	TEST_P(OperatorTest, ChannelwiseQuantizedConv3D) {
11489	CHECK_IF_ENABLED();
11490
11491	constexpr size_t groups = `2`;
11492	constexpr dim_t output_channel = `4`;
11493	constexpr dim_t input_channel = `2`;
11494
11495	auto *input = mod_.createPlaceholder(
11496	ElemKind::FloatTy, {`1`, input_channel, `2`, `3`, `2`}, "input", false);
11497	auto IH = bindings_.allocate(input)->getHandle<float>();
11498	for (size_t i = `0`; i < input_channel * `2` * `3` * `2`; i++) {
11499	IH.raw(i) = i + `1`;
11500	}
11501
11502	auto *qInTy =
11503	mod_.uniqueType(ElemKind::Int8QTy, {`1`, input_channel, `2`, `3`, `2`}, `1.0`, `0`);
11504	auto *qInput = F_->createQuantize("qInput", input, qInTy);
11505
11506	auto filterT = Tensor (
11507	ElemKind::Int8QTy,
11508	{output_channel / groups, input_channel / groups, `1`, `1`, `1`}, `1.0`, `0`);
11509	for (dim_t i = `0`; i < output_channel / groups; i++) {
11510	for (dim_t j = `0`; j < input_channel / groups; j++) {
11511	for (dim_t t = `0`; t < `1`; t++) {
11512	for (dim_t k = `0`; k < `1`; k++) {
11513	for (dim_t l = `0`; l < `1`; l++) {
11514	filterT.getHandle<int8_t>().at({i, j, t, k, l}) = j + `1`;
11515	}
11516	}
11517	}
11518	}
11519	}
11520	auto *filter = mod_.createConstant("filter", std::move(filterT));
11521
11522	auto biasT = Tensor (ElemKind::FloatTy, {output_channel / groups});
11523	biasT.zero();
11524	auto *bias = mod_.createConstant("bias", std::move(biasT));
11525
11526	auto scalesT = Tensor (ElemKind::FloatTy, {output_channel / groups});
11527	for (size_t i = `0`; i < scalesT.size(); i++) {
11528	scalesT.getHandle<float>().raw(i) = `1`;
11529	}
11530	auto *scales = mod_.createConstant("scales", std::move(scalesT));
11531
11532	auto offsetsT = Tensor (ElemKind::Int32ITy, {output_channel / groups});
11533	offsetsT.zero();
11534	auto *offsets = mod_.createConstant("offsets", std::move(offsetsT));
11535
11536	auto *outTy = mod_.uniqueType(ElemKind::Int8QTy,
11537	{`1`, output_channel / groups, `2`, `3`, `2`}, `1.0`, `0`);
11538	ChannelwiseQuantizedConvolutionNode *CQC = F_->createChannelwiseQuantizedConv(
11539	"channelwiseQuantizedConv", qInput, filter, bias, scales, offsets,
11540	/ biasScales / nullptr, / biasOffsets / nullptr, outTy, {`1`, `1`, `1`},
11541	{`1`, `1`, `1`}, {`0`, `0`, `0`, `0`, `0`, `0`}, groups);
11542
11543	DequantizeNode *dq =
11544	F_->createDequantize("dequantize", CQC, ElemKind::FloatTy);
11545	SaveNode *S = F_->createSave("save", dq);
11546	bindings_.allocate(S->getPlaceholder());
11547
11548	::glow::convertPlaceholdersToConstants(F_, bindings_,
11549	{input, S->getPlaceholder()});
11550
11551	EE_.compile(CompilationMode::Infer);
11552	EE_.run(bindings_);
11553
11554	auto result = bindings_.get(S->getPlaceholder())->getHandle();
11555
11556	std::vector<dim_t> expectedDims = {`1`, output_channel / groups, `2`, `3`, `2`};
11557	ASSERT_TRUE(result.dims().vec() == expectedDims);
11558
11559	EXPECT_FLOAT_EQ(result.at({`0`, `0`, `0`, `0`}), `1`);
11560	EXPECT_FLOAT_EQ(result.at({`0`, `0`, `0`, `1`}), `3`);
11561	EXPECT_FLOAT_EQ(result.at({`0`, `0`, `0`, `2`}), `5`);
11562	EXPECT_FLOAT_EQ(result.at({`0`, `0`, `0`, `3`}), `7`);
11563	EXPECT_FLOAT_EQ(result.at({`0`, `0`, `1`, `0`}), `7`);
11564	EXPECT_FLOAT_EQ(result.at({`0`, `0`, `1`, `1`}), `9`);
11565
11566	EXPECT_FLOAT_EQ(result.at({`0`, `0`, `1`, `2`}), `11`);
11567	EXPECT_FLOAT_EQ(result.at({`0`, `0`, `1`, `3`}), `13`);
11568	EXPECT_FLOAT_EQ(result.at({`0`, `0`, `2`, `0`}), `13`);
11569	EXPECT_FLOAT_EQ(result.at({`0`, `0`, `2`, `1`}), `15`);
11570	EXPECT_FLOAT_EQ(result.at({`0`, `0`, `2`, `2`}), `17`);
11571	EXPECT_FLOAT_EQ(result.at({`0`, `0`, `2`, `3`}), `19`);
11572	}
11573
11574	TEST_P(OperatorTest, DilatedConvolution) {
11575	CHECK_IF_ENABLED();
11576
11577	auto *input =
11578	mod_.createPlaceholder(ElemKind::FloatTy, {`1`, `4`, `1`, `1`}, "input", false);
11579	auto IH = bindings_.allocate(input)->getHandle();
11580	for (size_t i = `0`; i < `4`; i++) {
11581	IH.raw(i) = i + `1`;
11582	}
11583
11584	auto filter =
11585	mod_.createPlaceholder(ElemKind::FloatTy, {`1`, `3`, `3`, `1`}, "filter", false);
11586	auto FH = bindings_.allocate(filter)->getHandle();
11587	for (dim_t i = `0`; i < `3`; i++)
11588	for (dim_t j = `0`; j < `3`; j++) {
11589	FH.at({`0`, i, j, `0`}) = `1`;
11590	}
11591	FH.at({`0`, `1`, `1`, `0`}) = `0`;
11592
11593	auto *zeroBias =
11594	mod_.createPlaceholder(ElemKind::FloatTy, {`1`}, "bias", false);
11595	bindings_.allocate(zeroBias)->zero();
11596
11597	auto outTy = mod_.uniqueType(ElemKind::FloatTy, {`1`, `4`, `1`, `1`});
11598
11599	ConvolutionNode *CN = F_->createConv("Conv", input, filter, zeroBias, outTy,
11600	`3`, `1`, `2`, `1`, {`2`, `2`});
11601	SaveNode *S = F_->createSave("save", CN);
11602	bindings_.allocate(S->getPlaceholder());
11603
11604	::glow::convertPlaceholdersToConstants(F_, bindings_,
11605	{input, S->getPlaceholder()});
11606	EE_.compile(CompilationMode::Infer);
11607	EE_.run(bindings_);
11608
11609	auto result = bindings_.get(S->getPlaceholder())->getHandle();
11610
11611	std::vector<dim_t> expectedDims = {`1`, `4`, `1`, `1`};
11612	EXPECT_TRUE(result.dims().vec() == expectedDims);
11613	EXPECT_FLOAT_EQ(result.at({`0`, `0`, `0`, `0`}), `3`);
11614	EXPECT_FLOAT_EQ(result.at({`0`, `1`, `0`, `0`}), `4`);
11615	EXPECT_FLOAT_EQ(result.at({`0`, `2`, `0`, `0`}), `1`);
11616	EXPECT_FLOAT_EQ(result.at({`0`, `3`, `0`, `0`}), `2`);
11617	}
11618
11619	/// Test the functionality of channelwise quantized group convolution using
11620	/// ChannelwiseQuantizedConvNode with non-zero offsets and biases.
11621	void testChannelwiseQuantizedConv2DNonZero(glow::PlaceholderBindings &bindings,
11622	glow::Module &mod, glow::Function *F,
11623	glow::ExecutionEngine &EE,
11624	bool quantizeBias) {
11625	constexpr size_t groups = `2`;
11626	constexpr dim_t output_channel = `4`;
11627
11628	auto *input =
11629	mod.createPlaceholder(ElemKind::FloatTy, {`1`, `2`, `3`, `2`}, "input", false);
11630	auto IH = bindings.allocate(input)->getHandle<float>();
11631	for (size_t i = `0`; i < `2` * `3` * `2`; i++) {
11632	IH.raw(i) = i + `1`;
11633	}
11634
11635	auto *qInTy = mod.uniqueType(ElemKind::Int8QTy, {`1`, `2`, `3`, `2`}, `2.5`, `3`);
11636	auto *qInput = F->createQuantize("qInput", input, qInTy);
11637
11638	auto filterT = Tensor (ElemKind::Int8QTy, {`4`, `2`, `1`, `1`}, `1.0`, `0`);
11639	for (dim_t i = `0`; i < `4`; i++) {
11640	for (dim_t j = `0`; j < `2`; j++) {
11641	for (dim_t k = `0`; k < `1`; k++) {
11642	for (dim_t l = `0`; l < `1`; l++) {
11643	filterT.getHandle<int8_t>().at({i, j, k, l}) = j + `1`;
11644	}
11645	}
11646	}
11647	}
11648	auto *filter = mod.createConstant("filter", std::move(filterT));
11649
11650	auto biasT = Tensor (ElemKind::FloatTy, {`4`});
11651	for (dim_t i = `0`; i < `4`; i++) {
11652	biasT.getHandle<float>().raw(i) = i + `1`;
11653	}
11654	auto *bias = mod.createConstant("bias", std::move(biasT));
11655
11656	auto filterScalesT = Tensor (ElemKind::FloatTy, {output_channel});
11657	for (size_t i = `0`; i < filterScalesT.size(); i++) {
11658	filterScalesT.getHandle<float>().raw(i) = `1`;
11659	}
11660	auto *filterScales =
11661	mod.createConstant("filterScales", std::move(filterScalesT));
11662
11663	auto filterOffsetsT = Tensor (ElemKind::Int32ITy, {output_channel});
11664	filterOffsetsT.zero();
11665
11666	auto *filterOffsets =
11667	mod.createConstant("filterOffsets", std::move(filterOffsetsT));
11668
11669	auto *outTy = mod.uniqueType(ElemKind::Int8QTy, {`1`, `1`, `3`, `4`}, `2`, `2`);
11670	ChannelwiseQuantizedConvolutionNode *CQC = F->createChannelwiseQuantizedConv(
11671	"channelwiseQuantizedConv", qInput, filter, bias, filterScales,
11672	filterOffsets, / biasScales / nullptr, / biasOffsets / nullptr, outTy,
11673	{`2`, `1`}, {`1`, `1`}, {`0`, `0`, `0`, `0`}, groups, / dilation / {`1`, `1`},
11674	/ quantizeFilter / false, quantizeBias);
11675
11676	DequantizeNode *dq =
11677	F->createDequantize("dequantize", CQC, ElemKind::FloatTy);
11678	SaveNode *S = F->createSave("save", dq);
11679	bindings.allocate(S->getPlaceholder());
11680
11681	::glow::convertPlaceholdersToConstants(F, bindings,
11682	{input, S->getPlaceholder()});
11683
11684	EE.compile(CompilationMode::Infer);
11685	EE.run(bindings);
11686
11687	auto result = bindings.get(S->getPlaceholder())->getHandle();
11688
11689	std::vector<dim_t> expectedDims = {`1`, `1`, `3`, `4`};
11690	ASSERT_TRUE(result.dims().vec() == expectedDims);
11691	EXPECT_FLOAT_EQ(result.at({`0`, `0`, `0`, `0`}), `16`);
11692	EXPECT_FLOAT_EQ(result.at({`0`, `0`, `0`, `1`}), `18`);
11693	EXPECT_FLOAT_EQ(result.at({`0`, `0`, `0`, `2`}), `20`);
11694	EXPECT_FLOAT_EQ(result.at({`0`, `0`, `0`, `3`}), `22`);
11695	EXPECT_FLOAT_EQ(result.at({`0`, `0`, `1`, `0`}), `22`);
11696	EXPECT_FLOAT_EQ(result.at({`0`, `0`, `1`, `1`}), `26`);
11697
11698	EXPECT_FLOAT_EQ(result.at({`0`, `0`, `1`, `2`}), `28`);
11699	EXPECT_FLOAT_EQ(result.at({`0`, `0`, `1`, `3`}), `30`);
11700	EXPECT_FLOAT_EQ(result.at({`0`, `0`, `2`, `0`}), `26`);
11701	EXPECT_FLOAT_EQ(result.at({`0`, `0`, `2`, `1`}), `28`);
11702	EXPECT_FLOAT_EQ(result.at({`0`, `0`, `2`, `2`}), `32`);
11703	EXPECT_FLOAT_EQ(result.at({`0`, `0`, `2`, `3`}), `36`);
11704	}
11705
11706	TEST_P(OperatorTest, ChannelwiseQuantizedConv2D_NonZero_FloatBias) {
11707	CHECK_IF_ENABLED();
11708	testChannelwiseQuantizedConv2DNonZero(bindings_, mod_, F_, EE_,
11709	/ quantizeBias / false);
11710	}
11711
11712	TEST_P(OperatorTest, ChannelwiseQuantizedConv2D_NonZero_QuantizedBias) {
11713	CHECK_IF_ENABLED();
11714	testChannelwiseQuantizedConv2DNonZero(bindings_, mod_, F_, EE_,
11715	/ quantizeBias / true);
11716	}
11717
11718	TEST_P(OperatorTest, GroupDilatedConvolution) {
11719	CHECK_IF_ENABLED();
11720
11721	auto *input =
11722	mod_.createPlaceholder(ElemKind::FloatTy, {`1`, `4`, `4`, `2`}, "input", false);
11723	auto IH = bindings_.allocate(input)->getHandle();
11724	for (dim_t i = `0`; i < `4` * `4` * `2`; i++) {
11725	IH.raw(i) = i;
11726	}
11727
11728	auto filter =
11729	mod_.createPlaceholder(ElemKind::FloatTy, {`2`, `2`, `2`, `1`}, "filter", false);
11730	auto FH = bindings_.allocate(filter)->getHandle();
11731	for (dim_t i = `0`; i < `2`; i++)
11732	for (dim_t j = `0`; j < `2`; j++) {
11733	for (dim_t k = `0`; k < `2`; k++) {
11734	FH.at({i, j, k, `0`}) = `1`;
11735	}
11736	}
11737
11738	auto *zeroBias =
11739	mod_.createPlaceholder(ElemKind::FloatTy, {`2`}, "bias", false);
11740	bindings_.allocate(zeroBias)->zero();
11741
11742	auto outTy = mod_.uniqueType(ElemKind::FloatTy, {`1`, `4`, `4`, `2`});
11743
11744	ConvolutionNode *CN = F_->createConv("Conv", input, filter, zeroBias, outTy,
11745	`2`, `1`, `1`, `2`, {`2`, `2`});
11746	SaveNode *S = F_->createSave("save", CN);
11747	bindings_.allocate(S->getPlaceholder());
11748
11749	::glow::convertPlaceholdersToConstants(F_, bindings_,
11750	{input, S->getPlaceholder()});
11751	EE_.compile(CompilationMode::Infer);
11752	EE_.run(bindings_);
11753
11754	auto result = bindings_.get(S->getPlaceholder())->getHandle();
11755
11756	std::vector<dim_t> expectedDims = {`1`, `4`, `4`, `2`};
11757	ASSERT_TRUE(result.dims().vec() == expectedDims);
11758
11759	EXPECT_FLOAT_EQ(result.at({`0`, `0`, `0`, `0`}), `10`);
11760	EXPECT_FLOAT_EQ(result.at({`0`, `0`, `0`, `1`}), `11`);
11761	EXPECT_FLOAT_EQ(result.at({`0`, `0`, `1`, `0`}), `20`);
11762	EXPECT_FLOAT_EQ(result.at({`0`, `0`, `1`, `1`}), `22`);
11763	EXPECT_FLOAT_EQ(result.at({`0`, `0`, `2`, `0`}), `24`);
11764	EXPECT_FLOAT_EQ(result.at({`0`, `0`, `2`, `1`}), `26`);
11765	EXPECT_FLOAT_EQ(result.at({`0`, `0`, `3`, `0`}), `12`);
11766	EXPECT_FLOAT_EQ(result.at({`0`, `0`, `3`, `1`}), `13`);
11767
11768	EXPECT_FLOAT_EQ(result.at({`0`, `1`, `0`, `0`}), `20`);
11769	EXPECT_FLOAT_EQ(result.at({`0`, `1`, `0`, `1`}), `22`);
11770	EXPECT_FLOAT_EQ(result.at({`0`, `1`, `1`, `0`}), `40`);
11771	EXPECT_FLOAT_EQ(result.at({`0`, `1`, `1`, `1`}), `44`);
11772	EXPECT_FLOAT_EQ(result.at({`0`, `1`, `2`, `0`}), `48`);
11773	EXPECT_FLOAT_EQ(result.at({`0`, `1`, `2`, `1`}), `52`);
11774	EXPECT_FLOAT_EQ(result.at({`0`, `1`, `3`, `0`}), `24`);
11775	EXPECT_FLOAT_EQ(result.at({`0`, `1`, `3`, `1`}), `26`);
11776
11777	EXPECT_FLOAT_EQ(result.at({`0`, `2`, `0`, `0`}), `36`);
11778	EXPECT_FLOAT_EQ(result.at({`0`, `2`, `0`, `1`}), `38`);
11779	EXPECT_FLOAT_EQ(result.at({`0`, `2`, `1`, `0`}), `72`);
11780	EXPECT_FLOAT_EQ(result.at({`0`, `2`, `1`, `1`}), `76`);
11781	EXPECT_FLOAT_EQ(result.at({`0`, `2`, `2`, `0`}), `80`);
11782	EXPECT_FLOAT_EQ(result.at({`0`, `2`, `2`, `1`}), `84`);
11783	EXPECT_FLOAT_EQ(result.at({`0`, `2`, `3`, `0`}), `40`);
11784	EXPECT_FLOAT_EQ(result.at({`0`, `2`, `3`, `1`}), `42`);
11785
11786	EXPECT_FLOAT_EQ(result.at({`0`, `3`, `0`, `0`}), `18`);
11787	EXPECT_FLOAT_EQ(result.at({`0`, `3`, `0`, `1`}), `19`);
11788	EXPECT_FLOAT_EQ(result.at({`0`, `3`, `1`, `0`}), `36`);
11789	EXPECT_FLOAT_EQ(result.at({`0`, `3`, `1`, `1`}), `38`);
11790	EXPECT_FLOAT_EQ(result.at({`0`, `3`, `2`, `0`}), `40`);
11791	EXPECT_FLOAT_EQ(result.at({`0`, `3`, `2`, `1`}), `42`);
11792	EXPECT_FLOAT_EQ(result.at({`0`, `3`, `3`, `0`}), `20`);
11793	EXPECT_FLOAT_EQ(result.at({`0`, `3`, `3`, `1`}), `21`);
11794	}
11795
11796	/// Test Conv3D with group size of 2 to make sure that group 3d convolution
11797	/// works as expected.
11798	TEST_P(OperatorTest, GroupConv3D) {
11799	CHECK_IF_ENABLED();
11800
11801	auto *input = mod_.createPlaceholder(ElemKind::FloatTy, {`1`, `2`, `1`, `2`, `8`},
11802	"input", false);
11803	auto IH = bindings_.allocate(input)->getHandle();
11804	for (size_t i = `0`; i < input->getType()->size(); i++) {
11805	IH.raw(i) = i + `1`;
11806	}
11807
11808	auto *filter = mod_.createPlaceholder(ElemKind::FloatTy, {`6`, `1`, `1`, `1`, `4`},
11809	"filter", false);
11810	auto FH = bindings_.allocate(filter)->getHandle();
11811	for (dim_t i = `0`; i < `6`; i++)
11812	for (dim_t j = `0`; j < `4`; j++) {
11813	FH.at({i, `0`, `0`, `0`, j}) = pow(`10.0`, i);
11814	}
11815
11816	auto *zeroBias =
11817	mod_.createPlaceholder(ElemKind::FloatTy, {`6`}, "bias", false);
11818	bindings_.allocate(zeroBias)->zero();
11819
11820	auto outTy = mod_.uniqueType(ElemKind::FloatTy, {`1`, `2`, `1`, `2`, `6`});
11821
11822	Convolution3DNode *CN =
11823	F_->createConv3D("Conv3D", input, filter, zeroBias, outTy, `1`, `1`, `0`, `2`);
11824	SaveNode *S = F_->createSave("save", CN);
11825	bindings_.allocate(S->getPlaceholder());
11826
11827	::glow::convertPlaceholdersToConstants(F_, bindings_,
11828	{input, S->getPlaceholder()});
11829	EE_.compile(CompilationMode::Infer);
11830	EE_.run(bindings_);
11831
11832	auto result = bindings_.get(S->getPlaceholder())->getHandle();
11833
11834	std::vector<dim_t> expectedDims = {`1`, `2`, `1`, `2`, `6`};
11835	ASSERT_TRUE(result.dims().vec() == expectedDims);
11836
11837	EXPECT_FLOAT_EQ(result.at({`0`, `0`, `0`, `0`, `0`}), `1` + `2` + `3` + `4`);
11838	EXPECT_FLOAT_EQ(result.at({`0`, `0`, `0`, `0`, `1`}), (`1` + `2` + `3` + `4`) * `10`);
11839	EXPECT_FLOAT_EQ(result.at({`0`, `0`, `0`, `0`, `2`}), (`1` + `2` + `3` + `4`) * `100`);
11840	EXPECT_FLOAT_EQ(result.at({`0`, `0`, `0`, `0`, `3`}), (`5` + `6` + `7` + `8`) * `1000`);
11841	EXPECT_FLOAT_EQ(result.at({`0`, `0`, `0`, `0`, `4`}), (`5` + `6` + `7` + `8`) * `10000`);
11842	EXPECT_FLOAT_EQ(result.at({`0`, `0`, `0`, `0`, `5`}), (`5` + `6` + `7` + `8`) * `100000`);
11843
11844	EXPECT_FLOAT_EQ(result.at({`0`, `0`, `0`, `1`, `0`}), `9` + `10` + `11` + `12`);
11845	EXPECT_FLOAT_EQ(result.at({`0`, `0`, `0`, `1`, `1`}), (`9` + `10` + `11` + `12`) * `10`);
11846	EXPECT_FLOAT_EQ(result.at({`0`, `0`, `0`, `1`, `2`}), (`9` + `10` + `11` + `12`) * `100`);
11847	EXPECT_FLOAT_EQ(result.at({`0`, `0`, `0`, `1`, `3`}), (`13` + `14` + `15` + `16`) * `1000`);
11848	EXPECT_FLOAT_EQ(result.at({`0`, `0`, `0`, `1`, `4`}), (`13` + `14` + `15` + `16`) * `10000`);
11849	EXPECT_FLOAT_EQ(result.at({`0`, `0`, `0`, `1`, `5`}), (`13` + `14` + `15` + `16`) * `100000`);
11850
11851	EXPECT_FLOAT_EQ(result.at({`0`, `1`, `0`, `0`, `0`}), `17` + `18` + `19` + `20`);
11852	EXPECT_FLOAT_EQ(result.at({`0`, `1`, `0`, `0`, `1`}), (`17` + `18` + `19` + `20`) * `10`);
11853	EXPECT_FLOAT_EQ(result.at({`0`, `1`, `0`, `0`, `2`}), (`17` + `18` + `19` + `20`) * `100`);
11854	EXPECT_FLOAT_EQ(result.at({`0`, `1`, `0`, `0`, `3`}), (`21` + `22` + `23` + `24`) * `1000`);
11855	EXPECT_FLOAT_EQ(result.at({`0`, `1`, `0`, `0`, `4`}), (`21` + `22` + `23` + `24`) * `10000`);
11856	EXPECT_FLOAT_EQ(result.at({`0`, `1`, `0`, `0`, `5`}), (`21` + `22` + `23` + `24`) * `100000`);
11857
11858	EXPECT_FLOAT_EQ(result.at({`0`, `1`, `0`, `1`, `0`}), `25` + `26` + `27` + `28`);
11859	EXPECT_FLOAT_EQ(result.at({`0`, `1`, `0`, `1`, `1`}), (`25` + `26` + `27` + `28`) * `10`);
11860	EXPECT_FLOAT_EQ(result.at({`0`, `1`, `0`, `1`, `2`}), (`25` + `26` + `27` + `28`) * `100`);
11861	EXPECT_FLOAT_EQ(result.at({`0`, `1`, `0`, `1`, `3`}), (`29` + `30` + `31` + `32`) * `1000`);
11862	EXPECT_FLOAT_EQ(result.at({`0`, `1`, `0`, `1`, `4`}), (`29` + `30` + `31` + `32`) * `10000`);
11863	EXPECT_FLOAT_EQ(result.at({`0`, `1`, `0`, `1`, `5`}), (`29` + `30` + `31` + `32`) * `100000`);
11864	}
11865
11866	/// Check non-square padding for convolution. The first conv has non-square
11867	/// padding, while the second one has zero padding. The second conv's input is
11868	/// the same as the first one's after-padding input. All other parameters of
11869	/// the two convs are the same.
11870	TEST_P(OperatorTest, NonSquarePaddingConvolution) {
11871	CHECK_IF_ENABLED();
11872
11873	auto *input = mod_.createPlaceholder(ElemKind::FloatTy, {`1`, `4`, `4`, `1`}, "input",
11874	false, "NHWC");
11875	auto IH = bindings_.allocate(input)->getHandle();
11876	for (dim_t i = `0`; i < `4` * `4`; i++) {
11877	IH.raw(i) = i + `1`;
11878	}
11879
11880	auto filter = mod_.createPlaceholder(ElemKind::FloatTy, {`2`, `2`, `2`, `1`},
11881	"filter", false, "NHWC");
11882	auto FH = bindings_.allocate(filter)->getHandle();
11883	for (dim_t i = `0`; i < `2` * `2` * `2`; i++) {
11884	FH.raw(i) = pow(`2.0`, i);
11885	}
11886	auto *zeroBias =
11887	mod_.createPlaceholder(ElemKind::FloatTy, {`2`}, "bias", false);
11888	bindings_.allocate(zeroBias)->zero();
11889
11890	auto outTy = mod_.uniqueType(ElemKind::FloatTy, {`1`, `4`, `8`, `2`});
11891
11892	ConvolutionNode *CN = F_->createConv("Conv", input, filter, zeroBias, outTy,
11893	{`2`, `2`}, {`1`, `1`}, {`0`, `2`, `1`, `3`}, `1`);
11894	SaveNode *S = F_->createSave("save", CN);
11895	bindings_.allocate(S->getPlaceholder());
11896
11897	::glow::convertPlaceholdersToConstants(F_, bindings_,
11898	{input, S->getPlaceholder()});
11899
11900	Tensor &result = *bindings_.get(S->getPlaceholder());
11901
11902	// Create the reference conv operator whose input is the same as the
11903	// after-padding-input above.
11904	auto *input1 = mod_.createPlaceholder(ElemKind::FloatTy, {`1`, `5`, `9`, `1`},
11905	"input1", false, "NHWC");
11906	bindings_.allocate(input1)->zero();
11907	auto IH1 = bindings_.get(input1)->getHandle();
11908	for (dim_t i = `0`; i < `4`; i++)
11909	for (dim_t j = `2`; j < `6`; j++) {
11910	IH1.at({`0`, i, j, `0`}) = i * `4` + j - `2` + `1`;
11911	}
11912
11913	Function *refF = mod_.createFunction("mainRef");
11914	CN = refF->createConv("Conv1", input1, filter, zeroBias, outTy, {`2`, `2`},
11915	{`1`, `1`}, {`0`, `0`, `0`, `0`}, `1`);
11916	S = refF->createSave("save1", CN);
11917	bindings_.allocate(S->getPlaceholder());
11918
11919	::glow::convertPlaceholdersToConstants(refF, bindings_,
11920	{input, input1, S->getPlaceholder()});
11921	EE_.compile(CompilationMode::Infer);
11922	EE_.run(bindings_, "main");
11923	EE_.run(bindings_, "mainRef");
11924	Tensor &result1 = *bindings_.get(S->getPlaceholder());
11925
11926	EXPECT_TRUE(result.isEqual(result1));
11927	}
11928
11929	/// Check non-cubic padding for conv3D. The first conv3D has non-cubic
11930	/// padding, while the second one has zero padding. The second conv3D's input
11931	/// is the same as the first one's after-padding input. All other parameters
11932	/// of the two conv3Ds are the same.
11933	TEST_P(OperatorTest, NonCubicPaddingConv3D) {
11934	CHECK_IF_ENABLED();
11935
11936	auto *input = mod_.createPlaceholder(ElemKind::FloatTy, {`1`, `4`, `4`, `4`, `1`},
11937	"input", false);
11938	auto IH = bindings_.allocate(input)->getHandle();
11939	int nextVal = `1`;
11940	for (dim_t i = `0`; i < `4`; i++) {
11941	for (dim_t j = `0`; j < `4`; j++) {
11942	for (dim_t k = `0`; k < `4`; k++) {
11943	IH.at({`0`, i, j, k, `0`}) = static_cast<float>(nextVal++);
11944	} // W
11945	} // H
11946	} // T
11947
11948	auto *filter = mod_.createPlaceholder(ElemKind::FloatTy, {`2`, `2`, `2`, `2`, `1`},
11949	"filter", false);
11950	auto FH = bindings_.allocate(filter)->getHandle();
11951	for (size_t i = `0`; i < filter->getType()->size(); i++) {
11952	FH.raw(i) = pow(`2.0`, i);
11953	}
11954	auto *zeroBias =
11955	mod_.createPlaceholder(ElemKind::FloatTy, {`2`}, "bias", false);
11956	bindings_.allocate(zeroBias)->zero();
11957
11958	auto outTy = mod_.uniqueType(ElemKind::FloatTy, {`1`, `12`, `4`, `8`, `2`});
11959
11960	Convolution3DNode *CN =
11961	F_->createConv3D("Conv3D", input, filter, zeroBias, outTy, {`2`, `2`, `2`},
11962	{`1`, `1`, `1`}, // {0, 2, 5, 1, 3, 4},
11963	{`5`, `4`, `0`, `1`, `2`, `3`}, `1`);
11964	SaveNode *S = F_->createSave("save", CN);
11965	bindings_.allocate(S->getPlaceholder());
11966
11967	::glow::convertPlaceholdersToConstants(F_, bindings_,
11968	{input, S->getPlaceholder()});
11969
11970	Tensor &result = *bindings_.get(S->getPlaceholder());
11971
11972	// Create the reference conv3D operator whose input is the same as the
11973	// after-padding-input above.
11974	auto *input1 = mod_.createPlaceholder(ElemKind::FloatTy, {`1`, `13`, `5`, `9`, `1`},
11975	"input1", false);
11976	bindings_.allocate(input1)->zero();
11977	auto IH1 = bindings_.get(input1)->getHandle();
11978	nextVal = `1`;
11979	for (dim_t i = `5`; i < `9`; i++) {
11980	for (dim_t j = `0`; j < `4`; j++) {
11981	for (dim_t k = `2`; k < `6`; k++) {
11982	IH1.at({`0`, i, j, k, `0`}) = static_cast<float>(nextVal++);
11983	} // W
11984	} // H
11985	} // T
11986
11987	Function *refF = mod_.createFunction("mainRef");
11988	CN = refF->createConv3D("Conv3D_1", input1, filter, zeroBias, outTy,
11989	{`2`, `2`, `2`}, {`1`, `1`, `1`}, {`0`, `0`, `0`, `0`, `0`, `0`}, `1`);
11990	S = refF->createSave("save1", CN);
11991	bindings_.allocate(S->getPlaceholder());
11992
11993	::glow::convertPlaceholdersToConstants(refF, bindings_,
11994	{input, input1, S->getPlaceholder()});
11995	EE_.compile(CompilationMode::Infer);
11996	EE_.run(bindings_, "main");
11997	EE_.run(bindings_, "mainRef");
11998	Tensor &result1 = *bindings_.get(S->getPlaceholder());
11999
12000	EXPECT_TRUE(result.isEqual(result1));
12001	}
12002
12003	template <typename DataType>
12004	void testBatchnorm(Module &module, Function *function,
12005	PlaceholderBindings &bindings, ExecutionEngine &engine,
12006	std::string &testName, unsigned channelIndex, Tensor &&bias,
12007	Tensor &&weights, Tensor &&mean, Tensor &&variance,
12008	float epsilon, float momentum, Tensor &&input,
12009	Tensor &&expectedOutput, float maxAbsoluteDifference = `0.05`,
12010	float maxRMSE = `0.01`) {
12011	auto *inputPlaceholder =
12012	createPlaceholder(module, bindings, &input, testName + "_input");
12013	TypeRef outputType = module.uniqueType(expectedOutput.getType());
12014	auto *batchnorm = function->createBatchNormalization(
12015	testName, outputType, inputPlaceholder,
12016	module.createConstant("bias", bias),
12017	module.createConstant("weights", weights),
12018	module.createConstant("mean", mean),
12019	module.createConstant("var", variance), channelIndex, epsilon, momentum);
12020	auto *save = function->createSave(testName + "output", batchnorm);
12021	bindings.allocate(save->getPlaceholder());
12022	engine.compile(CompilationMode::Infer);
12023	engine.run(bindings);
12024	Tensor *result = bindings.get(save->getPlaceholder());
12025	compare<DataType>(expectedOutput, *result, maxAbsoluteDifference, maxRMSE);
12026	}
12027
12028	TEST_P(OperatorTest, BatchNorm0D_FP16_NC) {
12029	CHECK_IF_ENABLED();
12030	std::string name = "BatchNorm0D_FP16_NC";
12031	testBatchnorm<float16_t>(
12032	mod_, F_, bindings_, EE_, name, `1`,
12033	Tensor::fromData<float16_t>(ElemKind::Float16Ty, {`2`}, {`0.7451`, `0.7946`}),
12034	Tensor::fromData<float16_t>(ElemKind::Float16Ty, {`2`}, {`0.6815`, `0.0039`}),
12035	Tensor::fromData<float16_t>(ElemKind::Float16Ty, {`2`}, {`1.0730`, -`7.3854`}),
12036	Tensor::fromData<float16_t>(ElemKind::Float16Ty, {`2`}, {`1.8200`, `4.6300`}),
12037	`1e-5`, `0.1`,
12038	Tensor::fromData<float16_t>(ElemKind::Float16Ty, {`2`, `2`},
12039	{-`0.0892`, `0.6268`, `1.3740`, `2.4480`}),
12040	Tensor::fromData<float16_t>(ElemKind::Float16Ty, {`2`, `2`},
12041	{`0.1580`, `0.8091`, `0.8972`, `0.8124`}));
12042	}
12043
12044	TEST_P(OperatorTest, BatchNorm1D_FP16_NCW) {
12045	CHECK_IF_ENABLED();
12046	std::string name = "BatchNorm1D_FP16_NCW";
12047	testBatchnorm<float16_t>(
12048	mod_, F_, bindings_, EE_, name, `1`,
12049	Tensor::fromData<float16_t>(ElemKind::Float16Ty, {`2`}, {`0.7451`, `0.7946`}),
12050	Tensor::fromData<float16_t>(ElemKind::Float16Ty, {`2`}, {`0.6815`, `0.0039`}),
12051	Tensor::fromData<float16_t>(ElemKind::Float16Ty, {`2`}, {`1.0730`, -`7.3854`}),
12052	Tensor::fromData<float16_t>(ElemKind::Float16Ty, {`2`}, {`1.8200`, `4.6300`}),
12053	`1e-5`, `0.1`,
12054	Tensor::fromData<float16_t>(ElemKind::Float16Ty, {`2`, `2`, `3`},
12055	{-`0.0892`, `0.6268`, `1.3740`, `2.4480`, -`1.4285`,
12056	`0.0565`, -`0.0266`, `0.4494`, -`0.3858`, `1.0044`,
12057	`0.8844`, `0.5071`}),
12058	Tensor::fromData<float16_t>(ElemKind::Float16Ty, {`2`, `2`, `3`},
12059	{`0.1580`, `0.5197`, `0.8972`, `0.8124`, `0.8054`,
12060	`0.8081`, `0.1896`, `0.4301`, `0.0082`, `0.8098`,
12061	`0.8096`, `0.8089`}));
12062	}
12063
12064	TEST_P(OperatorTest, BatchNorm2D_FP16_NCHW) {
12065	CHECK_IF_ENABLED();
12066	std::string name = "BatchNorm2D_FP16_NCHW";
12067	testBatchnorm<float16_t>(
12068	mod_, F_, bindings_, EE_, name, `1`,
12069	Tensor::fromData<float>(ElemKind::FloatTy, {`2`}, {`0.7451`, `0.7946`}),
12070	Tensor::fromData<float>(ElemKind::FloatTy, {`2`}, {`0.6815`, `0.0039`}),
12071	Tensor::fromData<float>(ElemKind::FloatTy, {`2`}, {`1.0730`, -`7.3854`}),
12072	Tensor::fromData<float>(ElemKind::FloatTy, {`2`}, {`1.8200`, `4.6300`}),
12073	`9.999999747378752e-06`, `0.8999999761581421`,
12074	Tensor::fromData<float16_t>(
12075	ElemKind::Float16Ty, {`2`, `2`, `3`, `3`},
12076	{-`0.0892`, `1.3740`, -`1.4285`, -`0.0266`, -`0.3858`, `0.8844`,
12077	-`1.3639`, -`1.8868`, -`1.3744`, `0.6268`, `2.4480`, `0.0565`,
12078	`0.4494`, `1.0044`, `0.5071`, -`0.8796`, `0.1380`, `1.9176`,
12079	`1.4044`, `0.1614`, `0.3824`, `0.5881`, -`0.5724`, -`2.1089`,
12080	`0.5069`, `0.8189`, -`0.3894`, -`1.0725`, `0.7809`, -`0.3220`,
12081	`0.4939`, -`0.3471`, -`0.2114`, -`0.7874`, `0.2189`, `1.8009`}),
12082	Tensor::fromData<float16_t>(
12083	ElemKind::Float16Ty, {`2`, `2`, `3`, `3`},
12084	{`0.1580`, `0.8972`, -`0.5186`, `0.1896`, `0.0082`, `0.6498`, -`0.4859`, -`0.7501`,
12085	-`0.4913`, `0.8093`, `0.8126`, `0.8082`, `0.8089`, `0.8100`, `0.8091`, `0.8065`,
12086	`0.8084`, `0.8116`, `0.9125`, `0.2846`, `0.3962`, `0.5001`, -`0.0861`, -`0.8623`,
12087	`0.4591`, `0.6167`, `0.0064`, `0.8062`, `0.8096`, `0.8075`, `0.8090`, `0.8075`,
12088	`0.8077`, `0.8067`, `0.8085`, `0.8114`}));
12089	}
12090
12091	TEST_P(OperatorTest, BatchNorm2D_FP16_NHWC) {
12092	CHECK_IF_ENABLED();
12093	std::string name = "BatchNorm2D_FP16_NHWC";
12094	testBatchnorm<float16_t>(
12095	mod_, F_, bindings_, EE_, name, `3`,
12096	Tensor::fromData<float16_t>(ElemKind::Float16Ty, {`2`}, {`0.7451`, `0.7946`}),
12097	Tensor::fromData<float16_t>(ElemKind::Float16Ty, {`2`}, {`0.6815`, `0.0039`}),
12098	Tensor::fromData<float16_t>(ElemKind::Float16Ty, {`2`}, {`1.0730`, -`7.3854`}),
12099	Tensor::fromData<float16_t>(ElemKind::Float16Ty, {`2`}, {`1.8200`, `4.6300`}),
12100	`0.0008742152713239193`, `0.030418938025832176`,
12101	Tensor::fromData<float16_t>(
12102	ElemKind::Float16Ty, {`2`, `3`, `3`, `2`},
12103	{-`0.0892`, `0.6268`, `1.3740`, `2.4480`, -`1.4285`, `0.0565`,
12104	-`0.0266`, `0.4494`, -`0.3858`, `1.0044`, `0.8844`, `0.5071`,
12105	-`1.3639`, -`0.8796`, -`1.8868`, `0.1380`, -`1.3744`, `1.9176`,
12106	`1.4044`, -`1.0725`, `0.1614`, `0.7809`, `0.3824`, -`0.3220`,
12107	`0.5881`, `0.4939`, -`0.5724`, -`0.3471`, -`2.1089`, -`0.2114`,
12108	`0.5069`, -`0.7874`, `0.8189`, `0.2189`, -`0.3894`, `1.8009`}),
12109	Tensor::fromData<float16_t>(
12110	ElemKind::Float16Ty, {`2`, `3`, `3`, `2`},
12111	{`0.1580`, `0.8093`, `0.8972`, `0.8126`, -`0.5186`, `0.8082`, `0.1896`, `0.8089`,
12112	`0.0082`, `0.8100`, `0.6498`, `0.8091`, -`0.4859`, `0.8065`, -`0.7501`, `0.8084`,
12113	-`0.4913`, `0.8116`, `0.9125`, `0.8062`, `0.2846`, `0.8096`, `0.3962`, `0.8075`,
12114	`0.5001`, `0.8090`, -`0.0861`, `0.8075`, -`0.8623`, `0.8077`, `0.4591`, `0.8067`,
12115	`0.6167`, `0.8085`, `0.0064`, `0.8114`}));
12116	}
12117
12118	TEST_P(OperatorTest, BatchNorm2D_INT8_NCHW) {
12119	CHECK_IF_ENABLED();
12120	std::string name = "BatchNorm2D_INT8_NCHW";
12121	testBatchnorm<int8_t>(
12122	mod_, F_, bindings_, EE_, name, `1`,
12123	Tensor::fromData<float>(ElemKind::FloatTy, {`2`}, {`0.0393`, `0.7099`}),
12124	Tensor::fromData<float>(ElemKind::FloatTy, {`2`}, {`3.2329`, `0.7463`}),
12125	Tensor::fromData<float>(ElemKind::FloatTy, {`2`}, {`0.2697`, `0.2668`}),
12126	Tensor::fromData<float>(ElemKind::FloatTy, {`2`}, {`0.0011`, `0.5580`}),
12127	`0.00035455477237701416`, `0.1`,
12128	Tensor::fromData<int8_t>(
12129	ElemKind::Int8QTy, `0.039`, `44`, {`2`, `2`, `2`, `2`},
12130	{`16`, `70`, `55`, `53`, `49`, `94`, `26`, `62`, `19`, `78`, `50`, `31`, `28`, `69`, `32`, `59`}),
12131	Tensor::fromData<int8_t>(ElemKind::Int8QTy, `1.42822`, -`57`, {`2`, `2`, `2`, `2`},
12132	{-`128`, -`13`, -`48`, -`52`, -`57`, -`55`, -`57`, -`56`, -`128`,
12133	`6`, -`59`, -`103`, -`57`, -`56`, -`57`, -`56`}),
12134	`1`, `0.1`);
12135	}
12136
12137	/// 3D Batch Normalization in Float16
12138	TEST_P(OperatorTest, BatchNorm3D_FP16_NCTHW) {
12139	CHECK_IF_ENABLED();
12140	std::string name = "BatchNorm3D_FP16_NCTHW";
12141	testBatchnorm<float16_t>(
12142	mod_, F_, bindings_, EE_, name, `1`,
12143	Tensor::fromData<float16_t>(ElemKind::Float16Ty, {`2`}, {`2.9304`, `2.9185`}),
12144	Tensor::fromData<float16_t>(ElemKind::Float16Ty, {`2`}, {`5.0408`, `7.0274`}),
12145	Tensor::fromData<float16_t>(ElemKind::Float16Ty, {`2`}, {`2.5450`, `0.0290`}),
12146	Tensor::fromData<float16_t>(ElemKind::Float16Ty, {`2`}, {`3.1328`, `5.8897`}),
12147	`0.0008742152713239193`, `0.030418938025832176`,
12148	Tensor::fromData<float16_t>(
12149	ElemKind::Float16Ty, {`2`, `2`, `2`, `2`, `2`},
12150	{`1.5744`, -`0.2288`, `0.1272`, -`0.6514`, -`0.4279`, `0.3006`, -`0.9304`,
12151	`1.0815`, `0.1611`, `0.6101`, `1.5194`, -`0.5881`, -`0.4864`, -`0.3633`,
12152	-`1.5608`, -`0.4172`, -`1.2346`, -`0.6261`, -`1.5411`, `0.0569`, `1.2166`,
12153	-`1.1330`, -`0.0745`, `0.6357`, `1.0074`, `1.0130`, -`0.9973`, -`0.7630`,
12154	`0.7943`, `0.3071`, `0.3458`, `0.6359`}),
12155	Tensor::fromData<float16_t>(
12156	ElemKind::Float16Ty, {`2`, `2`, `2`, `2`, `2`},
12157	{`0.1665`, -`4.9684`, -`3.9544`, -`6.1716`, -`5.5352`, -`3.4608`, -`6.9660`,
12158	-`1.2371`, `3.3009`, `4.6010`, `7.2340`, `1.1318`, `1.4262`, `1.7827`,
12159	-`1.6845`, `1.6266`, -`7.8322`, -`6.0997`, -`8.7051`, -`4.1546`, -`0.8525`,
12160	-`7.5429`, -`4.5288`, -`2.5064`, `5.7513`, `5.7677`, -`0.0532`, `0.6254`,
12161	`5.1343`, `3.7236`, `3.8359`, `4.6758`}));
12162	}
12163
12164	/// Temporary copy-paste of channel-last test until we get glow side testing
12165	/// for this particular case
12166	TEST_P(OperatorTest, BatchNorm3D_FP16_NTHWC) {
12167	CHECK_IF_ENABLED();
12168	std::string name = "BatchNorm3D_FP16_NTHWC";
12169	testBatchnorm<float16_t>(
12170	mod_, F_, bindings_, EE_, name, `4`,
12171	Tensor::fromData<float16_t>(ElemKind::Float16Ty, {`2`}, {`2.9304`, `2.9185`}),
12172	Tensor::fromData<float16_t>(ElemKind::Float16Ty, {`2`}, {`5.0408`, `7.0274`}),
12173	Tensor::fromData<float16_t>(ElemKind::Float16Ty, {`2`}, {`2.5450`, `0.0290`}),
12174	Tensor::fromData<float16_t>(ElemKind::Float16Ty, {`2`}, {`3.1328`, `5.8897`}),
12175	`0.0008742152713239193`, `0.030418938025832176`,
12176	Tensor::fromData<float16_t>(
12177	ElemKind::Float16Ty, {`2`, `2`, `2`, `2`, `2`},
12178	{`1.5305`, -`0.2205`, -`0.7900`, -`0.1350`, -`0.7486`, -`0.0381`, `0.9090`,
12179	-`0.6790`, `0.3621`, -`0.5873`, -`2.4898`, `0.9877`, `0.4998`, `0.2285`,
12180	-`0.1048`, -`0.5791`, -`0.7407`, -`1.2809`, `2.2225`, `1.1126`, `0.0190`,
12181	`0.9498`, -`0.6292`, -`0.6234`, -`1.4386`, `2.1991`, `1.6403`, -`1.5266`,
12182	-`0.0793`, -`0.6487`, `0.8279`, `0.7702`}),
12183	Tensor::fromData<float16_t>(
12184	ElemKind::Float16Ty, {`2`, `2`, `2`, `2`, `2`},
12185	{`0.0414`, `2.1962`, -`6.5665`, `2.4435`, -`6.4484`, `2.7244`, -`1.7283`,
12186	`0.8687`, -`3.2857`, `1.1340`, -`11.4066`, `5.6943`, -`2.8935`, `3.4961`,
12187	-`4.6153`, `1.1577`, -`6.4260`, -`0.8744`, `2.0120`, `6.0559`, -`4.2627`,
12188	`5.5847`, -`6.1085`, `1.0296`, -`8.4133`, `9.2018`, `0.3542`, -`1.5855`,
12189	-`4.5427`, `0.9562`, -`1.9592`, `5.0646`}));
12190	}
12191
12192	TEST_P(OperatorTest, BatchNorm3D_INT8_NTHWC) {
12193	CHECK_IF_ENABLED();
12194	std::string name = "BatchNorm3D_INT8_NTHWC";
12195	testBatchnorm<int8_t>(
12196	mod_, F_, bindings_, EE_, name, `4`,
12197	Tensor::fromData<float>(ElemKind::FloatTy, {`2`}, {`2.5167e-05`, `6.8856e-05`}),
12198	Tensor::fromData<float>(ElemKind::FloatTy, {`2`}, {`1.0003`, `1.0005`}),
12199	Tensor::fromData<float>(ElemKind::FloatTy, {`2`}, {`0.073`, `0.043`}),
12200	Tensor::fromData<float>(ElemKind::FloatTy, {`2`}, {`2.5`, `1.27`}),
12201	`9.999999747378752e-06`, `0.8999999761581421`,
12202	Tensor::fromData<int8_t>(
12203	ElemKind::Int8QTy, `0.01`, -`5`, {`2`, `2`, `3`, `3`, `2`},
12204	{-`8`, `36`, `120`, `40`, `27`, -`29`, -`49`, `102`, -`74`, -`105`, -`84`, -`35`,
12205	`49`, `18`, -`122`, `33`, `16`, -`128`, -`72`, `83`, -`128`, `74`, `58`, `1`,
12206	`15`, `75`, `127`, -`26`, `67`, -`110`, -`128`, `102`, -`128`, `127`, -`58`, `127`,
12207	`55`, `127`, `117`, `84`, `20`, -`24`, -`55`, `45`, -`64`, `54`, -`71`, `10`,
12208	-`14`, -`128`, -`74`, -`61`, `18`, -`57`, -`128`, -`64`, -`77`, -`84`, -`4`, -`115`,
12209	-`4`, `24`, `12`, -`18`, `127`, -`128`, -`1`, `127`, -`128`, -`47`, -`128`, -`56`}),
12210	Tensor::fromData<int8_t>(
12211	ElemKind::Int8QTy, `0.023`, `15`, {`2`, `2`, `3`, `3`, `2`},
12212	{`12`, `29`, `47`, `31`, `22`, `4`, `1`, `55`, -`6`, -`25`, -`9`, `2`, `28`, `22`, -`19`,
12213	`28`, `19`, -`34`, -`5`, `47`, -`21`, `44`, `30`, `16`, `18`, `44`, `49`, `5`, `33`, -`27`,
12214	-`21`, `55`, -`21`, `64`, -`2`, `64`, `29`, `64`, `47`, `48`, `20`, `6`, -`1`, `33`, -`3`,
12215	`36`, -`5`, `19`, `11`, -`34`, -`6`, -`8`, `19`, -`7`, -`21`, -`9`, -`7`, -`17`, `13`, -`29`,
12216	`13`, `25`, `18`, `8`, `49`, -`34`, `14`, `64`, -`21`, -`3`, -`21`, -`6`}),
12217	`1`, `0.025`);
12218	}
12219
12220	TEST_P(OperatorTest, BatchNorm3D_INT8_NCTHW) {
12221	CHECK_IF_ENABLED();
12222	std::string name = "BatchNorm3D_INT8_NCTHW";
12223	testBatchnorm<int8_t>(
12224	mod_, F_, bindings_, EE_, name, `1`,
12225	Tensor::fromData<float>(ElemKind::FloatTy, {`2`}, {`2.2692`, `0.8616`}),
12226	Tensor::fromData<float>(ElemKind::FloatTy, {`2`}, {`0.4543`, `6.7262`}),
12227	Tensor::fromData<float>(ElemKind::FloatTy, {`2`}, {`0.6257`, `0.9221`}),
12228	Tensor::fromData<float>(ElemKind::FloatTy, {`2`}, {`0.9841`, `0.4234`}),
12229	`0.0009562143087387085`, `1.2378009557724`,
12230	Tensor::fromData<int8_t>(ElemKind::Int8QTy, `0.022`, `22`, {`2`, `2`, `2`, `2`, `2`},
12231	{`10`, `5`, `28`, `37`, `16`, `66`, `5`, `31`, `16`, -`3`, `44`,
12232	`53`, `27`, -`4`, `52`, `6`, `25`, `46`, `51`, -`44`, `26`, `29`,
12233	-`4`, `27`, -`17`, `81`, `20`, `71`, `105`, `10`, `68`, -`37`}),
12234	Tensor::fromData<int8_t>(ElemKind::Int8QTy, `1.00106`, `88`, {`2`, `2`, `2`, `2`, `2`},
12235	{`90`, `90`, `90`, `90`, `90`, `90`, `90`, `90`, `78`, `74`, `84`,
12236	`86`, `80`, `73`, `86`, `76`, `90`, `90`, `90`, `89`, `90`, `90`,
12237	`90`, `90`, `70`, `93`, `79`, `90`, `98`, `77`, `90`, `66`}),
12238	`1`, `0.1`);
12239	}
12240
12241	/// Check non-square padding for AveragePool. The first pool op has non-square
12242	/// padding, while the second one has zero padding. The second pool op's input
12243	/// is the same as the first one's after-padding input. All other parameters
12244	/// of the two convs are the same.
12245	TEST_P(OperatorTest, NonSquarePaddingAveragePool) {
12246	CHECK_IF_ENABLED();
12247
12248	auto *input =
12249	mod_.createPlaceholder(ElemKind::FloatTy, {`1`, `4`, `4`, `1`}, "input", false);
12250	auto IH = bindings_.allocate(input)->getHandle();
12251	for (size_t i = `0`; i < `4` * `4`; i++) {
12252	IH.raw(i) = i + `1`;
12253	}
12254	auto *Pool = F_->createAvgPool("pool", input, {`2`, `2`}, {`1`, `1`}, {`0`, `2`, `1`, `3`});
12255	auto *S = F_->createSave("save", Pool);
12256	bindings_.allocate(S->getPlaceholder());
12257
12258	Tensor &result = *bindings_.get(S->getPlaceholder());
12259
12260	auto *input1 =
12261	mod_.createPlaceholder(ElemKind::FloatTy, {`1`, `5`, `9`, `1`}, "input1", false);
12262	bindings_.allocate(input1)->zero();
12263	auto IH1 = bindings_.get(input1)->getHandle();
12264	for (dim_t i = `0`; i < `4`; i++)
12265	for (dim_t j = `2`; j < `6`; j++) {
12266	IH1.at({`0`, i, j, `0`}) = i * `4` + j - `2` + `1`;
12267	}
12268
12269	Function *refF = mod_.createFunction("mainRef");
12270	Pool = refF->createAvgPool("pool1", input1, `2`, `1`, `0`);
12271	S = refF->createSave("save1", Pool);
12272	bindings_.allocate(S->getPlaceholder());
12273	EE_.compile(CompilationMode::Infer);
12274	EE_.run(bindings_, "main");
12275	EE_.run(bindings_, "mainRef");
12276	Tensor &result1 = *bindings_.get(S->getPlaceholder());
12277
12278	EXPECT_TRUE(result.isEqual(result1));
12279	}
12280
12281	/// Check non-square padding for MaxPool. The first pool op has non-square
12282	/// padding, while the second one has zero padding. The second pool-op's input
12283	/// is the same as the first one's after-padding input. All other parameters
12284	/// of the two convs are the same.
12285	TEST_P(OperatorTest, NonSquarePaddingMaxPool) {
12286	CHECK_IF_ENABLED();
12287
12288	auto *input =
12289	mod_.createPlaceholder(ElemKind::FloatTy, {`1`, `4`, `4`, `1`}, "input", false);
12290	auto IH = bindings_.allocate(input)->getHandle();
12291	for (size_t i = `0`; i < `4` * `4`; i++) {
12292	IH.raw(i) = i + `1`;
12293	}
12294	auto *Pool = F_->createMaxPool("pool", input, {`2`, `2`}, {`1`, `1`}, {`0`, `2`, `1`, `3`});
12295	auto *S = F_->createSave("save", Pool->getResult());
12296	bindings_.allocate(S->getPlaceholder());
12297
12298	Tensor &result = *bindings_.get(S->getPlaceholder());
12299
12300	auto *input1 =
12301	mod_.createPlaceholder(ElemKind::FloatTy, {`1`, `5`, `9`, `1`}, "input1", false);
12302	bindings_.allocate(input1)->zero();
12303	auto IH1 = bindings_.get(input1)->getHandle();
12304	for (dim_t i = `0`; i < `4`; i++)
12305	for (dim_t j = `2`; j < `6`; j++) {
12306	IH1.at({`0`, i, j, `0`}) = i * `4` + j - `2` + `1`;
12307	}
12308
12309	Function *refF = mod_.createFunction("mainRef");
12310	Pool = refF->createMaxPool("pool1", input1, `2`, `1`, `0`);
12311	S = refF->createSave("save1", Pool->getResult());
12312	bindings_.allocate(S->getPlaceholder());
12313
12314	EE_.compile(CompilationMode::Infer);
12315	EE_.run(bindings_, "main");
12316	EE_.run(bindings_, "mainRef");
12317
12318	Tensor &result1 = *bindings_.get(S->getPlaceholder());
12319
12320	EXPECT_TRUE(result.isEqual(result1));
12321	}
12322
12323	TEST_P(OperatorTest, FP16AvgPool) {
12324	CHECK_IF_ENABLED();
12325
12326	auto *input =
12327	mod_.createPlaceholder(ElemKind::Float16Ty, {`1`, `3`, `3`, `1`}, "input", false);
12328	bindings_.allocate(input)->getHandle<float16_t>() = {`0.`, `1.`, `2.`, `3.`, `4.`,
12329	`5.`, `6.`, `7.`, `8.`};
12330	auto *Pool = F_->createAvgPool("pool", input, {`2`, `2`}, {`1`, `1`}, {`0`, `0`, `0`, `0`});
12331	auto *S = F_->createSave("save", Pool);
12332	bindings_.allocate(S->getPlaceholder());
12333
12334	EE_.compile(CompilationMode::Infer);
12335	EE_.run(bindings_);
12336
12337	auto *result = bindings_.get(S->getPlaceholder());
12338	Tensor out(ElemKind::Float16Ty, {`1`, `2`, `2`, `1`});
12339	out.getHandle<float16_t>() = {`2.`, `3.`, `5.`, `6.`};
12340	EXPECT_TRUE(out.isEqual(*result));
12341	}
12342
12343	TEST_P(OperatorTest, BFloat16AvgPool) {
12344	CHECK_IF_ENABLED();
12345
12346	auto *input = mod_.createPlaceholder(ElemKind::BFloat16Ty, {`1`, `3`, `3`, `1`},
12347	"input", false);
12348	bindings_.allocate(input)->getHandle<bfloat16_t>() = {`0.`, `1.`, `2.`, `3.`, `4.`,
12349	`5.`, `6.`, `7.`, `8.`};
12350	auto *Pool = F_->createAvgPool("pool", input, {`2`, `2`}, {`1`, `1`}, {`0`, `0`, `0`, `0`});
12351	auto *S = F_->createSave("save", Pool);
12352	bindings_.allocate(S->getPlaceholder());
12353
12354	EE_.compile(CompilationMode::Infer);
12355	EE_.run(bindings_);
12356
12357	auto *result = bindings_.get(S->getPlaceholder());
12358	Tensor out(ElemKind::BFloat16Ty, {`1`, `2`, `2`, `1`});
12359	out.getHandle<bfloat16_t>() = {`2.`, `3.`, `5.`, `6.`};
12360	EXPECT_TRUE(out.isEqual(*result));
12361	}
12362
12363	/// Verify that the AvgPool operator works correctly.
12364	TEST_P(OperatorTest, AvgPool) {
12365	CHECK_IF_ENABLED();
12366
12367	auto *input =
12368	mod_.createPlaceholder(ElemKind::FloatTy, {`1`, `3`, `3`, `1`}, "input", false);
12369	bindings_.allocate(input)->getHandle() = {`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`};
12370	auto *Pool = F_->createAvgPool("pool", input, {`2`, `2`}, {`1`, `1`}, {`0`, `0`, `0`, `0`});
12371	auto *S = F_->createSave("save", Pool);
12372	bindings_.allocate(S->getPlaceholder());
12373
12374	EE_.compile(CompilationMode::Infer);
12375	EE_.run(bindings_);
12376
12377	auto *result = bindings_.get(S->getPlaceholder());
12378	Tensor out(ElemKind::FloatTy, {`1`, `2`, `2`, `1`});
12379	out.getHandle() = {`2.`, `3.`, `5.`, `6.`};
12380	EXPECT_TRUE(out.isEqual(*result));
12381	}
12382
12383	TEST_P(OperatorTest, Int8AvgPool) {
12384	CHECK_IF_ENABLED();
12385
12386	auto *input = mod_.createPlaceholder(ElemKind::Int8QTy, {`1`, `3`, `3`, `1`}, `1`, `0`,
12387	"input", false);
12388	bindings_.allocate(input)->getHandle<int8_t>() = {`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`};
12389	auto *Pool = F_->createAvgPool("pool", input, {`2`, `2`}, {`1`, `1`}, {`0`, `0`, `0`, `0`});
12390	auto *S = F_->createSave("save", Pool);
12391	bindings_.allocate(S->getPlaceholder());
12392
12393	EE_.compile(CompilationMode::Infer);
12394	EE_.run(bindings_);
12395
12396	auto result = bindings_.get(S->getPlaceholder())->getHandle<int8_t>();
12397	Tensor out(ElemKind::Int8QTy, {`2`, `2`}, `1`, `0`);
12398	out.getHandle<int8_t>() = {`2`, `3`, `5`, `6`};
12399	for (size_t i = `0`; i < `2` * `2`; i++) {
12400	EXPECT_EQ(result.raw(i), out.getHandle<int8_t>().raw(i));
12401	}
12402	}
12403
12404	TEST_P(OperatorTest, Int8AvgPoolCountExcludePads) {
12405	CHECK_IF_ENABLED();
12406
12407	auto *input = mod_.createPlaceholder(ElemKind::Int8QTy, {`1`, `3`, `3`, `1`}, `1`, `0`,
12408	"input", false);
12409	bindings_.allocate(input)->getHandle<int8_t>() = {`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`};
12410	auto *Pool = F_->createAvgPool("pool", input, {`3`, `3`}, {`2`, `2`}, {`1`, `1`, `1`, `1`},
12411	NHWC, / countIncludePads / false);
12412	auto *S = F_->createSave("save", Pool);
12413	bindings_.allocate(S->getPlaceholder());
12414
12415	EE_.compile(CompilationMode::Infer);
12416	EE_.run(bindings_);
12417
12418	auto result = bindings_.get(S->getPlaceholder())->getHandle<int8_t>();
12419	Tensor out(ElemKind::Int8QTy, {`2`, `2`}, `1`, `0`);
12420	out.getHandle<int8_t>() = {`2`, `3`, `5`, `6`};
12421	for (size_t i = `0`; i < `2` * `2`; i++) {
12422	EXPECT_EQ(result.raw(i), out.getHandle<int8_t>().raw(i));
12423	}
12424	}
12425
12426	TEST_P(OperatorTest, FP16AvgPool3D) {
12427	CHECK_IF_ENABLED();
12428
12429	auto *input =
12430	mod_.createPlaceholder(ElemKind::Float16Ty, {`1`, `3`, `3`, `3`, `1`}, // NCTHW
12431	"input", false);
12432	bindings_.allocate(input)->getHandle<float16_t>() = {
12433	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `0.`, `1.`, `2.`, `3.`, `4.`,
12434	`5.`, `6.`, `7.`, `8.`, `0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`};
12435	auto Pool = F_->createAvgPool("pool", input, {`2`, `2`, `2`}, // kernel*
12436	{`1`, `1`, `1`}, // stride
12437	{`0`, `0`, `0`, `0`, `0`, `0`}, // padding
12438	NTHWC);
12439	auto *outputNCTHW =
12440	F_->createTranspose("avgpool3d_output_NTHWC2NCTHW", Pool, NTHWC2NCTHW);
12441	auto *S = F_->createSave("save", outputNCTHW);
12442	bindings_.allocate(S->getPlaceholder());
12443
12444	EE_.compile(CompilationMode::Infer);
12445	EE_.run(bindings_);
12446
12447	auto *result = bindings_.get(S->getPlaceholder());
12448	Tensor out(ElemKind::Float16Ty, {`1`, `1`, `2`, `2`, `2`});
12449	out.getHandle<float16_t>() = {`2.`, `3.`, `5.`, `6.`, `2.`, `3.`, `5.`, `6.`};
12450	EXPECT_TRUE(out.isEqual(*result));
12451	}
12452
12453	TEST_P(OperatorTest, BFloat16AvgPool3D) {
12454	CHECK_IF_ENABLED();
12455
12456	auto *input =
12457	mod_.createPlaceholder(ElemKind::BFloat16Ty, {`1`, `1`, `3`, `3`, `3`}, // NCTHW
12458	"input", false);
12459	bindings_.allocate(input)->getHandle<bfloat16_t>() = {
12460	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `0.`, `1.`, `2.`, `3.`, `4.`,
12461	`5.`, `6.`, `7.`, `8.`, `0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`};
12462	auto *inputNTHWC =
12463	F_->createTranspose("avgpool3d_input_NCTHW2NTHWC", input, NCTHW2NTHWC);
12464	auto Pool = F_->createAvgPool("pool", inputNTHWC, {`2`, `2`, `2`}, // kernel*
12465	{`1`, `1`, `1`}, // stride
12466	{`0`, `0`, `0`, `0`, `0`, `0`}, // padding
12467	NTHWC);
12468	auto *outputNCTHW =
12469	F_->createTranspose("avgpool3d_output_NTHWC2NCTHW", Pool, NTHWC2NCTHW);
12470	auto *S = F_->createSave("save", outputNCTHW);
12471	bindings_.allocate(S->getPlaceholder());
12472
12473	EE_.compile(CompilationMode::Infer);
12474	EE_.run(bindings_);
12475
12476	auto *result = bindings_.get(S->getPlaceholder());
12477	Tensor out(ElemKind::BFloat16Ty, {`1`, `1`, `2`, `2`, `2`});
12478	out.getHandle<bfloat16_t>() = {`2.`, `3.`, `5.`, `6.`, `2.`, `3.`, `5.`, `6.`};
12479	EXPECT_TRUE(out.isEqual(*result));
12480	}
12481
12482	TEST_P(OperatorTest, Int8AvgPool3D) {
12483	CHECK_IF_ENABLED();
12484
12485	auto *input =
12486	mod_.createPlaceholder(ElemKind::Int8QTy, {`1`, `1`, `3`, `3`, `3`}, // NCTHW
12487	`1`, `0`, // scale, offset
12488	"input", false);
12489	bindings_.allocate(input)->getHandle<int8_t>() = {`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`,
12490	`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`,
12491	`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`};
12492	auto *inputNTHWC =
12493	F_->createTranspose("avgpool3d_input_NCTHW2NTHWC", input, NCTHW2NTHWC);
12494	auto Pool = F_->createAvgPool("avgpool3d", inputNTHWC, {`2`, `2`, `2`}, // kernel*
12495	{`1`, `1`, `1`}, // stride
12496	{`0`, `0`, `0`, `0`, `0`, `0`}, // padding
12497	NTHWC);
12498	auto *outputNCTHW =
12499	F_->createTranspose("avgpool3d_output_NTHWC2NCTHW", Pool, NTHWC2NCTHW);
12500	auto *S = F_->createSave("save", outputNCTHW);
12501	bindings_.allocate(S->getPlaceholder());
12502
12503	EE_.compile(CompilationMode::Infer);
12504	EE_.run(bindings_);
12505
12506	auto result = bindings_.get(S->getPlaceholder())->getHandle<int8_t>();
12507	Tensor out(ElemKind::Int8QTy, {`1`, `1`, `2`, `2`, `2`}, `1`, `0`);
12508	out.getHandle<int8_t>() = {
12509	`2`, `3`, `5`, `6`, `2`, `3`, `5`, `6`,
12510	};
12511	for (size_t i = `0`; i < `2` * `2` * `2`; i++) {
12512	EXPECT_EQ(result.raw(i), out.getHandle<int8_t>().raw(i));
12513	}
12514	}
12515
12516	TEST_P(OperatorTest, AvgPoolCountExcludePads) {
12517	CHECK_IF_ENABLED();
12518
12519	auto *input =
12520	mod_.createPlaceholder(ElemKind::FloatTy, {`1`, `3`, `3`, `1`}, "input", false);
12521	bindings_.allocate(input)->getHandle() = {`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`};
12522	auto *Pool = F_->createAvgPool("pool", input, {`3`, `3`}, {`2`, `2`}, {`1`, `1`, `1`, `1`},
12523	NHWC, / countIncludePads / false);
12524	auto *S = F_->createSave("save", Pool);
12525	bindings_.allocate(S->getPlaceholder());
12526
12527	EE_.compile(CompilationMode::Infer);
12528	EE_.run(bindings_);
12529
12530	auto *result = bindings_.get(S->getPlaceholder());
12531	Tensor out(ElemKind::FloatTy, {`1`, `2`, `2`, `1`});
12532	out.getHandle() = {`2.`, `3.`, `5.`, `6.`};
12533	EXPECT_TRUE(out.isEqual(*result));
12534	}
12535
12536	/// Create a simple AvgPool network with large pads.
12537	template <bool countIncludePads>
12538	static FunctionTensorPair
12539	createAndInitAvgPool2DLargePads(glow::PlaceholderBindings &bindings,
12540	glow::ExecutionEngine &EE) {
12541	auto &mod = EE.getModule();
12542	Function *F = mod.createFunction("main");
12543	std::vector<dim_t> inputDims = {`3`, `4`, `5`, `6`};
12544	std::vector<unsigned_t> kernels = {`2`, `3`};
12545	std::vector<unsigned_t> strides = {`1`, `2`};
12546	std::vector<unsigned_t> pads = {`4`, `5`, `6`, `7`};
12547	auto *input =
12548	mod.createPlaceholder(ElemKind::FloatTy, inputDims, "input", false);
12549	bindings.allocate(input)->getHandle<float>().randomize(-`1.0`, `1.0`,
12550	mod.getPRNG());
12551	AvgPoolNode *pool =
12552	F->createAvgPool("pool", input, kernels, strides, pads,
12553	ConvolutionLayout::NHWC, countIncludePads);
12554	SaveNode *save = F->createSave("save", pool);
12555	auto *resultTensor = bindings.allocate(save->getPlaceholder());
12556	return std::make_pair(F, resultTensor);
12557	}
12558
12559	/// AvgPool2D tests with large pads.
12560	/// Compare with the Interpreter float implementation.
12561	#define TEST_AVG_POOL2D_LARGE_PADS(NAME, TYPE, COUNT_INCLUDE_PADS, TOL) \
12562	TEST_P(OperatorStatelessTest, AvgPool2DLargePads_##NAME) { \
12563	CHECK_IF_ENABLED(); \
12564	compareAgainstInterpreter( \
12565	getBackendName(), createAndInitAvgPool2DLargePads<COUNT_INCLUDE_PADS>, \
12566	ElemKind::FloatTy, ElemKind::TYPE, TOL); \
12567	}
12568	TEST_AVG_POOL2D_LARGE_PADS(FloatTy_CountIncludePads, FloatTy, true, `1e-5`)
12569	TEST_AVG_POOL2D_LARGE_PADS(FloatTy_CountExcludePads, FloatTy, false, `1e-5`)
12570	TEST_AVG_POOL2D_LARGE_PADS(Int8QTy_CountIncludePads, Int8QTy, true, `0.005`)
12571	TEST_AVG_POOL2D_LARGE_PADS(Int8QTy_CountExcludePads, Int8QTy, false, `0.01`)
12572	#undef TEST_AVG_POOL2D_LARGE_PADS
12573
12574	/// Create a simple MaxPool network with large pads.
12575	static FunctionTensorPair
12576	createAndInitMaxPool2DLargePads(glow::PlaceholderBindings &bindings,
12577	glow::ExecutionEngine &EE) {
12578	auto &mod = EE.getModule();
12579	Function *F = mod.createFunction("main");
12580	std::vector<dim_t> inputDims = {`3`, `4`, `5`, `6`};
12581	std::vector<unsigned_t> kernels = {`2`, `3`};
12582	std::vector<unsigned_t> strides = {`1`, `2`};
12583	std::vector<unsigned_t> pads = {`4`, `5`, `6`, `7`};
12584	auto *input =
12585	mod.createPlaceholder(ElemKind::FloatTy, inputDims, "input", false);
12586	bindings.allocate(input)->getHandle<float>().randomize(-`1.0`, `1.0`,
12587	mod.getPRNG());
12588	MaxPoolNode *pool = F->createMaxPool("pool", input, kernels, strides, pads);
12589	SaveNode *save = F->createSave("save", pool->getResult());
12590	auto *resultTensor = bindings.allocate(save->getPlaceholder());
12591	return std::make_pair(F, resultTensor);
12592	}
12593
12594	/// MaxPool2D tests with large pads.
12595	/// Compare with the Interpreter float implementation.
12596	#define TEST_MAX_POOL2D_LARGE_PADS(NAME, TYPE, TOL) \
12597	TEST_P(OperatorStatelessTest, MaxPool2DLargePads_##NAME) { \
12598	CHECK_IF_ENABLED(); \
12599	compareAgainstInterpreter(getBackendName(), \
12600	createAndInitMaxPool2DLargePads, \
12601	ElemKind::FloatTy, ElemKind::TYPE, TOL); \
12602	}
12603	TEST_MAX_POOL2D_LARGE_PADS(FloatTy, FloatTy, `1e-5`)
12604	TEST_MAX_POOL2D_LARGE_PADS(Int8QTy, Int8QTy, `0.005`)
12605	#undef TEST_MAX_POOL2D_LARGE_PADS
12606
12607	/// Verify that the AdaptiveAvgPool operator works correctly.
12608	TEST_P(OperatorTest, AdaptiveAvgPool) {
12609	CHECK_IF_ENABLED();
12610	auto *input =
12611	mod_.createPlaceholder(ElemKind::FloatTy, {`1`, `4`, `4`, `1`}, "input", false);
12612	bindings_.allocate(input)->getHandle() = {
12613	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`};
12614
12615	auto outTy = mod_.uniqueType(ElemKind::FloatTy, {`1`, `3`, `3`, `1`});
12616	auto *pool = F_->createAdaptiveAvgPool("pool", input, outTy);
12617	auto *S = F_->createSave("save", pool);
12618	bindings_.allocate(S->getPlaceholder());
12619
12620	EE_.compile(CompilationMode::Infer);
12621	EE_.run(bindings_);
12622
12623	auto *result = bindings_.get(S->getPlaceholder());
12624	Tensor out(ElemKind::FloatTy, {`1`, `3`, `3`, `1`});
12625	out.getHandle() = {`2.5`, `3.5`, `4.5`, `6.5`, `7.5`, `8.5`, `10.5`, `11.5`, `12.5`};
12626	EXPECT_TRUE(out.isEqual(*result));
12627	}
12628
12629	/// Verify that the AdaptiveAvgPool operator works correctly with fp16.
12630	TEST_P(OperatorTest, FP16AdaptiveAvgPool) {
12631	CHECK_IF_ENABLED();
12632	auto *input =
12633	mod_.createPlaceholder(ElemKind::Float16Ty, {`1`, `4`, `4`, `1`}, "input", false);
12634	bindings_.allocate(input)->getHandle<float16_t>() = {
12635	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`};
12636	auto outTy = mod_.uniqueType(ElemKind::Float16Ty, {`1`, `3`, `3`, `1`});
12637	auto *pool = F_->createAdaptiveAvgPool("pool", input, outTy);
12638	auto *S = F_->createSave("save", pool);
12639	bindings_.allocate(S->getPlaceholder());
12640
12641	EE_.compile(CompilationMode::Infer);
12642	EE_.run(bindings_);
12643
12644	auto *result = bindings_.get(S->getPlaceholder());
12645	Tensor out(ElemKind::Float16Ty, {`1`, `3`, `3`, `1`});
12646	out.getHandle<float16_t>() = {`2.5`, `3.5`, `4.5`, `6.5`, `7.5`, `8.5`, `10.5`, `11.5`, `12.5`};
12647	EXPECT_TRUE(out.isEqual(*result));
12648	}
12649
12650	/// Verify that the AdaptiveAvgPool operator works correctly with bfloat16.
12651	TEST_P(OperatorTest, BFloat16AdaptiveAvgPool) {
12652	CHECK_IF_ENABLED();
12653	auto *input = mod_.createPlaceholder(ElemKind::BFloat16Ty, {`1`, `4`, `4`, `1`},
12654	"input", false);
12655	bindings_.allocate(input)->getHandle<bfloat16_t>() = {
12656	`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`, `15.`};
12657	auto outTy = mod_.uniqueType(ElemKind::BFloat16Ty, {`1`, `3`, `3`, `1`});
12658	auto *pool = F_->createAdaptiveAvgPool("pool", input, outTy);
12659	auto *S = F_->createSave("save", pool);
12660	bindings_.allocate(S->getPlaceholder());
12661
12662	EE_.compile(CompilationMode::Infer);
12663	EE_.run(bindings_);
12664
12665	auto *result = bindings_.get(S->getPlaceholder());
12666	Tensor out(ElemKind::BFloat16Ty, {`1`, `3`, `3`, `1`});
12667	out.getHandle<bfloat16_t>() = {`2.5`, `3.5`, `4.5`, `6.5`, `7.5`,
12668	`8.5`, `10.5`, `11.5`, `12.5`};
12669	EXPECT_TRUE(out.isEqual(*result));
12670	}
12671
12672	/// Verify that the AdaptiveAvgPool operator works correctly with int8.
12673	TEST_P(OperatorTest, Int8AdaptiveAvgPool) {
12674	CHECK_IF_ENABLED();
12675	auto *input = mod_.createPlaceholder(ElemKind::Int8QTy, {`1`, `4`, `4`, `1`}, `1`, `0`,
12676	"input", false);
12677	bindings_.allocate(input)->getHandle<int8_t>() = {
12678	`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`};
12679	auto outTy = mod_.uniqueType(ElemKind::Int8QTy, {`1`, `3`, `3`, `1`}, `1`, `0`);
12680	auto *pool = F_->createAdaptiveAvgPool("pool", input, outTy);
12681	auto *S = F_->createSave("save", pool);
12682	bindings_.allocate(S->getPlaceholder());
12683
12684	EE_.compile(CompilationMode::Infer);
12685	EE_.run(bindings_);
12686
12687	auto *result = bindings_.get(S->getPlaceholder());
12688	Tensor out(ElemKind::Int8QTy, {`1`, `3`, `3`, `1`}, `1`, `0`);
12689	out.getHandle<int8_t>() = {`3`, `4`, `5`, `7`, `8`, `9`, `11`, `12`, `13`};
12690	EXPECT_TRUE(out.isEqual(*result));
12691	}
12692
12693	/// Verify that the AdaptiveAvgPool operator works correctly with non-square
12694	/// inputs and outputs.
12695	TEST_P(OperatorTest, AdaptiveAvgPoolNonSquare) {
12696	CHECK_IF_ENABLED();
12697	auto *input =
12698	mod_.createPlaceholder(ElemKind::FloatTy, {`1`, `5`, `3`, `1`}, "input", false);
12699	bindings_.allocate(input)->getHandle() = {`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`,
12700	`8.`, `9.`, `10.`, `11.`, `12.`, `13.`, `14.`};
12701
12702	auto outTy = mod_.uniqueType(ElemKind::FloatTy, {`1`, `3`, `2`, `1`});
12703	auto *pool = F_->createAdaptiveAvgPool("pool", input, outTy);
12704	auto *S = F_->createSave("save", pool);
12705	bindings_.allocate(S->getPlaceholder());
12706
12707	EE_.compile(CompilationMode::Infer);
12708	EE_.run(bindings_);
12709
12710	auto *result = bindings_.get(S->getPlaceholder());
12711	Tensor out(ElemKind::FloatTy, {`1`, `3`, `2`, `1`});
12712	out.getHandle() = {`2`, `3`, `6.5`, `7.5`, `11`, `12`};
12713	EXPECT_TRUE(out.isEqual(*result));
12714	}
12715
12716	TEST_P(OperatorTest, MaxPool) {
12717	CHECK_IF_ENABLED();
12718
12719	auto *input =
12720	mod_.createPlaceholder(ElemKind::FloatTy, {`1`, `3`, `3`, `1`}, "input", false);
12721	bindings_.allocate(input)->getHandle() = {`0.`, `1.`, `2.`, `3.`, `4.`, `5.`, `6.`, `7.`, `8.`};
12722	auto *pool = F_->createMaxPool("pool", input, {`2`, `2`}, {`1`, `1`}, {`0`, `0`, `0`, `0`});
12723	auto *S = F_->createSave("save", pool->getResult());
12724	bindings_.allocate(S->getPlaceholder());
12725
12726	EE_.compile(CompilationMode::Infer);
12727	EE_.run(bindings_);
12728
12729	auto result = bindings_.get(S->getPlaceholder());
12730	Tensor out(ElemKind::FloatTy, {`1`, `2`, `2`, `1`});
12731	out.getHandle() = {`4.`, `5.`, `7.`, `8.`};
12732	EXPECT_TRUE(out.isEqual(*result));
12733	}
12734
12735	TEST_P(OperatorTest, FP16MaxPool) {
12736	CHECK_IF_ENABLED();
12737
12738	auto *input =
12739	mod_.createPlaceholder(ElemKind::Float16Ty, {`1`, `3`, `3`, `1`}, "input", false);
12740	bindings_.allocate(input)->getHandle<float16_t>() = {`0.`, `1.`, `2.`, `3.`, `4.`,
12741	`5.`, `6.`, `7.`, `8.`};
12742	auto *pool = F_->createMaxPool("pool", input, {`2`, `2`}, {`1`, `1`}, {`0`, `0`, `0`, `0`});
12743	auto *S = F_->createSave("save", pool->getResult());
12744	bindings_.allocate(S->getPlaceholder());
12745
12746	EE_.compile(CompilationMode::Infer);
12747	EE_.run(bindings_);
12748
12749	auto result = bindings_.get(S->getPlaceholder());
12750	Tensor out(ElemKind::Float16Ty, {`1`, `2`, `2`, `1`});
12751	out.getHandle<float16_t>() = {`4.`, `5.`, `7.`, `8.`};
12752	EXPECT_TRUE(out.isEqual(*result));
12753	}
12754
12755	TEST_P(OperatorTest, BFloat16MaxPool) {
12756	CHECK_IF_ENABLED();
12757
12758	auto *input = mod_.createPlaceholder(ElemKind::BFloat16Ty, {`1`, `3`, `3`, `1`},
12759	"input", false);
12760	bindings_.allocate(input)->getHandle<bfloat16_t>() = {`0.`, `1.`, `2.`, `3.`, `4.`,
12761	`5.`, `6.`, `7.`, `8.`};
12762	auto *pool = F_->createMaxPool("pool", input, {`2`, `2`}, {`1`, `1`}, {`0`, `0`, `0`, `0`});
12763	auto *S = F_->createSave("save", pool->getResult());
12764	bindings_.allocate(S->getPlaceholder());
12765
12766	EE_.compile(CompilationMode::Infer);
12767	EE_.run(bindings_);
12768
12769	auto result = bindings_.get(S->getPlaceholder());
12770	Tensor out(ElemKind::BFloat16Ty, {`1`, `2`, `2`, `1`});
12771	out.getHandle<bfloat16_t>() = {`4.`, `5.`, `7.`, `8.`};
12772	EXPECT_TRUE(out.isEqual(*result));
12773	}
12774
12775	TEST_P(OperatorTest, Int8MaxPool) {
12776	CHECK_IF_ENABLED();
12777
12778	auto *input = mod_.createPlaceholder(ElemKind::Int8QTy, {`1`, `3`, `3`, `1`}, `1`, `0`,
12779	"input", false);
12780	bindings_.allocate(input)->getHandle<int8_t>() = {`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`};
12781	auto *Pool = F_->createMaxPool("pool", input, {`2`, `2`}, {`1`, `1`}, {`0`, `0`, `0`, `0`});
12782	auto *S = F_->createSave("save", Pool->getResult());
12783	bindings_.allocate(S->getPlaceholder());
12784
12785	EE_.compile(CompilationMode::Infer);
12786	EE_.run(bindings_);
12787
12788	auto result = bindings_.get(S->getPlaceholder())->getHandle<int8_t>();
12789	Tensor out(ElemKind::Int8QTy, {`2`, `2`}, `1`, `0`);
12790	out.getHandle<int8_t>() = {`4`, `5`, `7`, `8`};
12791	for (size_t i = `0`; i < `2` * `2`; i++) {
12792	EXPECT_EQ(result.raw(i), out.getHandle<int8_t>().raw(i));
12793	}
12794	}
12795
12796	#define COMPARE_UNARY_OP_FUN(_OP_NAME_, LEN, LOW, HIGH) \
12797	static FunctionTensorPair createAndInitBasic##_OP_NAME_##Test( \
12798	glow::PlaceholderBindings &bindings, glow::ExecutionEngine &EE) { \
12799	auto &mod = EE.getModule(); \
12800	Function *F = mod.createFunction("main"); \
12801	\
12802	auto *input = \
12803	mod.createPlaceholder(ElemKind::FloatTy, {LEN}, "input", false); \
12804	bindings.allocate(input)->getHandle().randomize(LOW, HIGH, mod.getPRNG()); \
12805	auto *tanh = F->create##_OP_NAME_(#_OP_NAME_, input); \
12806	auto *save = F->createSave("Save", tanh); \
12807	auto *resultTensor = bindings.allocate(save->getPlaceholder()); \
12808	return std::make_pair(F, resultTensor); \
12809	}
12810	COMPARE_UNARY_OP_FUN(Exp, `10`, -`1.0F`, `1.0F`)
12811	COMPARE_UNARY_OP_FUN(Tanh, `10`, -`10.0F`, `10.0F`)
12812	COMPARE_UNARY_OP_FUN(Log, `1000`, `1.0F`, `100.0F`)
12813	COMPARE_UNARY_OP_FUN(Sigmoid, `10`, -`10.0F`, `10.0F`)
12814	#undef COMPARE_UNARY_OP_FUN
12815
12816	/// Test to verify that the sigmoid implementation is equal to the
12817	/// Mirrored LUT implementation
12818	/// Does a sweep of -15,15 and prints the outputs of the NNPI implementation
12819	/// compared to the LUT one, the ideal sigmoid in fp16 is also provided as
12820	/// a visual sanity check, but nothing is enforced against that last one.
12821	static void testSigmoidFp16Sweep(glow::PlaceholderBindings &bindings,
12822	glow::Module &mod, glow::Function *F,
12823	glow::ExecutionEngine &EE) {
12824	constexpr dim_t N = `100`;
12825	auto input = mod.createPlaceholder(ElemKind::FloatTy, {N}, "input", false*);
12826	auto inputH = bindings.allocate(input)->getHandle();
12827
12828	constexpr float rangeStart = -`20`;
12829	constexpr float rangeEnd = `20`;
12830	constexpr float delta = (rangeEnd - rangeStart) / N;
12831
12832	for (dim_t i = `0`; i < N; i++) {
12833	inputH.raw(i) = rangeStart + i * delta;
12834	}
12835
12836	auto *sigmoid = F->createSigmoid("Sigmoid", input);
12837	auto *save = F->createSave("Save", sigmoid);
12838	auto *resultTensor = bindings.allocate(save->getPlaceholder());
12839
12840	CompilationContext cctx;
12841	cctx.precisionConfig.convertToFP16 = true;
12842	cctx.precisionConfig.convertFusedToFP16 = true;
12843	cctx.precisionConfig.float16Format =
12844	PrecisionConfiguration::Float16Format::FP16;
12845
12846	EE.compile(cctx);
12847	EE.run(bindings);
12848
12849	auto resultH = resultTensor->getHandle();
12850	int numDiffs = `0`;
12851
12852	for (dim_t i = `0`; i < N; i++) {
12853	float inputV = inputH.at({i});
12854	float refIdeal = refSigmoidFp16(inputV);
12855	float output = resultH.at({i});
12856	float absDiff = fabs(output - refIdeal);
12857	float relDiff = fabs(absDiff / (refIdeal + `1e-8`));
12858
12859	bool failed = false;
12860	// Relative error should be 2^-11 but we are relaxing this constraint
12861	// due to linear interpolation
12862	// Absolute error can remain 1e-5 for now
12863	if (absDiff > `1e-5` && relDiff > `2e-3`) {
12864	numDiffs++;
12865	failed = true;
12866	}
12867
12868	llvm::outs() << "Sigmoid " << i << " " << inputV << " Backend:" << output
12869	<< " ref_ideal:" << refIdeal << " relDiff:" << relDiff
12870	<< " absDiff:" << absDiff << " failed:" << failed << "\n";
12871	}
12872	llvm::outs() << "Number of diffs: " << numDiffs << "\n";
12873	llvm::outs().flush();
12874
12875	EXPECT_EQ(numDiffs, `0`);
12876	}
12877
12878	/// Test to verify that the sigmoid implementation is equal to the
12879	/// Mirrored LUT implementation
12880	/// Does a sweep of -15,15 and prints the outputs of the NNPI implementation
12881	/// compared to the LUT one, the ideal sigmoid in bfloat16 is also provided as
12882	/// a visual sanity check, but nothing is enforced against that last one.
12883	static void testSigmoidBFloat16Sweep(glow::PlaceholderBindings &bindings,
12884	glow::Module &mod, glow::Function *F,
12885	glow::ExecutionEngine &EE) {
12886	constexpr dim_t N = `100`;
12887	auto input = mod.createPlaceholder(ElemKind::FloatTy, {N}, "input", false*);
12888	auto inputH = bindings.allocate(input)->getHandle();
12889
12890	constexpr float rangeStart = -`20`;
12891	constexpr float rangeEnd = `20`;
12892	constexpr float delta = (rangeEnd - rangeStart) / N;
12893
12894	for (dim_t i = `0`; i < N; i++) {
12895	inputH.raw(i) = rangeStart + i * delta;
12896	}
12897
12898	auto *sigmoid = F->createSigmoid("Sigmoid", input);
12899	auto *save = F->createSave("Save", sigmoid);
12900	auto *resultTensor = bindings.allocate(save->getPlaceholder());
12901
12902	CompilationContext cctx;
12903	cctx.precisionConfig.convertToFP16 = true;
12904	cctx.precisionConfig.convertFusedToFP16 = true;
12905	cctx.precisionConfig.float16Format =
12906	PrecisionConfiguration::Float16Format::BFloat16;
12907
12908	EE.compile(cctx);
12909	EE.run(bindings);
12910
12911	auto resultH = resultTensor->getHandle();
12912	int numDiffs = `0`;
12913
12914	for (dim_t i = `0`; i < N; i++) {
12915	float inputV = inputH.at({i});
12916	float refIdeal = refSigmoidBFloat16(inputV);
12917	float output = resultH.at({i});
12918	float absDiff = fabs(output - refIdeal);
12919	float relDiff = fabs(absDiff / (refIdeal + `1e-8`));
12920
12921	bool failed = false;
12922	// Relative error should be 2^-11 but we are relaxing this constraint
12923	// due to linear interpolation.
12924	// Absolute error can remain 1e-5 for now
12925	if (absDiff > `1e-3` && relDiff > `2e-2`) {
12926	numDiffs++;
12927	failed = true;
12928	}
12929
12930	llvm::outs() << "Sigmoid " << i << " " << inputV << " Backend:" << output
12931	<< " ref_ideal:" << refIdeal << " relDiff:" << relDiff
12932	<< " absDiff:" << absDiff << " failed:" << failed << "\n";
12933	}
12934	llvm::outs() << "Number of diffs: " << numDiffs << "\n";
12935	llvm::outs().flush();
12936
12937	EXPECT_EQ(numDiffs, `0`);
12938	}
12939
12940	TEST_P(OperatorTest, SigmoidSweep_Float16) {
12941	CHECK_IF_ENABLED();
12942
12943	testSigmoidFp16Sweep(bindings_, mod_, F_, EE_);
12944	}
12945
12946	TEST_P(OperatorTest, SigmoidSweep_BFloat16) {
12947	CHECK_IF_ENABLED();
12948
12949	testSigmoidBFloat16Sweep(bindings_, mod_, F_, EE_);
12950	}
12951
12952	/// Reference ideal tanh implementation. Computes an fp32 tanh
12953	/// and casts the result to FP16, no denorms
12954	static float16_t refTanHFp16(float x) {
12955	float res = (exp(`2` * x) - `1`) / (exp(`2` * x) + `1`);
12956	if (fabs(res) < `6e-5`) {
12957	res = `0.0`;
12958	}
12959	return (float16_t)res;
12960	}
12961
12962	/// Reference ideal tanh implementation. Computes an fp32 tanh
12963	/// and casts the result to BFloat16, no denorms
12964	static bfloat16_t refTanHBFloat16(float x) {
12965	float res = (exp(`2` * x) - `1`) / (exp(`2` * x) + `1`);
12966	if (fabs(res) < `6e-5`) {
12967	res = `0.0`;
12968	}
12969	return (bfloat16_t)res;
12970	}
12971
12972	/// Test to verify that the tanh implementation is close to the ideal one
12973	/// Does a sweep of -15,15 and prints the outputs of the NNPI implementation
12974	/// compared to the ideal tanh in fp16.
12975	static void testTanHFp16Sweep(glow::PlaceholderBindings &bindings,
12976	glow::Module &mod, glow::Function *F,
12977	glow::ExecutionEngine &EE) {
12978	constexpr dim_t N = `100`;
12979	auto input = mod.createPlaceholder(ElemKind::FloatTy, {N}, "input", false*);
12980	auto inputH = bindings.allocate(input)->getHandle();
12981
12982	constexpr float rangeStart = -`15`;
12983	constexpr float rangeEnd = `15`;
12984	constexpr float delta = (rangeEnd - rangeStart) / N;
12985
12986	for (dim_t i = `0`; i < N; i++) {
12987	inputH.raw(i) = rangeStart + i * delta;
12988	}
12989
12990	auto *sigmoid = F->createTanh("TanH", input);
12991	auto *save = F->createSave("Save", sigmoid);
12992	auto *resultTensor = bindings.allocate(save->getPlaceholder());
12993
12994	CompilationContext cctx;
12995	cctx.precisionConfig.convertToFP16 = true;
12996	cctx.precisionConfig.convertFusedToFP16 = true;
12997	cctx.precisionConfig.float16Format =
12998	PrecisionConfiguration::Float16Format::FP16;
12999
13000	EE.compile(cctx);
13001	EE.run(bindings);
13002
13003	auto resultH = resultTensor->getHandle();
13004	int count = `0`;
13005
13006	for (dim_t i = `0`; i < N; i++) {
13007	float inputV = inputH.at({i});
13008	float refIdeal = refTanHFp16(inputV);
13009	float output = resultH.at({i});
13010	float diff = fabs(output - refIdeal);
13011
13012	if (diff > `1e-6`) {
13013	count++;
13014	}
13015
13016	llvm::outs() << "TanH " << i << " " << inputV << " Backend:" << output
13017	<< " ref_ideal:" << refIdeal << " diff:" << diff << "\n";
13018	}
13019	llvm::outs().flush();
13020
13021	EXPECT_EQ(count, `0`);
13022	}
13023
13024	/// Test to verify that the tanh implementation is close to the ideal one
13025	/// Does a sweep of -15,15 and prints the outputs of the NNPI implementation
13026	/// compared to the ideal tanh in fp16.
13027	static void testTanHBFloat16Sweep(glow::PlaceholderBindings &bindings,
13028	glow::Module &mod, glow::Function *F,
13029	glow::ExecutionEngine &EE) {
13030	constexpr dim_t N = `100`;
13031	auto input = mod.createPlaceholder(ElemKind::FloatTy, {N}, "input", false*);
13032	auto inputH = bindings.allocate(input)->getHandle();
13033
13034	constexpr float rangeStart = -`15`;
13035	constexpr float rangeEnd = `15`;
13036	constexpr float delta = (rangeEnd - rangeStart) / N;
13037
13038	for (dim_t i = `0`; i < N; i++) {
13039	inputH.raw(i) = rangeStart + i * delta;
13040	}
13041
13042	auto *sigmoid = F->createTanh("TanH", input);
13043	auto *save = F->createSave("Save", sigmoid);
13044	auto *resultTensor = bindings.allocate(save->getPlaceholder());
13045
13046	CompilationContext cctx;
13047	cctx.precisionConfig.convertToFP16 = true;
13048	cctx.precisionConfig.convertFusedToFP16 = true;
13049	cctx.precisionConfig.float16Format =
13050	PrecisionConfiguration::Float16Format::BFloat16;
13051
13052	EE.compile(cctx);
13053	EE.run(bindings);
13054
13055	auto resultH = resultTensor->getHandle();
13056	int count = `0`;
13057
13058	for (dim_t i = `0`; i < N; i++) {
13059	float inputV = inputH.at({i});
13060	float refIdeal = refTanHBFloat16(inputV);
13061	float output = resultH.at({i});
13062	float diff = fabs(output - refIdeal);
13063
13064	if (diff > `1e-2`) {
13065	count++;
13066	}
13067
13068	llvm::outs() << "TanH " << i << " " << inputV << " Backend:" << output
13069	<< " ref_ideal:" << refIdeal << " diff:" << diff << "\n";
13070	}
13071	llvm::outs().flush();
13072
13073	EXPECT_EQ(count, `0`);
13074	}
13075
13076	TEST_P(OperatorTest, TanHSweep_Float16) {
13077	CHECK_IF_ENABLED();
13078
13079	testTanHFp16Sweep(bindings_, mod_, F_, EE_);
13080	}
13081
13082	TEST_P(OperatorTest, TanHSweep_BFloat16) {
13083	CHECK_IF_ENABLED();
13084
13085	testTanHBFloat16Sweep(bindings_, mod_, F_, EE_);
13086	}
13087
13088	template <typename DataType>
13089	static void testMaxPoolWithArgmax(glow::PlaceholderBindings &bindings,
13090	glow::Module &mod, glow::Function *F,
13091	glow::ExecutionEngine &EE, ElemKind DTy) {
13092	auto *input = createPlaceholderConditionallyQuantized(mod, DTy, {`1`, `3`, `3`, `1`},
13093	"input", false, "NHWC");
13094	bindings.allocate(input)->getHandle<DataType>() = {`0`, `3`, `7`, `6`, `5`, `1`, `2`, `8`, `4`};
13095	auto *pool = F->createMaxPool("pool", input, {`2`, `2`}, {`1`, `1`}, {`0`, `0`, `0`, `0`});
13096	auto *SResult = F->createSave("save_result", pool->getResult());
13097	auto *SArgmax = F->createSave("save_argmax", pool->getArgmax());
13098	bindings.allocate(SResult->getPlaceholder());
13099	bindings.allocate(SArgmax->getPlaceholder());
13100
13101	EE.compile(CompilationMode::Infer);
13102	EE.run(bindings);
13103
13104	auto result = bindings.get(SResult->getPlaceholder());
13105	auto argmax = bindings.get(SArgmax->getPlaceholder());
13106	Tensor out1 = createTensorConditionallyQuantized(DTy, {`1`, `2`, `2`, `1`});
13107	out1.getHandle<DataType>() = {`6`, `7`, `8`, `8`};
13108	EXPECT_TRUE(out1.isEqual(*result));
13109
13110	Tensor out2(ElemKind::Int64ITy, {`1`, `2`, `2`, `1`});
13111	out2.getHandle<int64_t>() = {`3`, `2`, `7`, `7`};
13112	EXPECT_TRUE(out2.isEqual(*argmax));
13113	}
13114
13115	TEST_P(OperatorTest, FloatMaxPoolWithArgmax) {
13116	CHECK_IF_ENABLED();
13117	testMaxPoolWithArgmax<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy);
13118	}
13119
13120	TEST_P(OperatorTest, QuantizedMaxPoolWithArgmax) {
13121	CHECK_IF_ENABLED();
13122	testMaxPoolWithArgmax<int8_t>(bindings_, mod_, F_, EE_, ElemKind::Int8QTy);
13123	}
13124
13125	template <typename DataType>
13126	static void
13127	testMaxPoolWithArgmaxTransposed(glow::PlaceholderBindings &bindings,
13128	glow::Module &mod, glow::Function *F,
13129	glow::ExecutionEngine &EE, ElemKind DTy) {
13130	// Show that sequence Tensor(NCHW) -> Transpose(NCHWtoNHWC) ->
13131	// MaxPoolWithArgmax -> Transpose(NHWCtoNCHW) produces correct
13132	// linearization.
13133	auto *inputNCHW = createPlaceholderConditionallyQuantized(
13134	mod, DTy, {`1`, `3`, `4`, `4`}, "input", false, "NCHW");
13135	auto inHandle = bindings.allocate(inputNCHW)->getHandle<DataType>();
13136	inHandle.clear(`0.`);
13137	inHandle.at({`0`, `0`, `2`, `2`}) = `11`;
13138	inHandle.at({`0`, `1`, `2`, `2`}) = `22`;
13139	inHandle.at({`0`, `2`, `2`, `2`}) = `33`;
13140
13141	// Input NCHW to NHWC conversion.
13142	auto *inputNHWC =
13143	F->createTranspose("transposeInput", inputNCHW, {`0`, `2`, `3`, `1`}, "NHWC");
13144	auto *pool =
13145	F->createMaxPool("pool", inputNHWC, {`4`, `4`}, {`4`, `4`}, {`0`, `0`, `0`, `0`});
13146
13147	// NHWC to NCHW conversion.
13148	auto *resultNCHW = F->createTranspose("transposeRes", pool->getResult(),
13149	{`0`, `3`, `1`, `2`}, "NCHW");
13150	auto *argmaxNCHW = F->createTranspose("transposeArgmax", pool->getArgmax(),
13151	{`0`, `3`, `1`, `2`}, "NCHW");
13152
13153	auto *SResult = F->createSave("save_result", resultNCHW);
13154	auto *SArgmax = F->createSave("save_argmax", argmaxNCHW);
13155	bindings.allocate(SResult->getPlaceholder());
13156	bindings.allocate(SArgmax->getPlaceholder());
13157
13158	EE.compile(CompilationMode::Infer);
13159	EE.run(bindings);
13160
13161	auto result = bindings.get(SResult->getPlaceholder());
13162	auto argmax = bindings.get(SArgmax->getPlaceholder());
13163	Tensor out1 = createTensorConditionallyQuantized(DTy, {`1`, `3`, `1`, `1`});
13164	out1.getHandle<DataType>() = {`11`, `22`, `33`};
13165	EXPECT_TRUE(out1.isEqual(*result));
13166
13167	Tensor out2(ElemKind::Int64ITy, {`1`, `3`, `1`, `1`});
13168	out2.getHandle<int64_t>() = {`0` + `2` * `3` + `2` * `12`, `1` + `2` * `3` + `2` * `12`,
13169	`2` + `2` * `3` + `2` * `12`};
13170	EXPECT_TRUE(out2.isEqual(*argmax));
13171	}
13172
13173	TEST_P(OperatorTest, FloatMaxPoolWithArgmaxTransposed) {
13174	CHECK_IF_ENABLED();
13175	testMaxPoolWithArgmaxTransposed<float>(bindings_, mod_, F_, EE_,
13176	ElemKind::FloatTy);
13177	}
13178
13179	TEST_P(OperatorTest, QuantizedMaxPoolWithArgmaxTransposed) {
13180	CHECK_IF_ENABLED();
13181	testMaxPoolWithArgmaxTransposed<int8_t>(bindings_, mod_, F_, EE_,
13182	ElemKind::Int8QTy);
13183	}
13184
13185	TEST_P(OperatorStatelessTest, Int8Tanh) {
13186	CHECK_IF_ENABLED();
13187	compareAgainstInterpreter(getBackendName(), createAndInitBasicTanhTest,
13188	ElemKind::FloatTy, ElemKind::Int8QTy, `0.005f`,
13189	parCloneCountOpt);
13190	}
13191
13192	TEST_P(OperatorStatelessTest, Tanh_Float16) {
13193	CHECK_IF_ENABLED();
13194	compareAgainstInterpreter(getBackendName(), createAndInitBasicTanhTest,
13195	ElemKind::FloatTy, ElemKind::Float16Ty, `0.001f`,
13196	parCloneCountOpt);
13197	}
13198
13199	TEST_P(OperatorStatelessTest, Tanh_BFloat16) {
13200	CHECK_IF_ENABLED();
13201	compareAgainstInterpreter(getBackendName(), createAndInitBasicTanhTest,
13202	ElemKind::FloatTy, ElemKind::BFloat16Ty, `0.001f`,
13203	parCloneCountOpt);
13204	}
13205
13206	/// Verify that the Tanh operator works correctly.
13207	TEST_P(OperatorTest, Tanh) {
13208	CHECK_IF_ENABLED();
13209
13210	constexpr dim_t size = `10`;
13211	auto *input =
13212	mod_.createPlaceholder(ElemKind::FloatTy, {size}, "input", false);
13213	bindings_.allocate(input)->getHandle().randomize(-`10.0`, `10.0`, mod_.getPRNG());
13214
13215	auto *tanh = F_->createTanh("Tanh", input);
13216	auto *save = F_->createSave("Save", tanh);
13217	bindings_.allocate(save->getPlaceholder());
13218
13219	EE_.compile(CompilationMode::Infer);
13220	EE_.run(bindings_);
13221
13222	auto resultH = bindings_.get(save->getPlaceholder())->getHandle();
13223	auto inputH = bindings_.get(input)->getHandle();
13224
13225	for (dim_t i = `0`; i < size; i++) {
13226	EXPECT_NEAR(resultH.at({i}), std::tanh(inputH.at({i})), `0.001`);
13227	}
13228	}
13229
13230	TEST_P(OperatorStatelessTest, Exp_Float16) {
13231	CHECK_IF_ENABLED();
13232	compareAgainstInterpreter(getBackendName(), createAndInitBasicExpTest,
13233	ElemKind::FloatTy, ElemKind::Float16Ty, `0.005f`,
13234	parCloneCountOpt);
13235	}
13236
13237	TEST_P(OperatorStatelessTest, Exp_BFloat16) {
13238	CHECK_IF_ENABLED();
13239	compareAgainstInterpreter(getBackendName(), createAndInitBasicExpTest,
13240	ElemKind::FloatTy, ElemKind::BFloat16Ty, `0.005f`,
13241	parCloneCountOpt);
13242	}
13243
13244	/// Verify that the Exp operator works correctly.
13245	TEST_P(OperatorTest, Exp) {
13246	CHECK_IF_ENABLED();
13247	constexpr dim_t size = `10`;
13248	auto *input =
13249	mod_.createPlaceholder(ElemKind::FloatTy, {size}, "input", false);
13250	bindings_.allocate(input)->getHandle().randomize(-`10.0`, `10.0`, mod_.getPRNG());
13251
13252	auto *expn = F_->createExp("Exp", input);
13253	auto *save = F_->createSave("Save", expn);
13254	bindings_.allocate(save->getPlaceholder());
13255
13256	EE_.compile(CompilationMode::Infer);
13257	EE_.run(bindings_);
13258
13259	auto resultH = bindings_.get(save->getPlaceholder())->getHandle();
13260	auto inputH = bindings_.get(input)->getHandle();
13261
13262	for (dim_t i = `0`; i < size; i++) {
13263	EXPECT_NEAR(resultH.at({i}), std::exp(inputH.at({i})), `0.001`);
13264	}
13265	}
13266
13267	/// Verify that a quantized Log works correctly.
13268	TEST_P(OperatorStatelessTest, Int8Log) {
13269	CHECK_IF_ENABLED();
13270	compareAgainstInterpreter(getBackendName(), createAndInitBasicLogTest,
13271	ElemKind::FloatTy, ElemKind::Int8QTy, `0.1f`,
13272	parCloneCountOpt);
13273	}
13274
13275	/// Check Non-square kernel for conv.
13276	TEST_P(OperatorTest, NonSquareKernelConvolution) {
13277	CHECK_IF_ENABLED();
13278
13279	auto *input =
13280	mod_.createPlaceholder(ElemKind::FloatTy, {`1`, `4`, `4`, `1`}, "input", false);
13281	auto IH = bindings_.allocate(input)->getHandle();
13282	for (size_t i = `0`; i < `4` * `4`; i++) {
13283	IH.raw(i) = i + `1`;
13284	}
13285
13286	auto filter =
13287	mod_.createPlaceholder(ElemKind::FloatTy, {`1`, `2`, `3`, `1`}, "filter", false);
13288	auto FH = bindings_.allocate(filter)->getHandle();
13289	for (size_t i = `0`; i < `1` * `2` * `3`; i++) {
13290	FH.raw(i) = i + `1`;
13291	}
13292
13293	auto *zeroBias =
13294	mod_.createPlaceholder(ElemKind::FloatTy, {`1`}, "bias", false);
13295	bindings_.allocate(zeroBias)->zero();
13296
13297	auto outTy = mod_.uniqueType(ElemKind::FloatTy, {`1`, `3`, `2`, `1`});
13298	ConvolutionNode *CN = F_->createConv("Conv", input, filter, zeroBias, outTy,
13299	{`2`, `3`}, {`1`, `1`}, {`0`, `0`, `0`, `0`}, `1`);
13300	SaveNode *S = F_->createSave("save", CN);
13301	bindings_.allocate(S->getPlaceholder());
13302
13303	::glow::convertPlaceholdersToConstants(F_, bindings_,
13304	{input, S->getPlaceholder()});
13305	EE_.compile(CompilationMode::Infer);
13306	EE_.run(bindings_);
13307	Tensor &result = *bindings_.get(S->getPlaceholder());
13308
13309	static const float ref[] = {`106`, `127`, `190`, `211`, `274`, `295`};
13310	for (size_t i = `0`; i < `6`; i++)
13311	EXPECT_EQ(result.getHandle().raw(i), ref[i]);
13312	}
13313
13314	/// Check Non-cubic kernel for conv3D.
13315	TEST_P(OperatorTest, NonCubicKernelConv3D) {
13316	CHECK_IF_ENABLED();
13317
13318	auto *input = mod_.createPlaceholder(ElemKind::FloatTy, {`1`, `4`, `4`, `4`, `1`},
13319	"input", false);
13320	auto IH = bindings_.allocate(input)->getHandle();
13321	int nextVal = `1`;
13322	for (dim_t i = `0`; i < `4`; i++) {
13323	for (dim_t j = `0`; j < `4`; j++) {
13324	for (dim_t k = `0`; k < `4`; k++) {
13325	IH.at({`0`, i, j, k, `0`}) = static_cast<float>(nextVal++);
13326	} // D
13327	} // W
13328	} // H
13329
13330	auto *filter = mod_.createPlaceholder(ElemKind::FloatTy, {`1`, `1`, `2`, `3`, `1`},
13331	"filter", false);
13332	auto FH = bindings_.allocate(filter)->getHandle();
13333	nextVal = `1`;
13334	for (dim_t i = `0`; i < `1`; i++) {
13335	for (dim_t j = `0`; j < `2`; j++) {
13336	for (dim_t k = `0`; k < `3`; k++) {
13337	FH.at({`0`, i, j, k, `0`}) = static_cast<float>(nextVal++);
13338	} // D
13339	} // W
13340	} // H
13341
13342	auto *zeroBias =
13343	mod_.createPlaceholder(ElemKind::FloatTy, {`1`}, "bias", false);
13344	bindings_.allocate(zeroBias)->zero();
13345
13346	auto outTy = mod_.uniqueType(ElemKind::FloatTy, {`1`, `4`, `3`, `2`, `1`});
13347
13348	Convolution3DNode *CN =
13349	F_->createConv3D("Conv3D", input, filter, zeroBias, outTy, {`1`, `2`, `3`},
13350	{`1`, `1`, `1`}, {`0`, `0`, `0`, `0`, `0`, `0`}, `1`);
13351	SaveNode *S = F_->createSave("save", CN);
13352	bindings_.allocate(S->getPlaceholder());
13353
13354	::glow::convertPlaceholdersToConstants(F_, bindings_,
13355	{input, S->getPlaceholder()});
13356	EE_.compile(CompilationMode::Infer);
13357	EE_.run(bindings_);
13358	Tensor &result = *bindings_.get(S->getPlaceholder());
13359
13360	static const float ref[] = {`106`, `127`, `190`, `211`, `274`, `295`, `442`, `463`,
13361	`526`, `547`, `610`, `631`, `778`, `799`, `862`, `883`,
13362	`946`, `967`, `1114`, `1135`, `1198`, `1219`, `1282`, `1303`};
13363	for (size_t i = `0`; i < `4` * `3` * `2`; i++) {
13364	EXPECT_EQ(result.getHandle().raw(i), ref[i]);
13365	}
13366	}
13367
13368	/// Check Non-cubic kernel for conv3D with quantized input, filters, and bias.
13369	TEST_P(OperatorTest, NonCubicKernelConv3DQuantized) {
13370	CHECK_IF_ENABLED();
13371
13372	auto *input = mod_.createPlaceholder(ElemKind::FloatTy, {`1`, `4`, `4`, `4`, `1`},
13373	"input", false);
13374	auto IH = bindings_.allocate(input)->getHandle();
13375	int nextVal = `1`;
13376	for (dim_t i = `0`; i < `4`; i++) {
13377	for (dim_t j = `0`; j < `4`; j++) {
13378	for (dim_t k = `0`; k < `4`; k++) {
13379	IH.at({`0`, i, j, k, `0`}) = static_cast<float>(nextVal++);
13380	} // D
13381	} // W
13382	} // H
13383
13384	auto qInType = mod_.uniqueType(ElemKind::Int16QTy, {`1`, `4`, `4`, `4`, `1`}, `0.1`, `0`);
13385	QuantizeNode *qInput = F_->createQuantize("q_input", input, qInType);
13386
13387	auto *filter = mod_.createPlaceholder(ElemKind::FloatTy, {`1`, `1`, `2`, `3`, `1`},
13388	"filter", false);
13389	auto FH = bindings_.allocate(filter)->getHandle();
13390	nextVal = `1`;
13391	for (dim_t i = `0`; i < `1`; i++) {
13392	for (dim_t j = `0`; j < `2`; j++) {
13393	for (dim_t k = `0`; k < `3`; k++) {
13394	FH.at({`0`, i, j, k, `0`}) = static_cast<float>(nextVal++);
13395	} // D
13396	} // W
13397	} // H
13398
13399	auto qFilterType =
13400	mod_.uniqueType(ElemKind::Int16QTy, {`1`, `1`, `2`, `3`, `1`}, `0.1`, `0`);
13401	QuantizeNode *qFilter = F_->createQuantize("q_filter", filter, qFilterType);
13402
13403	auto bias = mod_.createPlaceholder(ElemKind::FloatTy, {`1`}, "bias", false*);
13404	bindings_.allocate(bias)->zero();
13405
13406	auto qBiasType = mod_.uniqueType(ElemKind::Int32QTy, {`1`}, `0.1`, `0`);
13407	QuantizeNode *qBias = F_->createQuantize("q_bias", bias, qBiasType);
13408
13409	auto outTy = mod_.uniqueType(ElemKind::FloatTy, {`1`, `4`, `3`, `2`, `1`});
13410
13411	Convolution3DNode *CN =
13412	F_->createConv3D("Conv3D", input, filter, bias, outTy, {`1`, `2`, `3`},
13413	{`1`, `1`, `1`}, {`0`, `0`, `0`, `0`, `0`, `0`}, `1`);
13414
13415	auto qOutTy = mod_.uniqueType(ElemKind::Int16QTy, {`1`, `4`, `3`, `2`, `1`}, `0.1`, `0`);
13416
13417	Convolution3DNode *qCN =
13418	F_->createConv3D("q_Conv3D", qInput, qFilter, qBias, qOutTy, {`1`, `2`, `3`},
13419	{`1`, `1`, `1`}, {`0`, `0`, `0`, `0`, `0`, `0`}, `1`);
13420
13421	SaveNode *S = F_->createSave("save", CN);
13422
13423	DequantizeNode *deQ =
13424	F_->createDequantize("deQ_result", qCN, ElemKind::FloatTy);
13425	SaveNode *qS = F_->createSave("save", deQ);
13426
13427	bindings_.allocate(S->getPlaceholder());
13428
13429	::glow::convertPlaceholdersToConstants(F_, bindings_,
13430	{input, S->getPlaceholder()});
13431	bindings_.allocate(mod_.getPlaceholders());
13432	EE_.compile(CompilationMode::Infer);
13433	EE_.run(bindings_);
13434
13435	Tensor &result = *bindings_.get(S->getPlaceholder());
13436	Tensor &qResult = *bindings_.get(qS->getPlaceholder());
13437
13438	for (size_t i = `0`; i < `4` * `3` * `2`; i++) {
13439	EXPECT_NEAR(qResult.getHandle().raw(i), result.getHandle().raw(i), `0.5`);
13440	}
13441	}
13442
13443	/// Test for quantized Convolution3D.
13444	static void Conv3DQuantizedTest(glow::PlaceholderBindings &bindings,
13445	glow::Module &mod, glow::Function *F,
13446	glow::ExecutionEngine &EE, ElemKind elemKind,
13447	ElemKind biaselemKind) {
13448	// Create floating-point network.
13449	auto *input =
13450	mod.createPlaceholder(ElemKind::FloatTy, {`1`, `4`, `4`, `4`, `1`}, "input", false);
13451	auto *filter = mod.createPlaceholder(ElemKind::FloatTy, {`1`, `1`, `2`, `3`, `1`},
13452	"filter", false);
13453	auto bias = mod.createPlaceholder(ElemKind::FloatTy, {`1`}, "bias", false*);
13454	auto outTy = mod.uniqueType(ElemKind::FloatTy, {`1`, `4`, `3`, `2`, `1`});
13455	Convolution3DNode *conv3d =
13456	F->createConv3D("Conv3D", input, filter, bias, outTy, {`1`, `2`, `3`},
13457	{`1`, `1`, `1`}, {`0`, `0`, `0`, `0`, `0`, `0`}, `1`);
13458	SaveNode *save = F->createSave("save", conv3d);
13459
13460	// Quantized types.
13461	auto inputTQP = quantization::chooseQuantizationParams(
13462	{-`1.0`, `1.0`}, quantization::Schema::Asymmetric, elemKind);
13463	auto filterTQP = quantization::chooseQuantizationParams(
13464	{-`1.0`, `1.0`}, quantization::Schema::Asymmetric, elemKind);
13465	auto outputTQP = quantization::chooseQuantizationParams(
13466	{-`4.0`, `4.0`}, quantization::Schema::Asymmetric, elemKind);
13467
13468	// Create quantized network.
13469	auto inputQTy = mod.uniqueType(elemKind, {`1`, `4`, `4`, `4`, `1`}, inputTQP.scale,
13470	inputTQP.offset);
13471	auto filterQTy = mod.uniqueType(elemKind, {`1`, `1`, `2`, `3`, `1`}, filterTQP.scale,
13472	filterTQP.offset);
13473	auto outQTy = mod.uniqueType(elemKind, {`1`, `4`, `3`, `2`, `1`}, outputTQP.scale,
13474	outputTQP.offset);
13475	QuantizeNode *inputQ = F->createQuantize("inputQ", input, inputQTy);
13476	QuantizeNode *filterQ = F->createQuantize("filterQ", filter, filterQTy);
13477	Convolution3DNode conv3dQ = nullptr*;
13478	if (biaselemKind == ElemKind::FloatTy) {
13479	conv3dQ = F->createConv3D("Conv3DQ", inputQ, filterQ, bias, outQTy,
13480	{`1`, `2`, `3`}, {`1`, `1`, `1`}, {`0`, `0`, `0`, `0`, `0`, `0`}, `1`);
13481	} else {
13482	auto biasTQP = quantization::chooseQuantizationParams(
13483	{-`1.0`, `1.0`}, quantization::Schema::Asymmetric, biaselemKind);
13484	auto biasQTy =
13485	mod.uniqueType(biaselemKind, {`1`}, biasTQP.scale, biasTQP.offset);
13486	QuantizeNode *biasQ = F->createQuantize("biasQ", bias, biasQTy);
13487	conv3dQ = F->createConv3D("Conv3DQ", inputQ, filterQ, biasQ, outQTy,
13488	{`1`, `2`, `3`}, {`1`, `1`, `1`}, {`0`, `0`, `0`, `0`, `0`, `0`}, `1`);
13489	}
13490	DequantizeNode *deQ = F->createDequantize("deQ", conv3dQ, ElemKind::FloatTy);
13491	SaveNode *saveQ = F->createSave("saveQ", deQ);
13492
13493	// Allocate placeholders.
13494	bindings.allocate(input)->getHandle().randomize(-`1.0`, `1.0`, mod.getPRNG());
13495	bindings.allocate(filter)->getHandle().randomize(-`1.0`, `1.0`, mod.getPRNG());
13496	bindings.allocate(bias)->getHandle().randomize(-`1.0`, `1.0`, mod.getPRNG());
13497	bindings.allocate(save->getPlaceholder());
13498	bindings.allocate(saveQ->getPlaceholder());
13499
13500	// Run network.
13501	::glow::convertPlaceholdersToConstants(
13502	F, bindings, {input, save->getPlaceholder(), saveQ->getPlaceholder()});
13503	EE.compile(CompilationMode::Infer);
13504	EE.run(bindings);
13505
13506	// Compare.
13507	Tensor &res = *bindings.get(save->getPlaceholder());
13508	Tensor &resQ = *bindings.get(saveQ->getPlaceholder());
13509	for (size_t i = `0`; i < res.size(); i++) {
13510	EXPECT_NEAR(res.getHandle().raw(i), resQ.getHandle().raw(i), `0.03`);
13511	}
13512	}
13513
13514	/// Test Int8 Conv3D with Int8 bias.
13515	TEST_P(OperatorTest, Conv3DQuantizedTest_Int8_BiasInt8) {
13516	ENABLED_BACKENDS("Interpreter");
13517	Conv3DQuantizedTest(bindings_, mod_, F_, EE_, ElemKind::Int8QTy,
13518	ElemKind::Int8QTy);
13519	}
13520
13521	/// Test Int8 Conv3D with Int32 bias.
13522	TEST_P(OperatorTest, Conv3DQuantizedTest_Int8_BiasInt32) {
13523	ENABLED_BACKENDS("Interpreter", "NNPI");
13524	Conv3DQuantizedTest(bindings_, mod_, F_, EE_, ElemKind::Int8QTy,
13525	ElemKind::Int32QTy);
13526	}
13527
13528	/// Test Int8 Conv3D with Float32 bias.
13529	TEST_P(OperatorTest, Conv3DQuantizedTest_Int8_BiasFloat) {
13530	ENABLED_BACKENDS("Interpreter", "NNPI");
13531	Conv3DQuantizedTest(bindings_, mod_, F_, EE_, ElemKind::Int8QTy,
13532	ElemKind::FloatTy);
13533	}
13534
13535	/// Test Int16 Conv3D with Int16 bias.
13536	TEST_P(OperatorTest, Conv3DQuantizedTest_Int16_BiasInt16) {
13537	ENABLED_BACKENDS("Interpreter");
13538	Conv3DQuantizedTest(bindings_, mod_, F_, EE_, ElemKind::Int16QTy,
13539	ElemKind::Int16QTy);
13540	}
13541
13542	/// Test Int16 Conv3D with Int32 bias.
13543	TEST_P(OperatorTest, Conv3DQuantizedTest_Int16_BiasInt32) {
13544	ENABLED_BACKENDS("Interpreter");
13545	Conv3DQuantizedTest(bindings_, mod_, F_, EE_, ElemKind::Int16QTy,
13546	ElemKind::Int32QTy);
13547	}
13548
13549	/// Check Non-square kernel for AveragePool.
13550	TEST_P(OperatorTest, NonSquareKernelAveragePool) {
13551	CHECK_IF_ENABLED();
13552
13553	auto *input =
13554	mod_.createPlaceholder(ElemKind::FloatTy, {`1`, `4`, `4`, `1`}, "input", false);
13555	auto IH = bindings_.allocate(input)->getHandle();
13556	for (size_t i = `0`; i < `4` * `4`; i++) {
13557	IH.raw(i) = i + `1`;
13558	}
13559	auto *Pool = F_->createAvgPool("pool", input, {`2`, `3`}, {`1`, `1`}, {`0`, `0`, `0`, `0`});
13560	auto *S = F_->createSave("save", Pool);
13561	bindings_.allocate(S->getPlaceholder());
13562
13563	EE_.compile(CompilationMode::Infer);
13564	EE_.run(bindings_);
13565	Tensor &result = *bindings_.get(S->getPlaceholder());
13566
13567	static const float ref[] = {`4`, `5`, `8`, `9`, `12`, `13`};
13568	for (size_t i = `0`; i < `6`; i++)
13569	EXPECT_EQ(result.getHandle().raw(i), ref[i]);
13570	}
13571
13572	/// Check Non-square kernel for MaxPool.
13573	TEST_P(OperatorTest, NonSquareKernelMaxPool) {
13574	CHECK_IF_ENABLED();
13575
13576	auto *input =
13577	mod_.createPlaceholder(ElemKind::FloatTy, {`1`, `4`, `4`, `1`}, "input", false);
13578	auto IH = bindings_.allocate(input)->getHandle();
13579	for (size_t i = `0`; i < `4` * `4`; i++) {
13580	IH.raw(i) = i + `1`;
13581	}
13582	auto *Pool = F_->createMaxPool("pool", input, {`2`, `3`}, {`1`, `1`}, {`0`, `0`, `0`, `0`});
13583	auto *S = F_->createSave("save", Pool->getResult());
13584	bindings_.allocate(S->getPlaceholder());
13585
13586	EE_.compile(CompilationMode::Infer);
13587	EE_.run(bindings_);
13588	Tensor &result = *bindings_.get(S->getPlaceholder());
13589
13590	static const float ref[] = {`7`, `8`, `11`, `12`, `15`, `16`};
13591	for (size_t i = `0`; i < `6`; i++)
13592	EXPECT_EQ(result.getHandle().raw(i), ref[i]);
13593	}
13594
13595	/// Check Non-square stride for conv.
13596	TEST_P(OperatorTest, NonSquareStrideConvolution) {
13597	CHECK_IF_ENABLED();
13598
13599	auto *input =
13600	mod_.createPlaceholder(ElemKind::FloatTy, {`1`, `4`, `4`, `1`}, "input", false);
13601	auto IH = bindings_.allocate(input)->getHandle();
13602	for (size_t i = `0`; i < `4` * `4`; i++) {
13603	IH.raw(i) = i + `1`;
13604	}
13605
13606	auto filter =
13607	mod_.createPlaceholder(ElemKind::FloatTy, {`1`, `2`, `2`, `1`}, "filter", false);
13608	auto FH = bindings_.allocate(filter)->getHandle();
13609	for (size_t i = `0`; i < `1` * `2` * `2`; i++) {
13610	FH.raw(i) = i + `1`;
13611	}
13612
13613	auto *zeroBias =
13614	mod_.createPlaceholder(ElemKind::FloatTy, {`1`}, "bias", false);
13615	bindings_.allocate(zeroBias)->zero();
13616
13617	auto outTy = mod_.uniqueType(ElemKind::FloatTy, {`1`, `2`, `2`, `1`});
13618	ConvolutionNode *CN = F_->createConv("Conv", input, filter, zeroBias, outTy,
13619	{`2`, `2`}, {`3`, `2`}, {`0`, `0`, `1`, `1`}, `1`);
13620	SaveNode *S = F_->createSave("save", CN);
13621	bindings_.allocate(S->getPlaceholder());
13622
13623	::glow::convertPlaceholdersToConstants(F_, bindings_,
13624	{input, S->getPlaceholder()});
13625	EE_.compile(CompilationMode::Infer);
13626	EE_.run(bindings_);
13627	Tensor &result = *bindings_.get(S->getPlaceholder());
13628
13629	static const float ref[] = {`44`, `64`, `41`, `47`};
13630	for (size_t i = `0`; i < `4`; i++)
13631	EXPECT_EQ(result.getHandle().raw(i), ref[i]);
13632	}
13633
13634	/// Create a Conv2D network with an activation.
13635	template <FusedActivation ActType>
13636	static FunctionTensorPair
13637	createAndInitConv2DWithActivation(glow::PlaceholderBindings &bindings,
13638	glow::ExecutionEngine &EE) {
13639	auto &mod = EE.getModule();
13640	Function *F = mod.createFunction("main");
13641
13642	// Conv2D parameters.
13643	std::vector<dim_t> inputDims = {`1`, `8`, `9`, `1`};
13644	std::vector<dim_t> filterDims = {`1`, `2`, `3`, `1`};
13645	std::vector<dim_t> biasDims = {`1`};
13646	std::vector<dim_t> outputDims = {`1`, `11`, `10`, `1`};
13647	std::vector<unsigned_t> kernels = {`2`, `3`};
13648	std::vector<unsigned_t> strides = {`1`, `1`};
13649	std::vector<unsigned_t> pads = {`2`, `1`, `3`, `4`};
13650	unsigned_t group = `1`;
13651	std::vector<unsigned_t> dilation = {`2`, `2`};
13652
13653	// Create input placeholder.
13654	auto *input =
13655	mod.createPlaceholder(ElemKind::FloatTy, inputDims, "input", false);
13656	bindings.allocate(input)->getHandle<float>().randomize(-`1.0`, `1.0`,
13657	mod.getPRNG());
13658	// Create filter constant.
13659	auto *filter = mod.createConstant(ElemKind::FloatTy, filterDims, "filter");
13660	filter->getPayloadMutable().getHandle<float>().randomize(-`1.0`, `1.0`,
13661	mod.getPRNG());
13662	// Create bias constant.
13663	auto *bias = mod.createConstant(ElemKind::FloatTy, biasDims, "bias");
13664	bias->getPayloadMutable().getHandle<float>().randomize(-`1.0`, `1.0`,
13665	mod.getPRNG());
13666	// Create Conv2D.
13667	auto *outTy = mod.uniqueType(ElemKind::FloatTy, outputDims);
13668	ConvolutionNode *conv =
13669	F->createConv("conv", input, filter, bias, outTy, kernels, strides, pads,
13670	group, dilation);
13671	// Create activation.
13672	NodeValue act;
13673	if (ActType == FusedActivation::RELU) {
13674	act = F->createRELU("relu", conv);
13675	} else if (ActType == FusedActivation::CLIP) {
13676	act = F->createClip("clip", conv, `0.0`, `1.0`);
13677	} else if (ActType == FusedActivation::TANH) {
13678	act = F->createTanh("tanh", conv);
13679	} else if (ActType == FusedActivation::SIGMOID) {
13680	act = F->createSigmoid("sigmoid", conv);
13681	} else if (ActType == FusedActivation::LEAKY_RELU) {
13682	act = F->createLeakyRELU("leakyrelu", conv, `0.1`);
13683	}
13684
13685	SaveNode *save = F->createSave("save", act);
13686	auto *resultTensor = bindings.allocate(save->getPlaceholder());
13687	return std::make_pair(F, resultTensor);
13688	}
13689
13690	/// Check that Conv2D followed by activation works (whether fused or not).
13691	/// For this we compare with the Interpreter reference float implementation.
13692	#define TEST_CONV2D_ACTIVATION(ACTIVATION, TYPE, TOL) \
13693	TEST_P(OperatorStatelessTest, Conv2D_##ACTIVATION##_##TYPE) { \
13694	ENABLED_BACKENDS("CPU"); \
13695	compareAgainstInterpreter( \
13696	getBackendName(), \
13697	createAndInitConv2DWithActivation<FusedActivation::ACTIVATION>, \
13698	ElemKind::FloatTy, ElemKind::TYPE, TOL); \
13699	}
13700
13701	TEST_CONV2D_ACTIVATION(RELU, FloatTy, `1e-5`)
13702	TEST_CONV2D_ACTIVATION(CLIP, FloatTy, `1e-5`)
13703	TEST_CONV2D_ACTIVATION(TANH, FloatTy, `1e-5`)
13704	TEST_CONV2D_ACTIVATION(SIGMOID, FloatTy, `1e-5`)
13705	TEST_CONV2D_ACTIVATION(LEAKY_RELU, FloatTy, `1e-5`)
13706
13707	TEST_CONV2D_ACTIVATION(RELU, Int8QTy, `0.01`)
13708	TEST_CONV2D_ACTIVATION(CLIP, Int8QTy, `0.01`)
13709	TEST_CONV2D_ACTIVATION(TANH, Int8QTy, `0.02`)
13710	TEST_CONV2D_ACTIVATION(SIGMOID, Int8QTy, `0.01`)
13711	TEST_CONV2D_ACTIVATION(LEAKY_RELU, Int8QTy, `0.01`)
13712
13713	#undef TEST_CONV2D_ACTIVATION
13714
13715	/// Check that CWQ Conv2D followed by activation works (whether fused or not).
13716	/// For this we compare with the Interpreter reference float implementation.
13717	#define TEST_CWQ_CONV2D_ACTIVATION(ACTIVATION, TYPE, TOL) \
13718	TEST_P(OperatorStatelessTest, CWQConv2D_##ACTIVATION##_##TYPE) { \
13719	ENABLED_BACKENDS("CPU"); \
13720	compareAgainstInterpreter( \
13721	getBackendName(), \
13722	createAndInitConv2DWithActivation<FusedActivation::ACTIVATION>, \
13723	ElemKind::FloatTy, ElemKind::TYPE, TOL, parCloneCountOpt, \
13724	/* convertToRowwiseQuantization */ false, \
13725	quantization::Schema::Asymmetric, /biasElemKind/ ElemKind::Int32QTy, \
13726	/forceFP16AccumSLS/ false, \
13727	PrecisionConfiguration::Float16Format::None, \
13728	/convertToChannelwiseQuantization/ true); \
13729	}
13730
13731	TEST_CWQ_CONV2D_ACTIVATION(RELU, Int8QTy, `0.01`)
13732	TEST_CWQ_CONV2D_ACTIVATION(CLIP, Int8QTy, `0.01`)
13733	TEST_CWQ_CONV2D_ACTIVATION(TANH, Int8QTy, `0.02`)
13734	TEST_CWQ_CONV2D_ACTIVATION(SIGMOID, Int8QTy, `0.015`)
13735	TEST_CWQ_CONV2D_ACTIVATION(LEAKY_RELU, Int8QTy, `0.01`)
13736
13737	#undef TEST_CWQ_CONV2D_ACTIVATION
13738
13739	/// Check Non-cubic stride for conv3D.
13740	TEST_P(OperatorTest, NonCubicStrideConv3D) {
13741	CHECK_IF_ENABLED();
13742
13743	auto *input = mod_.createPlaceholder(ElemKind::FloatTy, {`1`, `4`, `4`, `4`, `1`},
13744	"input", false);
13745	auto IH = bindings_.allocate(input)->getHandle();
13746	int nextVal = `1`;
13747	for (dim_t i = `0`; i < `4`; i++) {
13748	for (dim_t j = `0`; j < `4`; j++) {
13749	for (dim_t k = `0`; k < `4`; k++) {
13750	IH.at({`0`, i, j, k, `0`}) = static_cast<float>(nextVal++);
13751	} // W
13752	} // H
13753	} // T
13754
13755	auto *filter = mod_.createPlaceholder(ElemKind::FloatTy, {`1`, `2`, `2`, `2`, `1`},
13756	"filter", false);
13757	auto FH = bindings_.allocate(filter)->getHandle();
13758	nextVal = `1`;
13759	for (dim_t i = `0`; i < `2`; i++) {
13760	for (dim_t j = `0`; j < `2`; j++) {
13761	for (dim_t k = `0`; k < `2`; k++) {
13762	FH.at({`0`, i, j, k, `0`}) = static_cast<float>(nextVal++);
13763	} // W
13764	} // H
13765	} // T
13766
13767	auto *zeroBias =
13768	mod_.createPlaceholder(ElemKind::FloatTy, {`1`}, "bias", false);
13769	bindings_.allocate(zeroBias)->zero();
13770
13771	auto outTy = mod_.uniqueType(ElemKind::FloatTy, {`1`, `2`, `2`, `2`, `1`});
13772
13773	Convolution3DNode *CN =
13774	F_->createConv3D("Conv3D", input, filter, zeroBias, outTy, {`2`, `2`, `2`},
13775	{`3`, `3`, `2`}, //{0, 0, 0, 1, 1, 1}, 1);
13776	{`0`, `1`, `0`, `1`, `0`, `1`}, `1`);
13777	SaveNode *S = F_->createSave("save", CN);
13778	bindings_.allocate(S->getPlaceholder());
13779
13780	::glow::convertPlaceholdersToConstants(F_, bindings_,
13781	{input, S->getPlaceholder()});
13782	EE_.compile(CompilationMode::Infer);
13783	EE_.run(bindings_);
13784	Tensor &result = *bindings_.get(S->getPlaceholder());
13785
13786	static const float ref[] = {`560`, `632`, `366`, `394`, `524`, `544`, `185`, `191`};
13787	for (size_t i = `0`; i < `8`; i++) {
13788	EXPECT_EQ(result.getHandle().raw(i), ref[i]);
13789	}
13790	}
13791
13792	/// Check Non-square stride for AveragePool.
13793	TEST_P(OperatorTest, NonSquareStrideAveragePool) {
13794	CHECK_IF_ENABLED();
13795
13796	auto *input =
13797	mod_.createPlaceholder(ElemKind::FloatTy, {`1`, `4`, `4`, `1`}, "input", false);
13798	auto IH = bindings_.allocate(input)->getHandle();
13799	for (size_t i = `0`; i < `4` * `4`; i++) {
13800	IH.raw(i) = i + `1`;
13801	}
13802	auto *Pool = F_->createAvgPool("pool", input, {`2`, `2`}, {`3`, `2`}, {`0`, `0`, `1`, `1`});
13803	auto *S = F_->createSave("save", Pool);
13804	bindings_.allocate(S->getPlaceholder());
13805
13806	EE_.compile(CompilationMode::Infer);
13807	EE_.run(bindings_);
13808	Tensor &result = *bindings_.get(S->getPlaceholder());
13809
13810	static const float ref[] = {`3.5`, `5.5`, `6.75`, `7.75`};
13811	for (size_t i = `0`; i < `4`; i++)
13812	EXPECT_EQ(result.getHandle().raw(i), ref[i]);
13813	}
13814
13815	/// Check Non-square stride for MaxPool.
13816	TEST_P(OperatorTest, NonSquareStrideMaxPool) {
13817	CHECK_IF_ENABLED();
13818
13819	auto *input =
13820	mod_.createPlaceholder(ElemKind::FloatTy, {`1`, `4`, `4`, `1`}, "input", false);
13821	auto IH = bindings_.allocate(input)->getHandle();
13822	for (size_t i = `0`; i < `4` * `4`; i++) {
13823	IH.raw(i) = i + `1`;
13824	}
13825	auto *Pool = F_->createMaxPool("pool", input, {`2`, `2`}, {`3`, `2`}, {`0`, `0`, `1`, `1`});
13826	auto *S = F_->createSave("save", Pool->getResult());
13827	bindings_.allocate(S->getPlaceholder());
13828
13829	EE_.compile(CompilationMode::Infer);
13830	EE_.run(bindings_);
13831	Tensor &result = *bindings_.get(S->getPlaceholder());
13832
13833	static const float ref[] = {`6`, `8`, `14`, `16`};
13834	for (size_t i = `0`; i < `4`; i++)
13835	EXPECT_EQ(result.getHandle().raw(i), ref[i]);
13836	}
13837
13838	TEST_P(OperatorTest, SigmoidOverflow) {
13839	CHECK_IF_ENABLED();
13840
13841	auto input = mod_.createPlaceholder(ElemKind::FloatTy, {`2`}, "input", false*);
13842	auto IH = bindings_.allocate(input)->getHandle();
13843	IH.raw(`0`) = `1000`;
13844	IH.raw(`1`) = -`1000`;
13845
13846	auto *fpSigmoid = F_->createSigmoid("fpSigmoid", input);
13847	auto *S = F_->createSave("fpSave", fpSigmoid);
13848	bindings_.allocate(S->getPlaceholder());
13849	EE_.compile(CompilationMode::Infer);
13850	EE_.run(bindings_);
13851	Tensor &result = *bindings_.get(S->getPlaceholder());
13852	static const float ref[] = {`1`, `0`};
13853	for (size_t i = `0`; i < `2`; i++) {
13854	EXPECT_EQ(result.getHandle().raw(i), ref[i]);
13855	}
13856	}
13857
13858	/// This unit test exposes a problem with the CPU Sigmoid when stacking a
13859	/// higher number of operations for extreme input values which result in NaNs.
13860	TEST_P(OperatorTest, SigmoidOverflowCPUStacking) {
13861	CHECK_IF_ENABLED();
13862	dim_t size = `20`;
13863	auto *input =
13864	mod_.createPlaceholder(ElemKind::FloatTy, {size}, "input", false);
13865	auto IH = bindings_.allocate(input)->getHandle();
13866	IH = {
13867	-`1588.409912109375`, -`460.55999755859375`, -`1176.9149169921875`,
13868	-`1655.9249267578125`, -`1580.1217041015625`, -`1680.279541015625`,
13869	-`1750.2677001953125`, -`1762.1697998046875`, -`1616.599365234375`,
13870	-`1725.301025390625`, +`1588.409912109375`, +`460.55999755859375`,
13871	+`1176.9149169921875`, +`1655.9249267578125`, +`1580.1217041015625`,
13872	+`1680.279541015625`, +`1750.2677001953125`, +`1762.1697998046875`,
13873	+`1616.599365234375`, +`1725.301025390625`,
13874	};
13875	auto *fpSigmoid = F_->createSigmoid("fpSigmoid", input);
13876	auto *S = F_->createSave("fpSave", fpSigmoid);
13877	bindings_.allocate(S->getPlaceholder());
13878	EE_.compile(CompilationMode::Infer);
13879	EE_.run(bindings_);
13880	Tensor &result = *bindings_.get(S->getPlaceholder());
13881	for (size_t i = `0`; i < size; i++) {
13882	float ref = IH.raw(i) > `0` ? `1` : `0`;
13883	EXPECT_NEAR(result.getHandle().raw(i), ref, `1E-6`);
13884	}
13885	}
13886
13887	/// This unit test exposes a problem with the CPU Tanh when stacking a higher
13888	/// number of operations for extreme input values which result in NaNs.
13889	TEST_P(OperatorTest, TanhOverflowCPUStacking) {
13890	CHECK_IF_ENABLED();
13891	dim_t size = `20`;
13892	auto *input =
13893	mod_.createPlaceholder(ElemKind::FloatTy, {size}, "input", false);
13894	auto IH = bindings_.allocate(input)->getHandle();
13895	IH = {
13896	-`1588.409912109375`, -`460.55999755859375`, -`1176.9149169921875`,
13897	-`1655.9249267578125`, -`1580.1217041015625`, -`1680.279541015625`,
13898	-`1750.2677001953125`, -`1762.1697998046875`, -`1616.599365234375`,
13899	-`1725.301025390625`, +`1588.409912109375`, +`460.55999755859375`,
13900	+`1176.9149169921875`, +`1655.9249267578125`, +`1580.1217041015625`,
13901	+`1680.279541015625`, +`1750.2677001953125`, +`1762.1697998046875`,
13902	+`1616.599365234375`, +`1725.301025390625`,
13903	};
13904	auto *fpTanh = F_->createTanh("fpTanh", input);
13905	auto *S = F_->createSave("fpSave", fpTanh);
13906	bindings_.allocate(S->getPlaceholder());
13907	EE_.compile(CompilationMode::Infer);
13908	EE_.run(bindings_);
13909	Tensor &result = *bindings_.get(S->getPlaceholder());
13910	for (size_t i = `0`; i < size; i++) {
13911	float ref = IH.raw(i) > `0` ? `1` : -`1`;
13912	EXPECT_NEAR(result.getHandle().raw(i), ref, `1E-6`);
13913	}
13914	}
13915
13916	TEST_P(OperatorStatelessTest, Int8Sigmoid) {
13917	CHECK_IF_ENABLED();
13918	compareAgainstInterpreter(getBackendName(), createAndInitBasicSigmoidTest,
13919	ElemKind::FloatTy, ElemKind::Int8QTy, `0.005f`,
13920	parCloneCountOpt);
13921	}
13922
13923	/// Check that the batch add operator works properly.
13924	TEST_P(OperatorTest, BatchAdd) {
13925	CHECK_IF_ENABLED();
13926
13927	PseudoRNG PRNG;
13928
13929	auto *input =
13930	mod_.createPlaceholder(ElemKind::FloatTy, {`13`, `3`, `3`}, "A", false);
13931	bindings_.allocate(input)->getHandle<float>().randomize(-`3.0`, `3.0`, PRNG);
13932	auto *slice =
13933	mod_.createPlaceholder(ElemKind::FloatTy, {`3`, `3`}, "slice", false);
13934	bindings_.allocate(slice)->getHandle<float>().randomize(-`3.0`, `3.0`, PRNG);
13935	auto *batchAdd = F_->createBatchedAdd("batchAdd", input, slice);
13936	auto *S = F_->createSave("save", batchAdd);
13937	bindings_.allocate(S->getPlaceholder());
13938
13939	EE_.compile(CompilationMode::Infer);
13940	EE_.run(bindings_);
13941
13942	auto result = bindings_.get(S->getPlaceholder())->getHandle<float>();
13943	auto handleInput = bindings_.get(input)->getHandle<float>();
13944	auto handleSlice = bindings_.get(slice)->getHandle<float>();
13945	ASSERT_EQ(result.size(), handleInput.size());
13946	for (size_t idx = `0`, end = result.size(); idx != end; ++idx) {
13947	EXPECT_EQ(result.raw(idx),
13948	handleInput.raw(idx) + handleSlice.raw(idx % handleSlice.size()));
13949	}
13950	}
13951
13952	/// Check that the batch add operator works properly for FP16.
13953	TEST_P(OperatorTest, FP16BatchAdd) {
13954	CHECK_IF_ENABLED();
13955
13956	PseudoRNG PRNG;
13957
13958	auto *input =
13959	mod_.createPlaceholder(ElemKind::Float16Ty, {`13`, `3`, `3`}, "A", false);
13960	bindings_.allocate(input)->getHandle<float16_t>().randomize(-`3.0`, `3.0`, PRNG);
13961	auto *slice =
13962	mod_.createPlaceholder(ElemKind::Float16Ty, {`3`, `3`}, "slice", false);
13963	bindings_.allocate(slice)->getHandle<float16_t>().randomize(-`3.0`, `3.0`, PRNG);
13964	auto *batchAdd = F_->createBatchedAdd("batchAdd", input, slice);
13965	auto *S = F_->createSave("save", batchAdd);
13966	bindings_.allocate(S->getPlaceholder());
13967
13968	EE_.compile(CompilationMode::Infer);
13969	EE_.run(bindings_);
13970
13971	auto result = bindings_.get(S->getPlaceholder())->getHandle<float16_t>();
13972	auto handleInput = bindings_.get(input)->getHandle<float16_t>();
13973	auto handleSlice = bindings_.get(slice)->getHandle<float16_t>();
13974	ASSERT_EQ(result.size(), handleInput.size());
13975	for (size_t idx = `0`, end = result.size(); idx != end; ++idx) {
13976	EXPECT_EQ(result.raw(idx),
13977	handleInput.raw(idx) + handleSlice.raw(idx % handleSlice.size()));
13978	}
13979	}
13980
13981	/// Check that the batch add operator works properly for BFloat16.
13982	TEST_P(OperatorTest, BFloat16BatchAdd) {
13983	CHECK_IF_ENABLED();
13984
13985	PseudoRNG PRNG;
13986
13987	auto *input =
13988	mod_.createPlaceholder(ElemKind::BFloat16Ty, {`13`, `3`, `3`}, "A", false);
13989	bindings_.allocate(input)->getHandle<bfloat16_t>().randomize(-`3.0`, `3.0`, PRNG);
13990	auto *slice =
13991	mod_.createPlaceholder(ElemKind::BFloat16Ty, {`3`, `3`}, "slice", false);
13992	bindings_.allocate(slice)->getHandle<bfloat16_t>().randomize(-`3.0`, `3.0`, PRNG);
13993	auto *batchAdd = F_->createBatchedAdd("batchAdd", input, slice);
13994	auto *S = F_->createSave("save", batchAdd);
13995	bindings_.allocate(S->getPlaceholder());
13996
13997	EE_.compile(CompilationMode::Infer);
13998	EE_.run(bindings_);
13999
14000	auto result = bindings_.get(S->getPlaceholder())->getHandle<bfloat16_t>();
14001	auto handleInput = bindings_.get(input)->getHandle<bfloat16_t>();
14002	auto handleSlice = bindings_.get(slice)->getHandle<bfloat16_t>();
14003	ASSERT_EQ(result.size(), handleInput.size());
14004	for (size_t idx = `0`, end = result.size(); idx != end; ++idx) {
14005	EXPECT_EQ(result.raw(idx),
14006	handleInput.raw(idx) + handleSlice.raw(idx % handleSlice.size()));
14007	}
14008	}
14009
14010	TEST_P(OperatorTest, BroadcastAdd2x) {
14011	CHECK_IF_ENABLED();
14012
14013	auto *input =
14014	mod_.createPlaceholder(ElemKind::FloatTy, {`10`, `1`}, "input", false);
14015	auto *bias = mod_.createConstant(ElemKind::FloatTy, {`1`, `1`}, "bias");
14016	bias->getPayloadMutable().getHandle() = {`42`};
14017	auto *tile = F_->createTile("tile", bias, `10`, `0`);
14018	auto *add = F_->createAdd("add", input, tile);
14019	auto *save = F_->createSave("save", add);
14020	auto *output = save->getPlaceholder();
14021	bindings_.allocate(input)->getHandle() = {`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`};
14022	bindings_.allocate(output);
14023	EE_.compile(CompilationMode::Infer);
14024	for (int i = `0`; i < `2`; i++) {
14025	Tensor expected(ElemKind::FloatTy, {`10`, `1`});
14026	expected.getHandle() = {`42`, `43`, `44`, `45`, `46`, `47`, `48`, `49`, `50`, `51`};
14027	EE_.run(bindings_);
14028	EXPECT_TRUE(bindings_.get(output)->isEqual(expected));
14029	}
14030	}
14031
14032	/// Helper to test Sigmoid using \p DTy.
14033	template <typename DataType>
14034	static void testSigmoid(glow::PlaceholderBindings &bindings, glow::Module &mod,
14035	glow::Function *F, glow::ExecutionEngine &EE,
14036	ElemKind DTy, float allowedError = `0.001f`) {
14037	constexpr dim_t size = `10`;
14038	auto input = mod.createPlaceholder(DTy, {size}, "input", false*);
14039	bindings.allocate(input)->getHandle<DataType>().randomize(-`10.0`, `10.0`,
14040	mod.getPRNG());
14041
14042	auto *sigmoid = F->createSigmoid("sigmoid", input);
14043	auto *save = F->createSave("Save", sigmoid);
14044	bindings.allocate(save->getPlaceholder());
14045
14046	EE.compile(CompilationMode::Infer);
14047	EE.run(bindings);
14048
14049	auto RH = bindings.get(save->getPlaceholder())->getHandle<DataType>();
14050	auto inH = bindings.get(input)->getHandle<DataType>();
14051
14052	for (dim_t i = `0`; i < size; i++) {
14053	float val = `1` / (`1` + std::exp(-(float)inH.at({i})));
14054	EXPECT_NEAR(RH.at({i}), val, allowedError);
14055	}
14056	}
14057
14058	/// Verify that the Sigmoid operator works correctly with FloatTy.
14059	TEST_P(OperatorTest, Sigmoid_Float) {
14060	CHECK_IF_ENABLED();
14061	testSigmoid<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy);
14062	}
14063
14064	/// Verify that the Sigmoid operator works correctly with Float16Ty.
14065	TEST_P(OperatorTest, Sigmoid_Float16) {
14066	CHECK_IF_ENABLED();
14067	testSigmoid<float16_t>(bindings_, mod_, F_, EE_, ElemKind::Float16Ty);
14068	}
14069
14070	/// Verify that the Sigmoid operator works correctly with BFloat16Ty.
14071	TEST_P(OperatorTest, Sigmoid_BFloat16) {
14072	CHECK_IF_ENABLED();
14073	testSigmoid<bfloat16_t>(bindings_, mod_, F_, EE_, ElemKind::BFloat16Ty,
14074	`0.01f`);
14075	}
14076
14077	/// Helper to test HardSigmoid using \p DTy.
14078	template <typename DataType>
14079	static void testHardSigmoid(glow::PlaceholderBindings &bindings,
14080	glow::Module &mod, glow::Function *F,
14081	glow::ExecutionEngine &EE, ElemKind DTy,
14082	float allowedError = `0.001f`) {
14083	constexpr dim_t size = `5`;
14084	float alpha = `0.2`;
14085	float beta = `0.5`;
14086	auto input = mod.createPlaceholder(DTy, {size}, "input", false*);
14087	bindings.allocate(input)->getHandle<DataType>() = {-`3.`, -`1.`, `0.`, `1.`, `3.`};
14088	auto *hardsigmoid = F->createHardSigmoid("hardsigmoid", input, alpha, beta);
14089	auto *save = F->createSave("save", hardsigmoid);
14090	bindings.allocate(save->getPlaceholder());
14091
14092	EE.compile(CompilationMode::Infer);
14093	EE.run(bindings);
14094
14095	auto saveH = bindings.get(save->getPlaceholder())->getHandle<DataType>();
14096	auto inH = bindings.get(input)->getHandle<DataType>();
14097
14098	for (dim_t i = `0`; i < size; i++) {
14099	DataType expectedResult = std::max<DataType>(
14100	`0`,
14101	std::min<DataType>(`1`, (DataType)alpha * inH.raw(i) + (DataType)beta));
14102	EXPECT_NEAR((float)saveH.raw(i), (float)expectedResult, allowedError);
14103	}
14104	}
14105
14106	/// Verify that the HardSigmoid operator works correctly with FloatTy.
14107	TEST_P(OperatorTest, HardSigmoid_Float) {
14108	CHECK_IF_ENABLED();
14109	testHardSigmoid<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy);
14110	}
14111
14112	/// Verify that the HardSigmoid operator works correctly with Float16Ty.
14113	TEST_P(OperatorTest, HardSigmoid_Float16) {
14114	CHECK_IF_ENABLED();
14115	testHardSigmoid<float16_t>(bindings_, mod_, F_, EE_, ElemKind::Float16Ty);
14116	}
14117
14118	/// Verify that the HardSigmoid operator works correctly with BFloat16Ty.
14119	TEST_P(OperatorTest, HardSigmoid_BFloat16) {
14120	CHECK_IF_ENABLED();
14121	testHardSigmoid<bfloat16_t>(bindings_, mod_, F_, EE_, ElemKind::BFloat16Ty);
14122	}
14123
14124	/// Helper to test Swish using \p DTy.
14125	template <typename DataType>
14126	static void testSwish(glow::PlaceholderBindings &bindings, glow::Module &mod,
14127	glow::Function *F, glow::ExecutionEngine &EE,
14128	ElemKind DTy, float allowedError = `0.006f`) {
14129	constexpr dim_t size = `10`;
14130	auto input = mod.createPlaceholder(DTy, {size}, "input", false*);
14131	bindings.allocate(input)->getHandle<DataType>().randomize(-`5.0`, `5.0`,
14132	mod.getPRNG());
14133
14134	auto *swish = F->createSwish("swish", input);
14135	auto *save = F->createSave("Save", swish);
14136	bindings.allocate(save->getPlaceholder());
14137
14138	EE.compile(CompilationMode::Infer);
14139	EE.run(bindings);
14140
14141	auto RH = bindings.get(save->getPlaceholder())->getHandle<DataType>();
14142	auto inH = bindings.get(input)->getHandle<DataType>();
14143
14144	for (dim_t i = `0`; i < size; i++) {
14145	float x = (float)inH.at({i});
14146	float val = x / (`1` + std::exp(-x));
14147	EXPECT_NEAR(RH.at({i}), val, allowedError);
14148	}
14149	}
14150
14151	/// Verify that the Swish operator works correctly with FloatTy.
14152	TEST_P(OperatorTest, Swish_Float) {
14153	CHECK_IF_ENABLED();
14154	testSwish<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy);
14155	}
14156
14157	/// Verify that the Swish operator works correctly with Float16Ty.
14158	TEST_P(OperatorTest, Swish_Float16) {
14159	CHECK_IF_ENABLED();
14160	testSwish<float16_t>(bindings_, mod_, F_, EE_, ElemKind::Float16Ty);
14161	}
14162
14163	/// Verify that the Swish operator works correctly with BFloat16Ty.
14164	TEST_P(OperatorTest, Swish_BFloat16) {
14165	CHECK_IF_ENABLED();
14166	testSwish<bfloat16_t>(bindings_, mod_, F_, EE_, ElemKind::BFloat16Ty, `0.2f`);
14167	}
14168
14169	/// Verify that the Swish operator works correctly with Int8QTy.
14170	TEST_P(OperatorStatelessTest, Swish_Int8) {
14171	CHECK_IF_ENABLED();
14172
14173	compareAgainstInterpreter(
14174	getBackendName(),
14175	[](PlaceholderBindings &bindings, ExecutionEngine &EE) {
14176	Module &mod = EE.getModule();
14177	Function *F = mod.createFunction("main");
14178	Placeholder *input =
14179	mod.createPlaceholder(ElemKind::FloatTy, {`500`}, "input", false);
14180	bindings.allocate(input)->getHandle<float>().randomize(-`5.0`, `5.0`,
14181	mod.getPRNG());
14182	SwishNode *swish = F->createSwish("swish", input);
14183	SaveNode *save = F->createSave("Save", swish);
14184	Tensor *saveTensor = bindings.allocate(save->getPlaceholder());
14185	return std::make_pair(F, saveTensor);
14186	},
14187	ElemKind::FloatTy, ElemKind::Int8QTy, `0.035`, parCloneCountOpt);
14188	}
14189
14190	TEST_P(OperatorTest, IntLookupTable) {
14191	CHECK_IF_ENABLED();
14192
14193	constexpr dim_t size = `6`;
14194	auto *input =
14195	mod_.createPlaceholder(ElemKind::Int8QTy, {size}, `1`, `0`, "input", false);
14196	bindings_.allocate(input)->getHandle<int8_t>() = {`0`, `1`, `2`, `3`, `4`, `5`};
14197
14198	auto outTy = mod_.uniqueType(ElemKind::Int8QTy, {size}, `3`, `3`);
14199
14200	// Mapping i -> i.
14201	std::vector<int8_t> initValues(`256`);
14202	for (size_t i = `0`; i < `256`; ++i) {
14203	initValues [i] = i - `128`;
14204	}
14205
14206	auto *lookupTable =
14207	F_->createIntLookupTable<int8_t>("lookupTable", input, initValues, outTy);
14208	auto *save = F_->createSave("save", lookupTable);
14209	bindings_.allocate(save->getPlaceholder());
14210
14211	EE_.compile(CompilationMode::Infer);
14212	EE_.run(bindings_);
14213
14214	auto result = bindings_.get(save->getPlaceholder())->getHandle<int8_t>();
14215	for (size_t i = `0`; i < size; ++i) {
14216	EXPECT_EQ(result.raw(i), i);
14217	}
14218	}
14219
14220	/// Helper to test BatchAdd using \p DTy.
14221	template <typename DataType>
14222	static void testBatchOp(glow::PlaceholderBindings &bindings, glow::Module &mod,
14223	glow::Function *F, glow::ExecutionEngine &EE,
14224	ElemKind DTy, const std::string &opName) {
14225	CHECK(opName == "add" \|\| opName == "mul") << "Invalid opName: " << opName;
14226
14227	constexpr unsigned numSlices = `10`;
14228	constexpr unsigned batchSize = `3`;
14229	auto input = mod.createPlaceholder(DTy, {batchSize numSlices, `10`, `10`},
14230	"input", false);
14231	auto slice = mod.createPlaceholder(DTy, {`10`, `10`}, "slice", false*);
14232
14233	bindings.allocate(input)->getHandle<DataType>().randomize(-`10.0`, `10.0`,
14234	mod.getPRNG());
14235	bindings.allocate(slice)->getHandle<DataType>().randomize(-`10.0`, `10.0`,
14236	mod.getPRNG());
14237
14238	std::vector<NodeValue> ops;
14239	for (dim_t i = `0`; i < numSlices; i++) {
14240	auto ex = F->createSlice("slice", input, {i batchSize, `0`, `0`},
14241	{(i + `1`) * batchSize, `10`, `10`});
14242	if (opName == "add") {
14243	ops.push_back(F->createBatchedAdd("add", ex, slice)->getResult());
14244	} else {
14245	ops.push_back(F->createBatchedMul("mul", ex, slice)->getResult());
14246	}
14247	}
14248
14249	auto *cc = F->createConcat("concat", ops, `0`);
14250
14251	// Remove the reference to the graph nodes to allow DCE to remove them.
14252	ops.clear();
14253
14254	auto *result = F->createSave("save", cc);
14255	bindings.allocate(result->getPlaceholder());
14256
14257	EE.compile(CompilationMode::Infer);
14258	EE.run(bindings);
14259
14260	auto RH = bindings.get(result->getPlaceholder())->getHandle<DataType>();
14261	auto IH = bindings.get(input)->getHandle<DataType>();
14262	auto SH = bindings.get(slice)->getHandle<DataType>();
14263
14264	// Check that batched add works as expected.
14265	for (dim_t i = `0`; i < numSlices * batchSize; i++) {
14266	for (dim_t j = `0`; j < `10`; j++) {
14267	for (dim_t k = `0`; k < `10`; k++) {
14268	if (opName == "add") {
14269	EXPECT_NEAR(IH.at({i, j, k}) + SH.at({j, k}), RH.at({i, j, k}),
14270	`0.00001`);
14271	} else {
14272	EXPECT_NEAR(IH.at({i, j, k}) * SH.at({j, k}), RH.at({i, j, k}),
14273	`0.00001`);
14274	}
14275	}
14276	}
14277	}
14278	}
14279
14280	/// Check that the sequence of extract-batchedadd-concat works.
14281	TEST_P(OperatorTest, testBatchAdd_Float) {
14282	CHECK_IF_ENABLED();
14283	testBatchOp<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy, "add");
14284	}
14285
14286	/// Check that the sequence of extract-batchedadd-concat works.
14287	TEST_P(OperatorTest, testBatchAdd_Float16) {
14288	CHECK_IF_ENABLED();
14289	testBatchOp<float16_t>(bindings_, mod_, F_, EE_, ElemKind::Float16Ty, "add");
14290	}
14291
14292	/// Check that the sequence of extract-batchedadd-concat works.
14293	TEST_P(OperatorTest, testBatchAdd_BFloat16) {
14294	CHECK_IF_ENABLED();
14295	testBatchOp<bfloat16_t>(bindings_, mod_, F_, EE_, ElemKind::BFloat16Ty,
14296	"add");
14297	}
14298
14299	/// Check that the sequence of extract-batchedmul-concat works.
14300	TEST_P(OperatorTest, testBatchMul_Float) {
14301	CHECK_IF_ENABLED();
14302	testBatchOp<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy, "mul");
14303	}
14304
14305	/// Check that the sequence of extract-batchedmul-concat works.
14306	TEST_P(OperatorTest, testBatchMul_Float16) {
14307	CHECK_IF_ENABLED();
14308	testBatchOp<float16_t>(bindings_, mod_, F_, EE_, ElemKind::Float16Ty, "mul");
14309	}
14310
14311	/// Check that the sequence of extract-batchedmul-concat works.
14312	TEST_P(OperatorTest, testBatchMul_BFloat16) {
14313	CHECK_IF_ENABLED();
14314	testBatchOp<bfloat16_t>(bindings_, mod_, F_, EE_, ElemKind::BFloat16Ty,
14315	"mul");
14316	}
14317
14318	static void quantizedBatchOp(ExecutionEngine &EE, Function *F,
14319	PlaceholderBindings &bindings, ElemKind Ty,
14320	const std::string &opName) {
14321	CHECK(opName == "add" \|\| opName == "mul") << "Invalid opName: " << opName;
14322	auto &mod = EE.getModule();
14323	constexpr unsigned numSlices = `10`;
14324	constexpr unsigned batchSize = `3`;
14325
14326	auto *input = mod.createPlaceholder(
14327	ElemKind::FloatTy, {numSlices * batchSize, `10`, `10`}, "input", false);
14328	auto *slice =
14329	mod.createPlaceholder(ElemKind::FloatTy, {`10`, `10`}, "slice", false);
14330
14331	bindings.allocate(input)->getHandle().randomize(-`5.0`, `5.0`, mod.getPRNG());
14332	bindings.allocate(slice)->getHandle().randomize(-`5.0`, `5.0`, mod.getPRNG());
14333
14334	// Scale the numbers in the range (-5. .. 5.) to (-50 .. 50).
14335	auto qInType =
14336	mod.uniqueType(ElemKind::Int8QTy, {numSlices * batchSize, `10`, `10`}, `.1`, `0`);
14337	auto qSliceType2 = mod.uniqueType(Ty, {`10`, `10`}, `.1`, `0`);
14338	auto qSliceType3 =
14339	mod.uniqueType(ElemKind::Int8QTy, {batchSize, `10`, `10`}, `.1`, `0`);
14340
14341	auto *intInput = F->createQuantize("qinput", input, qInType);
14342	auto *intSlice = F->createQuantize("qslice", slice, qSliceType2);
14343
14344	const Type *outTy;
14345
14346	if (opName == "add") {
14347	outTy = qInType;
14348	} else {
14349	outTy = mod.uniqueType(ElemKind::Int8QTy, {batchSize, `10`, `10`}, `1.2`, `0`);
14350	}
14351
14352	std::vector<NodeValue> ops;
14353	for (dim_t i = `0`; i < numSlices; i++) {
14354	auto *ex =
14355	F->createSlice("slice", intInput, {i * batchSize, `0`, `0`}, qSliceType3);
14356	if (opName == "add") {
14357	ops.push_back(F->createBatchedAdd("add", ex, intSlice)->getResult());
14358	} else {
14359	ops.push_back(
14360	F->createBatchedMul("mul", outTy, ex, intSlice)->getResult());
14361	}
14362	}
14363
14364	Node *cc = F->createConcat(
14365	"concat", ops, `0`, mod.uniqueTypeWithNewShape(outTy, qInType->dims()));
14366	cc = F->createDequantize("dq", cc, ElemKind::FloatTy);
14367	auto *result = F->createSave("save", cc);
14368	bindings.allocate(result->getPlaceholder());
14369
14370	// Remove the reference to the graph nodes to allow DCE to remove them.
14371	ops.clear();
14372
14373	EE.compile(CompilationMode::Infer);
14374	EE.run(bindings);
14375
14376	auto RH = bindings.get(result->getPlaceholder())->getHandle();
14377	auto IH = bindings.get(input)->getHandle();
14378	auto SH = bindings.get(slice)->getHandle();
14379
14380	// Check that batched add works as expected.
14381	for (dim_t i = `0`; i < numSlices * batchSize; i++) {
14382	for (dim_t j = `0`; j < `10`; j++) {
14383	for (dim_t k = `0`; k < `10`; k++) {
14384	if (opName == "add") {
14385	EXPECT_NEAR(IH.at({i, j, k}) + SH.at({j, k}), RH.at({i, j, k}), `0.1`);
14386
14387	} else {
14388	EXPECT_NEAR(IH.at({i, j, k}) * SH.at({j, k}), RH.at({i, j, k}), `2.0`);
14389	}
14390	}
14391	}
14392	}
14393	}
14394
14395	/// Tests quantized batched-add arithmetic on Int8QTy.
14396	TEST_P(OperatorTest, testQuantizedBatchAdd_Int8) {
14397	CHECK_IF_ENABLED();
14398	quantizedBatchOp(EE_, F_, bindings_, ElemKind::Int8QTy, "add");
14399	}
14400
14401	/// Tests quantized batched-add arithmetic on Int32QTy.
14402	TEST_P(OperatorTest, testQuantizedBatchAdd_Int32) {
14403	CHECK_IF_ENABLED();
14404	quantizedBatchOp(EE_, F_, bindings_, ElemKind::Int32QTy, "add");
14405	}
14406
14407	/// Tests quantized batched-mul arithmetic on Int8QTy.
14408	TEST_P(OperatorTest, testQuantizedBatchMul_Int8) {
14409	CHECK_IF_ENABLED();
14410	quantizedBatchOp(EE_, F_, bindings_, ElemKind::Int8QTy, "mul");
14411	}
14412
14413	template <typename DataType>
14414	static Tensor *testCumSum(glow::PlaceholderBindings &bindings,
14415	glow::Module &mod, glow::Function *F,
14416	glow::ExecutionEngine &EE, ElemKind DTy, int64_t dim,
14417	bool exclusive, bool reverse) {
14418	auto data = mod.createPlaceholder(DTy, {`4`}, "data", false*);
14419	bindings.allocate(data)->getHandle<DataType>() = {`1`, `2`, `3`, `4`};
14420
14421	auto *CS = F->createCumSum("CumSum", data, dim, exclusive, reverse);
14422	auto *S = F->createSave("save", CS);
14423	bindings.allocate(S->getPlaceholder());
14424
14425	EE.compile(CompilationMode::Infer);
14426	EE.run(bindings);
14427	return bindings.get(S->getPlaceholder());
14428	}
14429
14430	template <typename DataType>
14431	static Tensor *testCumSum2D(glow::PlaceholderBindings &bindings,
14432	glow::Module &mod, glow::Function *F,
14433	glow::ExecutionEngine &EE, ElemKind DTy,
14434	int64_t dim, bool exclusive, bool reverse) {
14435	auto data = mod.createPlaceholder(DTy, {`3`, `4`}, "data", false*);
14436	bindings.allocate(data)->getHandle<DataType>() = {`1`, `2`, `3`, `4`, `5`, `6`,
14437	`7`, `8`, `9`, `10`, `11`, `12`};
14438
14439	auto *CS = F->createCumSum("CumSum", data, dim, exclusive, reverse);
14440	auto *S = F->createSave("save", CS);
14441	bindings.allocate(S->getPlaceholder());
14442
14443	EE.compile(CompilationMode::Infer);
14444	EE.run(bindings);
14445	return bindings.get(S->getPlaceholder());
14446	}
14447
14448	template <typename DataType>
14449	static Tensor *testCumSum3D(glow::PlaceholderBindings &bindings,
14450	glow::Module &mod, glow::Function *F,
14451	glow::ExecutionEngine &EE, ElemKind DTy,
14452	int64_t dim, bool exclusive, bool reverse) {
14453	auto data = mod.createPlaceholder(DTy, {`2`, `3`, `4`}, "data", false*);
14454	bindings.allocate(data)->getHandle<DataType>() = {
14455	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`,
14456	`13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`};
14457
14458	auto *CS = F->createCumSum("CumSum", data, dim, exclusive, reverse);
14459	auto *S = F->createSave("save", CS);
14460	bindings.allocate(S->getPlaceholder());
14461
14462	EE.compile(CompilationMode::Infer);
14463	EE.run(bindings);
14464	return bindings.get(S->getPlaceholder());
14465	}
14466
14467	TEST_P(OperatorTest, CumSum_Float) {
14468	CHECK_IF_ENABLED();
14469	/*
14470	DATA = [1, 2, 3, 4]
14471	OUTPUT = [1, 3, 6, 10]
14472	*/
14473
14474	Tensor *result =
14475	testCumSum<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy,
14476	/dim/ `0`, /exclusive/ false, /reverse/ false);
14477	Tensor expected(result->getType());
14478	expected.getHandle<float>() = {`1`, `3`, `6`, `10`};
14479
14480	EXPECT_TRUE(expected.isEqual(*result));
14481	}
14482
14483	TEST_P(OperatorTest, CumSum_Float16) {
14484	CHECK_IF_ENABLED();
14485	/*
14486	DATA = [1, 2, 3, 4]
14487	OUTPUT = [1, 3, 6, 10]
14488	*/
14489
14490	Tensor *result =
14491	testCumSum<float16_t>(bindings_, mod_, F_, EE_, ElemKind::Float16Ty,
14492	/dim/ `0`, /exclusive/ false, /reverse/ false);
14493	Tensor expected(result->getType());
14494	expected.getHandle<float16_t>() = {`1`, `3`, `6`, `10`};
14495
14496	EXPECT_TRUE(expected.isEqual(*result));
14497	}
14498
14499	TEST_P(OperatorTest, CumSum_BFloat16) {
14500	CHECK_IF_ENABLED();
14501	/*
14502	DATA = [1, 2, 3, 4]
14503	OUTPUT = [1, 3, 6, 10]
14504	*/
14505
14506	Tensor *result =
14507	testCumSum<bfloat16_t>(bindings_, mod_, F_, EE_, ElemKind::BFloat16Ty,
14508	/dim/ `0`, /exclusive/ false, /reverse/ false);
14509	Tensor expected(result->getType());
14510	expected.getHandle<bfloat16_t>() = {`1`, `3`, `6`, `10`};
14511
14512	EXPECT_TRUE(expected.isEqual(*result));
14513	}
14514
14515	TEST_P(OperatorTest, CumSum_Int32) {
14516	CHECK_IF_ENABLED();
14517	/*
14518	DATA = [1, 2, 3, 4]
14519	OUTPUT = [1, 3, 6, 10]
14520	*/
14521
14522	Tensor *result =
14523	testCumSum<int32_t>(bindings_, mod_, F_, EE_, ElemKind::Int32ITy,
14524	/dim/ `0`, /exclusive/ false, /reverse/ false);
14525	Tensor expected(result->getType());
14526	expected.getHandle<int32_t>() = {`1`, `3`, `6`, `10`};
14527
14528	EXPECT_TRUE(expected.isEqual(*result));
14529	}
14530
14531	TEST_P(OperatorTest, CumSum_Int64) {
14532	CHECK_IF_ENABLED();
14533	/*
14534	DATA = [1, 2, 3, 4]
14535	OUTPUT = [1, 3, 6, 10]
14536	*/
14537
14538	Tensor *result =
14539	testCumSum<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy,
14540	/dim/ `0`, /exclusive/ false, /reverse/ false);
14541	Tensor expected(result->getType());
14542	expected.getHandle<float>() = {`1`, `3`, `6`, `10`};
14543
14544	EXPECT_TRUE(expected.isEqual(*result));
14545	}
14546
14547	TEST_P(OperatorTest, CumSum_Exclusive) {
14548	CHECK_IF_ENABLED();
14549	/*
14550	DATA = [1, 2, 3, 4]
14551	OUTPUT = [0, 1, 3, 6]
14552	*/
14553
14554	Tensor *result =
14555	testCumSum<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy,
14556	/dim/ `0`, /exclusive/ true, /reverse/ false);
14557	Tensor expected(result->getType());
14558	expected.getHandle<float>() = {`0`, `1`, `3`, `6`};
14559
14560	EXPECT_TRUE(expected.isEqual(*result));
14561	}
14562
14563	TEST_P(OperatorTest, CumSum_Reverse) {
14564	CHECK_IF_ENABLED();
14565	/*
14566	DATA = [1, 2, 3, 4]
14567	OUTPUT = [10, 9, 7, 4]
14568	*/
14569
14570	Tensor *result =
14571	testCumSum<float16_t>(bindings_, mod_, F_, EE_, ElemKind::Float16Ty,
14572	/dim/ `0`, /exclusive/ false, /reverse/ true);
14573	Tensor expected(result->getType());
14574	expected.getHandle<float16_t>() = {`10`, `9`, `7`, `4`};
14575
14576	EXPECT_TRUE(expected.isEqual(*result));
14577	}
14578
14579	TEST_P(OperatorTest, CumSum_Reverse_BFloat16) {
14580	CHECK_IF_ENABLED();
14581	/*
14582	DATA = [1, 2, 3, 4]
14583	OUTPUT = [10, 9, 7, 4]
14584	*/
14585
14586	Tensor *result =
14587	testCumSum<bfloat16_t>(bindings_, mod_, F_, EE_, ElemKind::BFloat16Ty,
14588	/dim/ `0`, /exclusive/ false, /reverse/ true);
14589	Tensor expected(result->getType());
14590	expected.getHandle<bfloat16_t>() = {`10`, `9`, `7`, `4`};
14591
14592	EXPECT_TRUE(expected.isEqual(*result));
14593	}
14594
14595	TEST_P(OperatorTest, CumSum_ExclusiveReverse) {
14596	CHECK_IF_ENABLED();
14597	/*
14598	DATA = [1, 2, 3, 4]
14599	OUTPUT = [9, 7, 4, 0]
14600	*/
14601
14602	Tensor *result =
14603	testCumSum<int32_t>(bindings_, mod_, F_, EE_, ElemKind::Int32ITy,
14604	/dim/ `0`, /exclusive/ true, /reverse/ true);
14605	Tensor expected(result->getType());
14606	expected.getHandle<int32_t>() = {`9`, `7`, `4`, `0`};
14607
14608	EXPECT_TRUE(expected.isEqual(*result));
14609	}
14610
14611	TEST_P(OperatorTest, CumSum_WithZeroes) {
14612	CHECK_IF_ENABLED();
14613	/*
14614	DATA = [0, 0, 1, 0, 0, 2, 0, 0, 3]
14615	OUTPUT = [0, 0, 1, 1, 1, 3, 3, 3, 6]
14616	*/
14617
14618	auto data = mod_.createPlaceholder(ElemKind::Int64ITy, {`9`}, "data", false*);
14619	bindings_.allocate(data)->getHandle<int64_t>() = {`0`, `0`, `1`, `0`, `0`, `2`, `0`, `0`, `3`};
14620
14621	auto *CS = F_->createCumSum("CumSum", data, `0`);
14622	auto *S = F_->createSave("save", CS);
14623	bindings_.allocate(S->getPlaceholder());
14624
14625	EE_.compile(CompilationMode::Infer);
14626	EE_.run(bindings_);
14627	Tensor *result = bindings_.get(S->getPlaceholder());
14628	Tensor expected(result->getType());
14629	expected.getHandle<int64_t>() = {`0`, `0`, `1`, `1`, `1`, `3`, `3`, `3`, `6`};
14630
14631	EXPECT_TRUE(expected.isEqual(*result));
14632	}
14633
14634	/*
14635	CumSum tests with 2 and 3 dimensions.
14636	Define CUMSUM_ND_SUPPORTED in the implementation to run these tests
14637
14638	These answers were generated using PyTorch, but hardcoded to prevent
14639	writing potentially buggy mock implementations.
14640
14641	2D:
14642	DATA = [
14643	1, 2, 3, 4,
14644	5, 6, 7, 8,
14645	9, 10, 11, 12
14646	] flat: {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}
14647
14648
14649	dim = 0 (sum ALONG ALL rows in a given column)
14650	Regular Output = [
14651	1, 2, 3, 4,
14652	6, 8, 10, 12,
14653	15, 18, 21, 24
14654	] flat: {1, 2, 3, 4, 6, 8, 10, 12, 15, 18, 21, 24}
14655
14656	Reversed output = [
14657	15, 18, 21, 24,
14658	14, 16, 18, 20,
14659	9, 10, 11, 12
14660	] flat: {15, 18, 21, 24, 14, 16, 18, 20, 9, 10, 11, 12}
14661
14662	Exclusive Output = [
14663	0, 0, 0, 0,
14664	1, 2, 3, 4,
14665	6, 8, 10, 12
14666	] flat: {0, 0, 0, 0, 1, 2, 3, 4, 6, 8, 10, 12}
14667
14668	Exclusive Reversed output = [
14669	14, 16, 18, 20,
14670	9, 10, 11, 12,
14671	0, 0, 0, 0
14672	] flat: {14, 16, 18, 20, 9, 10, 11, 12, 0, 0, 0, 0}
14673
14674	dim = 1 (sum ALONG ALL columns for a given row)
14675	Regular Output = [
14676	1, 3, 6, 10,
14677	5, 11, 18, 26,
14678	9, 19, 30, 42
14679	] flat: {1, 3, 6, 10, 5, 11, 18, 26, 9, 19, 30, 42}
14680
14681	Reversed output = [
14682	10, 9, 7, 4,
14683	26, 21, 15, 8,
14684	42, 33, 23, 12
14685	] flat: {10, 9, 7, 4, 26, 21, 15, 8, 42, 33, 23, 12}
14686
14687	Exclusive Output = [
14688	0, 1, 3, 6,
14689	0, 5, 11, 18,
14690	0, 9, 19, 30
14691	] flat: {0, 1, 3, 6, 0, 5, 11, 18, 0, 9, 19, 30}
14692
14693	Exclusive Reversed output = [
14694	9, 7, 4, 0,
14695	21, 15, 8, 0,
14696	33, 23, 12, 0
14697	] flat: {9, 7, 4, 0, 21, 15, 8, 0, 33, 23, 12, 0}
14698
14699	3D: Because there are too many moving parameters here, we try each branch
14700	but not each combination of branches
14701
14702	All examples were computed using pytorch.
14703	*/
14704
14705	TEST_P(OperatorTest, CumSum2D_float_Dim0_Exclusive_Reverse) {
14706	// Data: {1..24} arranged in 2 x 3 x 4
14707	// Answer sums ALONG the axis specified
14708	CHECK_IF_ENABLED();
14709	Tensor *dimSums =
14710	testCumSum2D<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy,
14711	/dim/ `0`,
14712	/exclusive/ true, /reverse/ true);
14713
14714	Tensor expectedDimSums(dimSums->getType());
14715	expectedDimSums.getHandle<float>() = {`14`, `16`, `18`, `20`, `9`, `10`,
14716	`11`, `12`, `0`, `0`, `0`, `0`};
14717
14718	EXPECT_TRUE(expectedDimSums.isEqual(*dimSums));
14719	}
14720
14721	TEST_P(OperatorTest, CumSum2D_float_Dim0_Exclusive) {
14722	// Data: {1..24} arranged in 2 x 3 x 4
14723	// Answer sums ALONG the axis specified
14724	CHECK_IF_ENABLED();
14725	Tensor *dimSums =
14726	testCumSum2D<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy,
14727	/dim/ `0`,
14728	/exclusive/ true, /reverse/ false);
14729
14730	Tensor expectedDimSums(dimSums->getType());
14731	expectedDimSums.getHandle<float>() = {`0`, `0`, `0`, `0`, `1`, `2`, `3`, `4`, `6`, `8`, `10`, `12`};
14732
14733	EXPECT_TRUE(expectedDimSums.isEqual(*dimSums));
14734	}
14735
14736	TEST_P(OperatorTest, CumSum2D_float_Dim0_Reverse) {
14737	// Data: {1..24} arranged in 2 x 3 x 4
14738	// Answer sums ALONG the axis specified
14739	CHECK_IF_ENABLED();
14740	Tensor *dimSums =
14741	testCumSum2D<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy,
14742	/dim/ `0`,
14743	/exclusive/ false, /reverse/ true);
14744
14745	Tensor expectedDimSums(dimSums->getType());
14746	expectedDimSums.getHandle<float>() = {`15`, `18`, `21`, `24`, `14`, `16`,
14747	`18`, `20`, `9`, `10`, `11`, `12`};
14748
14749	EXPECT_TRUE(expectedDimSums.isEqual(*dimSums));
14750	}
14751
14752	TEST_P(OperatorTest, CumSum2D_float_Dim0) {
14753	// Data: {1..24} arranged in 2 x 3 x 4
14754	// Answer sums ALONG the axis specified
14755	CHECK_IF_ENABLED();
14756	Tensor *dimSums =
14757	testCumSum2D<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy,
14758	/dim/ `0`,
14759	/exclusive/ false, /reverse/ false);
14760
14761	Tensor expectedDimSums(dimSums->getType());
14762	expectedDimSums.getHandle<float>() = {`1`, `2`, `3`, `4`, `6`, `8`,
14763	`10`, `12`, `15`, `18`, `21`, `24`};
14764
14765	EXPECT_TRUE(expectedDimSums.isEqual(*dimSums));
14766	}
14767
14768	TEST_P(OperatorTest, CumSum2D_float_Dim1_Exclusive_Reverse) {
14769	// Data: {1..24} arranged in 2 x 3 x 4
14770	// Answer sums ALONG the axis specified
14771	CHECK_IF_ENABLED();
14772	Tensor *dimSums =
14773	testCumSum2D<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy,
14774	/dim/ `1`,
14775	/exclusive/ true, /reverse/ true);
14776
14777	Tensor expectedDimSums(dimSums->getType());
14778	expectedDimSums.getHandle<float>() = {`9`, `7`, `4`, `0`, `21`, `15`,
14779	`8`, `0`, `33`, `23`, `12`, `0`};
14780
14781	EXPECT_TRUE(expectedDimSums.isEqual(*dimSums));
14782	}
14783
14784	TEST_P(OperatorTest, CumSum2D_float_Dim1_Exclusive) {
14785	// Data: {1..24} arranged in 2 x 3 x 4
14786	// Answer sums ALONG the axis specified
14787	CHECK_IF_ENABLED();
14788	Tensor *dimSums =
14789	testCumSum2D<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy,
14790	/dim/ `1`,
14791	/exclusive/ true, /reverse/ false);
14792
14793	Tensor expectedDimSums(dimSums->getType());
14794	expectedDimSums.getHandle<float>() = {`0`, `1`, `3`, `6`, `0`, `5`, `11`, `18`, `0`, `9`, `19`, `30`};
14795
14796	EXPECT_TRUE(expectedDimSums.isEqual(*dimSums));
14797	}
14798
14799	TEST_P(OperatorTest, CumSum2D_float_Dim1_Reverse) {
14800	// Data: {1..24} arranged in 2 x 3 x 4
14801	// Answer sums ALONG the axis specified
14802	CHECK_IF_ENABLED();
14803	Tensor *dimSums =
14804	testCumSum2D<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy,
14805	/dim/ `1`,
14806	/exclusive/ false, /reverse/ true);
14807
14808	Tensor expectedDimSums(dimSums->getType());
14809	expectedDimSums.getHandle<float>() = {`10`, `9`, `7`, `4`, `26`, `21`,
14810	`15`, `8`, `42`, `33`, `23`, `12`};
14811
14812	EXPECT_TRUE(expectedDimSums.isEqual(*dimSums));
14813	}
14814
14815	TEST_P(OperatorTest, CumSum2D_float_Dim1) {
14816	// Data: {1..24} arranged in 2 x 3 x 4
14817	// Answer sums ALONG the axis specified
14818	CHECK_IF_ENABLED();
14819	Tensor *dimSums =
14820	testCumSum2D<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy,
14821	/dim/ `1`,
14822	/exclusive/ false, /reverse/ false);
14823
14824	Tensor expectedDimSums(dimSums->getType());
14825	expectedDimSums.getHandle<float>() = {`1`, `3`, `6`, `10`, `5`, `11`,
14826	`18`, `26`, `9`, `19`, `30`, `42`};
14827
14828	EXPECT_TRUE(expectedDimSums.isEqual(*dimSums));
14829	}
14830
14831	TEST_P(OperatorTest, CumSum3D_float_Dim0_Exclusive_Reverse) {
14832	// Data: {1..24} arranged in 2 x 3 x 4
14833	// Answer sums ALONG the axis specified
14834	CHECK_IF_ENABLED();
14835	Tensor *dimSums =
14836	testCumSum3D<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy,
14837	/dim/ `0`,
14838	/exclusive/ true, /reverse/ true);
14839
14840	Tensor expectedDimSums(dimSums->getType());
14841	expectedDimSums.getHandle<float>() = {`13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`,
14842	`21`, `22`, `23`, `24`, `0`, `0`, `0`, `0`,
14843	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`};
14844
14845	EXPECT_TRUE(expectedDimSums.isEqual(*dimSums));
14846	}
14847
14848	TEST_P(OperatorTest, CumSum3D_float_Dim0_Exclusive) {
14849	// Data: {1..24} arranged in 2 x 3 x 4
14850	// Answer sums ALONG the axis specified
14851	CHECK_IF_ENABLED();
14852	Tensor *dimSums =
14853	testCumSum3D<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy,
14854	/dim/ `0`,
14855	/exclusive/ true, /reverse/ false);
14856
14857	Tensor expectedDimSums(dimSums->getType());
14858	expectedDimSums.getHandle<float>() = {`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
14859	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`};
14860
14861	EXPECT_TRUE(expectedDimSums.isEqual(*dimSums));
14862	}
14863
14864	TEST_P(OperatorTest, CumSum3D_float_Dim0_Reverse) {
14865	// Data: {1..24} arranged in 2 x 3 x 4
14866	// Answer sums ALONG the axis specified
14867	CHECK_IF_ENABLED();
14868	Tensor *dimSums =
14869	testCumSum3D<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy,
14870	/dim/ `0`,
14871	/exclusive/ false, /reverse/ true);
14872
14873	Tensor expectedDimSums(dimSums->getType());
14874	expectedDimSums.getHandle<float>() = {`14`, `16`, `18`, `20`, `22`, `24`, `26`, `28`,
14875	`30`, `32`, `34`, `36`, `13`, `14`, `15`, `16`,
14876	`17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`};
14877
14878	EXPECT_TRUE(expectedDimSums.isEqual(*dimSums));
14879	}
14880
14881	TEST_P(OperatorTest, CumSum3D_float_Dim0) {
14882	// Data: {1..24} arranged in 2 x 3 x 4
14883	// Answer sums ALONG the axis specified
14884	CHECK_IF_ENABLED();
14885	Tensor *dimSums =
14886	testCumSum3D<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy,
14887	/dim/ `0`,
14888	/exclusive/ false, /reverse/ false);
14889
14890	Tensor expectedDimSums(dimSums->getType());
14891	expectedDimSums.getHandle<float>() = {`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`,
14892	`9`, `10`, `11`, `12`, `14`, `16`, `18`, `20`,
14893	`22`, `24`, `26`, `28`, `30`, `32`, `34`, `36`};
14894
14895	EXPECT_TRUE(expectedDimSums.isEqual(*dimSums));
14896	}
14897
14898	TEST_P(OperatorTest, CumSum3D_float_Dim1_Exclusive_Reverse) {
14899	// Data: {1..24} arranged in 2 x 3 x 4
14900	// Answer sums ALONG the axis specified
14901	CHECK_IF_ENABLED();
14902	Tensor *dimSums =
14903	testCumSum3D<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy,
14904	/dim/ `1`,
14905	/exclusive/ true, /reverse/ true);
14906
14907	Tensor expectedDimSums(dimSums->getType());
14908	expectedDimSums.getHandle<float>() = {`14`, `16`, `18`, `20`, `9`, `10`, `11`, `12`,
14909	`0`, `0`, `0`, `0`, `38`, `40`, `42`, `44`,
14910	`21`, `22`, `23`, `24`, `0`, `0`, `0`, `0`};
14911
14912	EXPECT_TRUE(expectedDimSums.isEqual(*dimSums));
14913	}
14914
14915	TEST_P(OperatorTest, CumSum3D_float_Dim1_Exclusive) {
14916	// Data: {1..24} arranged in 2 x 3 x 4
14917	// Answer sums ALONG the axis specified
14918	CHECK_IF_ENABLED();
14919	Tensor *dimSums =
14920	testCumSum3D<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy,
14921	/dim/ `1`,
14922	/exclusive/ true, /reverse/ false);
14923
14924	Tensor expectedDimSums(dimSums->getType());
14925	expectedDimSums.getHandle<float>() = {`0`, `0`, `0`, `0`, `1`, `2`, `3`, `4`,
14926	`6`, `8`, `10`, `12`, `0`, `0`, `0`, `0`,
14927	`13`, `14`, `15`, `16`, `30`, `32`, `34`, `36`};
14928
14929	EXPECT_TRUE(expectedDimSums.isEqual(*dimSums));
14930	}
14931
14932	TEST_P(OperatorTest, CumSum3D_float_Dim1_Reverse) {
14933	// Data: {1..24} arranged in 2 x 3 x 4
14934	// Answer sums ALONG the axis specified
14935	CHECK_IF_ENABLED();
14936	Tensor *dimSums =
14937	testCumSum3D<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy,
14938	/dim/ `1`,
14939	/exclusive/ false, /reverse/ true);
14940
14941	Tensor expectedDimSums(dimSums->getType());
14942	expectedDimSums.getHandle<float>() = {`15`, `18`, `21`, `24`, `14`, `16`, `18`, `20`,
14943	`9`, `10`, `11`, `12`, `51`, `54`, `57`, `60`,
14944	`38`, `40`, `42`, `44`, `21`, `22`, `23`, `24`};
14945
14946	EXPECT_TRUE(expectedDimSums.isEqual(*dimSums));
14947	}
14948
14949	TEST_P(OperatorTest, CumSum3D_float_Dim1) {
14950	// Data: {1..24} arranged in 2 x 3 x 4
14951	// Answer sums ALONG the axis specified
14952	CHECK_IF_ENABLED();
14953	Tensor *dimSums =
14954	testCumSum3D<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy,
14955	/dim/ `1`,
14956	/exclusive/ false, /reverse/ false);
14957
14958	Tensor expectedDimSums(dimSums->getType());
14959	expectedDimSums.getHandle<float>() = {`1`, `2`, `3`, `4`, `6`, `8`, `10`, `12`,
14960	`15`, `18`, `21`, `24`, `13`, `14`, `15`, `16`,
14961	`30`, `32`, `34`, `36`, `51`, `54`, `57`, `60`};
14962
14963	EXPECT_TRUE(expectedDimSums.isEqual(*dimSums));
14964	}
14965
14966	TEST_P(OperatorTest, CumSum3D_float_Dim2_Exclusive_Reverse) {
14967	// Data: {1..24} arranged in 2 x 3 x 4
14968	// Answer sums ALONG the axis specified
14969	CHECK_IF_ENABLED();
14970	Tensor *dimSums =
14971	testCumSum3D<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy,
14972	/dim/ `2`,
14973	/exclusive/ true, /reverse/ true);
14974
14975	Tensor expectedDimSums(dimSums->getType());
14976	expectedDimSums.getHandle<float>() = {`9`, `7`, `4`, `0`, `21`, `15`, `8`, `0`,
14977	`33`, `23`, `12`, `0`, `45`, `31`, `16`, `0`,
14978	`57`, `39`, `20`, `0`, `69`, `47`, `24`, `0`};
14979
14980	EXPECT_TRUE(expectedDimSums.isEqual(*dimSums));
14981	}
14982
14983	TEST_P(OperatorTest, CumSum3D_float_Dim2_Exclusive) {
14984	// Data: {1..24} arranged in 2 x 3 x 4
14985	// Answer sums ALONG the axis specified
14986	CHECK_IF_ENABLED();
14987	Tensor *dimSums =
14988	testCumSum3D<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy,
14989	/dim/ `2`,
14990	/exclusive/ true, /reverse/ false);
14991
14992	Tensor expectedDimSums(dimSums->getType());
14993	expectedDimSums.getHandle<float>() = {`0`, `1`, `3`, `6`, `0`, `5`, `11`, `18`,
14994	`0`, `9`, `19`, `30`, `0`, `13`, `27`, `42`,
14995	`0`, `17`, `35`, `54`, `0`, `21`, `43`, `66`};
14996
14997	EXPECT_TRUE(expectedDimSums.isEqual(*dimSums));
14998	}
14999
15000	TEST_P(OperatorTest, CumSum3D_float_Dim2_Reverse) {
15001	// Data: {1..24} arranged in 2 x 3 x 4
15002	// Answer sums ALONG the axis specified
15003	CHECK_IF_ENABLED();
15004	Tensor *dimSums =
15005	testCumSum3D<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy,
15006	/dim/ `2`,
15007	/exclusive/ false, /reverse/ true);
15008
15009	Tensor expectedDimSums(dimSums->getType());
15010	expectedDimSums.getHandle<float>() = {`10`, `9`, `7`, `4`, `26`, `21`, `15`, `8`,
15011	`42`, `33`, `23`, `12`, `58`, `45`, `31`, `16`,
15012	`74`, `57`, `39`, `20`, `90`, `69`, `47`, `24`};
15013
15014	EXPECT_TRUE(expectedDimSums.isEqual(*dimSums));
15015	}
15016
15017	TEST_P(OperatorTest, CumSum3D_float_Dim2) {
15018	// Data: {1..24} arranged in 2 x 3 x 4
15019	// Answer sums ALONG the axis specified
15020	CHECK_IF_ENABLED();
15021	Tensor *dimSums =
15022	testCumSum3D<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy,
15023	/dim/ `2`,
15024	/exclusive/ false, /reverse/ false);
15025
15026	Tensor expectedDimSums(dimSums->getType());
15027	expectedDimSums.getHandle<float>() = {`1`, `3`, `6`, `10`, `5`, `11`, `18`, `26`,
15028	`9`, `19`, `30`, `42`, `13`, `27`, `42`, `58`,
15029	`17`, `35`, `54`, `74`, `21`, `43`, `66`, `90`};
15030
15031	EXPECT_TRUE(expectedDimSums.isEqual(*dimSums));
15032	}
15033
15034	TEST_P(OperatorTest, CumSum2D_float16_Dim0) {
15035	// Data: {1..24} arranged in 2 x 3 x 4
15036	// Answer sums ALONG the axis specified
15037	CHECK_IF_ENABLED();
15038	Tensor *dimSums =
15039	testCumSum2D<float16_t>(bindings_, mod_, F_, EE_, ElemKind::Float16Ty,
15040	/dim/ `0`,
15041	/exclusive/ false, /reverse/ false);
15042
15043	Tensor expectedDimSums(dimSums->getType());
15044	expectedDimSums.getHandle<float16_t>() = {`1`, `2`, `3`, `4`, `6`, `8`,
15045	`10`, `12`, `15`, `18`, `21`, `24`};
15046
15047	EXPECT_TRUE(expectedDimSums.isEqual(*dimSums));
15048	}
15049
15050	TEST_P(OperatorTest, CumSum2D_float16_Dim1) {
15051	// Data: {1..24} arranged in 2 x 3 x 4
15052	// Answer sums ALONG the axis specified
15053	CHECK_IF_ENABLED();
15054	Tensor *dimSums =
15055	testCumSum2D<float16_t>(bindings_, mod_, F_, EE_, ElemKind::Float16Ty,
15056	/dim/ `1`,
15057	/exclusive/ false, /reverse/ false);
15058
15059	Tensor expectedDimSums(dimSums->getType());
15060	expectedDimSums.getHandle<float16_t>() = {`1`, `3`, `6`, `10`, `5`, `11`,
15061	`18`, `26`, `9`, `19`, `30`, `42`};
15062
15063	EXPECT_TRUE(expectedDimSums.isEqual(*dimSums));
15064	}
15065
15066	TEST_P(OperatorTest, CumSum3D_float16_Dim0) {
15067	// Data: {1..24} arranged in 2 x 3 x 4
15068	// Answer sums ALONG the axis specified
15069	CHECK_IF_ENABLED();
15070	Tensor *dimSums =
15071	testCumSum3D<float16_t>(bindings_, mod_, F_, EE_, ElemKind::Float16Ty,
15072	/dim/ `0`,
15073	/exclusive/ false, /reverse/ false);
15074
15075	Tensor expectedDimSums(dimSums->getType());
15076	expectedDimSums.getHandle<float16_t>() = {`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`,
15077	`9`, `10`, `11`, `12`, `14`, `16`, `18`, `20`,
15078	`22`, `24`, `26`, `28`, `30`, `32`, `34`, `36`};
15079
15080	EXPECT_TRUE(expectedDimSums.isEqual(*dimSums));
15081	}
15082
15083	TEST_P(OperatorTest, CumSum3D_float16_Dim1) {
15084	// Data: {1..24} arranged in 2 x 3 x 4
15085	// Answer sums ALONG the axis specified
15086	CHECK_IF_ENABLED();
15087	Tensor *dimSums =
15088	testCumSum3D<float16_t>(bindings_, mod_, F_, EE_, ElemKind::Float16Ty,
15089	/dim/ `1`,
15090	/exclusive/ false, /reverse/ false);
15091
15092	Tensor expectedDimSums(dimSums->getType());
15093	expectedDimSums.getHandle<float16_t>() = {`1`, `2`, `3`, `4`, `6`, `8`, `10`, `12`,
15094	`15`, `18`, `21`, `24`, `13`, `14`, `15`, `16`,
15095	`30`, `32`, `34`, `36`, `51`, `54`, `57`, `60`};
15096
15097	EXPECT_TRUE(expectedDimSums.isEqual(*dimSums));
15098	}
15099
15100	TEST_P(OperatorTest, CumSum3D_float16_Dim2) {
15101	// Data: {1..24} arranged in 2 x 3 x 4
15102	// Answer sums ALONG the axis specified
15103	CHECK_IF_ENABLED();
15104	Tensor *dimSums =
15105	testCumSum3D<float16_t>(bindings_, mod_, F_, EE_, ElemKind::Float16Ty,
15106	/dim/ `2`,
15107	/exclusive/ false, /reverse/ false);
15108
15109	Tensor expectedDimSums(dimSums->getType());
15110	expectedDimSums.getHandle<float16_t>() = {`1`, `3`, `6`, `10`, `5`, `11`, `18`, `26`,
15111	`9`, `19`, `30`, `42`, `13`, `27`, `42`, `58`,
15112	`17`, `35`, `54`, `74`, `21`, `43`, `66`, `90`};
15113
15114	EXPECT_TRUE(expectedDimSums.isEqual(*dimSums));
15115	}
15116
15117	TEST_P(OperatorTest, CumSum2D_bfloat16_t_Dim0) {
15118	// Data: {1..24} arranged in 2 x 3 x 4
15119	// Answer sums ALONG the axis specified
15120	CHECK_IF_ENABLED();
15121	Tensor *dimSums =
15122	testCumSum2D<bfloat16_t>(bindings_, mod_, F_, EE_, ElemKind::BFloat16Ty,
15123	/dim/ `0`,
15124	/exclusive/ false, /reverse/ false);
15125
15126	Tensor expectedDimSums(dimSums->getType());
15127	expectedDimSums.getHandle<bfloat16_t>() = {`1`, `2`, `3`, `4`, `6`, `8`,
15128	`10`, `12`, `15`, `18`, `21`, `24`};
15129
15130	EXPECT_TRUE(expectedDimSums.isEqual(*dimSums));
15131	}
15132
15133	TEST_P(OperatorTest, CumSum2D_bfloat16_t_Dim1) {
15134	// Data: {1..24} arranged in 2 x 3 x 4
15135	// Answer sums ALONG the axis specified
15136	CHECK_IF_ENABLED();
15137	Tensor *dimSums =
15138	testCumSum2D<bfloat16_t>(bindings_, mod_, F_, EE_, ElemKind::BFloat16Ty,
15139	/dim/ `1`,
15140	/exclusive/ false, /reverse/ false);
15141
15142	Tensor expectedDimSums(dimSums->getType());
15143	expectedDimSums.getHandle<bfloat16_t>() = {`1`, `3`, `6`, `10`, `5`, `11`,
15144	`18`, `26`, `9`, `19`, `30`, `42`};
15145
15146	EXPECT_TRUE(expectedDimSums.isEqual(*dimSums));
15147	}
15148
15149	TEST_P(OperatorTest, CumSum3D_bfloat16_t_Dim0) {
15150	// Data: {1..24} arranged in 2 x 3 x 4
15151	// Answer sums ALONG the axis specified
15152	CHECK_IF_ENABLED();
15153	Tensor *dimSums =
15154	testCumSum3D<bfloat16_t>(bindings_, mod_, F_, EE_, ElemKind::BFloat16Ty,
15155	/dim/ `0`,
15156	/exclusive/ false, /reverse/ false);
15157
15158	Tensor expectedDimSums(dimSums->getType());
15159	expectedDimSums.getHandle<bfloat16_t>() = {`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`,
15160	`9`, `10`, `11`, `12`, `14`, `16`, `18`, `20`,
15161	`22`, `24`, `26`, `28`, `30`, `32`, `34`, `36`};
15162
15163	EXPECT_TRUE(expectedDimSums.isEqual(*dimSums));
15164	}
15165
15166	TEST_P(OperatorTest, CumSum3D_bfloat16_t_Dim1) {
15167	// Data: {1..24} arranged in 2 x 3 x 4
15168	// Answer sums ALONG the axis specified
15169	CHECK_IF_ENABLED();
15170	Tensor *dimSums =
15171	testCumSum3D<bfloat16_t>(bindings_, mod_, F_, EE_, ElemKind::BFloat16Ty,
15172	/dim/ `1`,
15173	/exclusive/ false, /reverse/ false);
15174
15175	Tensor expectedDimSums(dimSums->getType());
15176	expectedDimSums.getHandle<bfloat16_t>() = {`1`, `2`, `3`, `4`, `6`, `8`, `10`, `12`,
15177	`15`, `18`, `21`, `24`, `13`, `14`, `15`, `16`,
15178	`30`, `32`, `34`, `36`, `51`, `54`, `57`, `60`};
15179
15180	EXPECT_TRUE(expectedDimSums.isEqual(*dimSums));
15181	}
15182
15183	TEST_P(OperatorTest, CumSum3D_bfloat16_t_Dim2) {
15184	// Data: {1..24} arranged in 2 x 3 x 4
15185	// Answer sums ALONG the axis specified
15186	CHECK_IF_ENABLED();
15187	Tensor *dimSums =
15188	testCumSum3D<bfloat16_t>(bindings_, mod_, F_, EE_, ElemKind::BFloat16Ty,
15189	/dim/ `2`,
15190	/exclusive/ false, /reverse/ false);
15191
15192	Tensor expectedDimSums(dimSums->getType());
15193	expectedDimSums.getHandle<bfloat16_t>() = {`1`, `3`, `6`, `10`, `5`, `11`, `18`, `26`,
15194	`9`, `19`, `30`, `42`, `13`, `27`, `42`, `58`,
15195	`17`, `35`, `54`, `74`, `21`, `43`, `66`, `90`};
15196
15197	EXPECT_TRUE(expectedDimSums.isEqual(*dimSums));
15198	}
15199
15200	TEST_P(OperatorTest, CumSum2D_int32_t_Dim0) {
15201	// Data: {1..12} arranged in 3 x 4
15202	// Answer sums ALONG the axis specified
15203	CHECK_IF_ENABLED();
15204	Tensor *dimSums =
15205	testCumSum2D<int32_t>(bindings_, mod_, F_, EE_, ElemKind::Int32ITy,
15206	/dim/ `0`,
15207	/exclusive/ false, /reverse/ false);
15208
15209	Tensor expectedDimSums(dimSums->getType());
15210	expectedDimSums.getHandle<int32_t>() = {`1`, `2`, `3`, `4`, `6`, `8`,
15211	`10`, `12`, `15`, `18`, `21`, `24`};
15212
15213	EXPECT_TRUE(expectedDimSums.isEqual(*dimSums));
15214	}
15215
15216	TEST_P(OperatorTest, CumSum2D_int32_t_Dim1) {
15217	// Data: {1..12} arranged in 3 x 4
15218	// Answer sums ALONG the axis specified
15219	CHECK_IF_ENABLED();
15220	Tensor *dimSums =
15221	testCumSum2D<int32_t>(bindings_, mod_, F_, EE_, ElemKind::Int32ITy,
15222	/dim/ `1`,
15223	/exclusive/ false, /reverse/ false);
15224
15225	Tensor expectedDimSums(dimSums->getType());
15226	expectedDimSums.getHandle<int32_t>() = {`1`, `3`, `6`, `10`, `5`, `11`,
15227	`18`, `26`, `9`, `19`, `30`, `42`};
15228
15229	EXPECT_TRUE(expectedDimSums.isEqual(*dimSums));
15230	}
15231
15232	TEST_P(OperatorTest, CumSum3D_int32_t_Dim0) {
15233	// Data: {1..24} arranged in 2 x 3 x 4
15234	// Answer sums ALONG the axis specified
15235	CHECK_IF_ENABLED();
15236	Tensor *dimSums =
15237	testCumSum3D<int32_t>(bindings_, mod_, F_, EE_, ElemKind::Int32ITy,
15238	/dim/ `0`,
15239	/exclusive/ false, /reverse/ false);
15240
15241	Tensor expectedDimSums(dimSums->getType());
15242	expectedDimSums.getHandle<int32_t>() = {`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`,
15243	`9`, `10`, `11`, `12`, `14`, `16`, `18`, `20`,
15244	`22`, `24`, `26`, `28`, `30`, `32`, `34`, `36`};
15245
15246	EXPECT_TRUE(expectedDimSums.isEqual(*dimSums));
15247	}
15248
15249	TEST_P(OperatorTest, CumSum3D_int32_t_Dim1) {
15250	// Data: {1..24} arranged in 2 x 3 x 4
15251	// Answer sums ALONG the axis specified
15252	CHECK_IF_ENABLED();
15253	Tensor *dimSums =
15254	testCumSum3D<int32_t>(bindings_, mod_, F_, EE_, ElemKind::Int32ITy,
15255	/dim/ `1`,
15256	/exclusive/ false, /reverse/ false);
15257
15258	Tensor expectedDimSums(dimSums->getType());
15259	expectedDimSums.getHandle<int32_t>() = {`1`, `2`, `3`, `4`, `6`, `8`, `10`, `12`,
15260	`15`, `18`, `21`, `24`, `13`, `14`, `15`, `16`,
15261	`30`, `32`, `34`, `36`, `51`, `54`, `57`, `60`};
15262
15263	EXPECT_TRUE(expectedDimSums.isEqual(*dimSums));
15264	}
15265
15266	TEST_P(OperatorTest, CumSum3D_int32_t_Dim2) {
15267	// Data: {1..24} arranged in 2 x 3 x 4
15268	// Answer sums ALONG the axis specified
15269	CHECK_IF_ENABLED();
15270	Tensor *dimSums =
15271	testCumSum3D<int32_t>(bindings_, mod_, F_, EE_, ElemKind::Int32ITy,
15272	/dim/ `2`,
15273	/exclusive/ false, /reverse/ false);
15274
15275	Tensor expectedDimSums(dimSums->getType());
15276	expectedDimSums.getHandle<int32_t>() = {`1`, `3`, `6`, `10`, `5`, `11`, `18`, `26`,
15277	`9`, `19`, `30`, `42`, `13`, `27`, `42`, `58`,
15278	`17`, `35`, `54`, `74`, `21`, `43`, `66`, `90`};
15279
15280	EXPECT_TRUE(expectedDimSums.isEqual(*dimSums));
15281	}
15282
15283	TEST_P(OperatorTest, LengthsSum) {
15284	CHECK_IF_ENABLED();
15285
15286	/*
15287	DATA = [
15288	[1.0, 1.2],
15289	[2.3, 3.4],
15290	[4.5, 3.7],
15291	[3.0, 2.9],
15292	[1.1, 1.4],
15293	[2.8, 8.4],
15294	]
15295	LENGTHS = [2, 0, 3, 1]
15296	OUTPUT = [
15297	[3.3, 4.6],
15298	[0.0, 0.0],
15299	[8.6, 8.0],
15300	[2.8, 8.4],
15301	]
15302	*/
15303	auto data = mod_.createPlaceholder(ElemKind::FloatTy, {`6`, `2`}, "data", false*);
15304	auto *lengths =
15305	mod_.createPlaceholder(ElemKind::Int32ITy, {`4`}, "lengths", false);
15306
15307	bindings_.allocate(data)->getHandle() = {`1.0f`, `1.2f`, `2.3f`, `3.4f`, `4.5f`, `3.7f`,
15308	`3.0f`, `2.9f`, `1.1f`, `1.4f`, `2.8f`, `8.4f`};
15309	bindings_.allocate(lengths)->getHandle<int32_t>() = {`2`, `0`, `3`, `1`};
15310
15311	auto *R = F_->createLengthsSum("LS", data, lengths);
15312	auto *S = F_->createSave("save", R);
15313	bindings_.allocate(S->getPlaceholder());
15314
15315	EE_.compile(CompilationMode::Infer);
15316	EE_.run(bindings_);
15317
15318	Tensor &result = *bindings_.get(S->getPlaceholder());
15319	Tensor expected(ElemKind::FloatTy, {`4`, `2`});
15320	expected.getHandle() = {`3.3f`, `4.6f`, `0.0f`, `0.0f`, `8.6f`, `8.0f`, `2.8f`, `8.4f`};
15321
15322	EXPECT_TRUE(expected.isEqual(result));
15323	}
15324
15325	/// Helper to test SLS using \p DTy.
15326	template <typename DataType, typename IndexType>
15327	static void testSLS(glow::PlaceholderBindings &bindings, glow::Module &mod,
15328	glow::Function *F, glow::ExecutionEngine &EE, ElemKind DTy,
15329	ElemKind ITy, float allowedError) {
15330	/*
15331	DATA = [
15332	[1.0, 1.2],
15333	[2.3, 3.4],
15334	[4.5, 5.7],
15335	]
15336	INDICES = [2, 0, 1, 2, 0, 0, 0, 0]
15337	LENGTHS = [2, 0, 2, 1, 3]
15338	OUTPUT = [
15339	[5.5, 6.9],
15340	[0.0, 0.0],
15341	[6.8, 9.1],
15342	[1.0, 1.2],
15343	[3.0, 3.6],
15344	]
15345	*/
15346	auto data = mod.createPlaceholder(DTy, {`3`, `2`}, "data", false*);
15347	auto indices = mod.createPlaceholder(ITy, {`8`}, "indices", false*);
15348	auto *lengths =
15349	mod.createPlaceholder(ElemKind::Int32ITy, {`5`}, "lengths", false);
15350
15351	bindings.allocate(data)->getHandle<DataType>() = {
15352	`1.0f`, `1.2f`, `2.3f`, `3.4f`, `4.5f`, `5.7f`,
15353	};
15354	bindings.allocate(indices)->getHandle<IndexType>() = {
15355	`2`, `0`, `1`, `2`, `0`, `0`, `0`, `0`,
15356	};
15357	bindings.allocate(lengths)->getHandle<int32_t>() = {
15358	`2`, `0`, `2`, `1`, `3`,
15359	};
15360
15361	auto *R = F->createSparseLengthsSum("SLS", data, indices, lengths);
15362
15363	auto *S = F->createSave("save", R);
15364	bindings.allocate(S->getPlaceholder());
15365
15366	EE.compile(CompilationMode::Infer);
15367	EE.run(bindings);
15368
15369	Tensor &result = *bindings.get(S->getPlaceholder());
15370	Tensor expected(DTy, {`5`, `2`});
15371	expected.getHandle<DataType>() = {
15372	`5.5f`, `6.9f`, `0.0f`, `0.0f`, `6.8f`, `9.1f`, `1.0f`, `1.2f`, `3.0f`, `3.6f`,
15373	};
15374
15375	EXPECT_TRUE(expected.isEqual(result, allowedError));
15376	}
15377
15378	/// Test that SLS is correctly supported in FloatTy with int64 indices.
15379	TEST_P(OperatorTest, SparseLengthsSum_Float) {
15380	CHECK_IF_ENABLED();
15381	testSLS<float, int64_t>(bindings_, mod_, F_, EE_, ElemKind::FloatTy,
15382	ElemKind::Int64ITy, `0.0001`);
15383	}
15384
15385	/// Test that SLS is correctly supported in FloatTy with int32 indices.
15386	TEST_P(OperatorTest, SparseLengthsSum_Float_Int32) {
15387	CHECK_IF_ENABLED();
15388	testSLS<float, int32_t>(bindings_, mod_, F_, EE_, ElemKind::FloatTy,
15389	ElemKind::Int32ITy, `0.0001`);
15390	}
15391
15392	/// Test that SLS is correctly supported in Float16Ty with int64 indices.
15393	TEST_P(OperatorTest, SparseLengthsSum_Float16) {
15394	CHECK_IF_ENABLED();
15395	testSLS<float16_t, int64_t>(bindings_, mod_, F_, EE_, ElemKind::Float16Ty,
15396	ElemKind::Int64ITy, `0.002`);
15397	}
15398
15399	/// Test that SLS is correctly supported in BFloat16Ty with int64 indices.
15400	TEST_P(OperatorTest, SparseLengthsSum_BFloat16) {
15401	CHECK_IF_ENABLED();
15402	testSLS<bfloat16_t, int64_t>(bindings_, mod_, F_, EE_, ElemKind::BFloat16Ty,
15403	ElemKind::Int64ITy, `0.05`);
15404	}
15405
15406	/// Test that SLS is correctly supported in Float16Ty with int32 indices.
15407	TEST_P(OperatorTest, SparseLengthsSum_Float16_Int32) {
15408	CHECK_IF_ENABLED();
15409	testSLS<float16_t, int32_t>(bindings_, mod_, F_, EE_, ElemKind::Float16Ty,
15410	ElemKind::Int32ITy, `0.05`);
15411	}
15412
15413	/// Test that SLS is correctly supported in BFloat16Ty with int32 indices.
15414	TEST_P(OperatorTest, SparseLengthsSum_BFloat16_Int32) {
15415	CHECK_IF_ENABLED();
15416	testSLS<bfloat16_t, int32_t>(bindings_, mod_, F_, EE_, ElemKind::BFloat16Ty,
15417	ElemKind::Int32ITy, `0.05`);
15418	}
15419
15420	TEST_P(OperatorTest, SparseLengthsSumI8) {
15421	CHECK_IF_ENABLED();
15422
15423	/*
15424	DATA = [
15425	[11, 13],
15426	[24, 35],
15427	[46, 58],
15428	]
15429	INDICES = [2, 0, 1, 2, 0, 0, 0, 0]
15430	LENGTHS = [2, 0, 2, 1, 3]
15431	OUTPUT = [
15432	[56, 70],
15433	[ 1, 1],
15434	[69, 92],
15435	[11, 13],
15436	[31, 37],
15437	]
15438	*/
15439	auto *data =
15440	mod_.createPlaceholder(ElemKind::Int8QTy, {`3`, `2`}, `0.1f`, `1`, "data", false);
15441	auto *indices =
15442	mod_.createPlaceholder(ElemKind::Int64ITy, {`8`}, "indices", false);
15443	auto *lengths =
15444	mod_.createPlaceholder(ElemKind::Int32ITy, {`5`}, "lengths", false);
15445
15446	bindings_.allocate(data)->getHandle<int8_t>() = {
15447	`11`, `13`, `24`, `35`, `46`, `58`,
15448	};
15449	bindings_.allocate(indices)->getHandle<int64_t>() = {
15450	`2`, `0`, `1`, `2`, `0`, `0`, `0`, `0`,
15451	};
15452	bindings_.allocate(lengths)->getHandle<int32_t>() = {
15453	`2`, `0`, `2`, `1`, `3`,
15454	};
15455
15456	auto *R = F_->createSparseLengthsSum("SLS", data, indices, lengths);
15457	auto *S = F_->createSave("save", R);
15458	bindings_.allocate(S->getPlaceholder());
15459
15460	EE_.compile(CompilationMode::Infer);
15461	EE_.run(bindings_);
15462
15463	Tensor &result = *bindings_.get(S->getPlaceholder());
15464	Tensor expected(ElemKind::Int8QTy, {`5`, `2`}, `0.1f`, `1`);
15465	expected.getHandle<int8_t>() = {
15466	`56`, `70`, `1`, `1`, `69`, `92`, `11`, `13`, `31`, `37`,
15467	};
15468	EXPECT_TRUE(expected.isEqual(result));
15469	}
15470
15471	/// Test SparseLengthsWeightedSum with an N-dimension embedding table.
15472	template <typename DataType>
15473	static void testSLWS(glow::PlaceholderBindings &bindings, glow::Module &mod,
15474	glow::Function *F, glow::ExecutionEngine &EE, ElemKind DTy,
15475	float allowedError, size_t ndims) {
15476	/*
15477	DATA = [[2.0, -0.5, 13]]
15478	WEIGHTS = [3, 1, 0, 0, 0, 0, 2, -0.5]
15479	INDICES = [1, 0, 2, 0, 1, 2, 2, 0]
15480	LENGTHS = [3, 0, 3, 2]
15481	OUTPUT = [0.5, 0, 0, 25]
15482	*/
15483	ShapeVector idims(ndims, `1`);
15484	ShapeVector odims(ndims, `1`);
15485	idims [`0`] = `3`;
15486	odims [`0`] = `4`;
15487
15488	auto data = mod.createPlaceholder(DTy, idims, "data", false*);
15489	auto weights = mod.createPlaceholder(DTy, {`8`}, "weights", false*);
15490	auto *indices =
15491	mod.createPlaceholder(ElemKind::Int64ITy, {`8`}, "indices", false);
15492	auto *lengths =
15493	mod.createPlaceholder(ElemKind::Int32ITy, {`4`}, "lengths", false);
15494
15495	bindings.allocate(data)->getHandle<DataType>() = {
15496	`2.0`,
15497	-`0.5`,
15498	`13`,
15499	};
15500	bindings.allocate(weights)->getHandle<DataType>() = {
15501	`3`, `1`, `0`, `0`, `0`, `0`, `2`, -`0.5`,
15502	};
15503	bindings.allocate(indices)->getHandle<int64_t>() = {
15504	`1`, `0`, `2`, `0`, `1`, `2`, `2`, `0`,
15505	};
15506	bindings.allocate(lengths)->getHandle<int32_t>() = {
15507	`3`,
15508	`0`,
15509	`3`,
15510	`2`,
15511	};
15512
15513	auto *R = F->createSparseLengthsWeightedSum("SLWS", data, weights, indices,
15514	lengths);
15515	auto *S = F->createSave("save", R);
15516	bindings.allocate(S->getPlaceholder());
15517
15518	EE.compile(CompilationMode::Infer);
15519	EE.run(bindings);
15520
15521	Tensor &result = *bindings.get(S->getPlaceholder());
15522	Tensor expected(DTy, odims);
15523	expected.getHandle<DataType>() = {
15524	`0.5`,
15525	`0`,
15526	`0`,
15527	`25`,
15528	};
15529
15530	EXPECT_TRUE(expected.isEqual(result));
15531	}
15532
15533	/// Test that SLWS is correctly supported in FloatTy in 1D.
15534	TEST_P(OperatorTest, SparseLengthsWeightedSum_1D_Float) {
15535	CHECK_IF_ENABLED();
15536	testSLWS<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy, `0.0001`,
15537	/ ndims / `1`);
15538	}
15539
15540	/// Test that SLWS is correctly supported in FloatTy in 2D.
15541	TEST_P(OperatorTest, SparseLengthsWeightedSum_2D_Float) {
15542	CHECK_IF_ENABLED();
15543	testSLWS<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy, `0.0001`,
15544	/ ndims / `2`);
15545	}
15546
15547	/// Test that SLWS is correctly supported in Float16Ty in 1D.
15548	TEST_P(OperatorTest, SparseLengthsWeightedSum_1D_Float16) {
15549	CHECK_IF_ENABLED();
15550	testSLWS<float16_t>(bindings_, mod_, F_, EE_, ElemKind::Float16Ty, `0.0001`,
15551	/ ndims / `1`);
15552	}
15553
15554	/// Test that SLWS is correctly supported in BFloat16Ty in 1D.
15555	TEST_P(OperatorTest, SparseLengthsWeightedSum_1D_BFloat16) {
15556	CHECK_IF_ENABLED();
15557	testSLWS<bfloat16_t>(bindings_, mod_, F_, EE_, ElemKind::BFloat16Ty, `0.0001`,
15558	/ ndims / `1`);
15559	}
15560
15561	/// Test that SLWS is correctly supported in Float16Ty in 2D.
15562	TEST_P(OperatorTest, SparseLengthsWeightedSum_2D_Float16) {
15563	CHECK_IF_ENABLED();
15564	testSLWS<float16_t>(bindings_, mod_, F_, EE_, ElemKind::Float16Ty, `0.0001`,
15565	/ ndims / `2`);
15566	}
15567
15568	/// Test that SLWS is correctly supported in BFloat16Ty in 2D.
15569	TEST_P(OperatorTest, SparseLengthsWeightedSum_2D_BFloat16) {
15570	CHECK_IF_ENABLED();
15571	testSLWS<bfloat16_t>(bindings_, mod_, F_, EE_, ElemKind::BFloat16Ty, `0.0001`,
15572	/ ndims / `2`);
15573	}
15574
15575	TEST_P(OperatorTest, SparseLengthsWeightedSumI8) {
15576	CHECK_IF_ENABLED();
15577
15578	/*
15579	DATA = [4, -1, 26]
15580	WEIGHTS = [6, 2, 0, 0, 0, 0, 4, -1]
15581	INDICES = [1, 0, 2, 0, 1, 2, 2, 0]
15582	LENGTHS = [3, 0, 3, 2]
15583	OUTPUT = [1, 0, 0, 50]
15584	*/
15585	auto *data =
15586	mod_.createPlaceholder(ElemKind::Int8QTy, {`3`}, `0.5`, `0`, "data", false);
15587	auto *weights =
15588	mod_.createPlaceholder(ElemKind::Int8QTy, {`8`}, `0.5`, `0`, "weights", false);
15589	auto *indices =
15590	mod_.createPlaceholder(ElemKind::Int64ITy, {`8`}, "indices", false);
15591	auto *lengths =
15592	mod_.createPlaceholder(ElemKind::Int32ITy, {`4`}, "lengths", false);
15593
15594	bindings_.allocate(data)->getHandle<int8_t>() = {
15595	`4`,
15596	-`1`,
15597	`26`,
15598	};
15599	bindings_.allocate(weights)->getHandle<int8_t>() = {
15600	`6`, `2`, `0`, `0`, `0`, `0`, `4`, -`1`,
15601	};
15602	bindings_.allocate(indices)->getHandle<int64_t>() = {
15603	`1`, `0`, `2`, `0`, `1`, `2`, `2`, `0`,
15604	};
15605	bindings_.allocate(lengths)->getHandle<int32_t>() = {
15606	`3`,
15607	`0`,
15608	`3`,
15609	`2`,
15610	};
15611
15612	auto *R = F_->createSparseLengthsWeightedSum("SLWS", data, weights, indices,
15613	lengths);
15614	auto *S = F_->createSave("save", R);
15615	bindings_.allocate(S->getPlaceholder());
15616
15617	EE_.compile(CompilationMode::Infer);
15618	EE_.run(bindings_);
15619
15620	Tensor &result = *bindings_.get(S->getPlaceholder());
15621	Tensor expected(ElemKind::Int8QTy, {`4`}, `0.5`, `0`);
15622	expected.getHandle<int8_t>() = {
15623	`1`,
15624	`0`,
15625	`0`,
15626	`50`,
15627	};
15628
15629	EXPECT_TRUE(expected.isEqual(result));
15630	}
15631
15632	/// Helper function to construct indices/offsets pair for EmbeddingBag
15633	/// and EmbeddingBagByteRowwiseOffsets
15634	template <typename DataType>
15635	static void addEmbeddingBagPartialInputs(
15636	glow::PlaceholderBindings &bindings, glow::Module &mod, ElemKind DTy,
15637	Placeholder &weights, Placeholder &indices, Placeholder *&offsets,
15638	bool hasEndOffset, bool partialInput = false) {
15639
15640	if (hasEndOffset) {
15641	Tensor weightsTensorReal(DTy, {`8`});
15642	Tensor indicesTensorReal(ElemKind::Int32ITy, {`8`});
15643	Tensor offsetsTensorReal(ElemKind::Int32ITy, {`5`});
15644
15645	weightsTensorReal.getHandle<DataType>() = {
15646	`3`, `1`, `0`, `0`, `0`, `0`, `2`, -`0.5`,
15647	};
15648	indicesTensorReal.getHandle<int32_t>() = {
15649	`1`, `0`, `2`, `0`, `1`, `2`, `2`, `0`,
15650	};
15651	offsetsTensorReal.getHandle<int32_t>() = {
15652	`0`, `3`, `3`, `6`,
15653	`8`, // extra end offset
15654	};
15655
15656	if (partialInput) {
15657	weights = mod.createPlaceholder(DTy, {`20`}, "weights", false);
15658	indices =
15659	mod.createPlaceholder(ElemKind::Int32ITy, {`20`}, "indices", false);
15660	offsets =
15661	mod.createPlaceholder(ElemKind::Int32ITy, {`6`}, "offsets", false);
15662
15663	// If we use partial weights, it will cause problems when it added as a
15664	// Constant. So here we pad it with zeros.
15665	Tensor weightsTensorPadded(weights->getType());
15666	memcpy(weightsTensorPadded.getUnsafePtr(),
15667	weightsTensorReal.getUnsafePtr(),
15668	weightsTensorReal.getSizeInBytes());
15669	memset(weightsTensorPadded.getUnsafePtr() +
15670	weightsTensorReal.getSizeInBytes(),
15671	`0`,
15672	weightsTensorPadded.getSizeInBytes() -
15673	weightsTensorReal.getSizeInBytes());
15674
15675	Tensor indicesTensorPartial(indicesTensorReal.getUnsafePtr(),
15676	indices->getType(),
15677	indicesTensorReal.getSizeInBytes());
15678	Tensor offsetsTensorPartial(offsetsTensorReal.getUnsafePtr(),
15679	offsets->getType(),
15680	offsetsTensorReal.getSizeInBytes());
15681	bindings.insert(weights, std::move(weightsTensorPadded));
15682	bindings.insert(indices, indicesTensorPartial.clone());
15683	bindings.insert(offsets, offsetsTensorPartial.clone());
15684	} else {
15685	weights = mod.createPlaceholder(DTy, {`8`}, "weights", false);
15686	indices =
15687	mod.createPlaceholder(ElemKind::Int32ITy, {`8`}, "indices", false);
15688	offsets =
15689	mod.createPlaceholder(ElemKind::Int32ITy, {`5`}, "offsets", false);
15690
15691	bindings.insert(weights, std::move(weightsTensorReal));
15692	bindings.insert(indices, std::move(indicesTensorReal));
15693	bindings.insert(offsets, std::move(offsetsTensorReal));
15694	}
15695	} else {
15696	// We assume no partial inputs will be used if hasEndOffset is false
15697	Tensor weightsTensorReal(DTy, {`8`});
15698	Tensor indicesTensorReal(ElemKind::Int32ITy, {`8`});
15699	Tensor offsetsTensorReal(ElemKind::Int32ITy, {`4`});
15700
15701	weightsTensorReal.getHandle<DataType>() = {
15702	`3`, `1`, `0`, `0`, `0`, `0`, `2`, -`0.5`,
15703	};
15704	indicesTensorReal.getHandle<int32_t>() = {
15705	`1`, `0`, `2`, `0`, `1`, `2`, `2`, `0`,
15706	};
15707	offsetsTensorReal.getHandle<int32_t>() = {`0`, `3`, `3`, `6`};
15708
15709	weights = mod.createPlaceholder(DTy, {`8`}, "weights", false);
15710	indices = mod.createPlaceholder(ElemKind::Int32ITy, {`8`}, "indices", false);
15711	offsets = mod.createPlaceholder(ElemKind::Int32ITy, {`4`}, "offsets", false);
15712
15713	bindings.insert(weights, std::move(weightsTensorReal));
15714	bindings.insert(indices, std::move(indicesTensorReal));
15715	bindings.insert(offsets, std::move(offsetsTensorReal));
15716	}
15717	}
15718
15719	/// Test Embedding.
15720	template <typename DataType>
15721	static void testEmbedding(glow::PlaceholderBindings &bindings,
15722	glow::Module &mod, glow::Function *F,
15723	glow::ExecutionEngine &EE, ElemKind DTy,
15724	float allowedError, int32_t padIdx = -`1`) {
15725	/*
15726	WEIGHTS = [[2.0, -0.5], [4, 5.1], [1, 2.3]]
15727	INDICES = [1, 0, 2]
15728	OUTPUT = [[4, 5.1], [2.0, -0.5], [1, 2.3]]
15729	*/
15730
15731	// If hasEndOffset then add some additional junk to the end of indices and
15732	// weights and an extra offset to offsets.
15733
15734	auto *weights = mod.createConstant(DTy, {`3`, `2`}, "weights");
15735	auto *indices = mod.createConstant(ElemKind::Int32ITy, {`3`}, "indices");
15736	bool scale = false;
15737	bool sparse = false;
15738	int32_t indexValues[] = {`1`, `0`, `2`};
15739
15740	weights->getPayloadMutable().getHandle<DataType>() = {`2.0`, -`0.5`, `4`,
15741	`5.1`, `1`, `2.3`};
15742	indices->getPayloadMutable().getHandle<int32_t>() = indexValues;
15743
15744	auto *R =
15745	F->createEmbedding("Embedding", weights, indices, padIdx, scale, sparse);
15746	auto *S = F->createSave("save", R);
15747	bindings.allocate(S->getPlaceholder());
15748
15749	EE.compile(CompilationMode::Infer);
15750	EE.run(bindings);
15751
15752	Tensor &result = *bindings.get(S->getPlaceholder());
15753	Tensor expected(DTy, {`3`, `2`});
15754
15755	if (padIdx == -`1`) {
15756	expected.getHandle<DataType>() = {`4`, `5.1`, `2.0`, -`0.5`, `1`, `2.3`};
15757	} else if (padIdx == `0`) {
15758	expected.getHandle<DataType>() = {`4`, `5.1`, `0`, `0`, `1`, `2.3`};
15759	} else if (padIdx == `1`) {
15760	expected.getHandle<DataType>() = {`0`, `0`, `2.0`, -`0.5`, `1`, `2.3`};
15761	} else if (padIdx == `2`) {
15762	expected.getHandle<DataType>() = {`4`, `5.1`, `2.0`, -`0.5`, `0`, `0`};
15763	}
15764	EXPECT_TRUE(expected.isEqual(result, allowedError));
15765	}
15766
15767	/// Test that Embedding is correctly supported in FloatTy
15768	TEST_P(OperatorTest, Embedding_Float) {
15769	CHECK_IF_ENABLED();
15770	testEmbedding<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy, `0.0001`, -`1`);
15771	}
15772
15773	/// Test that Embedding is correctly supported in Float16Ty
15774	TEST_P(OperatorTest, Embedding_Float16) {
15775	CHECK_IF_ENABLED();
15776	testEmbedding<float16_t>(bindings_, mod_, F_, EE_, ElemKind::Float16Ty,
15777	`0.0001`, -`1`);
15778	}
15779
15780	/// Test that Embedding is correctly supported when PadIdx is specified.
15781	TEST_P(OperatorTest, Embedding_with_PadIdx) {
15782	CHECK_IF_ENABLED();
15783	testEmbedding<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy, `0.0001`, `2`);
15784	}
15785
15786	TEST_P(OperatorTest, Embedding_with_PadIdx_Float16) {
15787	CHECK_IF_ENABLED();
15788	testEmbedding<float16_t>(bindings_, mod_, F_, EE_, ElemKind::Float16Ty,
15789	`0.0001`, `2`);
15790	}
15791
15792	/// Test EmbeddingBag with an N-dimension embedding table.
15793	template <typename DataType>
15794	static void testEmbeddingBag(glow::PlaceholderBindings &bindings,
15795	glow::Module &mod, glow::Function *F,
15796	glow::ExecutionEngine &EE, ElemKind DTy,
15797	float allowedError, dim_t ndims, bool hasEndOffset,
15798	bool partialInput = false) {
15799	/*
15800	DATA = [[2.0, -0.5, 13]]
15801	WEIGHTS = [3, 1, 0, 0, 0, 0, 2, -0.5]
15802	INDICES = [1, 0, 2, 0, 1, 2, 2, 0]
15803	OFFSETS = [0, 3, 3, 6]
15804	OUTPUT = [0.5, 0, 0, 25]
15805	*/
15806	ShapeVector idims(ndims, `1`);
15807	ShapeVector odims(ndims, `1`);
15808	idims [`0`] = `3`;
15809	odims [`0`] = partialInput ? `5` : `4`;
15810
15811	auto data = mod.createPlaceholder(DTy, idims, "data", false*);
15812
15813	bindings.allocate(data)->getHandle<DataType>() = {
15814	`2.0`,
15815	-`0.5`,
15816	`13`,
15817	};
15818
15819	// If hasEndOffset then add some additional junk to the end of indices and
15820	// weights and an extra offset to offsets.
15821	Placeholder *weights;
15822	Placeholder *indices;
15823	Placeholder *offsets;
15824
15825	addEmbeddingBagPartialInputs<DataType>(bindings, mod, DTy, weights, indices,
15826	offsets, hasEndOffset, partialInput);
15827
15828	auto *R = F->createEmbeddingBag("EB", data, weights, indices, offsets,
15829	hasEndOffset);
15830	auto *S = F->createSave("save", R);
15831	bindings.allocate(S->getPlaceholder());
15832
15833	EE.compile(CompilationMode::Infer);
15834	EE.run(bindings);
15835
15836	Tensor &result = *bindings.get(S->getPlaceholder());
15837	Tensor expected(DTy, odims);
15838	if (partialInput) {
15839	expected.getHandle<DataType>() = {
15840	`0.5`, `0`, `0`, `25`, `0`,
15841	};
15842	} else {
15843	expected.getHandle<DataType>() = {
15844	`0.5`,
15845	`0`,
15846	`0`,
15847	`25`,
15848	};
15849	}
15850
15851	EXPECT_TRUE(expected.isEqual(result, allowedError));
15852	}
15853
15854	/// Test that EB is correctly supported in FloatTy in 1D.
15855	TEST_P(OperatorTest, EmbeddingBag_1D_Float) {
15856	CHECK_IF_ENABLED();
15857	testEmbeddingBag<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy, `0.0001`,
15858	/ ndims / `1`, / hasEndOffset / false);
15859	}
15860
15861	/// Test that EB is correctly supported in FloatTy in 1D with an end offset.
15862	TEST_P(OperatorTest, EmbeddingBag_1D_Float_End_Offset) {
15863	CHECK_IF_ENABLED();
15864	testEmbeddingBag<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy, `0.0001`,
15865	/ ndims / `1`, / hasEndOffset / true);
15866	}
15867
15868	/// Test that EB is correctly supported in FloatTy in 2D.
15869	TEST_P(OperatorTest, EmbeddingBag_2D_Float) {
15870	CHECK_IF_ENABLED();
15871	testEmbeddingBag<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy, `0.0001`,
15872	/ ndims / `2`, / hasEndOffset / false);
15873	}
15874
15875	/// Test that EB is correctly supported in FloatTy in 2D with an end offset.
15876	TEST_P(OperatorTest, EmbeddingBag_2D_Float_End_Offset) {
15877	CHECK_IF_ENABLED();
15878	testEmbeddingBag<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy, `0.0001`,
15879	/ ndims / `2`, / hasEndOffset / true);
15880	}
15881
15882	/// Test that EB is correctly supported in Float16Ty in 1D.
15883	TEST_P(OperatorTest, EmbeddingBag_1D_Float16) {
15884	CHECK_IF_ENABLED();
15885	testEmbeddingBag<float16_t>(bindings_, mod_, F_, EE_, ElemKind::Float16Ty,
15886	`0.0001`,
15887	/ ndims / `1`, / hasEndOffset / false);
15888	}
15889
15890	/// Test that EB is correctly supported in BFloat16Ty in 1D.
15891	TEST_P(OperatorTest, EmbeddingBag_1D_BFloat16) {
15892	CHECK_IF_ENABLED();
15893	testEmbeddingBag<bfloat16_t>(bindings_, mod_, F_, EE_, ElemKind::BFloat16Ty,
15894	`0.0001`,
15895	/ ndims / `1`, / hasEndOffset / false);
15896	}
15897
15898	/// Test that EB is correctly supported in Float16Ty in 1D with an end offset.
15899	TEST_P(OperatorTest, EmbeddingBag_1D_Float16_End_Offset) {
15900	CHECK_IF_ENABLED();
15901	testEmbeddingBag<float16_t>(bindings_, mod_, F_, EE_, ElemKind::Float16Ty,
15902	`0.0001`,
15903	/ ndims / `1`, / hasEndOffset / true);
15904	}
15905
15906	/// Test that EB is correctly supported in BFloat16Ty in 1D with an end
15907	/// offset.
15908	TEST_P(OperatorTest, EmbeddingBag_1D_BFloat16_End_Offset) {
15909	CHECK_IF_ENABLED();
15910	testEmbeddingBag<bfloat16_t>(bindings_, mod_, F_, EE_, ElemKind::BFloat16Ty,
15911	`0.0001`,
15912	/ ndims / `1`, / hasEndOffset / true);
15913	}
15914
15915	/// Test that EB is correctly supported in Float16Ty in 2D.
15916	TEST_P(OperatorTest, EmbeddingBag_2D_Float16) {
15917	CHECK_IF_ENABLED();
15918	testEmbeddingBag<float16_t>(bindings_, mod_, F_, EE_, ElemKind::Float16Ty,
15919	`0.0001`,
15920	/ ndims / `2`, / hasEndOffset / false);
15921	}
15922
15923	/// Test that EB is correctly supported in BFloat16Ty in 2D.
15924	TEST_P(OperatorTest, EmbeddingBag_2D_BFloat16) {
15925	CHECK_IF_ENABLED();
15926	testEmbeddingBag<bfloat16_t>(bindings_, mod_, F_, EE_, ElemKind::BFloat16Ty,
15927	`0.0001`,
15928	/ ndims / `2`, / hasEndOffset / false);
15929	}
15930
15931	/// Test that EB is correctly supported in Float16Ty in 2D with an end offset.
15932	TEST_P(OperatorTest, EmbeddingBag_2D_Float16_End_Offset) {
15933	CHECK_IF_ENABLED();
15934	testEmbeddingBag<float16_t>(bindings_, mod_, F_, EE_, ElemKind::Float16Ty,
15935	`0.0001`,
15936	/ ndims / `2`, / hasEndOffset / true);
15937	}
15938
15939	/// Test that EB is correctly supported in BFloat16Ty in 2D with an end
15940	/// offset.
15941	TEST_P(OperatorTest, EmbeddingBag_2D_BFloat16_End_Offset) {
15942	CHECK_IF_ENABLED();
15943	testEmbeddingBag<bfloat16_t>(bindings_, mod_, F_, EE_, ElemKind::BFloat16Ty,
15944	`0.0001`,
15945	/ ndims / `2`, / hasEndOffset / true);
15946	}
15947
15948	/// Test that EB is correctly supported in FloatTy in 1D with an end offset
15949	/// and partial inputs.
15950	TEST_P(OperatorTest, EmbeddingBag_1D_Float_End_Offset_Partial) {
15951	CHECK_IF_ENABLED();
15952	ASSERT_TRUE(EE_.getBackend(getBackendName()).supportsPartialTensors());
15953	testEmbeddingBag<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy, `0.0001`,
15954	/ ndims / `1`, / hasEndOffset / true,
15955	/ partialInput / true);
15956	}
15957
15958	/// Test that EB is correctly supported in Float16Ty in 1D with an end offset
15959	/// and partial inputs.
15960	TEST_P(OperatorTest, EmbeddingBag_2D_Float_End_Offset_Partial) {
15961	CHECK_IF_ENABLED();
15962	ASSERT_TRUE(EE_.getBackend(getBackendName()).supportsPartialTensors());
15963	testEmbeddingBag<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy, `0.0001`,
15964	/ ndims / `2`, / hasEndOffset / true,
15965	/ partialInput / true);
15966	}
15967
15968	/// Helper to test EmbeddingBagByteRowwiseOffsets using \p DTy.
15969	template <typename DataType>
15970	static void testEmbeddingBagByteRowwiseOffsets(
15971	glow::PlaceholderBindings &bindings, glow::Module &mod, glow::Function *F,
15972	glow::ExecutionEngine &EE, ElemKind fusedDTy, float allowedError,
15973	bool useFP16Accumulation, bool hasEndOffset, bool partialInput = false) {
15974	/*
15975	DATA = [[2.0, -0.5, 13]]
15976	WEIGHTS = [3, 1, 0, 0, 0, 0, 2, -0.5]
15977	INDICES = [1, 0, 2, 0, 1, 2, 2, 0]
15978	OFFSETS = [0, 3, 3, 6]
15979	OUTPUT = [0.5, 0, 0, 25]
15980	*/
15981	const bool fusedData = isFusedQuantizedElemKind(fusedDTy);
15982	const ElemKind DTy =
15983	fusedData ? getScaleOffsetElemKindFromFused(fusedDTy) : fusedDTy;
15984	Tensor data(ElemKind::FloatTy, {`3`, `1`});
15985	data.getHandle() = {
15986	`2.0`,
15987	-`0.5`,
15988	`13`,
15989	};
15990
15991	// If hasEndOffset then add some additional junk to the end of indices and
15992	// weights and an extra offset to offsets.
15993	// Note that weights here needs to be Constant instead of Placeholder for
15994	// EmbeddingBagByteRowwiseOffsets, so we need to convert it later on
15995	Placeholder *weights;
15996	Placeholder *indices;
15997	Placeholder *offsets;
15998
15999	addEmbeddingBagPartialInputs<DataType>(bindings, mod, DTy, weights, indices,
16000	offsets, hasEndOffset, partialInput);
16001
16002	auto *R = F->createEmbeddingBagByteRowwiseOffsets(
16003	"EBBRO", data, weights, indices, offsets, fusedDTy, useFP16Accumulation,
16004	hasEndOffset);
16005	SaveNode *S = F->createSave("save", R);
16006	bindings.allocate(S->getPlaceholder());
16007
16008	::glow::convertPlaceholdersToConstants(
16009	F, bindings, {indices, offsets, S->getPlaceholder()});
16010
16011	EE.compile(CompilationMode::Infer);
16012	EE.run(bindings);
16013
16014	Tensor &result = *bindings.get(S->getPlaceholder());
16015	ShapeVector odims(`2`, `1`);
16016	odims [`0`] = partialInput ? `5` : `4`;
16017	Tensor expected(DTy, odims);
16018	if (partialInput) {
16019	expected.getHandle<DataType>() = {
16020	`0.5`, `0`, `0`, `25`, `0`,
16021	};
16022	} else {
16023	expected.getHandle<DataType>() = {
16024	`0.5`,
16025	`0`,
16026	`0`,
16027	`25`,
16028	};
16029	}
16030
16031	EXPECT_TRUE(expected.isEqual(result, allowedError));
16032	}
16033
16034	/// Test EmbeddingBagByteRowwiseOffsets in Float.
16035	TEST_P(OperatorTest, EmbeddingBagByteRowwiseOffsets_Float) {
16036	CHECK_IF_ENABLED();
16037	testEmbeddingBagByteRowwiseOffsets<float>(
16038	bindings_, mod_, F_, EE_, ElemKind::UInt8FusedQTy, `0.0001`,
16039	/ useFP16Accumulation / false, / hasEndOffset / false);
16040	}
16041
16042	/// Test EmbeddingBagByteRowwiseOffsets in Float with end offset.
16043	TEST_P(OperatorTest, EmbeddingBagByteRowwiseOffsets_Float_End_Offset) {
16044	CHECK_IF_ENABLED();
16045	testEmbeddingBagByteRowwiseOffsets<float>(
16046	bindings_, mod_, F_, EE_, ElemKind::UInt8FusedQTy, `0.0001`,
16047	/ useFP16Accumulation / false, / hasEndOffset / true);
16048	}
16049
16050	/// Test EmbeddingBagByteRowwiseOffsets in Float with end offset and partial
16051	/// inputs.
16052	TEST_P(OperatorTest, EmbeddingBagByteRowwiseOffsets_Float_End_Offset_Partial) {
16053	CHECK_IF_ENABLED();
16054	ASSERT_TRUE(EE_.getBackend(getBackendName()).supportsPartialTensors());
16055	testEmbeddingBagByteRowwiseOffsets<float>(
16056	bindings_, mod_, F_, EE_, ElemKind::UInt8FusedQTy, `0.0001`,
16057	/ useFP16Accumulation / false, / hasEndOffset / true,
16058	/ partialInputs / true);
16059	}
16060
16061	/// Test EmbeddingBagByteRowwiseOffsets in Float16. Uses Float accumulation.
16062	TEST_P(OperatorTest, EmbeddingBagByteRowwiseOffsets_Float16_AccumFloat) {
16063	CHECK_IF_ENABLED();
16064	testEmbeddingBagByteRowwiseOffsets<float16_t>(
16065	bindings_, mod_, F_, EE_, ElemKind::UInt8FusedFP16QTy, `0.0001`,
16066	/ useFP16Accumulation / false, / hasEndOffset / false);
16067	}
16068
16069	/// Test EmbeddingBagByteRowwiseOffsets in Float16. Uses Float accumulation.
16070	/// Has end offset.
16071	TEST_P(OperatorTest,
16072	EmbeddingBagByteRowwiseOffsets_Float16_AccumFloat_End_Offset) {
16073	CHECK_IF_ENABLED();
16074	testEmbeddingBagByteRowwiseOffsets<float16_t>(
16075	bindings_, mod_, F_, EE_, ElemKind::UInt8FusedFP16QTy, `0.0001`,
16076	/ useFP16Accumulation / false, / hasEndOffset / true);
16077	}
16078
16079	/// Test EmbeddingBagByteRowwiseOffsets in Float16. Uses Float accumulation.
16080	/// Has end offset and using partial inputs.
16081	TEST_P(OperatorTest,
16082	EmbeddingBagByteRowwiseOffsets_Float16_AccumFloat_End_Offset_Partial) {
16083	CHECK_IF_ENABLED();
16084	ASSERT_TRUE(EE_.getBackend(getBackendName()).supportsPartialTensors());
16085	testEmbeddingBagByteRowwiseOffsets<float16_t>(
16086	bindings_, mod_, F_, EE_, ElemKind::UInt8FusedFP16QTy, `0.0001`,
16087	/ useFP16Accumulation / false, / hasEndOffset / true,
16088	/ partialInputs / true);
16089	}
16090
16091	/// Test EmbeddingBagByteRowwiseOffsets in Float16. Uses Float16 accumulation.
16092	TEST_P(OperatorTest, EmbeddingBagByteRowwiseOffsets_Float16_AccumFloat16) {
16093	CHECK_IF_ENABLED();
16094	testEmbeddingBagByteRowwiseOffsets<float16_t>(
16095	bindings_, mod_, F_, EE_, ElemKind::UInt8FusedFP16QTy, `0.0001`,
16096	/ useFP16Accumulation / true, / hasEndOffset / false);
16097	}
16098
16099	/// Test EmbeddingBagByteRowwiseOffsets in Float16. Uses Float16 accumulation.
16100	/// Has end offset.
16101	TEST_P(OperatorTest,
16102	EmbeddingBagByteRowwiseOffsets_Float16_AccumFloat16_End_Offset) {
16103	CHECK_IF_ENABLED();
16104	testEmbeddingBagByteRowwiseOffsets<float16_t>(
16105	bindings_, mod_, F_, EE_, ElemKind::UInt8FusedFP16QTy, `0.0001`,
16106	/ useFP16Accumulation / true, / hasEndOffset / true);
16107	}
16108
16109	/// Test EmbeddingBagByteRowwiseOffsets in Float16. Uses Float16 accumulation.
16110	/// Has end offset and using partial inputs.
16111	TEST_P(OperatorTest,
16112	EmbeddingBagByteRowwiseOffsets_Float16_AccumFloat16_End_Offset_Partial) {
16113	CHECK_IF_ENABLED();
16114	ASSERT_TRUE(EE_.getBackend(getBackendName()).supportsPartialTensors());
16115	testEmbeddingBagByteRowwiseOffsets<float16_t>(
16116	bindings_, mod_, F_, EE_, ElemKind::UInt8FusedFP16QTy, `0.0001`,
16117	/ useFP16Accumulation / false, / hasEndOffset / true,
16118	/ partialInputs / true);
16119	}
16120
16121	/// Helper to test EmbeddingBag4BitRowwiseOffsets.
16122	template <typename DataType>
16123	static void testEmbeddingBag4BitRowwiseOffsets(
16124	glow::PlaceholderBindings &bindings, glow::Module &mod, glow::Function *F,
16125	glow::ExecutionEngine &EE, bool useFP16Accumulation, bool hasEndOffset,
16126	float allowedError) {
16127	/*
16128	DATA = [[0, 1, 2, 3], [0, 1, 2, 3], [0, 1, 2, 3], // First Slice.
16129	[-3, -2, -1., 0], [0, -1, -2, -3], // Second Slice.
16130	[2, 2, 2, 2,], [2, 2, 2, 2] // Third Slice.
16131	]
16132	WEIGHTS = [1, 2, 3, 2, 0.5, -0.5, 2]
16133	INDICES = [0, 1, 2, 4, 3, 5, 6]
16134	OFFSETS = [
16135	0, // This slice contains numbers >= 0.
16136	3, // This slice contains numbers <= 0.
16137	5, // This slice contains numbers which are all the same.
16138	7, // Empty slice.
16139	]
16140	OUTPUT = [[0, 6, 12, 18], // Output row per slice.
16141	[-1.5, -3, -4.5, -6],
16142	[3, 3, 3, 3]
16143	[0, 0, 0, 0]]
16144	*/
16145	Tensor data(ElemKind::FloatTy, {`7`, `4`});
16146	data.getHandle() = {
16147	`0.`, `1.`, `2.`, `3.`, `0.`, `1.`, `2.`, `3.`, `0.`, `1.`, `2.`, `3.`, -`3.`, -`2.`,
16148	-`1.`, `0.`, `0.`, -`1.`, -`2.`, -`3.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`, `2.`,
16149	};
16150
16151	// If hasEndOffset then add some additional junk to the end of indices and
16152	// weights and an extra offset to offsets.
16153	Constant *weights;
16154	Placeholder *indices;
16155	Placeholder *offsets;
16156	if (hasEndOffset) {
16157	weights = mod.createConstant(ElemKind::Float16Ty, {`9`}, "weights");
16158	weights->getPayloadMutable().getHandle<DataType>() = {
16159	`1.`,
16160	`2.`,
16161	`3.`,
16162	`2`,
16163	`0.5`,
16164	-`0.5`,
16165	`2`,
16166	-`42.0` / A dummy weight for end offset. /,
16167	`42.0` / A dummy weight for end offset. /,
16168	};
16169
16170	indices = mod.createPlaceholder(ElemKind::Int32ITy, {`9`}, "indices",
16171	/ isTrainable / false);
16172	offsets = mod.createPlaceholder(ElemKind::Int32ITy, {`5`}, "offsets",
16173	/ isTrainable / false);
16174
16175	bindings.allocate(indices)->getHandle<int32_t>() = {
16176	`0`,
16177	`1`,
16178	`2`,
16179	`4`,
16180	`3`,
16181	`5`,
16182	`6`,
16183	`100` / A dummy indice for end offset. /,
16184	`200` / A dummy indice for end offset. /,
16185	};
16186
16187	bindings.allocate(offsets)->getHandle<int32_t>() = {
16188	`0`, // This slice contains numbers >= 0.
16189	`3`, // This slice contains numbers <= 0.
16190	`5`, // This slice contains numbers which are all the same.
16191	`7`, // Empty slice.
16192	`7`, // Dummy end offset.
16193	};
16194
16195	} else {
16196	weights = mod.createConstant(ElemKind::Float16Ty, {`7`}, "weights");
16197	weights->getPayloadMutable().getHandle<DataType>() = {
16198	`1.`, `2.`, `3.`, `2`, `0.5`, -`0.5`, `2`,
16199	};
16200
16201	indices = mod.createPlaceholder(ElemKind::Int32ITy, {`7`}, "indices",
16202	/ isTrainable / false);
16203	offsets = mod.createPlaceholder(ElemKind::Int32ITy, {`4`}, "offsets",
16204	/ isTrainable / false);
16205
16206	bindings.allocate(indices)->getHandle<int32_t>() = {
16207	`0`, `1`, `2`, `4`, `3`, `5`, `6`,
16208	};
16209	bindings.allocate(offsets)->getHandle<int32_t>() = {
16210	`0`, // This slice contains numbers >= 0.
16211	`3`, // This slice contains numbers <= 0.
16212	`5`, // This slice contains numbers which are all the same.
16213	`7`, // Empty slice.
16214	};
16215	}
16216
16217	auto *R = F->createEmbeddingBagByteRowwiseOffsets(
16218	"EBBRO", data, weights, indices, offsets, ElemKind::UInt4FusedFP16QTy,
16219	useFP16Accumulation, hasEndOffset);
16220	SaveNode *S = F->createSave("save", R);
16221	bindings.allocate(S->getPlaceholder());
16222
16223	EE.compile(CompilationMode::Infer);
16224	EE.run(bindings);
16225
16226	Tensor &result = *bindings.get(S->getPlaceholder());
16227	Tensor expected(ElemKind::Float16Ty, {`4`, `4`});
16228	expected.getHandle<DataType>() = {`0.`, `6.`, `12.`, `18.`, -`1.5`, -`3.`, -`4.5`, -`6`,
16229	`3.`, `3.`, `3.`, `3.`, `0.`, `0.`, `0.`, `0.`};
16230
16231	EXPECT_TRUE(expected.isEqual(result, allowedError));
16232	}
16233
16234	TEST_P(OperatorTest, EmbeddingBag4BitRowwiseOffsets_Float16) {
16235	CHECK_IF_ENABLED();
16236	testEmbeddingBag4BitRowwiseOffsets<float16_t>(
16237	bindings_, mod_, F_, EE_,
16238	/ useFP16Accumulation / false, / hasEndOffset / false, `0.005`);
16239	}
16240
16241	TEST_P(OperatorTest, EmbeddingBag4BitRowwiseOffsets_Float16_AccumFloat) {
16242	CHECK_IF_ENABLED();
16243	testEmbeddingBag4BitRowwiseOffsets<float16_t>(
16244	bindings_, mod_, F_, EE_,
16245	/ useFP16Accumulation / true, / hasEndOffset / false, `0.005`);
16246	}
16247
16248	TEST_P(OperatorTest, EmbeddingBag4BitRowwiseOffsets_Float16_HasEndOffset) {
16249	CHECK_IF_ENABLED();
16250	testEmbeddingBag4BitRowwiseOffsets<float16_t>(bindings_, mod_, F_, EE_,
16251	/ useFP16Accumulation / false,
16252	/ hasEndOffset / true, `0.005`);
16253	}
16254
16255	TEST_P(OperatorTest,
16256	EmbeddingBag4BitRowwiseOffsets_Float16_HasEndOffset_AccumFloat) {
16257	CHECK_IF_ENABLED();
16258	testEmbeddingBag4BitRowwiseOffsets<float16_t>(bindings_, mod_, F_, EE_,
16259	/ useFP16Accumulation / true,
16260	/ hasEndOffset / true, `0.005`);
16261	}
16262
16263	/// Helper to test RowwiseQuantizedSparseLengthsWeightedSum using \p DTy.
16264	template <typename DataType, typename IndexType>
16265	static void testRowwiseQuantizedSparseLengthsWeightedSum(
16266	glow::PlaceholderBindings &bindings, glow::Module &mod, glow::Function *F,
16267	glow::ExecutionEngine &EE, ElemKind DTy, ElemKind ITy, float allowedError,
16268	bool useFP16Accumulation = false) {
16269	/*
16270	DATA = [2.0, -0.5, 13]
16271	WEIGHTS = [3, 1, 0, 0, 0, 0, 2, -0.5]
16272	INDICES = [1, 0, 2, 0, 1, 2, 2, 0]
16273	LENGTHS = [3, 0, 3, 2]
16274	OUTPUT = [0.5, 0, 0, 25]
16275	*/
16276	Tensor data(ElemKind::FloatTy, {`3`});
16277	data.getHandle<float>() = {
16278	`2.0`,
16279	-`0.5`,
16280	`13`,
16281	};
16282
16283	Constant *weights = mod.createConstant(DTy, {`8`}, "weights");
16284	weights->getPayloadMutable().getHandle<DataType>() = {
16285	`3.`, `1.`, `0.`, `0.`, `0.`, `0.`, `2.`, -`0.5`,
16286	};
16287
16288	Placeholder *indices = mod.createPlaceholder(ITy, {`8`}, "indices",
16289	/ isTrainable / false);
16290	Placeholder *lengths =
16291	mod.createPlaceholder(ElemKind::Int32ITy, {`4`}, "lengths",
16292	/ isTrainable / false);
16293
16294	bindings.allocate(indices)->getHandle<IndexType>() = {
16295	`1`, `0`, `2`, `0`, `1`, `2`, `2`, `0`,
16296	};
16297	bindings.allocate(lengths)->getHandle<int32_t>() = {
16298	`3`,
16299	`0`,
16300	`3`,
16301	`2`,
16302	};
16303
16304	auto *R = F->createRowwiseQuantizedSparseLengthsWeightedSum(
16305	"RQSLWS", data, weights, indices, lengths,
16306	quantization::Schema::Asymmetric, DTy, useFP16Accumulation);
16307	SaveNode *S = F->createSave("save", R);
16308	bindings.allocate(S->getPlaceholder());
16309
16310	EE.compile(CompilationMode::Infer);
16311	EE.run(bindings);
16312
16313	Tensor &result = *bindings.get(S->getPlaceholder());
16314	Tensor expected(DTy, {`4`});
16315	expected.getHandle<DataType>() = {
16316	`0.5`,
16317	`0`,
16318	`0`,
16319	`25`,
16320	};
16321
16322	EXPECT_TRUE(expected.isEqual(result, allowedError));
16323	}
16324
16325	/// Test RWQ-SLWS with Float Weights, Scales, Offsets, and Output.
16326	TEST_P(OperatorTest, RowwiseQuantizedSparseLengthsWeightedSum_Float) {
16327	CHECK_IF_ENABLED();
16328	testRowwiseQuantizedSparseLengthsWeightedSum<float, int64_t>(
16329	bindings_, mod_, F_, EE_, ElemKind::FloatTy, ElemKind::Int64ITy, `0.0001`);
16330	}
16331
16332	/// Test RWQ-SLWS with Float16 Weights, Scales, Offsets, and Output. Uses
16333	/// Float accumulation.
16334	TEST_P(OperatorTest,
16335	RowwiseQuantizedSparseLengthsWeightedSum_Float16_AccumFloat) {
16336	CHECK_IF_ENABLED();
16337	testRowwiseQuantizedSparseLengthsWeightedSum<float16_t, int64_t>(
16338	bindings_, mod_, F_, EE_, ElemKind::Float16Ty, ElemKind::Int64ITy, `0.0001`,
16339	/ useFP16Accumulation / false);
16340	}
16341
16342	/// Test RWQ-SLWS with Float16 Weights, Scales, Offsets, and Output. Uses
16343	/// Float16 accumulation.
16344	TEST_P(OperatorTest,
16345	RowwiseQuantizedSparseLengthsWeightedSum_Float16_AccumFloat16) {
16346	CHECK_IF_ENABLED();
16347	testRowwiseQuantizedSparseLengthsWeightedSum<float16_t, int64_t>(
16348	bindings_, mod_, F_, EE_, ElemKind::Float16Ty, ElemKind::Int64ITy, `0.0001`,
16349	/ useFP16Accumulation / true);
16350	}
16351
16352	/// Test RWQ-SLWS with Float Weights, Scales, Offsets, and Output. Int32
16353	/// indices.
16354	TEST_P(OperatorTest, RowwiseQuantizedSparseLengthsWeightedSum_Float_Int32) {
16355	CHECK_IF_ENABLED();
16356	testRowwiseQuantizedSparseLengthsWeightedSum<float, int32_t>(
16357	bindings_, mod_, F_, EE_, ElemKind::FloatTy, ElemKind::Int32ITy, `0.0001`);
16358	}
16359
16360	/// Test RWQ-SLWS with Float16 Weights, Scales, Offsets, and Output. Uses
16361	/// Float accumulation. Int32 indices.
16362	TEST_P(OperatorTest,
16363	RowwiseQuantizedSparseLengthsWeightedSum_Float16_AccumFloat_Int32) {
16364	CHECK_IF_ENABLED();
16365	testRowwiseQuantizedSparseLengthsWeightedSum<float16_t, int32_t>(
16366	bindings_, mod_, F_, EE_, ElemKind::Float16Ty, ElemKind::Int32ITy, `0.0001`,
16367	/ useFP16Accumulation / false);
16368	}
16369
16370	/// Test RWQ-SLWS with Float16 Weights, Scales, Offsets, and Output. Uses
16371	/// Float16 accumulation. Int32 indices.
16372	TEST_P(OperatorTest,
16373	RowwiseQuantizedSparseLengthsWeightedSum_Float16_AccumFloat16_Int32) {
16374	CHECK_IF_ENABLED();
16375	testRowwiseQuantizedSparseLengthsWeightedSum<float16_t, int32_t>(
16376	bindings_, mod_, F_, EE_, ElemKind::Float16Ty, ElemKind::Int32ITy, `0.0001`,
16377	/ useFP16Accumulation / true);
16378	}
16379
16380	static FunctionTensorPair
16381	createAndInitRWQSLWSAllSame(glow::PlaceholderBindings &bindings,
16382	glow::ExecutionEngine &EE) {
16383	auto &mod = EE.getModule();
16384	Function *F = mod.createFunction("main");
16385
16386	Tensor data(ElemKind::FloatTy, {`20`, `2`});
16387	data.getHandle<float>() = {
16388	`0.1`, `0.1`, `0.1`, `0.1`, `0.1`, `0.1`, `0.1`, `0.1`, `0.1`, `0.1`, `0.1`, `0.1`, `0.1`, `0.1`,
16389	`0.1`, `0.1`, `0.1`, `0.1`, `0.1`, `0.1`, `0.1`, `0.1`, `0.1`, `0.1`, `0.1`, `0.1`, `0.1`, `0.1`,
16390	`0.1`, `0.1`, `0.1`, `0.1`, `0.1`, `0.1`, `0.1`, `0.1`, `0.1`, `0.1`, `0.1`, `0.1`,
16391	};
16392
16393	Constant *weights = mod.createConstant(ElemKind::FloatTy, {`21`}, "weights");
16394	weights->getPayloadMutable().getHandle<float>() = {
16395	`0.44419134`, `0.3419154`, `0.28775468`, `0.47224975`, `0.05422213`, `0.14346851`,
16396	`0.05846643`, `0.3750175`, `0.09190885`, `0.3335992`, `0.09665264`, `0.4560224`,
16397	`0.2244578`, `0.44881952`, `0.42696562`, `0.33007848`, `0.4511249`, `0.11568925`,
16398	`0.02629679`, `0.33864713`, `0.42614424`};
16399
16400	Placeholder *indices =
16401	mod.createPlaceholder(ElemKind::Int64ITy, {`21`}, "indices",
16402	/ isTrainable / false);
16403	Placeholder *lengths =
16404	mod.createPlaceholder(ElemKind::Int32ITy, {`2`}, "lengths",
16405	/ isTrainable / false);
16406
16407	bindings.allocate(indices)->getHandle<int64_t>() = {
16408	`11`, `8`, `19`, `8`, `4`, `11`, `4`, `19`, `6`, `18`, `2`, `6`, `15`, `5`, `14`, `14`, `15`, `13`, `4`, `6`, `5`,
16409	};
16410	bindings.allocate(lengths)->getHandle<int32_t>() = {`15`, `6`};
16411
16412	auto *R = F->createRowwiseQuantizedSparseLengthsWeightedSum(
16413	"RQSLWS", data, weights, indices, lengths,
16414	quantization::Schema::Asymmetric, ElemKind::FloatTy,
16415	/ useFP16Accumulation / false);
16416	SaveNode *S = F->createSave("save", R);
16417	Tensor *resultT = bindings.allocate(S->getPlaceholder());
16418
16419	return std::make_pair(F, resultT);
16420	}
16421
16422	TEST_P(OperatorStatelessTest, RWQSLWSAllSame_Float16_AccumFP16) {
16423	CHECK_IF_ENABLED();
16424	compareAgainstInterpreter(
16425	getBackendName(), createAndInitRWQSLWSAllSame, ElemKind::Float16Ty,
16426	ElemKind::Float16Ty, `0.0005`, parCloneCountOpt,
16427	/ convertToRowwiseQuantization / false,
16428	/schema / quantization::Schema::Asymmetric,
16429	/ biasElemKind / ElemKind::Int32QTy, / forceFP16AccumSLS / true);
16430	}
16431
16432	TEST_P(OperatorStatelessTest, RWQSLWSAllSame_Float16_AccumFP32) {
16433	CHECK_IF_ENABLED();
16434	compareAgainstInterpreter(
16435	getBackendName(), createAndInitRWQSLWSAllSame, ElemKind::Float16Ty,
16436	ElemKind::Float16Ty, `1e-6`, parCloneCountOpt,
16437	/ convertToRowwiseQuantization / false,
16438	/schema / quantization::Schema::Asymmetric,
16439	/ biasElemKind / ElemKind::Int32QTy, / forceFP16AccumSLS / false);
16440	}
16441
16442	/// Helper to test RowwiseQuantizedSparseLengthsWeightedSum using \p DTy.
16443	template <typename DataType>
16444	static void testRowwiseQuantizedSparseLengthsSum(
16445	glow::PlaceholderBindings &bindings, glow::Module &mod, glow::Function *F,
16446	glow::ExecutionEngine &EE, ElemKind DTy, float allowedError,
16447	bool useFP16Accumulation = false) {
16448	/*
16449	DATA = [
16450	[1.0, 1.2],
16451	[2.3, 3.4],
16452	[4.5, 5.7],
16453	]
16454	INDICES = [2, 0, 1, 2, 0, 0, 0, 0]
16455	LENGTHS = [2, 0, 2, 1, 3]
16456	OUTPUT = [
16457	[5.5, 6.9],
16458	[0.0, 0.0],
16459	[6.8, 9.1],
16460	[1.0, 1.2],
16461	[3.0, 3.6],
16462	]
16463	*/
16464	Tensor data(ElemKind::FloatTy, {`3`, `2`});
16465	data.getHandle() = {
16466	`1.0f`, `1.2f`, `2.3f`, `3.4f`, `4.5f`, `5.7f`,
16467	};
16468
16469	Placeholder *indices =
16470	mod.createPlaceholder(ElemKind::Int64ITy, {`8`}, "indices",
16471	/ isTrainable / false);
16472	Placeholder *lengths = mod.createPlaceholder(
16473	ElemKind::Int32ITy, {`5`}, "lengths", / isTrainable / false);
16474
16475	bindings.allocate(indices)->getHandle<int64_t>() = {
16476	`2`, `0`, `1`, `2`, `0`, `0`, `0`, `0`,
16477	};
16478	bindings.allocate(lengths)->getHandle<int32_t>() = {
16479	`2`, `0`, `2`, `1`, `3`,
16480	};
16481
16482	auto *R = F->createRowwiseQuantizedSparseLengthsSum(
16483	"RQSLWS", data, indices, lengths, quantization::Schema::Asymmetric, DTy,
16484	useFP16Accumulation);
16485	SaveNode *S = F->createSave("save", R);
16486	bindings.allocate(S->getPlaceholder());
16487
16488	EE.compile(CompilationMode::Infer);
16489	EE.run(bindings);
16490
16491	Tensor &result = *bindings.get(S->getPlaceholder());
16492	Tensor expected(DTy, {`5`, `2`});
16493	expected.getHandle<DataType>() = {
16494	`5.5f`, `6.9f`, `0.0f`, `0.0f`, `6.8f`, `9.1f`, `1.0f`, `1.2f`, `3.0f`, `3.6f`,
16495	};
16496
16497	EXPECT_TRUE(expected.isEqual(result, allowedError));
16498	}
16499
16500	/// Test RWQ-SLS with Float Weights, Scales, Offsets, and Output.
16501	TEST_P(OperatorTest, RowwiseQuantizedSparseLengthsSum_Float) {
16502	CHECK_IF_ENABLED();
16503	testRowwiseQuantizedSparseLengthsSum<float>(bindings_, mod_, F_, EE_,
16504	ElemKind::FloatTy, `0.015`);
16505	}
16506
16507	/// Test RWQ-SLS with Float16 Weights, Scales, Offsets, and Output. Uses
16508	/// Float accumulation.
16509	TEST_P(OperatorTest, RowwiseQuantizedSparseLengthsSum_Float16_AccumFloat) {
16510	CHECK_IF_ENABLED();
16511	testRowwiseQuantizedSparseLengthsSum<float16_t>(
16512	bindings_, mod_, F_, EE_, ElemKind::Float16Ty, `0.02`,
16513	/ useFP16Accumulation / false);
16514	}
16515
16516	/// Test RWQ-SLS with Float16 Weights, Scales, Offsets, and Output. Uses
16517	/// Float16 accumulation.
16518	TEST_P(OperatorTest, RowwiseQuantizedSparseLengthsSum_Float16_AccumFloat16) {
16519	CHECK_IF_ENABLED();
16520	testRowwiseQuantizedSparseLengthsSum<float16_t>(
16521	bindings_, mod_, F_, EE_, ElemKind::Float16Ty, `0.02`,
16522	/ useFP16Accumulation / true);
16523	}
16524
16525	template <typename IndexType>
16526	static void testRepeatedSLSWithPartialTensors(
16527	glow::PlaceholderBindings &bindings, glow::Module &mod, glow::Function *F,
16528	glow::ExecutionEngine &EE, std::vector<Tensor> &unownedTensors,
16529	llvm::StringRef backendName, ElemKind ITy) {
16530	// This test is only meaningful if the backend supports partial tensors.
16531	ASSERT_TRUE(EE.getBackend(backendName).supportsPartialTensors());
16532
16533	constexpr dim_t embeddingRows = `1275`;
16534	constexpr dim_t numLengths = `20`;
16535	constexpr dim_t maxIndices = `20000`;
16536	constexpr dim_t numIndices = `20`; // Must be less than sum(lengths).
16537	constexpr dim_t iterations = `33`;
16538
16539	auto *data =
16540	mod.createConstant(ElemKind::FloatTy, {embeddingRows, `1`}, "data");
16541	data->getPayloadMutable().getHandle<float>().randomize(-`1.0`, `1.0`,
16542	mod.getPRNG());
16543	auto indices = mod.createPlaceholder(ITy, {maxIndices}, "indices", false*);
16544	auto *lengths =
16545	mod.createPlaceholder(ElemKind::Int32ITy, {numLengths}, "lengths", false);
16546	auto *SLS = F->createSparseLengthsSum("SLS", data, indices, lengths);
16547	auto *save = F->createSave("save", SLS);
16548	auto *outPH = save->getPlaceholder();
16549	EE.compile(CompilationMode::Infer);
16550
16551	Tensor indicesReal(ITy, {numIndices});
16552	indicesReal.getHandle<IndexType>().randomize(`0`, embeddingRows - `1`,
16553	mod.getPRNG());
16554	Tensor indicesPartial(indicesReal.getUnsafePtr(), indices->getType(),
16555	indicesReal.getSizeInBytes());
16556	Tensor indicesPadded(indices->getType());
16557	indicesPadded.zero();
16558	memcpy(indicesPadded.getUnsafePtr(), indicesReal.getUnsafePtr(),
16559	numIndices * sizeof(IndexType));
16560
16561	Tensor lengthsReal(ElemKind::Int32ITy, {numLengths});
16562	lengthsReal.getHandle<int32_t>().clear(`1`);
16563	Tensor lengthsPartial(lengthsReal.getUnsafePtr(), lengths->getType(),
16564	lengthsReal.getSizeInBytes());
16565	Tensor lengthsPadded(ElemKind::Int32ITy, {numLengths});
16566	lengthsPadded.assign(&lengthsReal);
16567
16568	bindings.insert(indices, std::move(indicesPartial));
16569	bindings.insert(lengths, std::move(lengthsPartial));
16570	bindings.allocate(outPH);
16571
16572	PlaceholderBindings paddedBindings;
16573	paddedBindings.insert(indices, std::move(indicesPadded));
16574	paddedBindings.insert(lengths, std::move(lengthsPadded));
16575	paddedBindings.allocate(outPH);
16576
16577	for (dim_t i = `0`; i < iterations; i++) {
16578	EE.run(bindings);
16579	EE.run(paddedBindings);
16580	ASSERT_TRUE(bindings.get(outPH)->isEqual(*paddedBindings.get(outPH)));
16581	}
16582
16583	// Keep these around so their memory is not freed at the end of the
16584	// test/scope. This is so that inside TearDown during import/export testing
16585	// the data is still around.
16586	unownedTensors.push_back(std::move(indicesReal));
16587	unownedTensors.push_back(std::move(lengthsReal));
16588	}
16589
16590	// Checking with int32 indices
16591	TEST_P(OperatorTest, RepeatedSLSWithPartialTensors_int32) {
16592	CHECK_IF_ENABLED();
16593
16594	testRepeatedSLSWithPartialTensors<int32_t>(bindings_, mod_, F_, EE_,
16595	unownedTensors_, getBackendName(),
16596	ElemKind::Int32ITy);
16597	}
16598
16599	// Checking with int64 indices
16600	TEST_P(OperatorTest, RepeatedSLSWithPartialTensors_int64) {
16601	CHECK_IF_ENABLED();
16602
16603	testRepeatedSLSWithPartialTensors<int64_t>(bindings_, mod_, F_, EE_,
16604	unownedTensors_, getBackendName(),
16605	ElemKind::Int64ITy);
16606	}
16607
16608	TEST_P(OperatorTest, RepeatedSLWSWithPartialTensors) {
16609	CHECK_IF_ENABLED();
16610
16611	// This test is only meaningful if the backend supports partial tensors.
16612	ASSERT_TRUE(EE_.getBackend(getBackendName()).supportsPartialTensors());
16613
16614	constexpr dim_t embeddingRows = `1275`;
16615	constexpr dim_t numLengths = `20`;
16616	constexpr dim_t maxIndices = `20000`;
16617	constexpr dim_t numIndices = `20`; // Must be less than sum(lengths).
16618	constexpr dim_t iterations = `33`;
16619
16620	auto *data =
16621	mod_.createConstant(ElemKind::FloatTy, {embeddingRows, `1`}, "data");
16622	data->getPayloadMutable().getHandle<float>().randomize(-`1.0`, `1.0`,
16623	mod_.getPRNG());
16624	auto *indices = mod_.createPlaceholder(ElemKind::Int64ITy, {maxIndices},
16625	"indices", false);
16626	auto *weights =
16627	mod_.createPlaceholder(ElemKind::FloatTy, {maxIndices}, "weights", false);
16628	auto *lengths = mod_.createPlaceholder(ElemKind::Int32ITy, {numLengths},
16629	"lengths", false);
16630	auto *SLWS = F_->createSparseLengthsWeightedSum("SWLS", data, weights,
16631	indices, lengths);
16632	auto *save = F_->createSave("save", SLWS);
16633	auto *outPH = save->getPlaceholder();
16634	EE_.compile(CompilationMode::Infer);
16635
16636	Tensor indicesReal(ElemKind::Int64ITy, {numIndices});
16637	indicesReal.getHandle<int64_t>().randomize(`0`, embeddingRows - `1`,
16638	mod_.getPRNG());
16639	Tensor indicesPartial(indicesReal.getUnsafePtr(), indices->getType(),
16640	indicesReal.getSizeInBytes());
16641	Tensor indicesPadded(indices->getType());
16642	indicesPadded.zero();
16643	memcpy(indicesPadded.getUnsafePtr(), indicesReal.getUnsafePtr(),
16644	numIndices * sizeof(int64_t));
16645
16646	Tensor weightsReal(ElemKind::FloatTy, {numIndices});
16647	weightsReal.getHandle<float>().randomize(`0`, embeddingRows - `1`,
16648	mod_.getPRNG());
16649	Tensor weightsPartial(weightsReal.getUnsafePtr(), weights->getType(),
16650	weightsReal.getSizeInBytes());
16651	Tensor weightsPadded(weights->getType());
16652	weightsPadded.zero();
16653	memcpy(weightsPadded.getUnsafePtr(), weightsReal.getUnsafePtr(),
16654	numIndices * sizeof(float));
16655
16656	Tensor lengthsReal(ElemKind::Int32ITy, {numLengths});
16657	lengthsReal.getHandle<int32_t>().clear(`1`);
16658	Tensor lengthsPartial(lengthsReal.getUnsafePtr(), lengths->getType(),
16659	lengthsReal.getSizeInBytes());
16660	Tensor lengthsPadded(ElemKind::Int32ITy, {numLengths});
16661	lengthsPadded.assign(&lengthsReal);
16662
16663	bindings_.insert(indices, std::move(indicesPartial));
16664	bindings_.insert(weights, std::move(weightsPartial));
16665	bindings_.insert(lengths, std::move(lengthsPartial));
16666
16667	bindings_.allocate(outPH);
16668
16669	PlaceholderBindings paddedBindings;
16670	paddedBindings.insert(indices, std::move(indicesPadded));
16671	paddedBindings.insert(weights, std::move(weightsPadded));
16672	paddedBindings.insert(lengths, std::move(lengthsPadded));
16673
16674	paddedBindings.allocate(outPH);
16675
16676	for (dim_t i = `0`; i < iterations; i++) {
16677	EE_.run(bindings_);
16678	EE_.run(paddedBindings);
16679	ASSERT_TRUE(bindings_.get(outPH)->isEqual(*paddedBindings.get(outPH)));
16680	}
16681
16682	// Keep these around so their memory is not freed at the end of the
16683	// test/scope. This is so that inside TearDown during import/export testing
16684	// the data is still around.
16685	unownedTensors_.push_back(std::move(indicesReal));
16686	unownedTensors_.push_back(std::move(lengthsReal));
16687	unownedTensors_.push_back(std::move(weightsReal));
16688	}
16689
16690	/// Helper to test gathers using partial inputs using \p ITy.
16691	template <typename IndicesType>
16692	static void
16693	testPartialGather(glow::PlaceholderBindings &bindings, glow::Module &mod,
16694	glow::Function *F, glow::ExecutionEngine &EE,
16695	std::vector<Tensor> &unownedTensors, ElemKind ITy) {
16696	/*
16697	The acutal input we care about has the following shape/result:
16698
16699	DATA = [1.0, 2.3, 4.5]
16700	INDICES = [0, 1, 0, 1, 2, 0]
16701	OUTPUT = [1.0, 2.3, 1.0, 2.3, 4.5, 1.0]
16702
16703	However, we are going to create a larger INDICES input that is only
16704	partially filled, and expect a larger OUTPUT that we expect will have data
16705	we do not care about.
16706	*/
16707
16708	Placeholder *data = mod.createPlaceholder(ElemKind::FloatTy, {`3`}, "data",
16709	/ isTrainable / false);
16710	Placeholder *indices =
16711	mod.createPlaceholder(ITy, {`10000`}, "indices", / isTrainable / false);
16712
16713	bindings.allocate(data)->getHandle<float>() = {`1.0f`, `2.3f`, `4.5f`};
16714
16715	Tensor indicesReal(ITy, {`6`});
16716	indicesReal.getHandle<IndicesType>() = {`0`, `1`, `0`, `1`, `2`, `0`};
16717	Tensor indicesPartial(indicesReal.getUnsafePtr(), indices->getType(),
16718	indicesReal.getSizeInBytes());
16719	bindings.insert(indices, std::move(indicesPartial));
16720
16721	auto *R = F->createGather("gather", data, indices);
16722
16723	auto *result = F->createSave("save", R);
16724	Tensor *resultT = bindings.allocate(result->getPlaceholder());
16725
16726	// Result should be 10000, even though we only care about the first 6
16727	// results.
16728	EXPECT_EQ(resultT->getType().dims().size(), `1`);
16729	EXPECT_EQ(resultT->getType().dims()[`0`], `10000`);
16730
16731	EE.compile(CompilationMode::Infer);
16732	EE.run(bindings);
16733
16734	Tensor expectedT(ElemKind::FloatTy, {`6`});
16735	auto expectedH = expectedT.getHandle<float>();
16736	expectedH = {`1.0`, `2.3`, `1.0`, `2.3`, `4.5`, `1.0`};
16737	auto resultH = resultT->getHandle<float>();
16738
16739	for (dim_t i = `0`; i < `6`; ++i) {
16740	EXPECT_EQ(expectedH.at({i}), resultH.at({i}));
16741	}
16742
16743	// Keep this around so their memory is not freed at the end of the
16744	// test/scope. This is so that inside TearDown during import/export testing
16745	// the data is still around.
16746	unownedTensors.push_back(std::move(indicesReal));
16747	}
16748
16749	TEST_P(OperatorTest, GatherWithInt64PartialTensors) {
16750	CHECK_IF_ENABLED();
16751	// This test is only meaningful if the backend supports partial tensors.
16752	ASSERT_TRUE(EE_.getBackend(getBackendName()).supportsPartialTensors());
16753	testPartialGather<int64_t>(bindings_, mod_, F_, EE_, unownedTensors_,
16754	ElemKind::Int64ITy);
16755	}
16756
16757	TEST_P(OperatorTest, GatherWithInt32PartialTensors) {
16758	CHECK_IF_ENABLED();
16759	// This test is only meaningful if the backend supports partial tensors.
16760	ASSERT_TRUE(EE_.getBackend(getBackendName()).supportsPartialTensors());
16761	testPartialGather<int32_t>(bindings_, mod_, F_, EE_, unownedTensors_,
16762	ElemKind::Int32ITy);
16763	}
16764
16765	void testGatherElements(glow::PlaceholderBindings &bindings, glow::Function *F,
16766	glow::ExecutionEngine &EE, Placeholder *data,
16767	Placeholder *indices, unsigned_t axis,
16768	const Tensor &expectedT) {
16769	auto *G = F->createGatherElements("GatherElements", data, indices, axis);
16770	auto *result = F->createSave("save", G);
16771	bindings.allocate(result->getPlaceholder());
16772
16773	EE.compile(CompilationMode::Infer);
16774	EE.run(bindings);
16775
16776	Tensor *resultT = bindings.get(result->getPlaceholder());
16777	EXPECT_TRUE(resultT->isEqual(expectedT));
16778	}
16779
16780	template <typename DataType, typename IndexType>
16781	void testGatherElementsIntInt(glow::PlaceholderBindings &bindings,
16782	glow::Module &mod, glow::Function *F,
16783	glow::ExecutionEngine &EE, ElemKind dataKind,
16784	ElemKind indexKind) {
16785	auto data = mod.createPlaceholder(dataKind, {`2`, `2`}, "data", false*);
16786	auto indices = mod.createPlaceholder(indexKind, {`2`, `2`}, "indices", false*);
16787	bindings.allocate(data)->getHandle<DataType>() = {`1`, `2`, `3`, `4`};
16788	bindings.allocate(indices)->getHandle<IndexType>() = {`0`, `0`, `1`, `0`};
16789	unsigned_t axis = `1`;
16790
16791	Tensor expectedT(dataKind, {`2`, `2`});
16792	expectedT.getHandle<DataType>() = {`1`, `1`, `4`, `3`};
16793	testGatherElements(bindings, F, EE, data, indices, axis, expectedT);
16794	}
16795
16796	TEST_P(OperatorTest, GatherElementsInt64Int64) {
16797	CHECK_IF_ENABLED();
16798	testGatherElementsIntInt<int64_t, int64_t>(
16799	bindings_, mod_, F_, EE_, ElemKind::Int64ITy, ElemKind::Int64ITy);
16800	}
16801
16802	TEST_P(OperatorTest, GatherElementsInt64Int32) {
16803	CHECK_IF_ENABLED();
16804	testGatherElementsIntInt<int64_t, int32_t>(
16805	bindings_, mod_, F_, EE_, ElemKind::Int64ITy, ElemKind::Int32ITy);
16806	}
16807
16808	TEST_P(OperatorTest, GatherElementsInt32Int64) {
16809	CHECK_IF_ENABLED();
16810	testGatherElementsIntInt<int32_t, int64_t>(
16811	bindings_, mod_, F_, EE_, ElemKind::Int32ITy, ElemKind::Int64ITy);
16812	}
16813
16814	TEST_P(OperatorTest, GatherElementsInt32Int32) {
16815	CHECK_IF_ENABLED();
16816	testGatherElementsIntInt<int32_t, int32_t>(
16817	bindings_, mod_, F_, EE_, ElemKind::Int32ITy, ElemKind::Int32ITy);
16818	}
16819
16820	template <typename DataType, typename IndexType>
16821	void testGatherElementsFloatInt(glow::PlaceholderBindings &bindings,
16822	glow::Module &mod, glow::Function *F,
16823	glow::ExecutionEngine &EE, ElemKind dataKind,
16824	ElemKind indexKind) {
16825	auto data = mod.createPlaceholder(dataKind, {`3`, `3`}, "data", false*);
16826	auto indices = mod.createPlaceholder(indexKind, {`2`, `3`}, "indices", false*);
16827	bindings.allocate(data)->getHandle<DataType>() = {`1.f`, `2.f`, `3.f`, `4.f`, `5.f`,
16828	`6.f`, `7.f`, `8.f`, `9.f`};
16829	bindings.allocate(indices)->getHandle<IndexType>() = {`1`, `2`, `0`, `2`, `0`, `0`};
16830	unsigned_t dim = `0`;
16831
16832	Tensor expectedT(dataKind, {`2`, `3`});
16833	expectedT.getHandle<DataType>() = {`4.f`, `8.f`, `3.f`, `7.f`, `2.f`, `3.f`};
16834	testGatherElements(bindings, F, EE, data, indices, dim, expectedT);
16835	}
16836
16837	TEST_P(OperatorTest, GatherElementsFloatInt64) {
16838	CHECK_IF_ENABLED();
16839	testGatherElementsIntInt<float_t, int64_t>(
16840	bindings_, mod_, F_, EE_, ElemKind::FloatTy, ElemKind::Int64ITy);
16841	}
16842
16843	TEST_P(OperatorTest, GatherElementsFloatInt32) {
16844	CHECK_IF_ENABLED();
16845	testGatherElementsIntInt<float_t, int32_t>(
16846	bindings_, mod_, F_, EE_, ElemKind::FloatTy, ElemKind::Int32ITy);
16847	}
16848
16849	TEST_P(OperatorTest, GatherElementsFloat16Int64) {
16850	CHECK_IF_ENABLED();
16851	testGatherElementsIntInt<float16_t, int64_t>(
16852	bindings_, mod_, F_, EE_, ElemKind::Float16Ty, ElemKind::Int64ITy);
16853	}
16854
16855	TEST_P(OperatorTest, GatherElementsFloat16Int32) {
16856	CHECK_IF_ENABLED();
16857	testGatherElementsIntInt<float16_t, int32_t>(
16858	bindings_, mod_, F_, EE_, ElemKind::Float16Ty, ElemKind::Int32ITy);
16859	}
16860
16861	TEST_P(OperatorTest, GatherElementsFloatInt32NegInd) {
16862	CHECK_IF_ENABLED();
16863	using ElemType = float;
16864	using IndexType = int32_t;
16865	auto data = mod_.createPlaceholder(ElemKind::FloatTy, {`3`, `3`}, "data", false*);
16866	auto *indices =
16867	mod_.createPlaceholder(ElemKind::Int32ITy, {`2`, `3`}, "indices", false);
16868	bindings_.allocate(data)->getHandle<ElemType>() = {`1.f`, `2.f`, `3.f`, `4.f`, `5.f`,
16869	`6.f`, `7.f`, `8.f`, `9.f`};
16870	bindings_.allocate(indices)->getHandle<IndexType>() = {-`2`, `2`, `0`, -`1`, `0`, `0`};
16871	unsigned_t dim = `0`;
16872
16873	Tensor expectedT(ElemKind::FloatTy, {`2`, `3`});
16874	expectedT.getHandle<ElemType>() = {`4.f`, `8.f`, `3.f`, `7.f`, `2.f`, `3.f`};
16875	testGatherElements(bindings_, F_, EE_, data, indices, dim, expectedT);
16876	}
16877
16878	TEST_P(OperatorTest, GatherElementsQInt8Int32) {
16879	CHECK_IF_ENABLED();
16880	auto data = mod_.createPlaceholder(ElemKind::FloatTy, {`3`, `3`}, "data", false*);
16881	auto *indices =
16882	mod_.createPlaceholder(ElemKind::Int32ITy, {`2`, `3`}, "indices", false);
16883	bindings_.allocate(data)->getHandle<float>() = {`1.f`, `2.f`, `3.f`, `4.f`, `5.f`,
16884	`6.f`, `7.f`, `8.f`, `9.f`};
16885	bindings_.allocate(indices)->getHandle<int32_t>() = {`1`, `2`, `0`, `2`, `0`, `0`};
16886	unsigned_t axis = `0`;
16887	Tensor expectedT(ElemKind::FloatTy, {`2`, `3`});
16888	expectedT.getHandle<float>() = {`4.f`, `8.f`, `3.f`, `7.f`, `2.f`, `3.f`};
16889
16890	auto qParams = glow::quantization::chooseQuantizationParams({-`10`, `10`});
16891	auto dataTy =
16892	mod_.uniqueType(ElemKind::Int8QTy, {`3`, `3`}, qParams.scale, qParams.offset);
16893	auto *dataQ = F_->createQuantize("quantizeQ", data, dataTy);
16894	auto *GQ = F_->createGatherElements("GatherElements", dataQ, indices, axis);
16895	auto *DQ = F_->createDequantize("dequantize", GQ, ElemKind::FloatTy);
16896	auto *result = F_->createSave("save", DQ);
16897	bindings_.allocate(result->getPlaceholder());
16898
16899	EE_.compile(CompilationMode::Infer);
16900	EE_.run(bindings_);
16901
16902	Tensor *resultT = bindings_.get(result->getPlaceholder());
16903	for (auto i = `0`; i < `6`; i++) {
16904	EXPECT_NEAR(expectedT.getHandle<float>().raw(i),
16905	resultT->getHandle<float>().raw(i), `5e-2`);
16906	}
16907	}
16908
16909	/// Helper to test FusedRowwiseQuantizedSparseLengthsWeightedSum using \p DTy.
16910	template <typename DataType, typename IndexType>
16911	static void testFusedRowwiseQuantizedSparseLengthsWeightedSum(
16912	glow::PlaceholderBindings &bindings, glow::Module &mod, glow::Function *F,
16913	glow::ExecutionEngine &EE, ElemKind fusedDTy, ElemKind ITy,
16914	float allowedError, bool useFP16Accumulation = false) {
16915	/*
16916	DATA = [[2.0, -0.5, 13]]
16917	WEIGHTS = [3, 1, 0, 0, 0, 0, 2, -0.5]
16918	INDICES = [1, 0, 2, 0, 1, 2, 2, 0]
16919	LENGTHS = [3, 0, 3, 2]
16920	OUTPUT = [[0.5, 0, 0, 25]]
16921	*/
16922	const bool fusedData = isFusedQuantizedElemKind(fusedDTy);
16923	const ElemKind DTy =
16924	fusedData ? getScaleOffsetElemKindFromFused(fusedDTy) : fusedDTy;
16925	Tensor data(ElemKind::FloatTy, {`3`, `1`});
16926	data.getHandle() = {
16927	`2.0`,
16928	-`0.5`,
16929	`13`,
16930	};
16931
16932	Constant *weights = mod.createConstant(DTy, {`8`}, "weights");
16933	weights->getPayloadMutable().getHandle<DataType>() = {
16934	`3.`, `1.`, `0.`, `0.`, `0.`, `0.`, `2.`, -`0.5`,
16935	};
16936
16937	Placeholder *indices = mod.createPlaceholder(ITy, {`8`}, "indices",
16938	/ isTrainable / false);
16939	Placeholder *lengths =
16940	mod.createPlaceholder(ElemKind::Int32ITy, {`4`}, "lengths",
16941	/ isTrainable / false);
16942
16943	bindings.allocate(indices)->getHandle<IndexType>() = {
16944	`1`, `0`, `2`, `0`, `1`, `2`, `2`, `0`,
16945	};
16946	bindings.allocate(lengths)->getHandle<int32_t>() = {
16947	`3`,
16948	`0`,
16949	`3`,
16950	`2`,
16951	};
16952
16953	auto *R = F->createFusedRowwiseQuantizedSparseLengthsWeightedSum(
16954	"RQSLWS", data, weights, indices, lengths, fusedDTy, useFP16Accumulation);
16955	SaveNode *S = F->createSave("save", R);
16956	bindings.allocate(S->getPlaceholder());
16957
16958	EE.compile(CompilationMode::Infer);
16959	EE.run(bindings);
16960
16961	Tensor &result = *bindings.get(S->getPlaceholder());
16962	Tensor expected(DTy, {`4`, `1`});
16963	expected.getHandle<DataType>() = {
16964	`0.5`,
16965	`0`,
16966	`0`,
16967	`25`,
16968	};
16969
16970	EXPECT_TRUE(expected.isEqual(result, allowedError));
16971	}
16972
16973	/// Test Fused-RWQ-SLWS in Float.
16974	TEST_P(OperatorTest, FusedRowwiseQuantizedSparseLengthsWeightedSum_Float) {
16975	CHECK_IF_ENABLED();
16976	testFusedRowwiseQuantizedSparseLengthsWeightedSum<float, int64_t>(
16977	bindings_, mod_, F_, EE_, ElemKind::UInt8FusedQTy, ElemKind::Int64ITy,
16978	`0.0001`);
16979	}
16980
16981	/// Test Fused-RWQ-SLWS in Float16. Uses Float accumulation.
16982	TEST_P(OperatorTest,
16983	FusedRowwiseQuantizedSparseLengthsWeightedSum_Float16_AccumFloat) {
16984	CHECK_IF_ENABLED();
16985	testFusedRowwiseQuantizedSparseLengthsWeightedSum<float16_t, int64_t>(
16986	bindings_, mod_, F_, EE_, ElemKind::UInt8FusedFP16QTy, ElemKind::Int64ITy,
16987	`0.0001`,
16988	/ useFP16Accumulation / false);
16989	}
16990
16991	/// Test Fused-RWQ-SLWS in Float16. Uses Float16 accumulation.
16992	TEST_P(OperatorTest,
16993	FusedRowwiseQuantizedSparseLengthsWeightedSum_Float16_AccumFloat16) {
16994	CHECK_IF_ENABLED();
16995	testFusedRowwiseQuantizedSparseLengthsWeightedSum<float16_t, int64_t>(
16996	bindings_, mod_, F_, EE_, ElemKind::UInt8FusedFP16QTy, ElemKind::Int64ITy,
16997	`0.0001`,
16998	/ useFP16Accumulation / true);
16999	}
17000
17001	/// Test Fused-RWQ-SLWS in Float. Int32 indices.
17002	TEST_P(OperatorTest,
17003	FusedRowwiseQuantizedSparseLengthsWeightedSum_Float_Int32) {
17004	CHECK_IF_ENABLED();
17005	testFusedRowwiseQuantizedSparseLengthsWeightedSum<float, int32_t>(
17006	bindings_, mod_, F_, EE_, ElemKind::UInt8FusedQTy, ElemKind::Int32ITy,
17007	`0.0001`);
17008	}
17009
17010	/// Test Fused-RWQ-SLWS in Float16. Uses Float accumulation. Int32 indices.
17011	TEST_P(OperatorTest,
17012	FusedRowwiseQuantizedSparseLengthsWeightedSum_Float16_AccumFloat_Int32) {
17013	CHECK_IF_ENABLED();
17014	testFusedRowwiseQuantizedSparseLengthsWeightedSum<float16_t, int32_t>(
17015	bindings_, mod_, F_, EE_, ElemKind::UInt8FusedFP16QTy, ElemKind::Int32ITy,
17016	`0.0001`,
17017	/ useFP16Accumulation / false);
17018	}
17019
17020	/// Test Fused-RWQ-SLWS in Float16. Uses Float16 accumulation. Int32 indices.
17021	TEST_P(
17022	OperatorTest,
17023	FusedRowwiseQuantizedSparseLengthsWeightedSum_Float16_AccumFloat16_Int32) {
17024	CHECK_IF_ENABLED();
17025	testFusedRowwiseQuantizedSparseLengthsWeightedSum<float16_t, int32_t>(
17026	bindings_, mod_, F_, EE_, ElemKind::UInt8FusedFP16QTy, ElemKind::Int32ITy,
17027	`0.0001`,
17028	/ useFP16Accumulation / true);
17029	}
17030
17031	static void testRowwiseQuantizedSparseLengthsSum_ConvertedFloat16(
17032	glow::PlaceholderBindings &bindings, glow::Module &mod, glow::Function *F,
17033	glow::ExecutionEngine &EE, float allowedError, bool convertFusedToFP16,
17034	bool useFP16AccumSLS) {
17035	CHECK_IF_ENABLED();
17036	/*
17037	DATA = [[2.0, -0.5, 13]]
17038	WEIGHTS = [3, 1, 0, 0, 0, 0, 2, -0.5]
17039	INDICES = [1, 0, 2, 0, 1, 2, 2, 0]
17040	LENGTHS = [3, 0, 3, 2]
17041	OUTPUT = [[0.5, 0, 0, 25]]
17042	*/
17043	Tensor data(ElemKind::FloatTy, {`3`, `1`});
17044	data.getHandle() = {
17045	`2.0`,
17046	-`0.5`,
17047	`13`,
17048	};
17049
17050	Constant *weights = mod.createConstant(ElemKind::FloatTy, {`8`}, "weights");
17051	weights->getPayloadMutable().getHandle<float>() = {
17052	`3.`, `1.`, `0.`, `0.`, `0.`, `0.`, `2.`, -`0.5`,
17053	};
17054
17055	Placeholder *indices =
17056	mod.createPlaceholder(ElemKind::Int64ITy, {`8`}, "indices",
17057	/ isTrainable / false);
17058	Placeholder *lengths =
17059	mod.createPlaceholder(ElemKind::Int32ITy, {`4`}, "lengths",
17060	/ isTrainable / false);
17061
17062	bindings.allocate(indices)->getHandle<int64_t>() = {
17063	`1`, `0`, `2`, `0`, `1`, `2`, `2`, `0`,
17064	};
17065	bindings.allocate(lengths)->getHandle<int32_t>() = {
17066	`3`,
17067	`0`,
17068	`3`,
17069	`2`,
17070	};
17071
17072	auto *R = F->createFusedRowwiseQuantizedSparseLengthsWeightedSum(
17073	"RQSLWS", data, weights, indices, lengths);
17074	SaveNode *S = F->createSave("save", R);
17075	bindings.allocate(S->getPlaceholder());
17076
17077	CompilationContext cctx;
17078	cctx.precisionConfig.convertToFP16 = true;
17079	cctx.precisionConfig.convertFusedToFP16 = convertFusedToFP16;
17080	cctx.precisionConfig.forceFP16AccumSLS = useFP16AccumSLS;
17081	cctx.precisionConfig.float16Format =
17082	PrecisionConfiguration::Float16Format::FP16;
17083
17084	EE.compile(cctx);
17085	EE.run(bindings);
17086
17087	Tensor &result = *bindings.get(S->getPlaceholder());
17088	Tensor expected(ElemKind::FloatTy, {`4`, `1`});
17089	expected.getHandle<float>() = {
17090	`0.5`,
17091	`0`,
17092	`0`,
17093	`25`,
17094	};
17095
17096	EXPECT_TRUE(expected.isEqual(result, allowedError));
17097	}
17098
17099	/// Test Fused-RWQ-SLWS in where the weights are in Fp16, data
17100	/// inputs are UInt8FusedQTy.
17101	TEST_P(
17102	OperatorTest,
17103	FusedRowwiseQuantizedSparseLengthsWeightedSum_ConvertedFloat16_NoFusedConvert) {
17104	CHECK_IF_ENABLED();
17105	return testRowwiseQuantizedSparseLengthsSum_ConvertedFloat16(
17106	bindings_, mod_, F_, EE_, `0.02`,
17107	/ convertFusedToFP16/ false, / useFP16AccumSLS / true);
17108	}
17109
17110	TEST_P(
17111	OperatorTest,
17112	FusedRowwiseQuantizedSparseLengthsWeightedSum_ConvertedFloat16_NoFusedConvert_FP32Accum) {
17113	CHECK_IF_ENABLED();
17114	return testRowwiseQuantizedSparseLengthsSum_ConvertedFloat16(
17115	bindings_, mod_, F_, EE_, `0.02`,
17116	/ convertFusedToFP16/ false, / useFP16AccumSLS / false);
17117	}
17118
17119	TEST_P(OperatorTest,
17120	FusedRowwiseQuantizedSparseLengthsWeightedSum_ConvertedFloat16) {
17121	CHECK_IF_ENABLED();
17122	return testRowwiseQuantizedSparseLengthsSum_ConvertedFloat16(
17123	bindings_, mod_, F_, EE_, `0.02`,
17124	/ convertFusedToFP16/ true, / useFP16AccumSLS / true);
17125	}
17126
17127	TEST_P(
17128	OperatorTest,
17129	FusedRowwiseQuantizedSparseLengthsWeightedSum_ConvertedFloat16_back_to_back) {
17130	CHECK_IF_ENABLED();
17131	/*
17132	DATA = [[2.0, -0.5, 13]]
17133	WEIGHTS = [1]
17134	INDICES = [0]
17135	LENGTHS = [0, 0, 0, 1] and then [1, 0, 0, 0]
17136	OUTPUT = [[0, 0, 0, 0.2]] and then [[2.0, 0, 0, 0]]
17137	*/
17138	Tensor data(ElemKind::FloatTy, {`3`, `1`});
17139	data.getHandle() = {
17140	`2.0`,
17141	-`0.5`,
17142	`13`,
17143	};
17144
17145	Constant *weights = mod_.createConstant(ElemKind::FloatTy, {`1`}, "weights");
17146	weights->getPayloadMutable().getHandle<float>() = {`1.`};
17147
17148	Placeholder *indices =
17149	mod_.createPlaceholder(ElemKind::Int64ITy, {`1`}, "indices",
17150	/ isTrainable / false);
17151	Placeholder *lengths =
17152	mod_.createPlaceholder(ElemKind::Int32ITy, {`4`}, "lengths",
17153	/ isTrainable / false);
17154
17155	bindings_.allocate(indices)->getHandle<int64_t>() = {
17156	`0`,
17157	};
17158	bindings_.allocate(lengths)->getHandle<int32_t>() = {
17159	`0`,
17160	`0`,
17161	`0`,
17162	`1`,
17163	};
17164
17165	auto *R = F_->createFusedRowwiseQuantizedSparseLengthsWeightedSum(
17166	"RQSLWS", data, weights, indices, lengths);
17167	SaveNode *S = F_->createSave("save", R);
17168	bindings_.allocate(S->getPlaceholder());
17169
17170	CompilationContext cctx;
17171	cctx.precisionConfig.convertToFP16 = true;
17172	cctx.precisionConfig.convertFusedToFP16 = true;
17173	cctx.precisionConfig.float16Format =
17174	PrecisionConfiguration::Float16Format::FP16;
17175
17176	EE_.compile(cctx);
17177	EE_.run(bindings_);
17178
17179	Tensor &result = *bindings_.get(S->getPlaceholder());
17180	Tensor expected(ElemKind::FloatTy, {`4`, `1`});
17181	expected.getHandle<float>() = {
17182	`0`,
17183	`0`,
17184	`0`,
17185	`2.0`,
17186	};
17187
17188	EXPECT_TRUE(expected.isEqual(result, `0.02`));
17189
17190	// Send another inference
17191	bindings_.get(lengths)->getHandle<int32_t>() = {
17192	`1`,
17193	`0`,
17194	`0`,
17195	`0`,
17196	};
17197	EE_.run(bindings_);
17198
17199	Tensor &result1 = *bindings_.get(S->getPlaceholder());
17200	Tensor expected1(ElemKind::FloatTy, {`4`, `1`});
17201	expected1.getHandle<float>() = {
17202	`2.0`,
17203	`0`,
17204	`0`,
17205	`0`,
17206	};
17207	EXPECT_TRUE(expected1.isEqual(result1, `0.02`));
17208	}
17209
17210	TEST_P(
17211	OperatorTest,
17212	FusedRowwiseQuantizedSparseLengthsWeightedSum_ConvertedFloat16_back_to_back2) {
17213	CHECK_IF_ENABLED();
17214
17215	Tensor data(ElemKind::FloatTy, {`10000`, `64`});
17216	data.getHandle().randomize(-`1`, `1`, mod_.getPRNG());
17217
17218	Placeholder *weights =
17219	mod_.createPlaceholder(ElemKind::FloatTy, {`10000`}, "weights",
17220	/ isTrainable / false);
17221
17222	Placeholder *indices =
17223	mod_.createPlaceholder(ElemKind::Int64ITy, {`10000`}, "indices",
17224	/ isTrainable / false);
17225	Placeholder *lengths =
17226	mod_.createPlaceholder(ElemKind::Int32ITy, {`32`}, "lengths",
17227	/ isTrainable / false);
17228
17229	Tensor *wT = bindings_.allocate(weights);
17230	wT->zero();
17231	wT->getHandle<float>().at({`0`}) = `4.18067`;
17232
17233	Tensor *iT = bindings_.allocate(indices);
17234	iT->zero();
17235	iT->getHandle<int64_t>().at({`0`}) = `4124`;
17236
17237	bindings_.allocate(lengths)->getHandle<int32_t>() = {
17238	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
17239	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `1`, `0`};
17240
17241	auto *R = F_->createFusedRowwiseQuantizedSparseLengthsWeightedSum(
17242	"RQSLWS", data, weights, indices, lengths);
17243	SaveNode *S = F_->createSave("save", R);
17244	bindings_.allocate(S->getPlaceholder());
17245
17246	CompilationContext cctx;
17247	cctx.precisionConfig.convertToFP16 = true;
17248	cctx.precisionConfig.convertFusedToFP16 = true;
17249	cctx.precisionConfig.float16Format =
17250	PrecisionConfiguration::Float16Format::FP16;
17251
17252	EE_.compile(cctx);
17253	EE_.run(bindings_);
17254
17255	// This is the result for the first inference. We expect the result in the
17256	// second last row or raw location 30 64 to 31 * 64 -1. The rest of the*
17257	// rows should be all 0.
17258	Tensor &result = *bindings_.get(S->getPlaceholder());
17259
17260	// Send another inference
17261	result.zero();
17262	// set new indices.
17263	iT = bindings_.get(indices);
17264	iT->zero();
17265	iT->getHandle<int64_t>().at({`0`}) = `1256`;
17266	// set new lengths.
17267	bindings_.get(lengths)->getHandle<int32_t>() = {
17268	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `1`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
17269	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`
17270
17271	};
17272	EE_.run(bindings_);
17273
17274	// We now expect the second to last row to be all 0.
17275	Tensor &result1 = *bindings_.get(S->getPlaceholder());
17276	float d = reinterpret_cast<float* *>(result1.getUnsafePtr());
17277	for (size_t i = `30` * `64`; i < `31` * `64`; ++i) {
17278	EXPECT_EQ(`0`, d[i]);
17279	}
17280	}
17281
17282	/// Helper to test FusedRowwiseQuantizedSparseLengthsSum using \p fusedDTy.
17283	template <typename DataType>
17284	static void testFusedRowwiseQuantizedSparseLengthsSum(
17285	glow::PlaceholderBindings &bindings, glow::Module &mod, glow::Function *F,
17286	glow::ExecutionEngine &EE, ElemKind fusedDTy, float allowedError,
17287	bool useFP16Accumulation = false) {
17288	/*
17289	DATA = [
17290	[1.0, 1.2],
17291	[2.3, 3.4],
17292	[4.5, 5.7],
17293	]
17294	INDICES = [2, 0, 1, 2, 0, 0, 0, 0]
17295	LENGTHS = [2, 0, 2, 1, 3]
17296	OUTPUT = [
17297	[5.5, 6.9],
17298	[0.0, 0.0],
17299	[6.8, 9.1],
17300	[1.0, 1.2],
17301	[3.0, 3.6],
17302	]
17303	*/
17304	const bool fusedData = isFusedQuantizedElemKind(fusedDTy);
17305	const ElemKind DTy =
17306	fusedData ? getScaleOffsetElemKindFromFused(fusedDTy) : fusedDTy;
17307
17308	Tensor data(ElemKind::FloatTy, {`3`, `2`});
17309	data.getHandle() = {
17310	`1.0f`, `1.2f`, `2.3f`, `3.4f`, `4.5f`, `5.7f`,
17311	};
17312
17313	Placeholder *indices =
17314	mod.createPlaceholder(ElemKind::Int64ITy, {`8`}, "indices",
17315	/ isTrainable / false);
17316	Placeholder *lengths = mod.createPlaceholder(
17317	ElemKind::Int32ITy, {`5`}, "lengths", / isTrainable / false);
17318
17319	bindings.allocate(indices)->getHandle<int64_t>() = {
17320	`2`, `0`, `1`, `2`, `0`, `0`, `0`, `0`,
17321	};
17322	bindings.allocate(lengths)->getHandle<int32_t>() = {
17323	`2`, `0`, `2`, `1`, `3`,
17324	};
17325
17326	auto *R = F->createFusedRowwiseQuantizedSparseLengthsSum(
17327	"RQSLWS", data, indices, lengths, fusedDTy, useFP16Accumulation);
17328	SaveNode *S = F->createSave("save", R);
17329	bindings.allocate(S->getPlaceholder());
17330
17331	EE.compile(CompilationMode::Infer);
17332	EE.run(bindings);
17333
17334	Tensor &result = *bindings.get(S->getPlaceholder());
17335	Tensor expected(DTy, {`5`, `2`});
17336	expected.getHandle<DataType>() = {
17337	`5.5f`, `6.9f`, `0.0f`, `0.0f`, `6.8f`, `9.1f`, `1.0f`, `1.2f`, `3.0f`, `3.6f`,
17338	};
17339
17340	EXPECT_TRUE(expected.isEqual(result, allowedError));
17341	}
17342
17343	/// Test Fused-RWQ-SLS in Float.
17344	TEST_P(OperatorTest, FusedRowwiseQuantizedSparseLengthsSum_Float) {
17345	CHECK_IF_ENABLED();
17346	testFusedRowwiseQuantizedSparseLengthsSum<float>(
17347	bindings_, mod_, F_, EE_, ElemKind::UInt8FusedQTy, `0.015`);
17348	}
17349
17350	/// Test Fused-RWQ-SLS in Float16. Uses Float accumulation.
17351	TEST_P(OperatorTest, FusedRowwiseQuantizedSparseLengthsSum_Float16_AccumFloat) {
17352	CHECK_IF_ENABLED();
17353	testFusedRowwiseQuantizedSparseLengthsSum<float16_t>(
17354	bindings_, mod_, F_, EE_, ElemKind::UInt8FusedFP16QTy, `0.02`,
17355	/ useFP16Accumulation / false);
17356	}
17357
17358	/// Test Fused-RWQ-SLS in Float16. Uses Float16 accumulation.
17359	TEST_P(OperatorTest,
17360	FusedRowwiseQuantizedSparseLengthsSum_Float16_AccumFloat16) {
17361	CHECK_IF_ENABLED();
17362	testFusedRowwiseQuantizedSparseLengthsSum<float16_t>(
17363	bindings_, mod_, F_, EE_, ElemKind::UInt8FusedFP16QTy, `0.02`,
17364	/ useFP16Accumulation / true);
17365	}
17366
17367	/// Test Fused-RWQ-SLS in Float16 wth 4-bit quantization for the embedding.
17368	/// Uses Float16 accumulation.
17369	TEST_P(OperatorTest,
17370	FusedRowwiseQuantizedSparseLengthsSum_Fused4Bit_Float16_AccumFloat16) {
17371	CHECK_IF_ENABLED();
17372	testFusedRowwiseQuantizedSparseLengthsSum<float16_t>(
17373	bindings_, mod_, F_, EE_, ElemKind::UInt4FusedFP16QTy, `0.15`,
17374	/ useFP16Accumulation / true);
17375	}
17376
17377	/// Helper to test all variants of SLWS wiith all lengths as one, with
17378	/// precision \p DTy, and precision for data \p dataDTy.
17379	template <typename DataType>
17380	static void testSLWSTwoColumn(glow::PlaceholderBindings &bindings,
17381	glow::Module &mod, glow::Function *F,
17382	glow::ExecutionEngine &EE, ElemKind dataDTy,
17383	float allowedError,
17384	bool useFP16Accumulation = false) {
17385	/*
17386	DATA = [
17387	[1.0, 1.2],
17388	[2.3, 3.4],
17389	[4.5, 5.7],
17390	]
17391	INDICES = [2, 0, 1, 2, 0, 0, 0, 0]
17392	LENGTHS = [2, 0, 2, 1, 3]
17393	WEIGHTS = [1, -1, 1.5, 0.5, -1.5, 2, -2, -0.5]
17394	OUTPUT = [
17395	[3.5, 4.5],
17396	[0.0, 0.0],
17397	[5.7, 7.95],
17398	[-1.5, -1.8],
17399	[-0.5, -0.6],
17400	]
17401	*/
17402	const bool fusedData = isFusedQuantizedElemKind(dataDTy);
17403	const ElemKind DTy =
17404	fusedData ? getScaleOffsetElemKindFromFused(dataDTy) : dataDTy;
17405
17406	Tensor data(fusedData ? ElemKind::FloatTy : DTy, {`3`, `2`});
17407	#define floatData \
17408	{ 1.0f, 1.2f, 2.3f, 3.4f, 4.5f, 5.7f, }
17409	if (fusedData) {
17410	data.getHandle<float>() = floatData;
17411	} else {
17412	data.getHandle<DataType>() = floatData;
17413	}
17414
17415	Placeholder *indices =
17416	mod.createPlaceholder(ElemKind::Int64ITy, {`8`}, "indices",
17417	/ isTrainable / false);
17418	Placeholder *lengths = mod.createPlaceholder(
17419	ElemKind::Int32ITy, {`5`}, "lengths", / isTrainable / false);
17420	Placeholder *weights =
17421	mod.createPlaceholder(DTy, {`8`}, "weights", / isTrainable / false);
17422
17423	bindings.allocate(indices)->getHandle<int64_t>() = {
17424	`2`, `0`, `1`, `2`, `0`, `0`, `0`, `0`,
17425	};
17426	bindings.allocate(lengths)->getHandle<int32_t>() = {
17427	`2`, `0`, `2`, `1`, `3`,
17428	};
17429	bindings.allocate(weights)->getHandle<DataType>() = {
17430	`1`, -`1`, `1.5`, `0.5`, -`1.5`, `2`, -`2`, -`0.5`,
17431	};
17432
17433	Node SLWS = nullptr*;
17434	if (fusedData) {
17435	SLWS = F->createFusedRowwiseQuantizedSparseLengthsWeightedSum(
17436	"RQSLWS", data, weights, indices, lengths, dataDTy,
17437	useFP16Accumulation);
17438	} else {
17439	Placeholder *dataP = mod.createPlaceholder(&data.getType(), "data",
17440	/ isTrainable / false);
17441	bindings.insert(dataP, std::move(data));
17442	SLWS = F->createSparseLengthsWeightedSum("SLWS", dataP, weights, indices,
17443	lengths);
17444	}
17445	SaveNode *S = F->createSave("save", SLWS);
17446	bindings.allocate(S->getPlaceholder());
17447
17448	EE.compile(CompilationMode::Infer);
17449	EE.run(bindings);
17450
17451	Tensor &result = *bindings.get(S->getPlaceholder());
17452	Tensor expected(DTy, {`5`, `2`});
17453	expected.getHandle<DataType>() = {
17454	`3.5`, `4.5`, `0.0`, `0.0`, `5.7`, `7.95`, -`1.5`, -`1.8`, -`0.5`, -`0.6`,
17455	};
17456
17457	EXPECT_TRUE(expected.isEqual(result, allowedError));
17458	}
17459
17460	/// Test SLWS in Float.
17461	TEST_P(OperatorTest, SLWSTwoColumn_Float) {
17462	CHECK_IF_ENABLED();
17463	testSLWSTwoColumn<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy, `0.0001`);
17464	}
17465
17466	/// Test SLWS in Float16.
17467	TEST_P(OperatorTest, SLWSTwoColumn_Float16_AccumFloat) {
17468	CHECK_IF_ENABLED();
17469	testSLWSTwoColumn<float16_t>(bindings_, mod_, F_, EE_, ElemKind::Float16Ty,
17470	`0.005`,
17471	/ useFP16Accumulation / false);
17472	}
17473
17474	/// Test Fused-RWQ-SLWS in Float.
17475	TEST_P(OperatorTest, FusedRowwiseQuantizedSLWSTwoColumn_Float) {
17476	CHECK_IF_ENABLED();
17477	testSLWSTwoColumn<float>(bindings_, mod_, F_, EE_, ElemKind::UInt8FusedQTy,
17478	`0.015`);
17479	}
17480
17481	/// Test Fused-RWQ-SLWS in Float16. Uses Float accumulation.
17482	TEST_P(OperatorTest, FusedRowwiseQuantizedSLWSTwoColumn_Float16_AccumFloat) {
17483	CHECK_IF_ENABLED();
17484	testSLWSTwoColumn<float16_t>(bindings_, mod_, F_, EE_,
17485	ElemKind::UInt8FusedFP16QTy, `0.015`,
17486	/ useFP16Accumulation / false);
17487	}
17488
17489	/// Test Fused-RWQ-SLWS in Float16. Uses Float16 accumulation.
17490	TEST_P(OperatorTest, FusedRowwiseQuantizedSLWSTwoColumn_Float16_AccumFloat16) {
17491	CHECK_IF_ENABLED();
17492	testSLWSTwoColumn<float16_t>(bindings_, mod_, F_, EE_,
17493	ElemKind::UInt8FusedFP16QTy, `0.015`,
17494	/ useFP16Accumulation / true);
17495	}
17496
17497	/// Test Fused-RWQ-SLWS in Float16 wth 4-bit quantization for the embedding.
17498	/// Uses Float16 accumulation.
17499	TEST_P(OperatorTest,
17500	FusedRowwiseQuantizedSLWSTwoColumn_Fused4Bit_Float16_AccumFloat16) {
17501	CHECK_IF_ENABLED();
17502	testSLWSTwoColumn<float16_t>(bindings_, mod_, F_, EE_,
17503	ElemKind::UInt4FusedFP16QTy, `0.1`,
17504	/ useFP16Accumulation / true);
17505	}
17506
17507	/// Test Fused-RWQ-SLWS in Float16 wth 4-bit quantization for the embedding.
17508	/// Uses Float accumulation, Float for scale/offset.
17509	TEST_P(OperatorTest,
17510	FusedRowwiseQuantizedSLWSTwoColumn_Fused4Bit_Float_AccumFloat) {
17511	ENABLED_BACKENDS("Interpreter");
17512	testSLWSTwoColumn<float>(bindings_, mod_, F_, EE_, ElemKind::UInt4FusedQTy,
17513	`0.1`,
17514	/ useFP16Accumulation / false);
17515	}
17516
17517	/// Helper to test SLWS with different lengths modes, with precision \p DTy,
17518	/// and precision for data \p dataDTy.
17519	template <typename DataType>
17520	static void testSLWSLengthsMode(glow::PlaceholderBindings &bindings,
17521	glow::Module &mod, glow::Function *F,
17522	glow::ExecutionEngine &EE, ElemKind dataDTy,
17523	float allowedError, bool useFP16Accumulation,
17524	LengthsMode lengthsMode) {
17525	/*
17526	DATA = [
17527	[1.0, 1.2],
17528	[2.3, 3.4],
17529	[4.5, 5.7],
17530	]
17531	INDICES = [2, 0, 1, 2, 0]
17532	LENGTHS = [1, 1, 1, 1, 1]
17533	WEIGHTS = [1, -1, 1.5, 0.5, -1.5]
17534	OUTPUT = [
17535	[4.5, 5.7],
17536	[-1.0, -1.2],
17537	[3.45, 5.1],
17538	[2.25, 2.85],
17539	[-1.5, -1.8],
17540	]
17541	*/
17542	const bool fusedData = isFusedQuantizedElemKind(dataDTy);
17543	const ElemKind DTy =
17544	fusedData ? getScaleOffsetElemKindFromFused(dataDTy) : dataDTy;
17545
17546	Tensor data(fusedData ? ElemKind::FloatTy : DTy, {`3`, `2`});
17547	#define floatData \
17548	{ 1.0f, 1.2f, 2.3f, 3.4f, 4.5f, 5.7f, }
17549	if (fusedData) {
17550	data.getHandle<float>() = floatData;
17551	} else {
17552	data.getHandle<DataType>() = floatData;
17553	}
17554
17555	Placeholder *indices =
17556	mod.createPlaceholder(ElemKind::Int64ITy, {`5`}, "indices",
17557	/ isTrainable / false);
17558	Placeholder *lengths = mod.createPlaceholder(
17559	ElemKind::Int32ITy, {`5`}, "lengths", / isTrainable / false);
17560	Placeholder *weights =
17561	mod.createPlaceholder(DTy, {`5`}, "weights", / isTrainable / false);
17562
17563	bindings.allocate(indices)->getHandle<int64_t>() = {
17564	`2`, `0`, `1`, `2`, `0`,
17565	};
17566	auto LH = bindings.allocate(lengths)->getHandle<int32_t>();
17567	Tensor expected(DTy, {`5`, `2`});
17568	LH = {`1`, `1`, `1`, `1`, `1`};
17569	expected.getHandle<DataType>() = {
17570	`4.5`, `5.7`, -`1.0`, -`1.2`, `3.45`, `5.1`, `2.25`, `2.85`, -`1.5`, -`1.8`,
17571	};
17572	bindings.allocate(weights)->getHandle<DataType>() = {
17573	`1`, -`1`, `1.5`, `0.5`, -`1.5`,
17574	};
17575
17576	Node SLWS = nullptr*;
17577	if (fusedData) {
17578	SLWS = F->createFusedRowwiseQuantizedSparseLengthsWeightedSum(
17579	"RQSLWS", data, weights, indices, lengths, dataDTy, useFP16Accumulation,
17580	lengthsMode);
17581	} else {
17582	Placeholder *dataP = mod.createPlaceholder(&data.getType(), "data",
17583	/ isTrainable / false);
17584	bindings.insert(dataP, std::move(data));
17585	SLWS = F->createSparseLengthsWeightedSum("SLWS", dataP, weights, indices,
17586	lengths, lengthsMode);
17587	}
17588	SaveNode *S = F->createSave("save", SLWS);
17589	bindings.allocate(S->getPlaceholder());
17590
17591	EE.compile(CompilationMode::Infer);
17592	EE.run(bindings);
17593
17594	Tensor &result = *bindings.get(S->getPlaceholder());
17595
17596	EXPECT_TRUE(expected.isEqual(result, allowedError));
17597	}
17598
17599	/// Test SLWS in Float.
17600	TEST_P(OperatorTest, SLWSAllLengthsOne_Float) {
17601	CHECK_IF_ENABLED();
17602	testSLWSLengthsMode<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy,
17603	`0.0001`, / useFP16Accumulation / false,
17604	LengthsMode::AllOne);
17605	}
17606
17607	/// Test SLWS in Float16.
17608	TEST_P(OperatorTest, SLWSAllLengthsOne_Float16_AccumFloat) {
17609	CHECK_IF_ENABLED();
17610	testSLWSLengthsMode<float16_t>(
17611	bindings_, mod_, F_, EE_, ElemKind::Float16Ty, `0.005`,
17612	/ useFP16Accumulation / false, LengthsMode::AllOne);
17613	}
17614
17615	/// Test Fused-RWQ-SLWS in Float.
17616	TEST_P(OperatorTest, FusedRowwiseQuantizedSLWSAllLengthsOne_Float) {
17617	CHECK_IF_ENABLED();
17618	testSLWSLengthsMode<float>(
17619	bindings_, mod_, F_, EE_, ElemKind::UInt8FusedQTy, `0.015`,
17620	/ useFP16Accumulation / false, LengthsMode::AllOne);
17621	}
17622
17623	/// Test Fused-RWQ-SLWS in Float16. Uses Float accumulation.
17624	TEST_P(OperatorTest,
17625	FusedRowwiseQuantizedSLWSAllLengthsOne_Float16_AccumFloat) {
17626	CHECK_IF_ENABLED();
17627	testSLWSLengthsMode<float16_t>(
17628	bindings_, mod_, F_, EE_, ElemKind::UInt8FusedFP16QTy, `0.015`,
17629	/ useFP16Accumulation / false, LengthsMode::AllOne);
17630	}
17631
17632	/// Test Fused-RWQ-SLWS in Float16. Uses Float16 accumulation.
17633	TEST_P(OperatorTest,
17634	FusedRowwiseQuantizedSLWSAllLengthsOne_Float16_AccumFloat16) {
17635	CHECK_IF_ENABLED();
17636	testSLWSLengthsMode<float16_t>(
17637	bindings_, mod_, F_, EE_, ElemKind::UInt8FusedFP16QTy, `0.015`,
17638	/ useFP16Accumulation / true, LengthsMode::AllOne);
17639	}
17640
17641	/// Test Fused-RWQ-SLWS in Float16 wth 4-bit quantization for the embedding.
17642	/// Uses Float16 accumulation.
17643	TEST_P(OperatorTest,
17644	FusedRowwiseQuantizedSLWSAllLengthsOne_Fused4Bit_Float16_AccumFloat16) {
17645	CHECK_IF_ENABLED();
17646	testSLWSLengthsMode<float16_t>(
17647	bindings_, mod_, F_, EE_, ElemKind::UInt4FusedFP16QTy, `0.1`,
17648	/ useFP16Accumulation / true, LengthsMode::AllOne);
17649	}
17650
17651	/// Test SLS when some input tensors are constants.
17652	TEST_P(OperatorTest, ConstantSLS) {
17653	CHECK_IF_ENABLED();
17654
17655	auto *data = mod_.createConstant(ElemKind::FloatTy, {`1024`, `32`}, "data");
17656	auto *indices =
17657	mod_.createPlaceholder(ElemKind::Int64ITy, {`314`}, "indices", false);
17658	auto *lengths = mod_.createConstant(ElemKind::Int32ITy, {`20`}, "lengths");
17659
17660	// data
17661	auto DH = data->getPayload().getHandle();
17662	for (dim_t i = `0`; i < `1024`; i++) {
17663	for (dim_t j = `0`; j < `32`; j++) {
17664	DH.at({i, j}) = (float)i;
17665	}
17666	}
17667
17668	// indices
17669	auto IH = bindings_.allocate(indices)->getHandle<int64_t>();
17670	std::iota(IH.begin(), IH.end(), `0`);
17671
17672	// lengths
17673	auto LH = lengths->getHandle<int32_t>();
17674	LH.clear(`16`);
17675	for (dim_t ldx : {`1`, `2`, `6`, `13`, `14`, `19`}) {
17676	LH.at({ldx}) = `15`;
17677	}
17678
17679	auto *R = F_->createSparseLengthsSum("SLS", data, indices, lengths);
17680	auto *S = F_->createSave("save", R);
17681	auto *out = bindings_.allocate(S->getPlaceholder());
17682
17683	EE_.compile(CompilationMode::Infer);
17684	EE_.run(bindings_);
17685
17686	std::vector<float> expected = {`120`, `345`, `570`, `856`, `1112`, `1368`, `1515`,
17687	`1864`, `2120`, `2376`, `2632`, `2888`, `3144`, `3180`,
17688	`3405`, `3880`, `4136`, `4392`, `4648`, `4590`};
17689	auto OH = out->getHandle();
17690	for (dim_t i = `0`; i < `20`; i++) {
17691	for (dim_t j = `0`; j < `32`; j++) {
17692	EXPECT_EQ(OH.at({i, j}), expected[i]);
17693	}
17694	}
17695	}
17696
17697	/// Test SLS when some "lengths" inputs are zero.
17698	TEST_P(OperatorStatelessTest, SLSWithZeroLengths) {
17699	CHECK_IF_ENABLED();
17700
17701	compareAgainstInterpreter(
17702	getBackendName(),
17703	[](PlaceholderBindings &bindings, ExecutionEngine &EE) {
17704	auto &mod = EE.getModule();
17705	auto *F = mod.createFunction("main");
17706	constexpr dim_t embedWidth = `1000`;
17707	Tensor data(ElemKind::FloatTy, {embedWidth, `8`});
17708	data.getHandle().randomize(-`1`, `1`, mod.getPRNG());
17709	Constant *weights =
17710	mod.createConstant(ElemKind::FloatTy, {`3000`}, "weights");
17711	weights->getPayloadMutable().getHandle().clear(`1.0f`);
17712	auto *indices =
17713	mod.createPlaceholder(ElemKind::Int64ITy, {`3000`}, "indices", false);
17714	auto *lengths =
17715	mod.createPlaceholder(ElemKind::Int32ITy, {`1000`}, "lengths", false);
17716	bindings.allocate(indices)->getHandle<int64_t>().randomize(
17717	`0`, embedWidth - `1`, mod.getPRNG());
17718	auto LH = bindings.allocate(lengths)->getHandle<int32_t>();
17719	LH.clear(`0`);
17720	auto it = LH.begin();
17721	for (int i = `0`; i < `13`; ++i, ++it) {
17722	*it = `20`;
17723	}
17724
17725	auto *R = F->createFusedRowwiseQuantizedSparseLengthsWeightedSum(
17726	"RQSLWS", data, weights, indices, lengths);
17727	auto *S = F->createSave("save", R);
17728	auto *res = bindings.allocate(S->getPlaceholder());
17729	return std::make_pair(F, res);
17730	},
17731	ElemKind::FloatTy, ElemKind::FloatTy);
17732	}
17733
17734	/// Helper to create an SLS test with all zero lengths, with and without fused
17735	/// rowwise quantization based on \p convertToRowwiseQuantization.
17736	static FunctionTensorPair
17737	createAndInitZeroLengthsSLSTest(glow::PlaceholderBindings &bindings,
17738	glow::ExecutionEngine &EE,
17739	bool convertToRowwiseQuantization) {
17740	auto &mod = EE.getModule();
17741	auto *F = mod.createFunction("main");
17742	constexpr dim_t embedWidth = `1000`;
17743	auto dataTy = mod.uniqueType(ElemKind::FloatTy, {embedWidth, `8`});
17744	Tensor data(dataTy);
17745	data.getHandle().randomize(-`1`, `1`, mod.getPRNG());
17746	Constant *weights = mod.createConstant(ElemKind::FloatTy, {`3000`}, "weights");
17747	weights->getPayloadMutable().getHandle().clear(`1.0f`);
17748	auto *indices =
17749	mod.createPlaceholder(ElemKind::Int64ITy, {`3000`}, "indices", false);
17750	auto *lengths =
17751	mod.createPlaceholder(ElemKind::Int32ITy, {`1000`}, "lengths", false);
17752	bindings.allocate(indices)->getHandle<int64_t>().randomize(`0`, embedWidth - `1`,
17753	mod.getPRNG());
17754	auto LH = bindings.allocate(lengths)->getHandle<int32_t>();
17755	LH.clear(`0`);
17756
17757	Node R = nullptr*;
17758	if (convertToRowwiseQuantization) {
17759	R = F->createFusedRowwiseQuantizedSparseLengthsWeightedSum(
17760	"RQSLWS", data, weights, indices, lengths);
17761	} else {
17762	Placeholder *dataP =
17763	mod.createPlaceholder(dataTy, "data", / isTrainable / false);
17764	bindings.insert(dataP, std::move(data));
17765	R = F->createSparseLengthsWeightedSum("SLWS", dataP, weights, indices,
17766	lengths);
17767	}
17768	auto *S = F->createSave("save", R);
17769	auto *res = bindings.allocate(S->getPlaceholder());
17770	return std::make_pair(F, res);
17771	}
17772
17773	/// Test Fused RWQ-SLS when all "lengths" inputs are zero in FloatTy.
17774	TEST_P(OperatorStatelessTest, FusedRWQSLSAllZeroLengths_Float) {
17775	CHECK_IF_ENABLED();
17776
17777	compareAgainstInterpreter(getBackendName(),
17778	std::bind(createAndInitZeroLengthsSLSTest,
17779	std::placeholders::_1,
17780	std::placeholders::_2,
17781	/ convertToRowwiseQuantization / true),
17782	ElemKind::FloatTy, ElemKind::FloatTy);
17783	}
17784
17785	/// Test Fused RWQ-SLS when all "lengths" inputs are zero in Float16Ty.
17786	TEST_P(OperatorStatelessTest, FusedRWQSLSAllZeroLengths_Float16) {
17787	CHECK_IF_ENABLED();
17788
17789	compareAgainstInterpreter(getBackendName(),
17790	std::bind(createAndInitZeroLengthsSLSTest,
17791	std::placeholders::_1,
17792	std::placeholders::_2,
17793	/ convertToRowwiseQuantization / true),
17794
17795	ElemKind::Float16Ty, ElemKind::Float16Ty);
17796	}
17797
17798	/// Test SLS when all "lengths" inputs are zero in FloatTy.
17799	TEST_P(OperatorStatelessTest, SLSAllZeroLengths_Float) {
17800	CHECK_IF_ENABLED();
17801
17802	compareAgainstInterpreter(getBackendName(),
17803	std::bind(createAndInitZeroLengthsSLSTest,
17804	std::placeholders::_1,
17805	std::placeholders::_2,
17806	/ convertToRowwiseQuantization / false),
17807	ElemKind::FloatTy, ElemKind::FloatTy);
17808	}
17809
17810	/// Test SLS when all "lengths" inputs are zero in Float16Ty.
17811	TEST_P(OperatorStatelessTest, SLSAllZeroLengths_Float16) {
17812	CHECK_IF_ENABLED();
17813
17814	compareAgainstInterpreter(getBackendName(),
17815	std::bind(createAndInitZeroLengthsSLSTest,
17816	std::placeholders::_1,
17817	std::placeholders::_2,
17818	/ convertToRowwiseQuantization / false),
17819
17820	ElemKind::Float16Ty, ElemKind::Float16Ty);
17821	}
17822
17823	template <typename DataType, typename LengthType, typename IndexType>
17824	static void testBatchSparseToDense(glow::PlaceholderBindings &bindings,
17825	glow::Module &mod, glow::Function *F,
17826	glow::ExecutionEngine &EE, ElemKind DTy,
17827	ElemKind LTy, ElemKind ITy) {
17828	constexpr dim_t numBatches = `6`;
17829	constexpr dim_t numIndices = `10`;
17830
17831	auto lengths = mod.createPlaceholder(LTy, {numBatches}, "lengths", false*);
17832	auto indices = mod.createPlaceholder(ITy, {numIndices}, "indices", false*);
17833	auto values = mod.createPlaceholder(DTy, {numIndices}, "values", false*);
17834	float defaultValue = `0.5`;
17835	unsigned_t denseLastDim = `10`;
17836
17837	auto LH = bindings.allocate(lengths)->getHandle<LengthType>();
17838	auto IH = bindings.allocate(indices)->getHandle<IndexType>();
17839	auto VH = bindings.allocate(values)->getHandle<DataType>();
17840
17841	LH = {`1`, `0`, `3`, `4`, `0`, `2`};
17842	IH = {`0`, `1`, `2`, `1`, `3`, `6`, `4`, `5`, `2`, `8`};
17843
17844	auto *BSTD = F->createBatchSparseToDense("BSTD", lengths, indices, values,
17845	defaultValue, denseLastDim);
17846	auto *S = F->createSave("save", BSTD);
17847	bindings.allocate(S->getPlaceholder());
17848
17849	EE.compile(CompilationMode::Infer);
17850
17851	VH.randomize(-`3.0`, `3.0`, mod.getPRNG());
17852	EE.run(bindings);
17853
17854	Tensor &result = *bindings.get(S->getPlaceholder());
17855
17856	// Compute expected output.
17857	Tensor expected(DTy, {numBatches, denseLastDim});
17858	auto EH = expected.getHandle<DataType>();
17859	EH.clear(defaultValue);
17860	auto curInd = `0`;
17861	for (dim_t i = `0`; i < numBatches; ++i) {
17862	auto batchNumIndices = LH.at({i});
17863	for (dim_t j = `0`; j < batchNumIndices; ++j) {
17864	EH.at({i, static_cast<dim_t>(IH.at(curInd))}) = VH.at(curInd);
17865	curInd++;
17866	}
17867	}
17868
17869	EXPECT_TRUE(expected.isEqual(result));
17870	}
17871
17872	TEST_P(OperatorTest, BatchSparseToDense_Float) {
17873	CHECK_IF_ENABLED();
17874	testBatchSparseToDense<float, int64_t, int64_t>(
17875	bindings_, mod_, F_, EE_, ElemKind::FloatTy, ElemKind::Int64ITy,
17876	ElemKind::Int64ITy);
17877	}
17878
17879	TEST_P(OperatorTest, BatchSparseToDense_Float_Int32_Int32) {
17880	CHECK_IF_ENABLED();
17881	testBatchSparseToDense<float, int32_t, int32_t>(
17882	bindings_, mod_, F_, EE_, ElemKind::FloatTy, ElemKind::Int32ITy,
17883	ElemKind::Int32ITy);
17884	}
17885
17886	TEST_P(OperatorTest, BatchSparseToDense_Float16) {
17887	CHECK_IF_ENABLED();
17888	testBatchSparseToDense<float16_t, int64_t, int64_t>(
17889	bindings_, mod_, F_, EE_, ElemKind::Float16Ty, ElemKind::Int64ITy,
17890	ElemKind::Int64ITy);
17891	}
17892
17893	TEST_P(OperatorTest, BatchSparseToDense_BFloat16) {
17894	CHECK_IF_ENABLED();
17895	testBatchSparseToDense<bfloat16_t, int64_t, int64_t>(
17896	bindings_, mod_, F_, EE_, ElemKind::BFloat16Ty, ElemKind::Int64ITy,
17897	ElemKind::Int64ITy);
17898	}
17899
17900	template <typename DataType, typename IndicatorType>
17901	static void testFillExamplesWithIndicator(glow::PlaceholderBindings &bindings,
17902	glow::Module &mod, glow::Function *F,
17903	glow::ExecutionEngine &EE,
17904	ElemKind DTy, ElemKind IndTy) {
17905	// Create and initialize inputs. Make input 3D to make sure
17906	// multidimensional values are handled properly.
17907	auto indicator = mod.createPlaceholder(IndTy, {`8`}, "indicator", false*);
17908	auto data = mod.createPlaceholder(DTy, {`4`, `3`, `2`}, "data", false*);
17909
17910	auto IH = bindings.allocate(indicator)->getHandle<IndicatorType>();
17911	auto DH = bindings.allocate(data)->getHandle<DataType>();
17912
17913	IH = {`1`, `0`, `1`, `0`, `1`, `1`, `0`, `0`};
17914
17915	auto *filled = F->createFillExamplesWithIndicator("filled", data, indicator);
17916	auto *S = F->createSave("save", filled);
17917	bindings.allocate(S->getPlaceholder());
17918
17919	EE.compile(CompilationMode::Infer);
17920
17921	DH.randomize(-`3.0`, `3.0`, mod.getPRNG());
17922	EE.run(bindings);
17923
17924	Tensor &result = *bindings.get(S->getPlaceholder());
17925
17926	// Compute expected output.
17927	Tensor expected(DTy, {`8`, `3`, `2`});
17928	expected.zero();
17929	auto EH = expected.getHandle<DataType>();
17930	dim_t idx = `0`;
17931	for (dim_t i = `0`; i < `8`; ++i) {
17932	if (IH.at(i) == `1`) {
17933	for (dim_t j = `0`; j < `3`; ++j) {
17934	for (dim_t k = `0`; k < `2`; ++k) {
17935	EH.at({i, j, k}) = DH.at({idx, j, k});
17936	}
17937	}
17938	idx++;
17939	}
17940	}
17941	EXPECT_TRUE(expected.isEqual(result));
17942	}
17943
17944	TEST_P(OperatorTest, FillExamplesWithIndicator_Float_Int64) {
17945	CHECK_IF_ENABLED();
17946	testFillExamplesWithIndicator<float, int64_t>(
17947	bindings_, mod_, F_, EE_, ElemKind::FloatTy, ElemKind::Int64ITy);
17948	}
17949
17950	TEST_P(OperatorTest, FillExamplesWithIndicator_Float16_Int32) {
17951	CHECK_IF_ENABLED();
17952	testFillExamplesWithIndicator<float16_t, int32_t>(
17953	bindings_, mod_, F_, EE_, ElemKind::Float16Ty, ElemKind::Int32ITy);
17954	}
17955
17956	TEST_P(OperatorTest, FillExamplesWithIndicator_Float16_Bool) {
17957	CHECK_IF_ENABLED();
17958	testFillExamplesWithIndicator<float16_t, bool>(
17959	bindings_, mod_, F_, EE_, ElemKind::Float16Ty, ElemKind::BoolTy);
17960	}
17961
17962	TEST_P(OperatorTest, FillExamplesWithIndicator_BFloat16_Int32) {
17963	CHECK_IF_ENABLED();
17964	testFillExamplesWithIndicator<bfloat16_t, int32_t>(
17965	bindings_, mod_, F_, EE_, ElemKind::BFloat16Ty, ElemKind::Int32ITy);
17966	}
17967
17968	TEST_P(OperatorTest, FillExamplesWithIndicator_Int32_Int32) {
17969	CHECK_IF_ENABLED();
17970	testFillExamplesWithIndicator<int32_t, int32_t>(
17971	bindings_, mod_, F_, EE_, ElemKind::Int32ITy, ElemKind::Int32ITy);
17972	}
17973
17974	TEST_P(OperatorTest, SparseToDenseMask1) {
17975	CHECK_IF_ENABLED();
17976
17977	/*
17978	INDICES = [4, 42, 13, 0, 100, 13]
17979	VALUES = [-5.5, 0.7, 11, 1e6, 2, 3.5]
17980	DEFAULTVALUE = 1.1
17981	LENGTHS = [4, 2]
17982	MASK = [2, 1, 0, 13, 42, 43]
17983	OUTPUT = [[1.1, 1.1, 1e6, 11, 0.7, 1.1], [1.1, 1.1, 1.1, 3.5, 1.1, 1.1]]
17984	*/
17985	auto *indices =
17986	mod_.createPlaceholder(ElemKind::Int64ITy, {`6`}, "indices", false);
17987	auto *values =
17988	mod_.createPlaceholder(ElemKind::FloatTy, {`6`}, "values", false);
17989	auto *defaultValue =
17990	mod_.createPlaceholder(ElemKind::FloatTy, {}, "default_value", false);
17991	auto *lengths =
17992	mod_.createPlaceholder(ElemKind::Int32ITy, {`2`}, "lengths", false);
17993	std::vector<dim_t> mask{`2`, `1`, `0`, `13`, `42`, `43`};
17994
17995	bindings_.allocate(indices)->getHandle<int64_t>() = {`4`, `42`, `13`, `0`, `100`, `13`};
17996	bindings_.allocate(values)->getHandle<float>() = {-`5.5`, `0.7`, `11`, `1e6`, `2`, `3.5`};
17997	bindings_.allocate(defaultValue)->getHandle<float>().raw(`0`) = `1.1`;
17998	bindings_.allocate(lengths)->getHandle<int32_t>() = {`4`, `2`};
17999
18000	auto *R = F_->createSparseToDenseMask("STDM", indices, values, defaultValue,
18001	lengths, mask);
18002	auto *S = F_->createSave("save", R);
18003	bindings_.allocate(S->getPlaceholder());
18004
18005	EE_.compile(CompilationMode::Infer);
18006	EE_.run(bindings_);
18007
18008	Tensor &result = *bindings_.get(S->getPlaceholder());
18009	Tensor expected(ElemKind::FloatTy, {`2`, `6`});
18010	expected.getHandle<float>() = {
18011	`1.1`, `1.1`, `1e6`, `11`, `0.7`, `1.1`, `1.1`, `1.1`, `1.1`, `3.5`, `1.1`, `1.1`,
18012	};
18013
18014	EXPECT_TRUE(expected.isEqual(result));
18015	}
18016
18017	TEST_P(OperatorTest, SparseToDenseMask2) {
18018	CHECK_IF_ENABLED();
18019
18020	/*
18021	INDICES = [300, 100, 101, 299]
18022	VALUES = [[[-0.1, -0.2], [-0.3, -0.4]], [[2, -2], [2, 9]],
18023	[[15, 4.2], [10.3, 30.4]], [[0, 2], [3, 4.4]]]
18024	DEFAULTVALUE = [[0.1, 0.2], [0.3, 0.4]]
18025	LENGTHS = []
18026	MASK = [100, 300, 1]
18027	OUTPUT = [[[2, -2], [2, 9]], [[-0.1, -0.2], [-0.3, -0.4]],
18028	[[0.1, 0.2], [0.3, 0.4]]]
18029	*/
18030	auto *indices =
18031	mod_.createPlaceholder(ElemKind::Int64ITy, {`4`}, "indices", false);
18032	auto *values =
18033	mod_.createPlaceholder(ElemKind::FloatTy, {`4`, `2`, `2`}, "values", false);
18034	auto *defaultValue =
18035	mod_.createPlaceholder(ElemKind::FloatTy, {`2`, `2`}, "default_value", false);
18036	auto *lengths =
18037	mod_.createPlaceholder(ElemKind::Int32ITy, {}, "lengths", false);
18038	std::vector<dim_t> mask{`100`, `300`, `1`};
18039
18040	bindings_.allocate(indices)->getHandle<int64_t>() = {`300`, `100`, `101`, `299`};
18041	bindings_.allocate(values)->getHandle<float>() = {
18042	-`0.1`, -`0.2`, -`0.3`, -`0.4`, `2`, -`2`, `2`, `9`, `15`, `4.2`, `10.3`, `30.4`, `0`, `2`, `3`, `4.4`};
18043	bindings_.allocate(defaultValue)->getHandle<float>() = {`0.1`, `0.2`, `0.3`, `0.4`};
18044	bindings_.allocate(lengths)->getHandle<int32_t>() = {`4`};
18045
18046	auto *R = F_->createSparseToDenseMask("STDM", indices, values, defaultValue,
18047	lengths, mask);
18048	auto *S = F_->createSave("save", R);
18049	bindings_.allocate(S->getPlaceholder());
18050
18051	EE_.compile(CompilationMode::Infer);
18052	EE_.run(bindings_);
18053
18054	Tensor &result = *bindings_.get(S->getPlaceholder());
18055	Tensor expected(ElemKind::FloatTy, {`3`, `2`, `2`});
18056	expected.getHandle<float>() = {
18057	`2`, -`2`, `2`, `9`, -`0.1`, -`0.2`, -`0.3`, -`0.4`, `0.1`, `0.2`, `0.3`, `0.4`,
18058	};
18059
18060	EXPECT_TRUE(expected.isEqual(result));
18061	}
18062
18063	TEST_P(OperatorTest, FP16Reshape) {
18064	CHECK_IF_ENABLED();
18065
18066	auto A = mod_.createPlaceholder(ElemKind::Float16Ty, {`20`, `13`}, "A", false*);
18067	auto inputHandle = bindings_.allocate(A)->getHandle<float16_t>();
18068	inputHandle.randomize(-`3.0`, `3.0`, mod_.getPRNG());
18069
18070	auto *tr = F_->createReshape("tr", A, {`13`, `20`, `1`});
18071	auto *result = F_->createSave("saveTranspose", tr);
18072	bindings_.allocate(result->getPlaceholder());
18073
18074	EE_.compile(CompilationMode::Infer);
18075	EE_.run(bindings_);
18076
18077	auto outputHandle =
18078	bindings_.get(result->getPlaceholder())->getHandle<float16_t>();
18079	ASSERT_EQ(outputHandle.size(), inputHandle.size());
18080	for (size_t idx = `0`, end = inputHandle.size(); idx != end; ++idx) {
18081	EXPECT_EQ(inputHandle.raw(idx), outputHandle.raw(idx));
18082	}
18083	}
18084
18085	TEST_P(OperatorTest, BoolReshape) {
18086	CHECK_IF_ENABLED();
18087
18088	auto A = mod_.createPlaceholder(ElemKind::BoolTy, {`4`, `3`}, "A", false*);
18089	bindings_.allocate(A)->getHandle<bool>() = {false, true, false, true,
18090	true, false, false, false,
18091	true, true, true, true};
18092	auto *tr = F_->createReshape("tr", A, {`3`, `4`, `1`});
18093	auto *result = F_->createSave("saveTranspose", tr);
18094	bindings_.allocate(result->getPlaceholder());
18095
18096	EE_.compile(CompilationMode::Infer);
18097	EE_.run(bindings_);
18098
18099	auto outputHandle =
18100	bindings_.get(result->getPlaceholder())->getHandle<bool>();
18101	auto inputBoolHandle = bindings_.get(A)->getHandle<bool>();
18102	ASSERT_EQ(outputHandle.size(), inputBoolHandle.size());
18103	for (size_t idx = `0`, end = inputBoolHandle.size(); idx != end; ++idx) {
18104	EXPECT_EQ(inputBoolHandle.raw(idx), outputHandle.raw(idx));
18105	}
18106	}
18107
18108	TEST_P(OperatorTest, BFloat16Reshape) {
18109	CHECK_IF_ENABLED();
18110
18111	auto A = mod_.createPlaceholder(ElemKind::BFloat16Ty, {`20`, `13`}, "A", false*);
18112	auto inputHandle = bindings_.allocate(A)->getHandle<bfloat16_t>();
18113	inputHandle.randomize(-`3.0`, `3.0`, mod_.getPRNG());
18114
18115	auto *tr = F_->createReshape("tr", A, {`13`, `20`, `1`});
18116	auto *result = F_->createSave("saveTranspose", tr);
18117	bindings_.allocate(result->getPlaceholder());
18118
18119	EE_.compile(CompilationMode::Infer);
18120	EE_.run(bindings_);
18121
18122	auto outputHandle =
18123	bindings_.get(result->getPlaceholder())->getHandle<bfloat16_t>();
18124	ASSERT_EQ(outputHandle.size(), inputHandle.size());
18125	for (size_t idx = `0`, end = inputHandle.size(); idx != end; ++idx) {
18126	EXPECT_EQ(inputHandle.raw(idx), outputHandle.raw(idx));
18127	}
18128	}
18129
18130	/// Verify that the Reshape operator works correctly.
18131	TEST_P(OperatorTest, Reshape) {
18132	CHECK_IF_ENABLED();
18133
18134	auto A = mod_.createPlaceholder(ElemKind::FloatTy, {`5`, `7`}, "A", false*);
18135	auto inputHandle = bindings_.allocate(A)->getHandle();
18136	inputHandle.randomize(-`3.0`, `3.0`, mod_.getPRNG());
18137
18138	auto *RN = F_->createReshape("reshape", A, {`7`, `5`, `1`});
18139	auto *result = F_->createSave("saveReshape", RN);
18140	bindings_.allocate(result->getPlaceholder());
18141
18142	EE_.compile(CompilationMode::Infer);
18143	EE_.run(bindings_);
18144
18145	auto outputHandle = bindings_.get(result->getPlaceholder())->getHandle();
18146	ASSERT_EQ(outputHandle.size(), inputHandle.size());
18147	ASSERT_EQ(outputHandle.dims().size(), `3`);
18148	EXPECT_EQ(outputHandle.dims()[`0`], `7`);
18149	EXPECT_EQ(outputHandle.dims()[`1`], `5`);
18150	EXPECT_EQ(outputHandle.dims()[`2`], `1`);
18151
18152	// Check values are still in the same order.
18153	for (size_t idx = `0`, end = inputHandle.size(); idx != end; ++idx) {
18154	EXPECT_EQ(inputHandle.raw(idx), outputHandle.raw(idx));
18155	}
18156	}
18157
18158	/// Verify that the Reshape operator works correctly with Int64ITy.
18159	TEST_P(OperatorTest, ReshapeInt) {
18160	CHECK_IF_ENABLED();
18161
18162	auto A = mod_.createPlaceholder(ElemKind::Int64ITy, {`5`, `7`}, "A", false*);
18163	auto inputHandle = bindings_.allocate(A)->getHandle<int64_t>();
18164	inputHandle.randomize<int64_t>(`0`, `100`, mod_.getPRNG());
18165
18166	auto *RN = F_->createReshape("reshape", A, {`7`, `5`, `1`});
18167	auto *result = F_->createSave("saveReshape", RN);
18168	bindings_.allocate(result->getPlaceholder());
18169
18170	EE_.compile(CompilationMode::Infer);
18171	EE_.run(bindings_);
18172
18173	auto outputHandle =
18174	bindings_.get(result->getPlaceholder())->getHandle<int64_t>();
18175	ASSERT_EQ(outputHandle.size(), inputHandle.size());
18176	ASSERT_EQ(outputHandle.dims().size(), `3`);
18177	EXPECT_EQ(outputHandle.dims()[`0`], `7`);
18178	EXPECT_EQ(outputHandle.dims()[`1`], `5`);
18179	EXPECT_EQ(outputHandle.dims()[`2`], `1`);
18180
18181	// Check values are still in the same order.
18182	for (size_t idx = `0`, end = inputHandle.size(); idx != end; ++idx) {
18183	EXPECT_EQ(inputHandle.raw(idx), outputHandle.raw(idx));
18184	}
18185	}
18186
18187	/// Verify that the NonZero operator works correctly.
18188	TEST_P(OperatorTest, NonZero) {
18189	CHECK_IF_ENABLED();
18190
18191	auto Cond = mod_.createPlaceholder(ElemKind::BoolTy, {`8`}, "Cond", false*);
18192	bindings_.allocate(Cond)->getHandle<bool>() = {false, true, true, false,
18193	false, true, false, true};
18194
18195	auto *N = F_->createNonZero("nonZero", Cond);
18196	auto *result = F_->createSave("saveNonZero", N);
18197	bindings_.allocate(result->getPlaceholder());
18198
18199	EE_.compile(CompilationMode::Infer);
18200	EE_.run(bindings_);
18201
18202	std::array<int, `4`> expected{`1`, `2`, `5`, `7`};
18203	auto resH = bindings_.get(result->getPlaceholder())->getHandle<int32_t>();
18204
18205	for (dim_t i = `0`; i < expected.size(); ++i) {
18206	EXPECT_EQ(resH.raw(i), expected[i]);
18207	}
18208	}
18209
18210	/// Verify that the Select operator works correctly.
18211	TEST_P(OperatorTest, Select) {
18212	CHECK_IF_ENABLED();
18213
18214	auto A = mod_.createPlaceholder(ElemKind::BoolTy, {`5`}, "A", false*);
18215	bindings_.allocate(A)->getHandle<bool>() = {false, true, true, false, false};
18216
18217	auto SNTy = mod_.uniqueType(ElemKind::FloatTy, {`5`});
18218	SplatNode *SN10 = F_->createSplat("zero", SNTy, `10.0`);
18219	SplatNode *SN20 = F_->createSplat("zero", SNTy, `20.0`);
18220
18221	auto *SN = F_->createSelect("select", A, SN10, SN20);
18222	auto *result = F_->createSave("saveSelect", SN);
18223	bindings_.allocate(result->getPlaceholder());
18224
18225	EE_.compile(CompilationMode::Infer);
18226	EE_.run(bindings_);
18227
18228	auto resH = bindings_.get(result->getPlaceholder())->getHandle();
18229	EXPECT_EQ(resH.at({`0`}), `20.0`);
18230	EXPECT_EQ(resH.at({`1`}), `10.0`);
18231	EXPECT_EQ(resH.at({`2`}), `10.0`);
18232	EXPECT_EQ(resH.at({`3`}), `20.0`);
18233	EXPECT_EQ(resH.at({`4`}), `20.0`);
18234	}
18235
18236	/// Verify that the CmpLTE operator works correctly.
18237	TEST_P(OperatorTest, CmpLTE) {
18238	CHECK_IF_ENABLED();
18239
18240	Placeholder A = mod_.createPlaceholder(ElemKind::FloatTy, {`5`}, "A", false*);
18241	Placeholder B = mod_.createPlaceholder(ElemKind::FloatTy, {`5`}, "B", false*);
18242	bindings_.allocate(A)->getHandle<float>() = {`0.0`, `1.0`, `2.0`, `3.0`, `4.0`};
18243	bindings_.allocate(B)->getHandle<float>() = {`0.0`, `1.1`, `1.5`, `10.1`, -`1.0`};
18244
18245	auto *CMPLTE = F_->createCmpLTE("select", A, B);
18246	auto *result = F_->createSave("saveCMPLTE", CMPLTE);
18247	Tensor *resultT = bindings_.allocate(result->getPlaceholder());
18248
18249	EE_.compile(CompilationMode::Infer);
18250	EE_.run(bindings_);
18251
18252	auto resH = resultT->getHandle<bool>();
18253	EXPECT_TRUE(resH.at({`0`}));
18254	EXPECT_TRUE(resH.at({`1`}));
18255	EXPECT_FALSE(resH.at({`2`}));
18256	EXPECT_TRUE(resH.at({`3`}));
18257	EXPECT_FALSE(resH.at({`4`}));
18258	}
18259
18260	/// Helper to test SliceReshape using \p DTy.
18261	template <typename DataType>
18262	static void testSliceReshape(glow::PlaceholderBindings &bindings,
18263	glow::Module &mod, glow::Function *F,
18264	glow::ExecutionEngine &EE, ElemKind DTy) {
18265	auto *X =
18266	createPlaceholderConditionallyQuantized(mod, DTy, {`3`, `3`}, "X", false);
18267
18268	auto XH = bindings.allocate(X)->getHandle<DataType>();
18269	for (dim_t i = `0`; i < `3`; i++) {
18270	for (dim_t j = `0`; j < `3`; j++) {
18271	XH.at({i, j}) = i * `3` + j;
18272	}
18273	}
18274
18275	// Do an assortment of slices/reshapes stacked on top of each other.
18276	auto *SX = F->createSlice("sliceX", X, {`2`, `0`}, {`3`, `3`});
18277	auto *RSX = F->createReshape("reshapeSX", SX, {`3`});
18278	auto *SSX = F->createSlice("sliceSliceX", SX, {`0`, `2`}, {`1`, `3`});
18279	auto *RSSX = F->createReshape("reshapeSliceSliceX", SSX, {`1`});
18280
18281	auto *resultSX = F->createSave("saveSX", SX);
18282	auto *resultRSX = F->createSave("saveRSX", RSX);
18283	auto *resultSSX = F->createSave("saveSSX", SSX);
18284	auto *resultRSSX = F->createSave("saveRSSX", RSSX);
18285
18286	bindings.allocate(resultSX->getPlaceholder());
18287	bindings.allocate(resultRSX->getPlaceholder());
18288	bindings.allocate(resultSSX->getPlaceholder());
18289	bindings.allocate(resultRSSX->getPlaceholder());
18290
18291	EE.compile(CompilationMode::Infer);
18292
18293	EE.run(bindings);
18294
18295	// Verify the slice has the same data as the original X.
18296	auto SXH = bindings.get(resultSX->getPlaceholder())->getHandle<DataType>();
18297	for (dim_t i = `0`; i < `3`; i++) {
18298	EXPECT_NEAR(SXH.at({`0`, i}), XH.at({`2`, i}), `1E-5`);
18299	}
18300
18301	// Verify the reshaped slice has the same data as the slice.
18302	auto RSXH = bindings.get(resultRSX->getPlaceholder())->getHandle<DataType>();
18303	for (dim_t i = `0`; i < `3`; i++) {
18304	EXPECT_NEAR(SXH.at({`0`, i}), RSXH.at({i}), `1E-5`);
18305	}
18306
18307	// Verify the slice of the slice has the same data as the slice.
18308	auto SSXH = bindings.get(resultSSX->getPlaceholder())->getHandle<DataType>();
18309	EXPECT_NEAR(SXH.at({`0`, `2`}), SSXH.at({`0`, `0`}), `1E-5`);
18310
18311	// Verify the reshape of the slice of the slice has the same data as the
18312	// slice of the slice.
18313	auto RSSXH =
18314	bindings.get(resultRSSX->getPlaceholder())->getHandle<DataType>();
18315	EXPECT_NEAR(RSSXH.at({`0`}), SSXH.at({`0`, `0`}), `1E-5`);
18316	}
18317
18318	/// Stack many slices/reshapes together. Some of these may be turned into
18319	/// tensor views stacked onto each other. Test in FloatTy.
18320	TEST_P(OperatorTest, sliceReshape_Float) {
18321	CHECK_IF_ENABLED();
18322
18323	testSliceReshape<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy);
18324	}
18325
18326	/// Stack many slices/reshapes together. Some of these may be turned into
18327	/// tensor views stacked onto each other. Test in Float16Ty.
18328	TEST_P(OperatorTest, sliceReshape_Float16) {
18329	CHECK_IF_ENABLED();
18330	testSliceReshape<float16_t>(bindings_, mod_, F_, EE_, ElemKind::Float16Ty);
18331	}
18332
18333	/// Stack many slices/reshapes together. Some of these may be turned into
18334	/// tensor views stacked onto each other. Test in BFloat16Ty.
18335	TEST_P(OperatorTest, sliceReshape_BFloat16) {
18336	CHECK_IF_ENABLED();
18337	testSliceReshape<bfloat16_t>(bindings_, mod_, F_, EE_, ElemKind::BFloat16Ty);
18338	}
18339
18340	/// Stack many slices/reshapes together. Some of these may be turned into
18341	/// tensor views stacked onto each other. Test in Int8QTy.
18342	TEST_P(OperatorTest, sliceReshape_Int8) {
18343	CHECK_IF_ENABLED();
18344	testSliceReshape<int8_t>(bindings_, mod_, F_, EE_, ElemKind::Int8QTy);
18345	}
18346
18347	/// Stack many slices/reshapes together. Some of these may be turned into
18348	/// tensor views stacked onto each other. Test in Int32QTy.
18349	TEST_P(OperatorTest, sliceReshape_Int32) {
18350	CHECK_IF_ENABLED();
18351	testSliceReshape<int32_t>(bindings_, mod_, F_, EE_, ElemKind::Int32QTy);
18352	}
18353
18354	/// Helper to test Flatten using \p DTy.
18355	template <typename DataType>
18356	static void testFlatten(glow::PlaceholderBindings &bindings, glow::Module &mod,
18357	glow::Function *F, glow::ExecutionEngine &EE,
18358	ElemKind DTy) {
18359	auto *tensor4D = createPlaceholderConditionallyQuantized(
18360	mod, DTy, {`3`, `2`, `4`, `3`}, "4D", false, "NHWC");
18361	bindings.allocate(tensor4D)->getHandle<DataType>().randomize(`0`, `100`,
18362	mod.getPRNG());
18363
18364	NodeValue reshape4Dto2DAxis1 = F->createFlatten("flat4Dto2Da1", tensor4D, `1`);
18365	EXPECT_EQ(reshape4Dto2DAxis1.dims().size(), `2`);
18366	EXPECT_EQ(reshape4Dto2DAxis1.dims()[`0`], `3`);
18367	EXPECT_EQ(reshape4Dto2DAxis1.dims()[`1`], `24`);
18368
18369	NodeValue reshape4Dto2DAxis2 = F->createFlatten("flat4Dto2Da2", tensor4D, `2`);
18370	EXPECT_EQ(reshape4Dto2DAxis2.dims().size(), `2`);
18371	EXPECT_EQ(reshape4Dto2DAxis2.dims()[`0`], `6`);
18372	EXPECT_EQ(reshape4Dto2DAxis2.dims()[`1`], `12`);
18373
18374	NodeValue reshape4Dto2DAxis3 = F->createFlatten("flat4Dto2Da3", tensor4D, `3`);
18375	EXPECT_EQ(reshape4Dto2DAxis3.dims().size(), `2`);
18376	EXPECT_EQ(reshape4Dto2DAxis3.dims()[`0`], `24`);
18377	EXPECT_EQ(reshape4Dto2DAxis3.dims()[`1`], `3`);
18378
18379	// Now, let us do the fifth (4) axis.
18380	// This comes straight from caffe2 because flattening is
18381	// supported for every axis up and including the rank of a tensor.
18382	// The rank of this tensor is 4, so axis 4 is fine.
18383	NodeValue reshape4Dto2DAxis4 = F->createFlatten("flat4Dto2Da4", tensor4D, `4`);
18384	EXPECT_EQ(reshape4Dto2DAxis4.dims().size(), `2`);
18385	EXPECT_EQ(reshape4Dto2DAxis4.dims()[`0`], `72`);
18386	EXPECT_EQ(reshape4Dto2DAxis4.dims()[`1`], `1`);
18387
18388	// This one is weird because we flatten something that is already flat, but
18389	// again because flattening is supported for every axis up and including the
18390	// rank of a tensor, 1D vector means we can flatten it on axis 1.
18391	auto *tensor1D =
18392	createPlaceholderConditionallyQuantized(mod, DTy, {`15`}, "1D", false, "N");
18393	bindings.allocate(tensor1D)->getHandle<DataType>().randomize(`0`, `100`,
18394	mod.getPRNG());
18395
18396	NodeValue reshape1Dto2DAxis1 = F->createFlatten("flat1Dto2D", tensor1D, `1`);
18397	EXPECT_EQ(reshape1Dto2DAxis1.dims().size(), `2`);
18398	EXPECT_EQ(reshape1Dto2DAxis1.dims()[`0`], `15`);
18399	EXPECT_EQ(reshape1Dto2DAxis1.dims()[`1`], `1`);
18400
18401	// Save all the reshapes so that the optimizations won't kill the network.
18402	auto *save1Dto2D = F->createSave("save1Dto2D", reshape1Dto2DAxis1);
18403	auto *save4Dto2Da1 = F->createSave("save4Dto2Da1", reshape4Dto2DAxis1);
18404	auto *save4Dto2Da2 = F->createSave("save4Dto2Da2", reshape4Dto2DAxis2);
18405	auto *save4Dto2Da3 = F->createSave("save4Dto2Da3", reshape4Dto2DAxis3);
18406	auto *save4Dto2Da4 = F->createSave("save4Dto2Da4", reshape4Dto2DAxis4);
18407
18408	bindings.allocate(save1Dto2D->getPlaceholder());
18409	bindings.allocate(save4Dto2Da1->getPlaceholder());
18410	bindings.allocate(save4Dto2Da2->getPlaceholder());
18411	bindings.allocate(save4Dto2Da3->getPlaceholder());
18412	bindings.allocate(save4Dto2Da4->getPlaceholder());
18413
18414	EE.compile(CompilationMode::Infer);
18415
18416	EE.run(bindings);
18417
18418	// Verify the reshapes have the same data as the original value.
18419	auto tensor4DH = bindings.get(tensor4D)->getHandle<DataType>();
18420	auto save4Dto2Da1H =
18421	bindings.get(save4Dto2Da1->getPlaceholder())->getHandle<DataType>();
18422	for (size_t i = `0`; i < `72`; i++) {
18423	EXPECT_NEAR(tensor4DH.raw(i), save4Dto2Da1H.raw(i), `1E-5`);
18424	}
18425
18426	auto save4Dto2Da2H =
18427	bindings.get(save4Dto2Da2->getPlaceholder())->getHandle<DataType>();
18428	for (size_t i = `0`; i < `72`; i++) {
18429	EXPECT_NEAR(tensor4DH.raw(i), save4Dto2Da2H.raw(i), `1E-5`);
18430	}
18431
18432	auto save4Dto2Da3H =
18433	bindings.get(save4Dto2Da3->getPlaceholder())->getHandle<DataType>();
18434	for (size_t i = `0`; i < `72`; i++) {
18435	EXPECT_NEAR(tensor4DH.raw(i), save4Dto2Da3H.raw(i), `1E-5`);
18436	}
18437
18438	auto save4Dto2Da4H =
18439	bindings.get(save4Dto2Da4->getPlaceholder())->getHandle<DataType>();
18440	for (size_t i = `0`; i < `72`; i++) {
18441	EXPECT_NEAR(tensor4DH.raw(i), save4Dto2Da4H.raw(i), `1E-5`);
18442	}
18443
18444	auto tensor1DH = bindings.get(tensor1D)->getHandle<DataType>();
18445	auto save1Dto2DH =
18446	bindings.get(save1Dto2D->getPlaceholder())->getHandle<DataType>();
18447	for (size_t i = `0`; i < `15`; i++) {
18448	EXPECT_NEAR(tensor1DH.raw(i), save1Dto2DH.raw(i), `1E-5`);
18449	}
18450	}
18451
18452	/// Check that the flatten operator produces 2D tensors of the right
18453	/// dimensions, using FloatTy.
18454	TEST_P(OperatorTest, Flatten_FloatTy) {
18455	CHECK_IF_ENABLED();
18456	testFlatten<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy);
18457	}
18458
18459	/// Check that the flatten operator produces 2D tensors of the right
18460	/// dimensions, using Float16Ty.
18461	TEST_P(OperatorTest, Flatten_Float16Ty) {
18462	CHECK_IF_ENABLED();
18463	testFlatten<float16_t>(bindings_, mod_, F_, EE_, ElemKind::Float16Ty);
18464	}
18465
18466	/// Check that the flatten operator produces 2D tensors of the right
18467	/// dimensions, using BFloat16Ty.
18468	TEST_P(OperatorTest, Flatten_BFloat16Ty) {
18469	CHECK_IF_ENABLED();
18470	testFlatten<bfloat16_t>(bindings_, mod_, F_, EE_, ElemKind::BFloat16Ty);
18471	}
18472
18473	/// Check that the flatten operator produces 2D tensors of the right
18474	/// dimensions, using Int8QTy.
18475	TEST_P(OperatorTest, Flatten_Int8) {
18476	CHECK_IF_ENABLED();
18477	testFlatten<int8_t>(bindings_, mod_, F_, EE_, ElemKind::Int8QTy);
18478	}
18479
18480	/// Check that div on Int64ITy/size_t works.
18481	TEST_P(OperatorTest, DivSizeT) {
18482	CHECK_IF_ENABLED();
18483
18484	auto LHS = mod_.createPlaceholder(ElemKind::Int64ITy, {`3`, `2`}, "LHS", false*);
18485	auto RHS = mod_.createPlaceholder(ElemKind::Int64ITy, {`3`, `2`}, "RHS", false*);
18486	auto LHSH = bindings_.allocate(LHS)->getHandle<int64_t>();
18487	auto RHSH = bindings_.allocate(RHS)->getHandle<int64_t>();
18488
18489	LHSH = {`10`, `20`, `30`, `40`, `50`, `60`};
18490	RHSH = {`2`, `20`, `100`, `41`, `3`, `59`};
18491
18492	auto *R = F_->createDiv("div", LHS, RHS);
18493
18494	auto *result = F_->createSave("save", R);
18495	bindings_.allocate(result->getPlaceholder());
18496
18497	CompilationContext cctx;
18498	cctx.compMode = CompilationMode::Infer;
18499	// Disabling this so that division of Int64ITy/size_t can be tested.
18500	cctx.optimizationOpts.enableTypeDemotion = false;
18501	EE_.compile(cctx);
18502	EE_.run(bindings_);
18503
18504	auto H = bindings_.get(result->getPlaceholder())->getHandle<int64_t>();
18505
18506	for (dim_t i = `0`; i < `3`; i++) {
18507	for (dim_t j = `0`; j < `2`; j++) {
18508	EXPECT_EQ(LHSH.at({i, j}) / RHSH.at({i, j}), H.at({i, j}));
18509	}
18510	}
18511	}
18512
18513	TEST_P(OperatorTest, SigmoidCrossEntropyWithLogits) {
18514	CHECK_IF_ENABLED();
18515
18516	/*
18517	LOGITS = [
18518	[
18519	[1.0, 1.2, -0.5],
18520	[0.1, 0.6, 0.5],
18521	],
18522	[
18523	[-0.1, -2., 0.3],
18524	[1, 2, 3],
18525	],
18526	]
18527	TARGETS = [
18528	[
18529	[0.7, 0.7, 0.7],
18530	[-0.7, -0.99, 1.0],
18531	],
18532	[
18533	[0, 0, 0],
18534	[1, 2, 3],
18535	],
18536	]
18537	OUTPUT = [
18538	[ 0.68687367, 0.97332054],
18539	[ 0.5418933, -2.50374103],
18540	]
18541	*/
18542	auto *logits =
18543	mod_.createPlaceholder(ElemKind::FloatTy, {`2`, `2`, `3`}, "logits", false);
18544	auto *targets =
18545	mod_.createPlaceholder(ElemKind::FloatTy, {`2`, `2`, `3`}, "targets", false);
18546
18547	bindings_.allocate(logits)->getHandle() = {
18548	`1.0f`, `1.2f`, -`0.5f`, `0.1f`, `0.6f`, `0.5f`, -`0.1f`, -`2.f`, `0.3f`, `1.f`, `2.f`, `3.f`};
18549	bindings_.allocate(targets)->getHandle() = {
18550	`0.7f`, `0.7f`, `0.7f`, -`0.7f`, -`0.99f`, `1.0f`, `0.f`, `0.f`, `0.f`, `1.f`, `2.f`, `3.f`};
18551
18552	auto *R = F_->createSigmoidCrossEntropyWithLogits("SCEL", logits, targets);
18553
18554	auto *result = F_->createSave("save", R);
18555	bindings_.allocate(result->getPlaceholder());
18556
18557	EE_.compile(CompilationMode::Infer);
18558	EE_.run(bindings_);
18559
18560	Tensor expected(ElemKind::FloatTy, {`2`, `2`});
18561	expected.getHandle() = {
18562	`0.68687367f`,
18563	`0.97332054f`,
18564	`0.5418933f`,
18565	-`2.50374103f`,
18566	};
18567
18568	EXPECT_TRUE(expected.isEqual(*bindings_.get(result->getPlaceholder())));
18569	}
18570
18571	/// Test the InsertTensor node works correctly.
18572	TEST_P(OperatorTest, insertTensorTest) {
18573	CHECK_IF_ENABLED();
18574
18575	// 0 0 0 0 0 0
18576	// 0 0 0 0 0 0
18577	// 0 0 0 0 0 0
18578	// 0 0 0 0 0 0
18579	auto SN0 = mod_.createPlaceholder(ElemKind::FloatTy, {`4`, `6`}, "SN0", false*);
18580	bindings_.allocate(SN0)->init(Tensor::InitKind::Broadcast, `0`, mod_.getPRNG());
18581
18582	// 1 1
18583	// 1 1
18584	auto SN1 = mod_.createPlaceholder(ElemKind::FloatTy, {`2`, `2`}, "SN1", false*);
18585	bindings_.allocate(SN1)->init(Tensor::InitKind::Broadcast, `1`, mod_.getPRNG());
18586
18587	// 0 0 0 0 0 0
18588	// 0 1 1 1 1 0
18589	// 0 1 1 1 1 0
18590	// 0 0 0 0 0 0
18591	Node IN = F_->createInsertTensor("insert", SN0, SN1, /* start / {`1`, `1`},
18592	/ count / `2`, / axis / `1`);
18593	SaveNode *result = F_->createSave("result", IN);
18594	bindings_.allocate(result->getPlaceholder());
18595
18596	EE_.compile(CompilationMode::Infer);
18597
18598	EE_.run(bindings_);
18599
18600	// Verify the output looks as expected (pictured above).
18601	auto resultH = bindings_.get(result->getPlaceholder())->getHandle<float>();
18602	for (dim_t i = `0`; i < `4`; i++) {
18603	for (dim_t j = `0`; j < `6`; j++) {
18604	int64_t expected = `1`;
18605	if (i == `0` \|\| i == `3` \|\| j == `0` \|\| j == `5`)
18606	expected = `0`;
18607	EXPECT_EQ(resultH.at({i, j}), expected);
18608	}
18609	}
18610	}
18611
18612	/// Test the InsertTensor node works correctly for 3 dimensions.
18613	TEST_P(OperatorTest, insertTensorTest3D) {
18614	CHECK_IF_ENABLED();
18615
18616	// 0 0 0 0 0 0 \| 0 0 0 0 0 0
18617	// 0 0 0 0 0 0 \| 0 0 0 0 0 0
18618	// 0 0 0 0 0 0 \| 0 0 0 0 0 0
18619	// 0 0 0 0 0 0 \| 0 0 0 0 0 0
18620	auto *SN0 =
18621	mod_.createPlaceholder(ElemKind::FloatTy, {`2`, `4`, `6`}, "SN0", false);
18622	bindings_.allocate(SN0)->init(Tensor::InitKind::Broadcast, `0`, mod_.getPRNG());
18623
18624	// 1 1 \| 1 1
18625	// 1 1 \| 1 1
18626	auto *SN1 =
18627	mod_.createPlaceholder(ElemKind::FloatTy, {`2`, `2`, `2`}, "SN1", false);
18628	bindings_.allocate(SN1)->init(Tensor::InitKind::Broadcast, `1`, mod_.getPRNG());
18629
18630	// 0 0 0 0 0 0 \| 0 0 0 0 0 0
18631	// 0 1 1 1 1 0 \| 0 1 1 1 1 0
18632	// 0 1 1 1 1 0 \| 0 1 1 1 1 0
18633	// 0 0 0 0 0 0 \| 0 0 0 0 0 0
18634	Node IN = F_->createInsertTensor("insert", SN0, SN1, /* start / {`0`, `1`, `1`},
18635	/ count / `2`, / axis / `2`);
18636	SaveNode *result = F_->createSave("result", IN);
18637	bindings_.allocate(result->getPlaceholder());
18638
18639	EE_.compile(CompilationMode::Infer);
18640
18641	EE_.run(bindings_);
18642
18643	// Verify the output looks as expected (pictured above).
18644	auto resultH = bindings_.get(result->getPlaceholder())->getHandle<float>();
18645	for (dim_t i = `0`; i < `2`; i++) {
18646	for (dim_t j = `0`; j < `4`; j++) {
18647	for (dim_t k = `0`; k < `6`; k++) {
18648	int64_t expected = `1`;
18649	if (j == `0` \|\| j == `3` \|\| k == `0` \|\| k == `5`)
18650	expected = `0`;
18651	EXPECT_EQ(resultH.at({i, j, k}), expected);
18652	}
18653	}
18654	}
18655	}
18656
18657	/// Test that the InsertTensor operator works correctly when crossing outer
18658	/// dimensions.
18659	TEST_P(OperatorTest, insertTensorCrossDimensions) {
18660	CHECK_IF_ENABLED();
18661
18662	// 0 0 0 0 0
18663	// 0 0 0 0 0
18664	// 0 0 0 0 0
18665	// 0 0 0 0 0
18666	// 0 0 0 0 0
18667	// 0 0 0 0 0
18668	auto *SN0 =
18669	mod_.createPlaceholder(ElemKind::FloatTy, {`3`, `2`, `5`}, "SN0", false);
18670	bindings_.allocate(SN0)->init(Tensor::InitKind::Broadcast, `0`, mod_.getPRNG());
18671
18672	// 1 1 1 1 1 1 (T)
18673	auto *SN1 =
18674	mod_.createPlaceholder(ElemKind::FloatTy, {`3`, `2`, `1`}, "SN1", false);
18675	bindings_.allocate(SN1)->init(Tensor::InitKind::Broadcast, `1`, mod_.getPRNG());
18676
18677	// 2 2 \| 2 2
18678	// 2 2 \| 2 2
18679	// 2 2 \| 2 2
18680	auto *SN2 =
18681	mod_.createPlaceholder(ElemKind::FloatTy, {`3`, `2`, `2`}, "SN2", false);
18682	bindings_.allocate(SN2)->init(Tensor::InitKind::Broadcast, `2`, mod_.getPRNG());
18683
18684	// 1 0 2 2 0
18685	// 1 0 2 2 0
18686	// 1 0 2 2 0
18687	// 1 0 2 2 0
18688	// 1 0 2 2 0
18689	// 1 0 2 2 0
18690	Node IN = F_->createInsertTensor("insert", SN0, SN1, /* start / {`0`, `0`, `0`},
18691	/ count / `1`, / axis / `2`);
18692	Node IN2 = F_->createInsertTensor("insert", IN, SN2, /* start / {`0`, `0`, `2`},
18693	/ count / `1`, / axis / `2`);
18694	SaveNode *result = F_->createSave("result", IN2);
18695	bindings_.allocate(result->getPlaceholder());
18696
18697	EE_.compile(CompilationMode::Infer);
18698
18699	EE_.run(bindings_);
18700
18701	// Verify the output looks as expected (pictured above).
18702	auto resultH = bindings_.get(result->getPlaceholder())->getHandle<float>();
18703	for (dim_t i = `0`; i < `3`; i++) {
18704	for (dim_t j = `0`; j < `2`; j++) {
18705	for (dim_t k = `0`; k < `5`; k++) {
18706	int64_t expected = `0`;
18707	if (k == `0`)
18708	expected = `1`;
18709	if (k == `2` \|\| k == `3`)
18710	expected = `2`;
18711	EXPECT_EQ(resultH.at({i, j, k}), expected);
18712	}
18713	}
18714	}
18715	}
18716
18717	/// Test that InsertTensor node works correctly for 6D tensors.
18718	TEST_P(OperatorTest, insertTensorTest6D) {
18719	CHECK_IF_ENABLED();
18720	// 0 0 0 0 0 0
18721	// 0 0 0 0 0 0
18722	// 0 0 0 0 0 0
18723	// 0 0 0 0 0 0
18724	auto *SN0 = mod_.createPlaceholder(ElemKind::Int64ITy, {`1`, `1`, `1`, `1`, `4`, `6`},
18725	"SN0", false);
18726	bindings_.allocate(SN0)->init(Tensor::InitKind::Broadcast, `0`, mod_.getPRNG());
18727
18728	// 1 1
18729	// 1 1
18730	auto *SN1 = mod_.createPlaceholder(ElemKind::Int64ITy, {`1`, `1`, `1`, `1`, `2`, `2`},
18731	"SN1", false);
18732	bindings_.allocate(SN1)->init(Tensor::InitKind::Broadcast, `1`, mod_.getPRNG());
18733
18734	// 0 0 0 0 0 0
18735	// 0 1 1 1 1 0
18736	// 0 1 1 1 1 0
18737	// 0 0 0 0 0 0
18738	Node *IN =
18739	F_->createInsertTensor("insert", SN0, SN1, / start / {`0`, `0`, `0`, `0`, `1`, `1`},
18740	/ count / `2`, / axis / `5`);
18741	SaveNode *result = F_->createSave("result", IN);
18742	bindings_.allocate(result->getPlaceholder());
18743
18744	EE_.compile(CompilationMode::Infer);
18745
18746	EE_.run(bindings_);
18747
18748	// Verify the output looks as expected (pictured above).
18749	auto resultH = bindings_.get(result->getPlaceholder())->getHandle<int64_t>();
18750	for (dim_t i = `0`; i < `4`; i++) {
18751	for (dim_t j = `0`; j < `6`; j++) {
18752	int64_t expected = `1`;
18753	if (i == `0` \|\| i == `3` \|\| j == `0` \|\| j == `5`) {
18754	expected = `0`;
18755	}
18756	EXPECT_EQ(resultH.at({`0`, `0`, `0`, `0`, i, j}), expected);
18757	}
18758	}
18759	}
18760
18761	/// Test the InsertTensor operator works correctly when inserting across an
18762	/// outer dimension where the inner dimensions have different sizes.
18763	TEST_P(OperatorTest, insertTensorPartialSliceInnerDim) {
18764	CHECK_IF_ENABLED();
18765
18766	// 0 0 0 0 0
18767	// 0 0 0 0 0
18768	// 0 0 0 0 0
18769	// 0 0 0 0 0
18770	// 0 0 0 0 0
18771	// 0 0 0 0 0
18772	// 0 0 0 0 0
18773	// 0 0 0 0 0
18774	// 0 0 0 0 0
18775	auto *SN0 =
18776	mod_.createPlaceholder(ElemKind::FloatTy, {`3`, `3`, `5`}, "SN0", false);
18777	bindings_.allocate(SN0)->init(Tensor::InitKind::Broadcast, `0`, mod_.getPRNG());
18778
18779	// 1 1
18780	// 1 1
18781	// 1 1
18782	auto *SN1 =
18783	mod_.createPlaceholder(ElemKind::FloatTy, {`3`, `1`, `2`}, "SN1", false);
18784	bindings_.allocate(SN1)->init(Tensor::InitKind::Broadcast, `1`, mod_.getPRNG());
18785
18786	// 2 2 2
18787	// 2 2 2
18788	// 2 2 2
18789	auto *SN2 =
18790	mod_.createPlaceholder(ElemKind::FloatTy, {`3`, `1`, `3`}, "SN2", false);
18791	bindings_.allocate(SN2)->init(Tensor::InitKind::Broadcast, `2`, mod_.getPRNG());
18792
18793	// 1 1 0 0 0
18794	// 0 2 2 2 0
18795	// 0 0 0 0 0
18796	// 1 1 0 0 0
18797	// 0 2 2 2 0
18798	// 0 0 0 0 0
18799	// 1 1 0 0 0
18800	// 0 2 2 2 0
18801	// 0 0 0 0 0
18802	Node IN = F_->createInsertTensor("insert", SN0, SN1, /* start / {`0`, `0`, `0`},
18803	/ count / `1`, / axis / `2`);
18804	Node IN2 = F_->createInsertTensor("insert", IN, SN2, /* start / {`0`, `1`, `1`},
18805	/ count / `1`, / axis / `2`);
18806	SaveNode *result = F_->createSave("result", IN2);
18807	bindings_.allocate(result->getPlaceholder());
18808
18809	EE_.compile(CompilationMode::Infer);
18810
18811	EE_.run(bindings_);
18812	// Verify the output looks as expected (pictured above).
18813	auto resultH = bindings_.get(result->getPlaceholder())->getHandle<float>();
18814	for (dim_t i = `0`; i < `3`; i++) {
18815	for (dim_t j = `0`; j < `3`; j++) {
18816	for (dim_t k = `0`; k < `5`; k++) {
18817	int64_t expected = `0`;
18818	if (j == `0` && k <= `1`)
18819	expected = `1`;
18820	if (j == `1` && k >= `1` && k <= `3`)
18821	expected = `2`;
18822	EXPECT_EQ(resultH.at({i, j, k}), expected);
18823	}
18824	}
18825	}
18826	}
18827
18828	static FunctionTensorPair
18829	createAndInitBasicRowwiseFCTest(glow::PlaceholderBindings &bindings,
18830	glow::ExecutionEngine &EE) {
18831	auto &mod = EE.getModule();
18832	Function *F = mod.createFunction("main");
18833
18834	// In this test we subtract the outputs of a row-wise quantized FC and a
18835	// floating-point FC and ensure that the error is below some low value.
18836	auto input = mod.createPlaceholder(ElemKind::FloatTy, {`2`, `100`}, "in", false*);
18837	auto *fc = F->createFullyConnected(bindings, "FC", input, `5`);
18838
18839	auto *weights = llvm::cast<Placeholder>(fc->getWeights());
18840	auto *bias = llvm::cast<Placeholder>(fc->getBias());
18841
18842	bindings.allocate(input)->getHandle().randomize(-`1.0`, `1.0`, mod.getPRNG());
18843	bindings.get(bias)->getHandle().randomize(`0`, `0.1`, mod.getPRNG());
18844	bindings.get(weights)->getHandle().randomize(-`1.1`, `1.1`, mod.getPRNG());
18845
18846	auto *res = F->createSave("save", fc);
18847	::glow::convertPlaceholdersToConstants(F, bindings,
18848	{input, res->getPlaceholder()});
18849	auto *resultTensor = bindings.allocate(res->getPlaceholder());
18850
18851	return std::make_pair(F, resultTensor);
18852	}
18853
18854	/// Test Int8 RowwiseQuantizedFullyConnected Node with Int8 bias.
18855	TEST_P(OperatorStatelessTest, rowwiseQuantizedFCTest_Int8_BiasInt8) {
18856	ENABLED_BACKENDS("Interpreter", "CPU");
18857	compareAgainstInterpreter(
18858	getBackendName(), createAndInitBasicRowwiseFCTest, ElemKind::FloatTy,
18859	ElemKind::Int8QTy, `0.06f`, parCloneCountOpt,
18860	/ convertToRowwiseQuantization / true, quantization::Schema::Asymmetric,
18861	ElemKind::Int8QTy);
18862	}
18863
18864	/// Test Int8 RowwiseQuantizedFullyConnected Node with Int32 bias.
18865	TEST_P(OperatorStatelessTest, rowwiseQuantizedFCTest_Int8_BiasInt32) {
18866	ENABLED_BACKENDS("Interpreter", "CPU");
18867	compareAgainstInterpreter(
18868	getBackendName(), createAndInitBasicRowwiseFCTest, ElemKind::FloatTy,
18869	ElemKind::Int8QTy, `0.06f`, parCloneCountOpt,
18870	/ convertToRowwiseQuantization / true, quantization::Schema::Asymmetric,
18871	ElemKind::Int32QTy);
18872	}
18873
18874	/// Test RowwiseQuantizedFullyConnected Node with Symmetric quantization.
18875	TEST_P(OperatorStatelessTest, rowwiseQuantizedFCTestSymmetric) {
18876	CHECK_IF_ENABLED();
18877	compareAgainstInterpreter(
18878	getBackendName(), createAndInitBasicRowwiseFCTest, ElemKind::FloatTy,
18879	ElemKind::Int8QTy, `0.07f`, parCloneCountOpt,
18880	/ convertToRowwiseQuantization / true, quantization::Schema::Symmetric);
18881	}
18882
18883	TEST_P(OperatorStatelessTest,
18884	rowwiseQuantizedFCTestSymmetric_Int8_BiasFloat32) {
18885	CHECK_IF_ENABLED();
18886	compareAgainstInterpreter(
18887	getBackendName(), createAndInitBasicRowwiseFCTest, ElemKind::FloatTy,
18888	ElemKind::Int8QTy, `0.07f`, parCloneCountOpt,
18889	/ convertToRowwiseQuantization / true, quantization::Schema::Symmetric,
18890	/biasElemKind/ ElemKind::Int32QTy,
18891	/forceFP16AccumSLS/ false, PrecisionConfiguration::Float16Format::None,
18892	/convertToChannelwiseQuantization/ false,
18893	/skipQuantizeFCBias/ true);
18894	}
18895
18896	TEST_P(OperatorStatelessTest,
18897	rowwiseQuantizedFCTestAsymmetric_Int8_BiasFloat32) {
18898	CHECK_IF_ENABLED();
18899	compareAgainstInterpreter(
18900	getBackendName(), createAndInitBasicRowwiseFCTest, ElemKind::FloatTy,
18901	ElemKind::Int8QTy, `0.06f`, parCloneCountOpt,
18902	/ convertToRowwiseQuantization / true, quantization::Schema::Asymmetric,
18903	/biasElemKind/ ElemKind::Int32QTy,
18904	/forceFP16AccumSLS/ false, PrecisionConfiguration::Float16Format::None,
18905	/convertToChannelwiseQuantization/ false,
18906	/skipQuantizeFCBias/ true);
18907	}
18908
18909	static FunctionTensorPair
18910	createAndInitBasicSLWSTest(glow::PlaceholderBindings &bindings,
18911	glow::ExecutionEngine &EE) {
18912	auto &mod = EE.getModule();
18913	Function *F = mod.createFunction("main");
18914
18915	/*
18916	DATA = [2.0, -0.5, 13]
18917	WEIGHTS = [3, 1, 0, 0, 0, 0, 2, -0.5]
18918	INDICES = [1, 0, 2, 0, 1, 2, 2, 0]
18919	LENGTHS = [3, 0, 3, 2]
18920	OUTPUT = [0.5, 0, 0, 25]
18921	*/
18922	auto data = mod.createPlaceholder(ElemKind::FloatTy, {`3`}, "data", false*);
18923	auto *weights =
18924	mod.createPlaceholder(ElemKind::FloatTy, {`8`}, "weights", false);
18925	auto *indices =
18926	mod.createPlaceholder(ElemKind::Int64ITy, {`8`}, "indices", false);
18927	auto *lengths =
18928	mod.createPlaceholder(ElemKind::Int32ITy, {`4`}, "lengths", false);
18929
18930	bindings.allocate(data)->getHandle() = {
18931	`2.0`,
18932	-`0.5`,
18933	`13`,
18934	};
18935	bindings.allocate(weights)->getHandle() = {
18936	`3`, `1`, `0`, `0`, `0`, `0`, `2`, -`0.5`,
18937	};
18938	bindings.allocate(indices)->getHandle<int64_t>() = {
18939	`1`, `0`, `2`, `0`, `1`, `2`, `2`, `0`,
18940	};
18941	bindings.allocate(lengths)->getHandle<int32_t>() = {
18942	`3`,
18943	`0`,
18944	`3`,
18945	`2`,
18946	};
18947
18948	auto *SLWS = F->createSparseLengthsWeightedSum("SLWS", data, weights, indices,
18949	lengths);
18950	auto *res = F->createSave("save", SLWS);
18951	::glow::convertPlaceholdersToConstants(
18952	F, bindings, {indices, lengths, res->getPlaceholder()});
18953	auto *resultTensor = bindings.allocate(res->getPlaceholder());
18954
18955	return std::make_pair(F, resultTensor);
18956	}
18957
18958	/// Test RowwiseQuantizedSLWS Node.
18959	TEST_P(OperatorStatelessTest, rowwiseQuantizedSLWSTest) {
18960	CHECK_IF_ENABLED();
18961	compareAgainstInterpreter(getBackendName(), createAndInitBasicSLWSTest,
18962	ElemKind::FloatTy, ElemKind::Int8QTy, `0.01f`,
18963	parCloneCountOpt,
18964	/ convertToRowwiseQuantization / true);
18965	}
18966
18967	static SaveNode setupBucketNode(Function F, PlaceholderBindings &bindings,
18968	Placeholder *input,
18969	const std::string &suffix) {
18970	std::vector<float> boundaries = {`0.1`, `2.5`};
18971
18972	auto *bucketize =
18973	F->createBucketizeNode("bucketize" + suffix, input, boundaries);
18974	auto *save = F->createSave("save" + suffix, bucketize);
18975	bindings.allocate(save->getPlaceholder());
18976	return save;
18977	}
18978
18979	/// Check the correctness of the bucketize operator.
18980	TEST_P(OperatorTest, Bucketize) {
18981	CHECK_IF_ENABLED();
18982
18983	auto *input1 =
18984	mod_.createPlaceholder(ElemKind::FloatTy, {`3`}, "input1", false);
18985	bindings_.allocate(input1)->getHandle<float>() = {`2.0`, `4.0`, `1.0`};
18986	auto *save1 =
18987	setupBucketNode(F_, bindings_, input1, / suffix / std::to_string(`1`));
18988
18989	auto *input2 =
18990	mod_.createPlaceholder(ElemKind::FloatTy, {`3`, `2`}, "input2", false);
18991	bindings_.allocate(input2)->getHandle<float>() = {`2.0`, `3.0`, `4.0`,
18992	`1.0`, `2.0`, `5.0`};
18993	auto *save2 =
18994	setupBucketNode(F_, bindings_, input2, / suffix / std::to_string(`2`));
18995
18996	EE_.compile(CompilationMode::Infer);
18997	EE_.run(bindings_);
18998
18999	// Check the result of the first op:
19000	Tensor *result1 = bindings_.get(save1->getPlaceholder());
19001	Tensor expected1(ElemKind::Int32ITy, {`3`});
19002	expected1.getHandle<int32_t>() = {`1`, `2`, `1`};
19003	EXPECT_TRUE(expected1.isEqual(*result1));
19004
19005	// Check the result of the second op:
19006	Tensor *result2 = bindings_.get(save2->getPlaceholder());
19007	Tensor expected2(ElemKind::Int32ITy, {`3`, `2`});
19008	expected2.getHandle<int32_t>() = {`1`, `2`, `2`, `1`, `1`, `2`};
19009	EXPECT_TRUE(expected2.isEqual(*result2));
19010	}
19011
19012	/// Helper to test SoftPlus using \p DTy.
19013	template <typename DataType>
19014	static void testSoftPlus(glow::PlaceholderBindings &bindings, glow::Module &mod,
19015	glow::Function *F, glow::ExecutionEngine &EE,
19016	ElemKind DTy) {
19017	auto input = mod.createPlaceholder(DTy, {`1`, `6`}, "input", false*);
19018	bindings.allocate(input)->getHandle<DataType>() = {`0.`, -`2.`, `2.`, `5.`, `2.5`, `6.`};
19019	auto *softPlus = F->createSoftPlus("Softplus", input);
19020	auto *S = F->createSave("save", softPlus);
19021	bindings.allocate(S->getPlaceholder());
19022
19023	EE.compile(CompilationMode::Infer);
19024	EE.run(bindings);
19025
19026	auto result = bindings.get(S->getPlaceholder());
19027
19028	Tensor out(DTy, {`1`, `6`});
19029	out.getHandle<DataType>() = {`0.693f`, `0.127f`, `2.127f`, `5.007f`, `2.579f`, `6.002f`};
19030	EXPECT_TRUE(out.isEqual(*result, `0.001`));
19031	}
19032
19033	/// Verify that the SoftPlus operator works correctly for Float.
19034	TEST_P(OperatorTest, SoftPlus_Float) {
19035	CHECK_IF_ENABLED();
19036
19037	testSoftPlus<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy);
19038	}
19039
19040	/// Verify that the SoftPlus operator works correctly for Float16.
19041	TEST_P(OperatorTest, SoftPlus_Float16) {
19042	CHECK_IF_ENABLED();
19043	testSoftPlus<float16_t>(bindings_, mod_, F_, EE_, ElemKind::Float16Ty);
19044	}
19045
19046	/// Verify that the SoftPlus operator works correctly for BFloat16.
19047	TEST_P(OperatorTest, SoftPlus_BFloat16) {
19048	CHECK_IF_ENABLED();
19049	testSoftPlus<bfloat16_t>(bindings_, mod_, F_, EE_, ElemKind::BFloat16Ty);
19050	}
19051
19052	/// Check the correctness of the SoftMax operator.
19053	/// The semantic of SoftMax is
19054	/// res_i = exp(input_i) / (exp(input_0) + ... + exp(input_N)).
19055	TEST_P(OperatorTest, SoftMax) {
19056	CHECK_IF_ENABLED();
19057
19058	auto *input =
19059	mod_.createPlaceholder(ElemKind::FloatTy, {`1`, `6`}, "input", false);
19060	bindings_.allocate(input)->getHandle<float>() = {`1.`, `3.`, `2.5`, `5.`, `4.`, `2.`};
19061	auto *selected =
19062	mod_.createPlaceholder(ElemKind::Int64ITy, {`1`, `1`}, "expected", false);
19063	auto *Pool = F_->createSoftMax("pool", input, selected);
19064	auto *S = F_->createSave("save", Pool);
19065	bindings_.allocate(S->getPlaceholder());
19066
19067	EE_.compile(CompilationMode::Infer);
19068	EE_.run(bindings_);
19069
19070	auto result = bindings_.get(S->getPlaceholder());
19071	Tensor out(ElemKind::FloatTy, {`1`, `6`});
19072	// Expected results are:
19073	// sum = exp(input_0) + ... + exp(input_N) = ~245.387
19074	// res_0 = exp(1) / sum = ~0.011
19075	// res_1 = exp(3) / sum = ~0.082
19076	// And so on.
19077	out.getHandle<float>() = {`0.011f`, `0.082f`, `0.05f`, `0.605f`, `0.222f`, `0.03f`};
19078	EXPECT_TRUE(out.isEqual(*result, `0.001`));
19079	}
19080
19081	/// Check that the softmax operator works properly with quantized input
19082	/// (int8_t). See the test that check the SoftMax operator for more details.
19083	TEST_P(OperatorTest, SoftMaxI8QTy) {
19084	CHECK_IF_ENABLED();
19085
19086	auto *inputTy = mod_.uniqueType(ElemKind::Int8QTy, {`1`, `10`}, `0.129249`, `3`);
19087	auto *outputTy = mod_.uniqueType(ElemKind::Int8QTy, {`1`, `10`}, `0.003922`, -`128`);
19088	auto input = mod_.createPlaceholder(inputTy, "input", false*);
19089	bindings_.allocate(input)->getHandle<int8_t>() = {`68`, -`128`, `99`, -`101`, `127`,
19090	-`5`, `104`, -`83`, -`111`, `44`};
19091	auto *selected =
19092	mod_.createPlaceholder(ElemKind::Int64ITy, {`1`, `1`}, "expected", false);
19093	auto *Pool = F_->createSoftMax("pool", input, selected, outputTy);
19094	auto *S = F_->createSave("save", Pool);
19095	bindings_.allocate(S->getPlaceholder());
19096
19097	EE_.compile(CompilationMode::Infer);
19098	EE_.run(bindings_);
19099
19100	auto result = bindings_.get(S->getPlaceholder());
19101	Tensor out(ElemKind::Int8QTy, {`1`, `10`}, `0.003922`, -`128`);
19102	out.getHandle<int8_t>() = {-`128`, -`128`, -`122`, -`128`, `108`,
19103	-`128`, -`116`, -`128`, -`128`, -`128`};
19104	EXPECT_TRUE(out.isEqual(*result, `0`));
19105	}
19106
19107	/// Check that the softmax operator works properly with FP16.
19108	/// See the test that check the SoftMax operator for more details.
19109	TEST_P(OperatorTest, FP16SoftMax) {
19110	CHECK_IF_ENABLED();
19111
19112	auto *input =
19113	mod_.createPlaceholder(ElemKind::Float16Ty, {`1`, `6`}, "input", false);
19114	bindings_.allocate(input)->getHandle<float16_t>() = {`1.`, `3.`, `2.5`, `5.`, `4.`, `2.`};
19115	auto *selected =
19116	mod_.createPlaceholder(ElemKind::Int64ITy, {`1`, `1`}, "expected", false);
19117	auto *Pool = F_->createSoftMax("pool", input, selected);
19118	auto *S = F_->createSave("save", Pool);
19119	bindings_.allocate(S->getPlaceholder());
19120
19121	EE_.compile(CompilationMode::Infer);
19122	EE_.run(bindings_);
19123
19124	auto result = bindings_.get(S->getPlaceholder());
19125	Tensor out(ElemKind::Float16Ty, {`1`, `6`});
19126	out.getHandle<float16_t>() = {`0.011f`, `0.082f`, `0.05f`, `0.605f`, `0.222f`, `0.03f`};
19127	EXPECT_TRUE(out.isEqual(*result, `0.001`));
19128	}
19129
19130	/// Check that the softmax operator works properly with BFloat16.
19131	/// See the test that check the SoftMax operator for more details.
19132	TEST_P(OperatorTest, BFloat16SoftMax) {
19133	CHECK_IF_ENABLED();
19134
19135	auto *input =
19136	mod_.createPlaceholder(ElemKind::BFloat16Ty, {`1`, `6`}, "input", false);
19137	bindings_.allocate(input)->getHandle<bfloat16_t>() = {`1.`, `3.`, `2.5`,
19138	`5.`, `4.`, `2.`};
19139	auto *selected =
19140	mod_.createPlaceholder(ElemKind::Int64ITy, {`1`, `1`}, "expected", false);
19141	auto *Pool = F_->createSoftMax("pool", input, selected);
19142	auto *S = F_->createSave("save", Pool);
19143	bindings_.allocate(S->getPlaceholder());
19144
19145	EE_.compile(CompilationMode::Infer);
19146	EE_.run(bindings_);
19147
19148	auto result = bindings_.get(S->getPlaceholder());
19149	Tensor out(ElemKind::BFloat16Ty, {`1`, `6`});
19150	out.getHandle<bfloat16_t>() = {`0.011f`, `0.082f`, `0.05f`, `0.605f`, `0.222f`, `0.03f`};
19151	EXPECT_TRUE(out.isEqual(*result, `0.001`));
19152	}
19153
19154	template <typename DataType, ElemKind DTy>
19155	static void testLogSoftMax(glow::PlaceholderBindings &bindings,
19156	glow::Module &mod, glow::Function *F,
19157	glow::ExecutionEngine &EE, double permittedError) {
19158	auto input = mod.createPlaceholder(DTy, {`1`, `6`}, "input", false*);
19159	bindings.allocate(input)->getHandle<DataType>() = {`1.`, `3.`, `2.5`, `5.`, `4.`, `2.`};
19160	auto *selected =
19161	mod.createPlaceholder(ElemKind::Int64ITy, {`1`, `1`}, "expected", false);
19162	auto *Pool = F->createLogSoftMax("pool", input, selected);
19163	auto *S = F->createSave("save", Pool);
19164	bindings.allocate(S->getPlaceholder());
19165
19166	EE.compile(CompilationMode::Infer);
19167	EE.run(bindings);
19168
19169	auto result = bindings.get(S->getPlaceholder());
19170	Tensor out(DTy, {`1`, `6`});
19171	// Expected results are:
19172	// sum = exp(input_0) + ... + exp(input_N) = ~245.387
19173	// res_0 = ln(exp(1) / sum) = ln(~0.011) = -4.503
19174	// res_1 = ln(exp(3) / sum) = ln(~0.082) = -2.503
19175	// And so on.
19176	out.getHandle<DataType>() = {-`4.503`, -`2.503`, -`3.003`, -`0.503`, -`1.503`, -`3.503`};
19177
19178	EXPECT_TRUE(out.isEqual(*result, permittedError));
19179	}
19180
19181	/// Check the correctness of the LogSoftMax operator.
19182	/// The semantic of LogSoftMax is
19183	/// res_i = log(exp(input_i) / (exp(input_0) + ... + exp(input_N))).
19184	TEST_P(OperatorTest, LogSoftMax_Float16) {
19185	CHECK_IF_ENABLED();
19186	ENABLED_BACKENDS("Interpreter", "NNPI"); // CPU does not support fp16
19187
19188	testLogSoftMax<float16_t, ElemKind::Float16Ty>(bindings_, mod_, F_, EE_,
19189	`0.01`);
19190	}
19191
19192	/// Check the correctness of the LogSoftMax operator.
19193	/// The semantic of LogSoftMax is
19194	/// res_i = log(exp(input_i) / (exp(input_0) + ... + exp(input_N))).
19195	TEST_P(OperatorTest, LogSoftMax_BFloat16) {
19196	CHECK_IF_ENABLED();
19197	ENABLED_BACKENDS("Interpreter"); // CPU & NNPI do not support bfloat16
19198
19199	testLogSoftMax<bfloat16_t, ElemKind::BFloat16Ty>(bindings_, mod_, F_, EE_,
19200	`0.1`); // even worse precision
19201	}
19202
19203	/// Check the correctness of the LogSoftMax operator.
19204	/// The semantic of LogSoftMax is
19205	/// res_i = log(exp(input_i) / (exp(input_0) + ... + exp(input_N))).
19206	TEST_P(OperatorTest, LogSoftMax_Float32) {
19207	CHECK_IF_ENABLED();
19208	ENABLED_BACKENDS("Interpreter", "CPU"); // NNPI does not support fp32 for exp
19209
19210	testLogSoftMax<float, ElemKind::FloatTy>(bindings_, mod_, F_, EE_, `0.001`);
19211	}
19212
19213	/// Verify that Quantize, Rescale, Dequantize work correctly together.
19214	static void quantizeSimpleTest(glow::PlaceholderBindings &bindings_,
19215	glow::Module &mod_, glow::Function *F_,
19216	glow::ExecutionEngine &EE_, ElemKind QTy) {
19217	auto *input =
19218	mod_.createPlaceholder(ElemKind::FloatTy, {`1`, `1`}, "input", true);
19219	bindings_.allocate(input)->init(Tensor::InitKind::Broadcast, `21`,
19220	mod_.getPRNG());
19221
19222	auto *Q =
19223	F_->createQuantize("quant", input, mod_.uniqueType(QTy, {`1`, `1`}, `0.25`, `4`));
19224	auto *RS = F_->createRescaleQuantized("rescale", Q,
19225	mod_.uniqueType(QTy, {`1`, `1`}, `0.5`, `11`));
19226	auto *D = F_->createDequantize("dequantize", RS, ElemKind::FloatTy);
19227	auto *save = F_->createSave("ret", D);
19228	auto *result = bindings_.allocate(save->getPlaceholder());
19229
19230	EXPECT_EQ(F_->getNodes().size(), `4`);
19231	EE_.compile(CompilationMode::Infer);
19232
19233	EE_.run(bindings_);
19234	EXPECT_EQ(F_->getNodes().size(), `1`);
19235
19236	auto RH = result->getHandle();
19237	EXPECT_NEAR(RH.at({`0`, `0`}), `21.0`, `0.001`);
19238	}
19239
19240	TEST_P(OperatorTest, QuantizeSimpleInt8) {
19241	CHECK_IF_ENABLED();
19242	quantizeSimpleTest(bindings_, mod_, F_, EE_, ElemKind::Int8QTy);
19243	}
19244	TEST_P(OperatorTest, QuantizeSimpleInt16) {
19245	CHECK_IF_ENABLED();
19246	quantizeSimpleTest(bindings_, mod_, F_, EE_, ElemKind::Int16QTy);
19247	}
19248	TEST_P(OperatorTest, QuantizeSimpleInt32) {
19249	CHECK_IF_ENABLED();
19250	quantizeSimpleTest(bindings_, mod_, F_, EE_, ElemKind::Int32QTy);
19251	}
19252
19253	TEST_P(OperatorTest, LengthsToRanges) {
19254	CHECK_IF_ENABLED();
19255
19256	/*
19257	LENGTHS = [1, 3, 0, 2]
19258	OUTPUT = [[0, 1], [1, 3], [4, 0], [4, 2]]
19259	*/
19260	auto *lengths =
19261	mod_.createPlaceholder(ElemKind::Int32ITy, {`4`}, "lengths", false);
19262
19263	bindings_.allocate(lengths)->getHandle<int32_t>() = {`1`, `3`, `0`, `2`};
19264
19265	auto *R = F_->createLengthsToRanges("LTR", lengths);
19266	auto *S = F_->createSave("save", R);
19267	bindings_.allocate(S->getPlaceholder());
19268
19269	EE_.compile(CompilationMode::Infer);
19270	EE_.run(bindings_);
19271
19272	Tensor &result = *bindings_.get(S->getPlaceholder());
19273	Tensor expected(ElemKind::Int32ITy, {`4`, `2`});
19274	expected.getHandle<int32_t>() = {
19275	`0`, `1`, `1`, `3`, `4`, `0`, `4`, `2`,
19276	};
19277
19278	EXPECT_TRUE(expected.isEqual(result));
19279	}
19280
19281	/// Test that LengthsRangeFill works.
19282	TEST_P(OperatorTest, LengthsRangeFill) {
19283	CHECK_IF_ENABLED();
19284
19285	/*
19286	LENGTHS = [4, 3, 1]
19287	OUTPUT = [0, 1, 2, 3, 0, 1, 2, 0]
19288	*/
19289	auto *lengths =
19290	mod_.createPlaceholder(ElemKind::Int32ITy, {`3`}, "lengths", false);
19291
19292	bindings_.allocate(lengths)->getHandle<int32_t>() = {`4`, `3`, `1`};
19293
19294	auto LRF = F_->createLengthsRangeFill("LRF", lengths, /* maxOutputSize / `8`);
19295	auto *S = F_->createSave("save", LRF);
19296	bindings_.allocate(S->getPlaceholder());
19297
19298	EE_.compile(CompilationMode::Infer);
19299	EE_.run(bindings_);
19300
19301	Tensor &result = *bindings_.get(S->getPlaceholder());
19302	Tensor expected(ElemKind::Int32ITy, {`8`});
19303	expected.getHandle<int32_t>() = {`0`, `1`, `2`, `3`, `0`, `1`, `2`, `0`};
19304
19305	EXPECT_TRUE(expected.isEqual(result));
19306	}
19307
19308	/// Test GaussianFill
19309	TEST_P(OperatorTest, GaussianFill) {
19310	CHECK_IF_ENABLED();
19311	auto mean = `0.`;
19312	auto scale = `1.`;
19313	auto seed = `5.`;
19314	auto *input =
19315	mod_.createPlaceholder(ElemKind::Int64ITy, {`2`, `4`, `5`}, "input", false);
19316	auto inputH = bindings_.allocate(input)->getHandle<int64_t>();
19317	inputH.randomize(-`10`, `10`, mod_.getPRNG());
19318
19319	auto *GF = F_->createGaussianFill("GF", input, mean, scale, seed);
19320	auto *S = F_->createSave("save", GF);
19321	bindings_.allocate(S->getPlaceholder());
19322
19323	EE_.compile(CompilationMode::Infer);
19324	EE_.run(bindings_);
19325
19326	Tensor &result = *bindings_.get(S->getPlaceholder());
19327	EXPECT_EQ(result.dims(), inputH.dims());
19328	auto resultH = result.getHandle<float16_t>();
19329	float16_t resultMean = `0`;
19330	auto n = resultH.actualSize();
19331	for (size_t i = `0`; i < n; i++) {
19332	resultMean += resultH.raw(i);
19333	}
19334	resultMean /= n;
19335	EXPECT_NEAR(mean, resultMean, scale);
19336	}
19337
19338	/// Helper for testing BatchOneHot with different \p DTy.
19339	template <typename DataType>
19340	void batchOneHotTest(glow::PlaceholderBindings &bindings, glow::Module &mod,
19341	glow::Function *F, glow::ExecutionEngine &EE,
19342	ElemKind DTy) {
19343	/*
19344	DATA = [[5, 0], [11, 3], [0, 5]]
19345	LENGTHS = [4, 2]
19346	VALUES = [5, 0, 11, 0, 5, 0]
19347	OUTPUT = [[1, 0, 0, 0, 0, 1], [0, 0, 1, 0, 0, 0], [0, 1, 0, 1, 1, 0]]
19348	*/
19349	auto *data =
19350	createPlaceholderConditionallyQuantized(mod, DTy, {`3`, `2`}, "data", false);
19351	auto *lengths =
19352	mod.createPlaceholder(ElemKind::Int32ITy, {`2`}, "lengths", false, "N");
19353	auto *values = createPlaceholderConditionallyQuantized(mod, DTy, {`6`},
19354	"values", false, "N");
19355
19356	bindings.allocate(data)->getHandle<DataType>() = {`5`, `0`, `11`, `3`, `0`, `5`};
19357	bindings.allocate(lengths)->getHandle<int32_t>() = {`4`, `2`};
19358	bindings.allocate(values)->getHandle<DataType>() = {`5`, `0`, `11`, `0`, `5`, `0`};
19359
19360	auto *R = F->createBatchOneHot("BOH", data, lengths, values);
19361	auto *S = F->createSave("save", R);
19362	bindings.allocate(S->getPlaceholder());
19363
19364	EE.compile(CompilationMode::Infer);
19365	EE.run(bindings);
19366
19367	Tensor &result = *bindings.get(S->getPlaceholder());
19368	auto expected = createTensorConditionallyQuantized(DTy, {`3`, `6`});
19369	expected.getHandle<DataType>() = {
19370	`1`, `0`, `0`, `0`, `0`, `1`, `0`, `0`, `1`, `0`, `0`, `0`, `0`, `1`, `0`, `1`, `1`, `0`,
19371	};
19372
19373	EXPECT_TRUE(expected.isEqual(result));
19374	}
19375
19376	/// Test BatchOneHot with Float data and Int32 Lengths.
19377	TEST_P(OperatorTest, BatchOneHotDataFloat) {
19378	CHECK_IF_ENABLED();
19379	batchOneHotTest<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy);
19380	}
19381
19382	/// Test BatchOneHot with Float16 data and Int32 Lengths
19383	TEST_P(OperatorTest, BatchOneHotDataFloat16) {
19384	CHECK_IF_ENABLED();
19385	batchOneHotTest<float16_t>(bindings_, mod_, F_, EE_, ElemKind::Float16Ty);
19386	}
19387
19388	/// Test BatchOneHot with BFloat16 data and Int32 Lengths
19389	TEST_P(OperatorTest, BatchOneHotDataBFloat16) {
19390	CHECK_IF_ENABLED();
19391	batchOneHotTest<bfloat16_t>(bindings_, mod_, F_, EE_, ElemKind::BFloat16Ty);
19392	}
19393
19394	/// Test BatchOneHot with Int64 data and Int32 Lengths.
19395	TEST_P(OperatorTest, BatchOneHotDataInt64) {
19396	CHECK_IF_ENABLED();
19397	batchOneHotTest<int64_t>(bindings_, mod_, F_, EE_, ElemKind::Int64ITy);
19398	}
19399
19400	/// Test BatchOneHot with Int32 data and Int32 Lengths.
19401	TEST_P(OperatorTest, BatchOneHotDataInt32) {
19402	CHECK_IF_ENABLED();
19403	batchOneHotTest<int32_t>(bindings_, mod_, F_, EE_, ElemKind::Int32ITy);
19404	}
19405
19406	/// Test BatchOneHot with Int8 data and Int32 Lengths.
19407	TEST_P(OperatorTest, BatchOneHotDataInt8) {
19408	CHECK_IF_ENABLED();
19409	batchOneHotTest<int8_t>(bindings_, mod_, F_, EE_, ElemKind::Int8QTy);
19410	}
19411
19412	/// Modulo with Int64 Tensors with SignFollowDivisor off.
19413	TEST_P(OperatorTest, ModuloInt64NoSignFollow) {
19414	CHECK_IF_ENABLED();
19415
19416	auto src = mod_.createPlaceholder(ElemKind::Int64ITy, {`3`, `5`}, "src", false*);
19417	auto srcH = bindings_.allocate(src)->getHandle<int64_t>();
19418
19419	srcH = {-`7`, -`6`, -`5`, -`4`, -`3`, -`2`, -`1`, `0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`};
19420
19421	int64_t divisor = `3`;
19422	bool signFollowDivisor = false;
19423
19424	auto *modulo = F_->createModulo("mod", src, divisor, signFollowDivisor);
19425	auto *result = F_->createSave("save", modulo);
19426	bindings_.allocate(result->getPlaceholder());
19427
19428	EE_.compile(CompilationMode::Infer);
19429	EE_.run(bindings_);
19430
19431	auto resultH = bindings_.get(result->getPlaceholder())->getHandle<int64_t>();
19432
19433	std::vector<int64_t> expectedResults = {-`1`, `0`, -`2`, -`1`, `0`, -`2`, -`1`, `0`,
19434	`1`, `2`, `0`, `1`, `2`, `0`, `1`};
19435	ASSERT_EQ(expectedResults.size(), resultH.size());
19436
19437	for (size_t i = `0`, end = expectedResults.size(); i < end; ++i) {
19438	EXPECT_EQ(resultH.raw(i), expectedResults.at(i));
19439	}
19440	}
19441
19442	/// Modulo with Int64 Tensors with SignFollowDivisor on.
19443	TEST_P(OperatorTest, ModuloInt64SignFollow) {
19444	CHECK_IF_ENABLED();
19445
19446	auto src = mod_.createPlaceholder(ElemKind::Int64ITy, {`3`, `5`}, "src", false*);
19447	auto srcH = bindings_.allocate(src)->getHandle<int64_t>();
19448
19449	srcH = {-`7`, -`6`, -`5`, -`4`, -`3`, -`2`, -`1`, `0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`};
19450
19451	int64_t divisor = `3`;
19452	bool signFollowDivisor = true;
19453
19454	auto *modulo = F_->createModulo("mod", src, divisor, signFollowDivisor);
19455	auto *result = F_->createSave("save", modulo);
19456	bindings_.allocate(result->getPlaceholder());
19457
19458	EE_.compile(CompilationMode::Infer);
19459	EE_.run(bindings_);
19460
19461	auto resultH = bindings_.get(result->getPlaceholder())->getHandle<int64_t>();
19462
19463	std::vector<int64_t> expectedResults = {`2`, `0`, `1`, `2`, `0`, `1`, `2`, `0`,
19464	`1`, `2`, `0`, `1`, `2`, `0`, `1`};
19465	ASSERT_EQ(expectedResults.size(), resultH.size());
19466
19467	for (size_t i = `0`, end = expectedResults.size(); i < end; ++i) {
19468	EXPECT_EQ(resultH.raw(i), expectedResults.at(i));
19469	}
19470	}
19471
19472	/// Modulo with Int32 Tensors with SignFollowDivisor off.
19473	TEST_P(OperatorTest, ModuloInt32NoSignFollow) {
19474	CHECK_IF_ENABLED();
19475	#define TENSORTYPE int32_t
19476	auto src = mod_.createPlaceholder(ElemKind::Int32ITy, {`3`, `5`}, "src", false*);
19477	auto srcH = bindings_.allocate(src)->getHandle<int32_t>();
19478
19479	srcH = {-`7`, -`6`, -`5`, -`4`, -`3`, -`2`, -`1`, `0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`};
19480
19481	int64_t divisor = `3`;
19482	bool signFollowDivisor = false;
19483
19484	auto *modulo = F_->createModulo("mod", src, divisor, signFollowDivisor);
19485	auto *result = F_->createSave("save", modulo);
19486	bindings_.allocate(result->getPlaceholder());
19487
19488	EE_.compile(CompilationMode::Infer);
19489	EE_.run(bindings_);
19490
19491	auto resultH = bindings_.get(result->getPlaceholder())->getHandle<int32_t>();
19492
19493	std::vector<int32_t> expectedResults = {-`1`, `0`, -`2`, -`1`, `0`, -`2`, -`1`, `0`,
19494	`1`, `2`, `0`, `1`, `2`, `0`, `1`};
19495	ASSERT_EQ(expectedResults.size(), resultH.size());
19496
19497	for (size_t i = `0`, end = expectedResults.size(); i < end; ++i) {
19498	EXPECT_EQ(resultH.raw(i), expectedResults.at(i));
19499	}
19500	}
19501
19502	/// Modulo with Int32 Tensors with SignFollowDivisor off.
19503	TEST_P(OperatorTest, ModuloInt32SignFollow) {
19504	CHECK_IF_ENABLED();
19505
19506	auto src = mod_.createPlaceholder(ElemKind::Int32ITy, {`3`, `5`}, "src", false*);
19507	auto srcH = bindings_.allocate(src)->getHandle<int32_t>();
19508
19509	srcH = {-`7`, -`6`, -`5`, -`4`, -`3`, -`2`, -`1`, `0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`};
19510
19511	int64_t divisor = `3`;
19512	bool signFollowDivisor = true;
19513
19514	auto *modulo = F_->createModulo("mod", src, divisor, signFollowDivisor);
19515	auto *result = F_->createSave("save", modulo);
19516	bindings_.allocate(result->getPlaceholder());
19517
19518	EE_.compile(CompilationMode::Infer);
19519	EE_.run(bindings_);
19520
19521	auto resultH = bindings_.get(result->getPlaceholder())->getHandle<int32_t>();
19522
19523	std::vector<int32_t> expectedResults = {`2`, `0`, `1`, `2`, `0`, `1`, `2`, `0`,
19524	`1`, `2`, `0`, `1`, `2`, `0`, `1`};
19525	ASSERT_EQ(expectedResults.size(), resultH.size());
19526
19527	for (size_t i = `0`, end = expectedResults.size(); i < end; ++i) {
19528	EXPECT_EQ(resultH.raw(i), expectedResults.at(i));
19529	}
19530	}
19531
19532	/// Helper to test DotProduct1D using \p DTy.
19533	template <typename DataType>
19534	static void testDotProduct1D(glow::PlaceholderBindings &bindings,
19535	glow::Module &mod, glow::Function *F,
19536	glow::ExecutionEngine &EE, ElemKind DTy) {
19537	// Input tensors.
19538	constexpr dim_t kDataSize = `10`;
19539	auto *X = createPlaceholderConditionallyQuantized(mod, DTy, {kDataSize}, "X",
19540	false, "N");
19541	auto *Y = createPlaceholderConditionallyQuantized(mod, DTy, {kDataSize}, "Y",
19542	false, "N");
19543	auto XH = bindings.allocate(X)->getHandle<DataType>();
19544	auto YH = bindings.allocate(Y)->getHandle<DataType>();
19545
19546	// Fill inputs with random values.
19547	XH.randomize(-`10.0`, `10.0`, mod.getPRNG());
19548	YH.randomize(-`10.0`, `10.0`, mod.getPRNG());
19549
19550	// Compute expected output.
19551	auto expected = createTensorConditionallyQuantized(DTy, {kDataSize});
19552	auto expectedH = expected.getHandle<DataType>();
19553
19554	for (dim_t i = `0`; i < kDataSize; ++i) {
19555	expectedH.at({i}) = XH.at({i}) * YH.at({i});
19556	}
19557
19558	// Compile and run the model.
19559	auto *dotProduct = F->createDotProduct("prod", X, Y);
19560	auto *result = F->createSave("save", dotProduct);
19561	bindings.allocate(result->getPlaceholder());
19562
19563	EE.compile(CompilationMode::Infer);
19564	EE.run(bindings);
19565
19566	auto actualH = bindings.get(result->getPlaceholder())->getHandle<DataType>();
19567
19568	// Check that the output tensor is the same as the expected output.
19569	EXPECT_EQ(actualH.size(), expectedH.size());
19570	for (std::size_t i = `0`; i < actualH.size(); ++i) {
19571	EXPECT_NEAR(actualH.raw(i), expectedH.raw(i), `0.00001`);
19572	}
19573	}
19574
19575	/// Test a DotProduct operator with 1D inputs, using FloatTy.
19576	TEST_P(OperatorTest, dotProduct1D_Float) {
19577	CHECK_IF_ENABLED();
19578	testDotProduct1D<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy);
19579	}
19580
19581	/// Test a DotProduct operator with 1D inputs, using Float16Ty.
19582	TEST_P(OperatorTest, dotProduct1D_Float16) {
19583	CHECK_IF_ENABLED();
19584	testDotProduct1D<float16_t>(bindings_, mod_, F_, EE_, ElemKind::Float16Ty);
19585	}
19586
19587	/// Test a DotProduct operator with 1D inputs, using Float16Ty.
19588	TEST_P(OperatorTest, dotProduct1D_BFloat16) {
19589	CHECK_IF_ENABLED();
19590	testDotProduct1D<bfloat16_t>(bindings_, mod_, F_, EE_, ElemKind::BFloat16Ty);
19591	}
19592
19593	/// Test a DotProduct operator with 1D inputs, using Int8Ty.
19594	TEST_P(OperatorTest, dotProduct1D_Int8) {
19595	CHECK_IF_ENABLED();
19596	testDotProduct1D<int8_t>(bindings_, mod_, F_, EE_, ElemKind::Int8QTy);
19597	}
19598
19599	// Test a BatchedPairwiseDotProduct operator.
19600	TEST_P(OperatorTest, batchedPairwiseDotProduct) {
19601	CHECK_IF_ENABLED();
19602
19603	// Input tensors.
19604	constexpr dim_t kBatchSize = `2`;
19605	constexpr dim_t kVectorSize = `6`;
19606
19607	auto *W = createPlaceholderConditionallyQuantized(
19608	mod_, ElemKind::FloatTy, {kBatchSize, kVectorSize}, "X", false);
19609	auto *X = createPlaceholderConditionallyQuantized(
19610	mod_, ElemKind::FloatTy, {kBatchSize, kVectorSize}, "X", false);
19611	auto *Y = createPlaceholderConditionallyQuantized(
19612	mod_, ElemKind::FloatTy, {kBatchSize, kVectorSize}, "Y", false);
19613	auto *Z = createPlaceholderConditionallyQuantized(
19614	mod_, ElemKind::FloatTy, {kBatchSize, kVectorSize}, "Z", false);
19615	auto WH = bindings_.allocate(W)->getHandle();
19616	auto XH = bindings_.allocate(X)->getHandle();
19617	auto YH = bindings_.allocate(Y)->getHandle();
19618	auto ZH = bindings_.allocate(Z)->getHandle();
19619
19620	// Fill inputs with random values.
19621
19622	WH = {`1`, `1`, `1`, `1`, `1`, `1`, `2`, `2`, `2`, `2`, `2`, `2`};
19623	XH = {`2`, `2`, `2`, `2`, `2`, `2`, `3`, `3`, `3`, `3`, `3`, `3`};
19624	YH = {`3`, `3`, `3`, `3`, `3`, `3`, `4`, `4`, `4`, `4`, `4`, `4`};
19625	ZH = {`4`, `4`, `4`, `4`, `4`, `4`, `5`, `5`, `5`, `5`, `5`, `5`};
19626
19627	// Compute expected output.
19628	auto expected =
19629	createTensorConditionallyQuantized(ElemKind::FloatTy, {kBatchSize, `6`});
19630	auto expectedH = expected.getHandle();
19631
19632	expectedH = {`12`, `18`, `36`, `24`, `48`, `72`, `36`, `48`, `72`, `60`, `90`, `120`};
19633
19634	// Compile and run the model.
19635	auto *pairwiseDotProduct =
19636	F_->createBatchedPairwiseDotProduct("prod", {W, X, Y, Z});
19637	auto *result = F_->createSave("save", pairwiseDotProduct);
19638	bindings_.allocate(result->getPlaceholder());
19639
19640	EE_.compile(CompilationMode::Infer);
19641	EE_.run(bindings_);
19642
19643	auto actualH = bindings_.get(result->getPlaceholder())->getHandle();
19644
19645	// Check that the output tensor is the same as the expected output.
19646	EXPECT_TRUE(actualH.size() == expectedH.size());
19647	EXPECT_TRUE(actualH.getType().isEqual(expectedH.getType()));
19648	for (std::size_t i = `0`; i < actualH.size(); ++i) {
19649	EXPECT_NEAR(actualH.raw(i), expectedH.raw(i), `0.00001`);
19650	}
19651	}
19652
19653	// Test an ElementwiseLinear operator with both axis = 0 and axis = 1
19654	// arguments.
19655	TEST_P(OperatorTest, elementwiseLinear) {
19656	CHECK_IF_ENABLED();
19657
19658	constexpr dim_t kRows = `10`;
19659	constexpr dim_t kCols = `20`;
19660
19661	// Create and allocate input placeholders.
19662	auto *X =
19663	mod_.createPlaceholder(ElemKind::FloatTy, {kCols, kRows}, "X", false);
19664	auto w = mod_.createPlaceholder(ElemKind::FloatTy, {kCols}, "w", false*);
19665	auto b = mod_.createPlaceholder(ElemKind::FloatTy, {kCols}, "b", false*);
19666
19667	auto XH = bindings_.allocate(X)->getHandle();
19668	auto wH = bindings_.allocate(w)->getHandle();
19669	auto bH = bindings_.allocate(b)->getHandle();
19670
19671	// Fill inputs with random values.
19672	XH.randomize(-`3.0`, `3.0`, mod_.getPRNG());
19673	wH.randomize(-`3.0`, `3.0`, mod_.getPRNG());
19674	bH.randomize(-`3.0`, `3.0`, mod_.getPRNG());
19675
19676	// Create two separate models to test behaviour when axis = 0 and axis = 1.
19677	// For the test with axis = 0, the 0th dimension of X, w, and b must match.
19678	auto *elementwiseLinearAxisZero =
19679	F_->createElementwiseLinear("elAxisZero", X, w, b, /axis=/`0`);
19680	auto *resultAxisZero =
19681	F_->createSave("saveAxisZero", elementwiseLinearAxisZero);
19682	bindings_.allocate(resultAxisZero->getPlaceholder());
19683
19684	// For the test with axis = 1, the 1st dimension of X must match the 0th
19685	// dimension of w and b must match, so a transpose is needed.
19686	auto *XT = F_->createTranspose("XT", X, {`1`, `0`});
19687	auto *elementwiseLinearAxisOne =
19688	F_->createElementwiseLinear("elAxisOne", XT, w, b, /axis=/`1`);
19689	auto *resultAxisOne = F_->createSave("saveAxisOne", elementwiseLinearAxisOne);
19690	bindings_.allocate(resultAxisOne->getPlaceholder());
19691
19692	// Compile and run the model.
19693	EE_.compile(CompilationMode::Infer);
19694	EE_.run(bindings_);
19695
19696	auto resAxisZeroH =
19697	bindings_.get(resultAxisZero->getPlaceholder())->getHandle();
19698	auto resAxisOneH =
19699	bindings_.get(resultAxisOne->getPlaceholder())->getHandle();
19700
19701	// Results should be the same shape as X/XT.
19702	ASSERT_EQ(resAxisZeroH.size(), XH.size());
19703	ASSERT_EQ(resAxisOneH.size(), (XT->getResult().getType())->size());
19704
19705	// Compute the expected output and check that the model outputs match.
19706	for (dim_t i = `0`; i < resAxisZeroH.dims()[`0`]; ++i) {
19707	for (dim_t j = `0`; j < resAxisZeroH.dims()[`1`]; ++j) {
19708	float expected = (XH.at({i, j}) * wH.at({i})) + bH.at({i});
19709	EXPECT_NEAR(resAxisZeroH.at({i, j}), expected, `0.00001`);
19710	EXPECT_NEAR(resAxisOneH.at({j, i}), expected, `0.00001`);
19711	}
19712	}
19713	}
19714
19715	/// Helper to test DotProduct2D using \p DTy.
19716	template <typename DataType>
19717	static void testDotProduct2D(glow::PlaceholderBindings &bindings,
19718	glow::Module &mod, glow::Function *F,
19719	glow::ExecutionEngine &EE, ElemKind DTy) {
19720	// Input tensors.
19721	constexpr dim_t kRows = `10`;
19722	constexpr dim_t kCols = `14`;
19723	auto *X = createPlaceholderConditionallyQuantized(mod, DTy, {kRows, kCols},
19724	"X", false);
19725	auto *Y = createPlaceholderConditionallyQuantized(mod, DTy, {kRows, kCols},
19726	"Y", false);
19727	auto XH = bindings.allocate(X)->getHandle<DataType>();
19728	auto YH = bindings.allocate(Y)->getHandle<DataType>();
19729
19730	// Fill inputs with random values.
19731	XH.randomize(-`3.0`, `3.0`, mod.getPRNG());
19732	YH.randomize(-`3.0`, `3.0`, mod.getPRNG());
19733
19734	// Compute expected output.
19735	auto expected = createTensorConditionallyQuantized(DTy, {kRows});
19736	auto expectedH = expected.getHandle<DataType>();
19737
19738	for (dim_t i = `0`; i < kRows; ++i) {
19739	DataType dotProduct = `0.0f`;
19740
19741	// Compute dot product of the i-th row of X and Y.
19742	for (dim_t j = `0`; j < kCols; ++j) {
19743	dotProduct += (XH.at({i, j}) * YH.at({i, j}));
19744	}
19745
19746	expectedH.at({i}) = dotProduct;
19747	}
19748
19749	// Compile and run the model.
19750	auto *dotProduct = F->createDotProduct("prod", X, Y);
19751	auto *result = F->createSave("save", dotProduct);
19752	bindings.allocate(result->getPlaceholder());
19753
19754	EE.compile(CompilationMode::Infer);
19755	EE.run(bindings);
19756
19757	auto actualH = bindings.get(result->getPlaceholder())->getHandle<DataType>();
19758
19759	// Check that the output tensor is the same as the expected output.
19760	EXPECT_EQ(actualH.size(), expectedH.size());
19761	for (std::size_t i = `0`; i < actualH.size(); ++i) {
19762	EXPECT_NEAR(actualH.raw(i), expectedH.raw(i), `0.00001`);
19763	}
19764	}
19765
19766	// Test a DotProduct operator with 2D inputs, using FloatTy.
19767	TEST_P(OperatorTest, dotProduct2D_Float) {
19768	CHECK_IF_ENABLED();
19769	testDotProduct2D<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy);
19770	}
19771
19772	// Test a DotProduct operator with 2D inputs, using Float16Ty.
19773	TEST_P(OperatorTest, dotProduct2D_Float16) {
19774	CHECK_IF_ENABLED();
19775	testDotProduct2D<float16_t>(bindings_, mod_, F_, EE_, ElemKind::Float16Ty);
19776	}
19777
19778	// Test a DotProduct operator with 2D inputs, using BFloat16Ty.
19779	TEST_P(OperatorTest, dotProduct2D_BFloat16) {
19780	CHECK_IF_ENABLED();
19781	testDotProduct2D<bfloat16_t>(bindings_, mod_, F_, EE_, ElemKind::BFloat16Ty);
19782	}
19783
19784	// Test a DotProduct operator with 2D inputs, using Int8QTy.
19785	TEST_P(OperatorTest, dotProduct2D_Int8) {
19786	CHECK_IF_ENABLED();
19787	testDotProduct2D<int8_t>(bindings_, mod_, F_, EE_, ElemKind::Int8QTy);
19788	}
19789
19790	/// Helper to test BatchBoxCox using \p DTy.
19791	template <typename DataType>
19792	static void testBatchBoxCox(glow::PlaceholderBindings &bindings,
19793	glow::Module &mod, glow::Function *F,
19794	glow::ExecutionEngine &EE, ElemKind DTy,
19795	float allowedError = `0.0001f`, float maxRange = `5.0f`,
19796	float maxLambda2 = `2.0f`) {
19797	// Input tensors.
19798	const dim_t kRows = `10`;
19799	const dim_t kCols = `5`;
19800	auto *data = mod.createPlaceholder(DTy, {kRows, kCols}, "data",
19801	/ isTrainable / false);
19802	auto *lambda1 = mod.createPlaceholder(DTy, {kCols}, "lambda1",
19803	/ isTrainable / false);
19804	auto *lambda2 = mod.createPlaceholder(DTy, {kCols}, "lambda2",
19805	/ isTrainable / false);
19806	auto dataH = bindings.allocate(data)->getHandle<DataType>();
19807	auto lambda1H = bindings.allocate(lambda1)->getHandle<DataType>();
19808	auto lambda2H = bindings.allocate(lambda2)->getHandle<DataType>();
19809
19810	// Fill inputs with random values.
19811	dataH.randomize(`0.0`, maxRange, mod.getPRNG());
19812	lambda1H.randomize(`1.0`, `2.0`, mod.getPRNG());
19813	lambda2H.randomize(`1.0`, maxLambda2, mod.getPRNG());
19814
19815	// Zero out every other element to lambda1 to test that case of the
19816	// transform.
19817	for (dim_t i = `0`; i < kCols; i += `2`) {
19818	lambda1H.at({i}) = `0`;
19819	}
19820
19821	const float epsilon = std::is_same<float, DataType>::value
19822	? std::numeric_limits<float>::min()
19823	: `1e-6f`;
19824
19825	// Construct the graph for the backend to run.
19826	auto *BBC = F->createBatchBoxCox("bbc", data, lambda1, lambda2, epsilon);
19827	auto *save = F->createSave("save", BBC);
19828	auto resultH =
19829	bindings.allocate(save->getPlaceholder())->getHandle<DataType>();
19830
19831	// Compile and run the model, setting results in tensor backed by resultH.
19832	EE.compile(CompilationMode::Infer);
19833	EE.run(bindings);
19834
19835	// Compute expected output here on the host to compare results.
19836	Tensor expected(DTy, {kRows, kCols});
19837	auto expectedH = expected.getHandle<DataType>();
19838
19839	for (dim_t i = `0`; i < kRows; ++i) {
19840	for (dim_t j = `0`; j < kCols; ++j) {
19841	float d = dataH.at({i, j});
19842	float l1 = lambda1H.at({j});
19843	float l2 = lambda2H.at({j});
19844
19845	// Compute elementwise Box-Cox transform.
19846	float tmp = std::max(d + l2, `1e-6f`);
19847	if (l1 == `0`) {
19848	// Clip argument to log and pow at 1e-6 to avoid saturation.
19849	expectedH.at({i, j}) = std::log(tmp);
19850	} else {
19851	expectedH.at({i, j}) = (std::pow(tmp, l1) - `1`) / l1;
19852	}
19853	}
19854	}
19855
19856	// Check that the output tensor is the same as the expected output.
19857	for (size_t i = `0`; i < resultH.size(); ++i) {
19858	EXPECT_NEAR(resultH.raw(i), expectedH.raw(i), allowedError);
19859	}
19860	}
19861
19862	/// Test that the BatchBoxCox operator works as expected in FloatTy.
19863	TEST_P(OperatorTest, BatchBoxCox_Float) {
19864	CHECK_IF_ENABLED();
19865	testBatchBoxCox<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy, `0.001f`);
19866	}
19867
19868	/// Test that the BatchBoxCox operator works as expected in Float16Ty.
19869	TEST_P(OperatorTest, BatchBoxCox_Large_Float16) {
19870	CHECK_IF_ENABLED();
19871	testBatchBoxCox<float16_t>(bindings_, mod_, F_, EE_, ElemKind::Float16Ty,
19872	`0.032f`, `5.0f`);
19873	}
19874	TEST_P(OperatorTest, BatchBoxCox_Medium_Float16) {
19875	CHECK_IF_ENABLED();
19876	testBatchBoxCox<float16_t>(bindings_, mod_, F_, EE_, ElemKind::Float16Ty,
19877	`0.016f`, `3.0f`);
19878	}
19879	TEST_P(OperatorTest, BatchBoxCox_Small_Float16) {
19880	CHECK_IF_ENABLED();
19881	testBatchBoxCox<float16_t>(bindings_, mod_, F_, EE_, ElemKind::Float16Ty,
19882	`0.003f`, `1.0f`, `1.001f`);
19883	}
19884
19885	/// Test that the BatchBoxCox operator works as expected in BFloat16Ty.
19886	TEST_P(OperatorTest, BatchBoxCox_Large_BFloat16) {
19887	CHECK_IF_ENABLED();
19888	testBatchBoxCox<bfloat16_t>(bindings_, mod_, F_, EE_, ElemKind::BFloat16Ty,
19889	`0.32f`, `5.0f`);
19890	}
19891	TEST_P(OperatorTest, BatchBoxCox_Medium_BFloat16) {
19892	CHECK_IF_ENABLED();
19893	testBatchBoxCox<bfloat16_t>(bindings_, mod_, F_, EE_, ElemKind::BFloat16Ty,
19894	`0.16f`, `3.0f`);
19895	}
19896	TEST_P(OperatorTest, BatchBoxCox_Small_BFloat16) {
19897	CHECK_IF_ENABLED();
19898	testBatchBoxCox<bfloat16_t>(bindings_, mod_, F_, EE_, ElemKind::BFloat16Ty,
19899	`0.03f`, `1.0f`, `1.001f`);
19900	}
19901
19902	/// Test that Arithmetic ops work.
19903	#define TEST_ARITH_OP_FLOAT(OP_NAME_, OP_) \
19904	TEST_P(OperatorTest, OP_NAME_##ArithFloatTest) { \
19905	CHECK_IF_ENABLED(); \
19906	constexpr dim_t size = 50; \
19907	auto *A = mod_.createPlaceholder(ElemKind::FloatTy, {size}, "A", false); \
19908	auto *B = mod_.createPlaceholder(ElemKind::FloatTy, {size}, "B", false); \
19909	auto *AT = bindings_.allocate(A); \
19910	auto *BT = bindings_.allocate(B); \
19911	auto AH = AT->getHandle(); \
19912	auto BH = BT->getHandle(); \
19913	AH.randomize(-10.0f, 10.0f, mod_.getPRNG()); \
19914	BH.randomize(0.01f, 10.0f, mod_.getPRNG()); \
19915	\
19916	auto *N = F_->create##OP_NAME_("op", A, B); \
19917	auto *save = F_->createSave("save", N); \
19918	auto resultH = bindings_.allocate(save->getPlaceholder())->getHandle(); \
19919	\
19920	EE_.compile(CompilationMode::Infer); \
19921	EE_.run(bindings_); \
19922	\
19923	for (size_t i = 0; i < size; i++) { \
19924	EXPECT_FLOAT_EQ(resultH.raw(i), OP_(AH.raw(i), BH.raw(i))); \
19925	} \
19926	}
19927
19928	TEST_ARITH_OP_FLOAT(Add, [](float a, float b) { return a + b; })
19929	TEST_ARITH_OP_FLOAT(Sub, [](float a, float b) { return a - b; })
19930	TEST_ARITH_OP_FLOAT(Mul, [](float a, float b) { return a * b; })
19931	TEST_ARITH_OP_FLOAT(Div, [](float a, float b) { return a / b; })
19932	TEST_ARITH_OP_FLOAT(Min, [](float a, float b) { return std::min(a, b); })
19933	TEST_ARITH_OP_FLOAT(Max, [](float a, float b) { return std::max(a, b); })
19934
19935	/// Helper to test ConvertTo casting from \p STy to \p DTy.
19936	template <typename SourceType, typename DestType>
19937	static void testConvertTo(glow::PlaceholderBindings &bindings_,
19938	glow::Module &mod_, glow::Function *F_,
19939	glow::ExecutionEngine &EE_, ElemKind STy,
19940	ElemKind DTy) {
19941	// Input tensor in source type.
19942	dim_t shape[] = {`5`, `3`, `20`};
19943	auto *data = mod_.createPlaceholder(STy, shape, "data",
19944	/ isTrainable / false);
19945	auto dataH = bindings_.allocate(data)->getHandle<SourceType>();
19946	if (STy == ElemKind::BoolTy) {
19947	for (dim_t i = `0`; i < dataH.size(); i++) {
19948	dataH.raw(i) = static_cast<bool>(i % `2` == `0`);
19949	}
19950	} else {
19951	dataH.randomize(-`1000`, `1000`, mod_.getPRNG());
19952	}
19953
19954	// Construct the graph for the backend to run, converting to dest type.
19955	auto OT = mod_.uniqueType(DTy, shape);
19956	auto *convert = F_->createConvertTo("convert", data, OT);
19957	auto *save = F_->createSave("save", convert);
19958	auto resultH =
19959	bindings_.allocate(save->getPlaceholder())->getHandle<DestType>();
19960
19961	// Compile and run the model, setting results in tensor backed by resultH.
19962	EE_.compile(CompilationMode::Infer);
19963	EE_.run(bindings_);
19964
19965	// Compute expected output here on the host to compare results.
19966	Tensor expected(DTy, shape);
19967	auto expectedH = expected.getHandle<DestType>();
19968	for (size_t i = `0`, e = expectedH.size(); i < e; ++i) {
19969	expectedH.raw(i) = static_cast<DestType>(dataH.raw(i));
19970	}
19971
19972	// Check that the output tensor is the same as the expected output.
19973	for (size_t i = `0`, e = resultH.size(); i < e; i++) {
19974	const DestType exp = expectedH.raw(i);
19975	const DestType res = resultH.raw(i);
19976	if (DTy == ElemKind::FloatTy) {
19977	EXPECT_FLOAT_EQ(exp, res);
19978	} else {
19979	EXPECT_EQ(exp, res);
19980	}
19981	}
19982	}
19983
19984	/// Test that ConvertTo operator casts correctly from one type to another.
19985	#define TEST_CONVERT_TO(T_FROM, T_TO, DTY_FROM, DTY_TO) \
19986	TEST_P(OperatorTest, ConvertFrom_##DTY_FROM##_To_##DTY_TO) { \
19987	CHECK_IF_ENABLED(); \
19988	testConvertTo<T_FROM, T_TO>(bindings_, mod_, F_, EE_, ElemKind::DTY_FROM, \
19989	ElemKind::DTY_TO); \
19990	}
19991	TEST_CONVERT_TO(float, float, FloatTy, FloatTy)
19992	TEST_CONVERT_TO(float, float16_t, FloatTy, Float16Ty)
19993	TEST_CONVERT_TO(float, bfloat16_t, FloatTy, BFloat16Ty)
19994	TEST_CONVERT_TO(float, int32_t, FloatTy, Int32ITy)
19995	TEST_CONVERT_TO(float, int64_t, FloatTy, Int64ITy)
19996	TEST_CONVERT_TO(float, bool, FloatTy, BoolTy)
19997	TEST_CONVERT_TO(float16_t, float, Float16Ty, FloatTy)
19998	TEST_CONVERT_TO(float16_t, float16_t, Float16Ty, Float16Ty)
19999	TEST_CONVERT_TO(float16_t, bfloat16_t, Float16Ty, BFloat16Ty)
20000	TEST_CONVERT_TO(float16_t, int32_t, Float16Ty, Int32ITy)
20001	TEST_CONVERT_TO(float16_t, int64_t, Float16Ty, Int64ITy)
20002	TEST_CONVERT_TO(bfloat16_t, float, BFloat16Ty, FloatTy)
20003	TEST_CONVERT_TO(bfloat16_t, float16_t, BFloat16Ty, Float16Ty)
20004	TEST_CONVERT_TO(bfloat16_t, bfloat16_t, BFloat16Ty, BFloat16Ty)
20005	TEST_CONVERT_TO(bfloat16_t, int32_t, BFloat16Ty, Int32ITy)
20006	TEST_CONVERT_TO(bfloat16_t, int64_t, BFloat16Ty, Int64ITy)
20007	TEST_CONVERT_TO(int32_t, float, Int32ITy, FloatTy)
20008	TEST_CONVERT_TO(int32_t, float16_t, Int32ITy, Float16Ty)
20009	TEST_CONVERT_TO(int32_t, bfloat16_t, Int32ITy, BFloat16Ty)
20010	TEST_CONVERT_TO(int32_t, int32_t, Int32ITy, Int32ITy)
20011	TEST_CONVERT_TO(int32_t, int64_t, Int32ITy, Int64ITy)
20012	TEST_CONVERT_TO(int64_t, float, Int64ITy, FloatTy)
20013	TEST_CONVERT_TO(int64_t, float16_t, Int64ITy, Float16Ty)
20014	TEST_CONVERT_TO(int64_t, bfloat16_t, Int64ITy, BFloat16Ty)
20015	TEST_CONVERT_TO(int64_t, int32_t, Int64ITy, Int32ITy)
20016	TEST_CONVERT_TO(int64_t, int64_t, Int64ITy, Int64ITy)
20017	TEST_CONVERT_TO(bool, float, BoolTy, FloatTy)
20018	TEST_CONVERT_TO(bool, float16_t, BoolTy, Float16Ty)
20019	TEST_CONVERT_TO(bool, bfloat16_t, BoolTy, BFloat16Ty)
20020	TEST_CONVERT_TO(bool, int32_t, BoolTy, Int32ITy)
20021
20022	#undef TEST_CONVERT_TO
20023
20024	/// Helper to test ConvertTo casting from \p STy to \p DTy and back.
20025	template <typename SourceType, typename DestType>
20026	static void testConvertToAndBack(glow::PlaceholderBindings &bindings_,
20027	glow::Module &mod_, glow::Function *F_,
20028	glow::ExecutionEngine &EE_, ElemKind STy,
20029	ElemKind DTy, bool castIsNoOp) {
20030	// Input tensor in source type.
20031	dim_t shape[] = {`5`, `3`, `20`};
20032	auto *data = mod_.createPlaceholder(STy, shape, "data",
20033	/ isTrainable / false);
20034	auto dataH = bindings_.allocate(data)->getHandle<SourceType>();
20035	dataH.randomize(-`1000`, `1000`, mod_.getPRNG());
20036
20037	// Construct the graph for the backend to run, converting to dest type and
20038	// back.
20039	auto IT = mod_.uniqueType(STy, shape);
20040	auto OT = mod_.uniqueType(DTy, shape);
20041	auto *convert = F_->createConvertTo("convert_forth", data, OT);
20042	auto *convertBack = F_->createConvertTo("convert_back", convert, IT);
20043	auto *save = F_->createSave("save", convertBack);
20044	auto resultH =
20045	bindings_.allocate(save->getPlaceholder())->getHandle<SourceType>();
20046
20047	// Compile and run the model, setting results in tensor backed by resultH.
20048	EXPECT_EQ(F_->getNodes().size(), `3`);
20049	EE_.compile(CompilationMode::Infer);
20050	EE_.run(bindings_);
20051	EXPECT_EQ(F_->getNodes().size(), size_t(castIsNoOp ? `1` : `3`));
20052
20053	for (size_t i = `0`, e = resultH.size(); i < e; i++) {
20054	const SourceType res = resultH.raw(i);
20055	const SourceType expected =
20056	static_cast<SourceType>(static_cast<DestType>(dataH.raw(i)));
20057	EXPECT_EQ(res, expected);
20058	}
20059	}
20060
20061	/// Test that ConvertTo operator casts correctly from one type to another.
20062	#define TEST_CAST_2WAYS(T_FROM, T_TO, DTY_FROM, DTY_TO, NOOP_CAST) \
20063	TEST_P(OperatorTest, ConvertFrom_##DTY_FROM##_To_##DTY_TO##_AndBack) { \
20064	CHECK_IF_ENABLED(); \
20065	testConvertToAndBack<T_FROM, T_TO>(bindings_, mod_, F_, EE_, \
20066	ElemKind::DTY_FROM, ElemKind::DTY_TO, \
20067	NOOP_CAST); \
20068	}
20069	TEST_CAST_2WAYS(float, float, FloatTy, FloatTy, / castIsNoOp / true)
20070	TEST_CAST_2WAYS(float, float16_t, FloatTy, Float16Ty, / castIsNoOp / false)
20071	// FIXME: Should this test succeed?
20072	TEST_CAST_2WAYS(float, bfloat16_t, FloatTy, BFloat16Ty,
20073	/ castIsNoOp / false)
20074	TEST_CAST_2WAYS(float, int32_t, FloatTy, Int32ITy, / castIsNoOp / false)
20075	TEST_CAST_2WAYS(float, int64_t, FloatTy, Int64ITy, / castIsNoOp / false)
20076	TEST_CAST_2WAYS(float16_t, float, Float16Ty, FloatTy, / castIsNoOp / true)
20077	TEST_CAST_2WAYS(float16_t, float16_t, Float16Ty, Float16Ty,
20078	/ castIsNoOp / true)
20079	TEST_CAST_2WAYS(float16_t, bfloat16_t, Float16Ty, BFloat16Ty,
20080	/ castIsNoOp / false)
20081	TEST_CAST_2WAYS(float16_t, int32_t, Float16Ty, Int32ITy,
20082	/ castIsNoOp / false)
20083	TEST_CAST_2WAYS(float16_t, int64_t, Float16Ty, Int64ITy,
20084	/ castIsNoOp / false)
20085	TEST_CAST_2WAYS(bfloat16_t, float, BFloat16Ty, FloatTy, / castIsNoOp / true)
20086	TEST_CAST_2WAYS(bfloat16_t, float16_t, BFloat16Ty, Float16Ty,
20087	/ castIsNoOp / true)
20088	TEST_CAST_2WAYS(bfloat16_t, bfloat16_t, BFloat16Ty, BFloat16Ty,
20089	/ castIsNoOp / true)
20090	TEST_CAST_2WAYS(bfloat16_t, int32_t, BFloat16Ty, Int32ITy,
20091	/ castIsNoOp / false)
20092	TEST_CAST_2WAYS(bfloat16_t, int64_t, BFloat16Ty, Int64ITy,
20093	/ castIsNoOp / false)
20094	TEST_CAST_2WAYS(int32_t, float, Int32ITy, FloatTy, / castIsNoOp / false)
20095	TEST_CAST_2WAYS(int32_t, float16_t, Int32ITy, Float16Ty,
20096	/ castIsNoOp / false)
20097	TEST_CAST_2WAYS(int32_t, bfloat16_t, Int32ITy, BFloat16Ty,
20098	/ castIsNoOp / false)
20099	TEST_CAST_2WAYS(int32_t, int32_t, Int32ITy, Int32ITy, / castIsNoOp / true)
20100	TEST_CAST_2WAYS(int32_t, int64_t, Int32ITy, Int64ITy, / castIsNoOp / true)
20101	TEST_CAST_2WAYS(int64_t, float, Int64ITy, FloatTy, / castIsNoOp / false)
20102	TEST_CAST_2WAYS(int64_t, float16_t, Int64ITy, Float16Ty,
20103	/ castIsNoOp / false)
20104	TEST_CAST_2WAYS(int64_t, bfloat16_t, Int64ITy, BFloat16Ty,
20105	/ castIsNoOp / false)
20106	TEST_CAST_2WAYS(int64_t, int32_t, Int64ITy, Int32ITy, / castIsNoOp / false)
20107	TEST_CAST_2WAYS(int64_t, int64_t, Int64ITy, Int64ITy, / castIsNoOp / true)
20108
20109	#undef TEST_CAST_2WAYS
20110
20111	TEST_P(OperatorTest, ConvertFusedToFusedFP16) {
20112	CHECK_IF_ENABLED();
20113
20114	// First create float data.
20115	Tensor fData(ElemKind::FloatTy, {`20`, `30`});
20116	fData.getHandle().randomize(-`10.0f`, `10.0f`, mod_.getPRNG());
20117
20118	// Convert the float data to RWQ, with float scale/offset.
20119	Tensor rwqData(ElemKind::UInt8FusedQTy, {`20`, `30` + `2` * (dim_t)sizeof(float)},
20120	`1.0`, `0`);
20121	quantization::tensorFusedRowwiseQuantization<float>(fData, rwqData);
20122
20123	// Create graph where we convert to using float16_t scale/offset.
20124	Placeholder *rwqDataPH =
20125	mod_.createPlaceholder(mod_.uniqueType(rwqData.getType()), "lhs", false);
20126	auto OT = mod_.uniqueType(ElemKind::UInt8FusedFP16QTy,
20127	{`20`, `30` + `2` * (dim_t)sizeof(float16_t)}, `1.0`, `0`);
20128	auto *convert = F_->createConvertTo("convert", rwqDataPH, OT);
20129	auto *save = F_->createSave("save", convert);
20130	auto *resultT = bindings_.allocate(save->getPlaceholder());
20131	bindings_.insert(rwqDataPH, std::move(rwqData));
20132
20133	EE_.compile(CompilationMode::Infer);
20134	EE_.run(bindings_);
20135
20136	// Dequantize the resulting RWQ w/ float16_t scale/offset, and compare to
20137	// the original float data we started with.
20138	Tensor dequantResult =
20139	quantization::dequantizeTensor(*resultT, ElemKind::FloatTy);
20140	EXPECT_TRUE(dequantResult.isEqual(fData, `0.05`));
20141	}
20142
20143	template <typename DataType>
20144	glow::Handle<DataType>
20145	mulHelper(glow::PlaceholderBindings &bindings, glow::Module &mod,
20146	glow::Function *F, glow::ExecutionEngine &EE, ElemKind DTy,
20147	llvm::ArrayRef<DataType> lhsValues,
20148	llvm::ArrayRef<DataType> rhsValues, llvm::ArrayRef<dim_t> lhsDims,
20149	llvm::ArrayRef<dim_t> rhsDims) {
20150	auto lhs = mod.createPlaceholder(DTy, lhsDims, "lhs", false*);
20151	auto rhs = mod.createPlaceholder(DTy, rhsDims, "rhs", false*);
20152	bindings.allocate(lhs)->getHandle<DataType>() = lhsValues;
20153	bindings.allocate(rhs)->getHandle<DataType>() = rhsValues;
20154
20155	auto *N = F->createMul("Mul", lhs, rhs);
20156	auto *save = F->createSave("save", N);
20157	auto *saveTensor = bindings.allocate(save->getPlaceholder());
20158
20159	EE.compile(CompilationMode::Infer);
20160	EE.run(bindings);
20161
20162	return saveTensor->getHandle<DataType>();
20163	}
20164
20165	/// Check that the Mul operator behaves correctly with int32.
20166	TEST_P(OperatorTest, mul_int32) {
20167	CHECK_IF_ENABLED();
20168
20169	llvm::SmallVector<int32_t, `16`> xValues = {
20170	`3`, `2`, `3`, `6`, `4`, `5`, `6`, `3`, `7`, `8`, `9`, `2`, `3`, `5`, `7`, `1`,
20171
20172	`1`, `2`, `3`, `6`, `4`, `5`, `6`, `3`, `7`, `8`, `9`, `2`, `3`, `5`, `7`, `1`,
20173
20174	`1`, `2`, `3`, `6`, `4`, `5`, `6`, `3`, `7`, `8`, `9`, `2`, `3`, `5`, `7`, `1`,
20175
20176	`1`, `2`, `3`, `6`, `4`, `5`, `6`, `3`, `7`, `8`, `9`, `2`, `3`, `5`, `7`, `1`};
20177
20178	llvm::SmallVector<int32_t, `16`> yValues = {
20179	`3`, `4`, `5`, `7`, `2`, `5`, `0`, `6`, `4`, `2`, `1`, `8`, `5`, `9`, `2`, `6`,
20180
20181	`3`, `4`, `5`, `7`, `2`, `1`, `0`, `6`, `4`, `2`, `1`, `8`, `5`, `9`, `2`, `6`,
20182
20183	`3`, `4`, `5`, `7`, `2`, `1`, `0`, `6`, `4`, `2`, `1`, `8`, `5`, `9`, `2`, `6`,
20184
20185	`3`, `4`, `5`, `7`, `2`, `1`, `0`, `6`, `4`, `2`, `1`, `8`, `5`, `9`, `2`, `6`};
20186
20187	llvm::SmallVector<dim_t, `4`> xDims = {`2`, `2`, `4`, `4`};
20188	llvm::SmallVector<dim_t, `4`> yDims = {`2`, `2`, `4`, `4`};
20189
20190	Handle<int32_t> saveH =
20191	mulHelper<int32_t>(bindings_, mod_, F_, EE_, ElemKind::Int32ITy, xValues,
20192	yValues, xDims, yDims);
20193
20194	int counter = `0`;
20195	for (dim_t i = `0`; i < saveH.dims()[`0`]; ++i) {
20196	for (dim_t j = `0`; j < saveH.dims()[`1`]; ++j) {
20197	for (dim_t k = `0`; k < saveH.dims()[`2`]; ++k) {
20198	for (dim_t f = `0`; f < saveH.dims()[`3`]; ++f) {
20199	EXPECT_EQ(xValues[counter] * yValues[counter],
20200	saveH.at({i, j, k, f}));
20201	++counter;
20202	}
20203	}
20204	}
20205	}
20206	}
20207
20208	/// Check that the Mul operator behaves correctly with int64.
20209	TEST_P(OperatorTest, mul_int64) {
20210	CHECK_IF_ENABLED();
20211
20212	llvm::SmallVector<int64_t, `16`> xValues = {
20213	`3`, `2`, `3`, `6`, `4`, `5`, `6`, `3`, `7`, `8`, `9`, `2`, `3`, `5`, `7`, `1`,
20214
20215	`1`, `2`, `3`, `6`, `4`, `5`, `6`, `3`, `7`, `8`, `9`, `2`, `3`, `5`, `7`, `1`,
20216
20217	`1`, `2`, `3`, `6`, `4`, `5`, `6`, `3`, `7`, `8`, `9`, `2`, `3`, `5`, `7`, `1`,
20218
20219	`1`, `2`, `3`, `6`, `4`, `5`, `6`, `3`, `7`, `8`, `9`, `2`, `3`, `5`, `7`, `1`};
20220
20221	llvm::SmallVector<int64_t, `16`> yValues = {
20222	`3`, `4`, `5`, `7`, `2`, `5`, `0`, `6`, `4`, `2`, `1`, `8`, `5`, `9`, `2`, `6`,
20223
20224	`3`, `4`, `5`, `7`, `2`, `1`, `0`, `6`, `4`, `2`, `1`, `8`, `5`, `9`, `2`, `6`,
20225
20226	`3`, `4`, `5`, `7`, `2`, `1`, `0`, `6`, `4`, `2`, `1`, `8`, `5`, `9`, `2`, `6`,
20227
20228	`3`, `4`, `5`, `7`, `2`, `1`, `0`, `6`, `4`, `2`, `1`, `8`, `5`, `9`, `2`, `6`};
20229
20230	llvm::SmallVector<dim_t, `4`> xDims = {`2`, `2`, `4`, `4`};
20231	llvm::SmallVector<dim_t, `4`> yDims = {`2`, `2`, `4`, `4`};
20232
20233	Handle<int64_t> saveH =
20234	mulHelper<int64_t>(bindings_, mod_, F_, EE_, ElemKind::Int64ITy, xValues,
20235	yValues, xDims, yDims);
20236
20237	int counter = `0`;
20238	for (dim_t i = `0`; i < saveH.dims()[`0`]; ++i) {
20239	for (dim_t j = `0`; j < saveH.dims()[`1`]; ++j) {
20240	for (dim_t k = `0`; k < saveH.dims()[`2`]; ++k) {
20241	for (dim_t f = `0`; f < saveH.dims()[`3`]; ++f) {
20242	EXPECT_EQ(xValues[counter] * yValues[counter],
20243	saveH.at({i, j, k, f}));
20244	++counter;
20245	}
20246	}
20247	}
20248	}
20249	}
20250	/// Check that the Mul operator behaves correctly with float.
20251	TEST_P(OperatorTest, mul_float) {
20252	CHECK_IF_ENABLED();
20253
20254	llvm::SmallVector<float, `16`> xValues = {
20255	`3`, `2`, `3`, `6`, `4`, `5`, `6`, `3`, `7`, `8`, `9`, `2`, `3`, `5`, `7`, `1`,
20256
20257	`1`, `2`, `3`, `6`, `4`, `5`, `6`, `3`, `7`, `8`, `9`, `2`, `3`, `5`, `7`, `1`,
20258
20259	`1`, `2`, `3`, `6`, `4`, `5`, `6`, `3`, `7`, `8`, `9`, `2`, `3`, `5`, `7`, `1`,
20260
20261	`1`, `2`, `3`, `6`, `4`, `5`, `6`, `3`, `7`, `8`, `9`, `2`, `3`, `5`, `7`, `1`};
20262
20263	llvm::SmallVector<float, `16`> yValues = {
20264	`3`, `4`, `5`, `7`, `2`, `5`, `0`, `6`, `4`, `2`, `1`, `8`, `5`, `9`, `2`, `6`,
20265
20266	`3`, `4`, `5`, `7`, `2`, `1`, `0`, `6`, `4`, `2`, `1`, `8`, `5`, `9`, `2`, `6`,
20267
20268	`3`, `4`, `5`, `7`, `2`, `1`, `0`, `6`, `4`, `2`, `1`, `8`, `5`, `9`, `2`, `6`,
20269
20270	`3`, `4`, `5`, `7`, `2`, `1`, `0`, `6`, `4`, `2`, `1`, `8`, `5`, `9`, `2`, `6`};
20271
20272	llvm::SmallVector<dim_t, `4`> xDims = {`2`, `2`, `4`, `4`};
20273	llvm::SmallVector<dim_t, `4`> yDims = {`2`, `2`, `4`, `4`};
20274
20275	Handle<float> saveH =
20276	mulHelper<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy, xValues,
20277	yValues, xDims, yDims);
20278
20279	int counter = `0`;
20280	for (dim_t i = `0`; i < saveH.dims()[`0`]; ++i) {
20281	for (dim_t j = `0`; j < saveH.dims()[`1`]; ++j) {
20282	for (dim_t k = `0`; k < saveH.dims()[`2`]; ++k) {
20283	for (dim_t f = `0`; f < saveH.dims()[`3`]; ++f) {
20284	EXPECT_FLOAT_EQ(xValues[counter] * yValues[counter],
20285	saveH.at({i, j, k, f}));
20286	++counter;
20287	}
20288	}
20289	}
20290	}
20291	}
20292
20293	template <typename DataType>
20294	glow::Handle<DataType>
20295	addHelper(glow::PlaceholderBindings &bindings, glow::Module &mod,
20296	glow::Function *F, glow::ExecutionEngine &EE, ElemKind DTy,
20297	llvm::ArrayRef<DataType> lhsValues,
20298	llvm::ArrayRef<DataType> rhsValues, llvm::ArrayRef<dim_t> lhsDims,
20299	llvm::ArrayRef<dim_t> rhsDims) {
20300	auto lhs = mod.createPlaceholder(DTy, lhsDims, "lhs", false*);
20301	auto rhs = mod.createPlaceholder(DTy, rhsDims, "rhs", false*);
20302	bindings.allocate(lhs)->getHandle<DataType>() = lhsValues;
20303	bindings.allocate(rhs)->getHandle<DataType>() = rhsValues;
20304
20305	auto *N = F->createAdd("Add", lhs, rhs);
20306	auto *save = F->createSave("save", N);
20307	auto *saveTensor = bindings.allocate(save->getPlaceholder());
20308
20309	EE.compile(CompilationMode::Infer);
20310	EE.run(bindings);
20311
20312	return saveTensor->getHandle<DataType>();
20313	}
20314
20315	/// Check that the Mul operator behaves correctly with int32.
20316	TEST_P(OperatorTest, add_int32) {
20317	CHECK_IF_ENABLED();
20318
20319	llvm::SmallVector<int32_t, `16`> xValues = {
20320	`3`, `2`, `3`, `6`, `4`, `5`, `6`, `3`, `7`, `8`, `9`, `2`, `3`, `5`, `7`, `1`,
20321
20322	`1`, `2`, `3`, `6`, `4`, `5`, `6`, `3`, `7`, `8`, `9`, `2`, `3`, `5`, `7`, `1`,
20323
20324	`1`, `2`, `3`, `6`, `4`, `5`, `6`, `3`, `7`, `8`, `9`, `2`, `3`, `5`, `7`, `1`,
20325
20326	`1`, `2`, `3`, `6`, `4`, `5`, `6`, `3`, `7`, `8`, `9`, `2`, `3`, `5`, `7`, `1`};
20327
20328	llvm::SmallVector<int32_t, `16`> yValues = {
20329	`3`, `4`, `5`, `7`, `2`, `5`, `0`, `6`, `4`, `2`, `1`, `8`, `5`, `9`, `2`, `6`,
20330
20331	`3`, `4`, `5`, `7`, `2`, `1`, `0`, `6`, `4`, `2`, `1`, `8`, `5`, `9`, `2`, `6`,
20332
20333	`3`, `4`, `5`, `7`, `2`, `1`, `0`, `6`, `4`, `2`, `1`, `8`, `5`, `9`, `2`, `6`,
20334
20335	`3`, `4`, `5`, `7`, `2`, `1`, `0`, `6`, `4`, `2`, `1`, `8`, `5`, `9`, `2`, `6`};
20336
20337	llvm::SmallVector<dim_t, `4`> xDims = {`2`, `2`, `4`, `4`};
20338	llvm::SmallVector<dim_t, `4`> yDims = {`2`, `2`, `4`, `4`};
20339
20340	Handle<int32_t> saveH =
20341	addHelper<int32_t>(bindings_, mod_, F_, EE_, ElemKind::Int32ITy, xValues,
20342	yValues, xDims, yDims);
20343
20344	int counter = `0`;
20345	for (dim_t i = `0`; i < saveH.dims()[`0`]; ++i) {
20346	for (dim_t j = `0`; j < saveH.dims()[`1`]; ++j) {
20347	for (dim_t k = `0`; k < saveH.dims()[`2`]; ++k) {
20348	for (dim_t f = `0`; f < saveH.dims()[`3`]; ++f) {
20349	EXPECT_EQ(xValues[counter] + yValues[counter],
20350	saveH.at({i, j, k, f}));
20351	++counter;
20352	}
20353	}
20354	}
20355	}
20356	}
20357
20358	/// Check that the Mul operator behaves correctly with int32.
20359	TEST_P(OperatorTest, add_int64) {
20360	CHECK_IF_ENABLED();
20361
20362	llvm::SmallVector<int64_t, `16`> xValues = {
20363	`3`, `2`, `3`, `6`, `4`, `5`, `6`, `3`, `7`, `8`, `9`, `2`, `3`, `5`, `7`, `1`,
20364
20365	`1`, `2`, `3`, `6`, `4`, `5`, `6`, `3`, `7`, `8`, `9`, `2`, `3`, `5`, `7`, `1`,
20366
20367	`1`, `2`, `3`, `6`, `4`, `5`, `6`, `3`, `7`, `8`, `9`, `2`, `3`, `5`, `7`, `1`,
20368
20369	`1`, `2`, `3`, `6`, `4`, `5`, `6`, `3`, `7`, `8`, `9`, `2`, `3`, `5`, `7`, `1`};
20370
20371	llvm::SmallVector<int64_t, `16`> yValues = {
20372	`3`, `4`, `5`, `7`, `2`, `5`, `0`, `6`, `4`, `2`, `1`, `8`, `5`, `9`, `2`, `6`,
20373
20374	`3`, `4`, `5`, `7`, `2`, `1`, `0`, `6`, `4`, `2`, `1`, `8`, `5`, `9`, `2`, `6`,
20375
20376	`3`, `4`, `5`, `7`, `2`, `1`, `0`, `6`, `4`, `2`, `1`, `8`, `5`, `9`, `2`, `6`,
20377
20378	`3`, `4`, `5`, `7`, `2`, `1`, `0`, `6`, `4`, `2`, `1`, `8`, `5`, `9`, `2`, `6`};
20379
20380	llvm::SmallVector<dim_t, `4`> xDims = {`2`, `2`, `4`, `4`};
20381	llvm::SmallVector<dim_t, `4`> yDims = {`2`, `2`, `4`, `4`};
20382
20383	Handle<int64_t> saveH =
20384	addHelper<int64_t>(bindings_, mod_, F_, EE_, ElemKind::Int64ITy, xValues,
20385	yValues, xDims, yDims);
20386
20387	int counter = `0`;
20388	for (dim_t i = `0`; i < saveH.dims()[`0`]; ++i) {
20389	for (dim_t j = `0`; j < saveH.dims()[`1`]; ++j) {
20390	for (dim_t k = `0`; k < saveH.dims()[`2`]; ++k) {
20391	for (dim_t f = `0`; f < saveH.dims()[`3`]; ++f) {
20392	EXPECT_EQ(xValues[counter] + yValues[counter],
20393	saveH.at({i, j, k, f}));
20394	++counter;
20395	}
20396	}
20397	}
20398	}
20399	}
20400	/// Check that the Mul operator behaves correctly with int32.
20401	TEST_P(OperatorTest, add_float) {
20402	CHECK_IF_ENABLED();
20403
20404	llvm::SmallVector<float, `16`> xValues = {
20405	`3`, `2`, `3`, `6`, `4`, `5`, `6`, `3`, `7`, `8`, `9`, `2`, `3`, `5`, `7`, `1`,
20406
20407	`1`, `2`, `3`, `6`, `4`, `5`, `6`, `3`, `7`, `8`, `9`, `2`, `3`, `5`, `7`, `1`,
20408
20409	`1`, `2`, `3`, `6`, `4`, `5`, `6`, `3`, `7`, `8`, `9`, `2`, `3`, `5`, `7`, `1`,
20410
20411	`1`, `2`, `3`, `6`, `4`, `5`, `6`, `3`, `7`, `8`, `9`, `2`, `3`, `5`, `7`, `1`};
20412
20413	llvm::SmallVector<float, `16`> yValues = {
20414	`3`, `4`, `5`, `7`, `2`, `5`, `0`, `6`, `4`, `2`, `1`, `8`, `5`, `9`, `2`, `6`,
20415
20416	`3`, `4`, `5`, `7`, `2`, `1`, `0`, `6`, `4`, `2`, `1`, `8`, `5`, `9`, `2`, `6`,
20417
20418	`3`, `4`, `5`, `7`, `2`, `1`, `0`, `6`, `4`, `2`, `1`, `8`, `5`, `9`, `2`, `6`,
20419
20420	`3`, `4`, `5`, `7`, `2`, `1`, `0`, `6`, `4`, `2`, `1`, `8`, `5`, `9`, `2`, `6`};
20421
20422	llvm::SmallVector<dim_t, `4`> xDims = {`2`, `2`, `4`, `4`};
20423	llvm::SmallVector<dim_t, `4`> yDims = {`2`, `2`, `4`, `4`};
20424
20425	Handle<float> saveH =
20426	addHelper<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy, xValues,
20427	yValues, xDims, yDims);
20428
20429	int counter = `0`;
20430	for (dim_t i = `0`; i < saveH.dims()[`0`]; ++i) {
20431	for (dim_t j = `0`; j < saveH.dims()[`1`]; ++j) {
20432	for (dim_t k = `0`; k < saveH.dims()[`2`]; ++k) {
20433	for (dim_t f = `0`; f < saveH.dims()[`3`]; ++f) {
20434	EXPECT_FLOAT_EQ(xValues[counter] + yValues[counter],
20435	saveH.at({i, j, k, f}));
20436	++counter;
20437	}
20438	}
20439	}
20440	}
20441	}
20442	static FunctionTensorPair
20443	createAndInitLayerNormStrongNormShapeTest(glow::PlaceholderBindings &bindings,
20444	glow::ExecutionEngine &EE) {
20445	auto &mod = EE.getModule();
20446	Function *F = mod.createFunction("main");
20447
20448	auto *input =
20449	mod.createPlaceholder(ElemKind::FloatTy, {`1`, `4`, `5`, `6`}, "in", false);
20450
20451	Tensor scaleT(ElemKind::FloatTy, {`5`, `6`});
20452	scaleT.getHandle().randomize(`0.0f`, `1.0f`, mod.getPRNG());
20453	Constant *scaleC = mod.createConstant("scale", std::move(scaleT));
20454	Tensor biasT(ElemKind::FloatTy, {`5`, `6`});
20455	biasT.getHandle().randomize(`0.0f`, `1.0f`, mod.getPRNG());
20456	Constant *biasC = mod.createConstant("bias", std::move(biasT));
20457
20458	LayerNormalizationNode *LNN = F->createLayerNormalization(
20459	"LN", input->getType(), input, scaleC, biasC, `1e-5`);
20460
20461	bindings.allocate(input)->getHandle().randomize(`0.0f`, `1.0f`, mod.getPRNG());
20462
20463	auto *res = F->createSave("save", LNN);
20464	::glow::convertPlaceholdersToConstants(F, bindings,
20465	{input, res->getPlaceholder()});
20466	auto *resultTensor = bindings.allocate(res->getPlaceholder());
20467
20468	return std::make_pair(F, resultTensor);
20469	}
20470
20471	/// Test LayerNorm with Float16Ty and strong norm_shape(dims > 1 and not
20472	/// identical)
20473	TEST_P(OperatorStatelessTest, LayerNorm_Float16_StrongNormShape) {
20474	CHECK_IF_ENABLED();
20475	compareAgainstInterpreter(
20476	getBackendName(), createAndInitLayerNormStrongNormShapeTest,
20477	ElemKind::FloatTy, ElemKind::Float16Ty, `0.05f`, parCloneCountOpt);
20478	}
20479
20480	static FunctionTensorPair
20481	createAndInitLayerNormTest(glow::PlaceholderBindings &bindings,
20482	glow::ExecutionEngine &EE) {
20483	auto &mod = EE.getModule();
20484	Function *F = mod.createFunction("main");
20485
20486	auto *input =
20487	mod.createPlaceholder(ElemKind::FloatTy, {`1`, `4`, `5`, `5`}, "in", false);
20488
20489	Tensor scaleT(ElemKind::FloatTy, {`5`, `5`});
20490	scaleT.getHandle().randomize(`0.0f`, `1.0f`, mod.getPRNG());
20491	Constant *scaleC = mod.createConstant("scale", std::move(scaleT));
20492	Tensor biasT(ElemKind::FloatTy, {`5`, `5`});
20493	biasT.getHandle().randomize(`0.0f`, `1.0f`, mod.getPRNG());
20494	Constant *biasC = mod.createConstant("bias", std::move(biasT));
20495
20496	LayerNormalizationNode *LNN = F->createLayerNormalization(
20497	"LN", input->getType(), input, scaleC, biasC, `1e-5`);
20498
20499	bindings.allocate(input)->getHandle().randomize(`0.0f`, `1.0f`, mod.getPRNG());
20500
20501	auto *res = F->createSave("save", LNN);
20502	::glow::convertPlaceholdersToConstants(F, bindings,
20503	{input, res->getPlaceholder()});
20504	auto *resultTensor = bindings.allocate(res->getPlaceholder());
20505
20506	return std::make_pair(F, resultTensor);
20507	}
20508
20509	/// Test LayerNorm with FloatTy.
20510	TEST_P(OperatorStatelessTest, LayerNorm_Float) {
20511	CHECK_IF_ENABLED();
20512	compareAgainstInterpreter(getBackendName(), createAndInitLayerNormTest,
20513	ElemKind::FloatTy, ElemKind::FloatTy, `0.0001f`,
20514	parCloneCountOpt);
20515	}
20516
20517	/// Test LayerNorm with Float16Ty.
20518	TEST_P(OperatorStatelessTest, LayerNorm_Float16) {
20519	CHECK_IF_ENABLED();
20520	compareAgainstInterpreter(getBackendName(), createAndInitLayerNormTest,
20521	ElemKind::FloatTy, ElemKind::Float16Ty, `0.01f`,
20522	parCloneCountOpt);
20523	}
20524
20525	/// Mock Test LayerNorm with Float32Ty.
20526	TEST_P(OperatorStatelessTest, LayerNormMock_Float32) {
20527	CHECK_IF_ENABLED();
20528	/*
20529	WEIGHT = [2.4180, 2.2070, 2.3184, 0.7378, 0.7734, 0.7520]
20530	BIAS = [0.1567, 0.0308, 0.0166, 0.2944, 0.2759, 0.5649]
20531	INPUT = [
20532	1.0,
20533	2.0,
20534	3.0,
20535	4.0,
20536	5.0,
20537	6.0,
20538	]
20539	TARGET = [
20540	-3.382883310317993,
20541	-1.907626986503601,
20542	-0.662156879901886,
20543	0.5104053020477295,
20544	0.9551836252212524,
20545	1.6657130718231201,
20546	]
20547	*/
20548	ExecutionEngine EE{};
20549	auto &mod = EE.getModule();
20550	Function *F = mod.createFunction("main");
20551
20552	auto *input =
20553	mod.createPlaceholder(ElemKind::FloatTy, {`1`, `6`}, "input", false);
20554
20555	PlaceholderBindings bindings;
20556
20557	bindings.allocate(input)->getHandle() = {
20558	`1.0f`, `2.0f`, `3.0f`, `4.0f`, `5.0f`, `6.0f`,
20559	};
20560	Tensor scaleT(ElemKind::FloatTy, {`6`});
20561	scaleT.getHandle() = {`2.4180f`, `2.2070f`, `2.3184f`, `0.7378f`, `0.7734f`, `0.7520f`};
20562	Constant *scaleC = mod.createConstant("scale", std::move(scaleT));
20563	Tensor biasT(ElemKind::FloatTy, {`6`});
20564	biasT.getHandle() = {`0.1567f`, `0.0308f`, `0.0166f`, `0.2944f`, `0.2759f`, `0.5649f`};
20565	Constant *biasC = mod.createConstant("bias", std::move(biasT));
20566
20567	LayerNormalizationNode *LNN = F->createLayerNormalization(
20568	"LN", input->getType(), input, scaleC, biasC, `1e-5`);
20569
20570	auto *res = F->createSave("save", LNN);
20571	bindings.allocate(res->getPlaceholder());
20572
20573	EE.compile(CompilationMode::Infer);
20574	EE.run(bindings);
20575
20576	Tensor expected(ElemKind::FloatTy, {`1`, `6`});
20577	expected.getHandle() = {
20578	-`3.382883310317993f`, -`1.907626986503601f`, -`0.662156879901886f`,
20579	`0.5104053020477295f`, `0.9551836252212524f`, `1.6657130718231201f`,
20580	};
20581
20582	EXPECT_TRUE(expected.isEqual(*bindings.get(res->getPlaceholder())));
20583	}
20584
20585	/// Test LayerNorm with BFloat16Ty.
20586	TEST_P(OperatorStatelessTest, LayerNorm_BFloat16) {
20587	CHECK_IF_ENABLED();
20588	compareAgainstInterpreter(getBackendName(), createAndInitLayerNormTest,
20589	ElemKind::FloatTy, ElemKind::BFloat16Ty, `0.01f`,
20590	parCloneCountOpt);
20591	}
20592
20593	template <typename DataType>
20594	static void QuantizedLayerNormTest(glow::PlaceholderBindings &bindings,
20595	glow::Module &mod, glow::Function *F,
20596	glow::ExecutionEngine &EE, ElemKind DTy) {
20597	auto *input =
20598	mod.createPlaceholder(ElemKind::Int8QTy, {`1`, `1`, `2`, `2`}, / scale / `0.3`,
20599	/ offset / `0`, "in", false);
20600	bindings.allocate(input)->getHandle<int8_t>() = {-`4`, `3`, -`2`, `3`};
20601
20602	auto scaleT = createTensorConditionallyQuantized(DTy, {`2`, `2`});
20603	scaleT.getHandle<DataType>() = {`1`, `2`, `1`, `2`};
20604	Constant *scaleC = mod.createConstant("scale", std::move(scaleT));
20605	auto biasT = createTensorConditionallyQuantized(DTy, {`2`, `2`});
20606	biasT.getHandle<DataType>() = {`1`, -`1`, `0`, `1`};
20607	Constant *biasC = mod.createConstant("bias", std::move(biasT));
20608
20609	auto outTy = F->getParent()->uniqueType(ElemKind::Int8QTy, input->dims(),
20610	/ scale / `0.1`, / offset / `0`);
20611
20612	LayerNormalizationNode *LNN = F->createLayerNormalization(
20613	"LN", outTy, input, scaleC, biasC, / eps / `1e-5`);
20614
20615	auto *result = F->createSave("save", LNN);
20616	bindings.allocate(result->getPlaceholder());
20617
20618	EE.compile(CompilationMode::Infer);
20619	EE.run(bindings);
20620
20621	auto resultH = bindings.get(result->getPlaceholder())->getHandle<int8_t>();
20622	EXPECT_NEAR(resultH.at({`0`, `0`, `0`, `0`}) * `0.1`, -`0.3`, `1e-04`);
20623	EXPECT_NEAR(resultH.at({`0`, `0`, `0`, `1`}) * `0.1`, `0.9`, `1.1e-01`);
20624	EXPECT_NEAR(resultH.at({`0`, `0`, `1`, `0`}) * `0.1`, -`0.6`, `1e-04`);
20625	EXPECT_NEAR(resultH.at({`0`, `0`, `1`, `1`}) * `0.1`, `2.9`, `1.1e-01`);
20626	}
20627
20628	TEST_P(OperatorTest, LayerNorm_Int8_With_Int8_Scale_Bias) {
20629	CHECK_IF_ENABLED();
20630
20631	QuantizedLayerNormTest<int8_t>(bindings_, mod_, F_, EE_, ElemKind::Int8QTy);
20632	}
20633
20634	TEST_P(OperatorTest, LayerNorm_Int8_With_Float16_Scale_Bias) {
20635	CHECK_IF_ENABLED();
20636
20637	QuantizedLayerNormTest<float16_t>(bindings_, mod_, F_, EE_,
20638	ElemKind::Float16Ty);
20639	}
20640
20641	TEST_P(OperatorTest, LayerNorm_Int8_With_Float_Scale_Bias) {
20642	CHECK_IF_ENABLED();
20643
20644	QuantizedLayerNormTest<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy);
20645	}
20646
20647	static void testDequantizeFRWQ(glow::PlaceholderBindings &bindings,
20648	glow::Module &mod, glow::Function *F,
20649	glow::ExecutionEngine &EE, ElemKind destTy) {
20650	Tensor FT(ElemKind::FloatTy, {`10`, `20`});
20651	FT.getHandle().randomize(-`0.5`, `0.5`, mod.getPRNG());
20652	TypeRef RWQTy = mod.uniqueType(ElemKind::UInt8FusedQTy,
20653	{`10`, `20` + `2` * sizeof(float)}, `1.0`, `0`);
20654	Tensor RWQT(RWQTy);
20655	quantization::tensorFusedRowwiseQuantization<float>(FT, RWQT);
20656
20657	auto input = mod.createPlaceholder(RWQTy, "input", false*);
20658	bindings.insert(input, std::move(RWQT));
20659
20660	auto *D = F->createDequantize("dequantize", input, destTy);
20661	auto *save = F->createSave("ret", D);
20662	auto *result = bindings.allocate(save->getPlaceholder());
20663
20664	EE.compile(CompilationMode::Infer);
20665	EE.run(bindings);
20666
20667	if (destTy == ElemKind::Float16Ty) {
20668	FT.convertToType(destTy);
20669	}
20670	EXPECT_TRUE(FT.isEqual(*result, `0.002f`));
20671	}
20672
20673	TEST_P(OperatorTest, DequantizeFRWQ_Float) {
20674	CHECK_IF_ENABLED();
20675	testDequantizeFRWQ(bindings_, mod_, F_, EE_, ElemKind::FloatTy);
20676	}
20677	TEST_P(OperatorTest, DequantizeFRWQ_Float16) {
20678	CHECK_IF_ENABLED();
20679	testDequantizeFRWQ(bindings_, mod_, F_, EE_, ElemKind::Float16Ty);
20680	}
20681
20682	template <typename DataType>
20683	static void testUpsample3D(glow::PlaceholderBindings &bindings,
20684	glow::Module &mod, glow::Function *F,
20685	glow::ExecutionEngine &EE, ElemKind DTy) {
20686	constexpr std::array<dim_t, `5`> size{`1`, `2`, `3`, `4`, `3`}; // NTHWC
20687	auto *input =
20688	createPlaceholderConditionallyQuantized(mod, DTy, size, "input", false);
20689	bindings.allocate(input)->getHandle<DataType>().randomize(-`10.0`, `10.0`,
20690	mod.getPRNG());
20691
20692	auto *output = F->createResizeNearest("Upsample", input, {`1`, `4`, `2`, `3`, `1`});
20693	auto *save = F->createSave("Save", output);
20694	bindings.allocate(save->getPlaceholder());
20695
20696	EE.compile(CompilationMode::Infer);
20697	EE.run(bindings);
20698
20699	auto resultH = bindings.get(save->getPlaceholder())->getHandle<DataType>();
20700	auto inputH = bindings.get(input)->getHandle<DataType>();
20701
20702	EXPECT_EQ(resultH.dims()[`0`], inputH.dims()[`0`]);
20703	EXPECT_EQ(resultH.dims()[`1`], `4` * inputH.dims()[`1`]);
20704	EXPECT_EQ(resultH.dims()[`2`], `2` * inputH.dims()[`2`]);
20705	EXPECT_EQ(resultH.dims()[`3`], `3` * inputH.dims()[`3`]);
20706	EXPECT_EQ(resultH.dims()[`4`], inputH.dims()[`4`]);
20707	for (dim_t m = `0`; m < size [`0`]; m++) {
20708	for (dim_t i = `0`; i < size [`1`]; i++) {
20709	for (dim_t j = `0`; j < size [`2`]; j++) {
20710	for (dim_t k = `0`; k < size [`3`]; k++) {
20711	for (dim_t n = `0`; n < size [`4`]; n++) {
20712	for (dim_t i_delta = `0`; i_delta < `4`; i_delta++) {
20713	for (dim_t j_delta = `0`; j_delta < `2`; j_delta++) {
20714	for (dim_t k_delta = `0`; k_delta < `3`; k_delta++) {
20715	EXPECT_EQ(resultH.at({m, `4` * i + i_delta, `2` * j + j_delta,
20716	`3` * k + k_delta, n}),
20717	static_cast<DataType>(inputH.at({m, i, j, k, n})));
20718	}
20719	}
20720	}
20721	}
20722	}
20723	}
20724	}
20725	}
20726	}
20727
20728	template <typename DataType>
20729	static void testUpsample2D(glow::PlaceholderBindings &bindings,
20730	glow::Module &mod, glow::Function *F,
20731	glow::ExecutionEngine &EE, ElemKind DTy) {
20732	constexpr std::array<dim_t, `4`> size{`1`, `2`, `3`, `4`}; // NHWC
20733	auto *input =
20734	createPlaceholderConditionallyQuantized(mod, DTy, size, "input", false);
20735	bindings.allocate(input)->getHandle<DataType>().randomize(-`10.0`, `10.0`,
20736	mod.getPRNG());
20737
20738	auto *output = F->createResizeNearest("Upsample", input, {`1`, `2`, `3`, `1`});
20739	auto *save = F->createSave("Save", output);
20740	bindings.allocate(save->getPlaceholder());
20741
20742	EE.compile(CompilationMode::Infer);
20743	EE.run(bindings);
20744
20745	auto resultH = bindings.get(save->getPlaceholder())->getHandle<DataType>();
20746	auto inputH = bindings.get(input)->getHandle<DataType>();
20747
20748	EXPECT_EQ(resultH.dims()[`0`], inputH.dims()[`0`]);
20749	EXPECT_EQ(resultH.dims()[`1`], `2` * inputH.dims()[`1`]);
20750	EXPECT_EQ(resultH.dims()[`2`], `3` * inputH.dims()[`2`]);
20751	EXPECT_EQ(resultH.dims()[`3`], inputH.dims()[`3`]);
20752	for (dim_t m = `0`; m < size [`0`]; m++) {
20753	for (dim_t i = `0`; i < size [`1`]; i++) {
20754	for (dim_t j = `0`; j < size [`2`]; j++) {
20755	for (dim_t n = `0`; n < size [`3`]; n++) {
20756	for (dim_t i_delta = `0`; i_delta < `2`; i_delta++) {
20757	for (dim_t j_delta = `0`; j_delta < `3`; j_delta++) {
20758	EXPECT_EQ(resultH.at({m, `2` * i + i_delta, `3` * j + j_delta, n}),
20759	static_cast<DataType>(inputH.at({m, i, j, n})));
20760	}
20761	}
20762	}
20763	}
20764	}
20765	}
20766	}
20767
20768	template <typename DataType>
20769	static void testUpsample1D(glow::PlaceholderBindings &bindings,
20770	glow::Module &mod, glow::Function *F,
20771	glow::ExecutionEngine &EE, ElemKind DTy) {
20772	constexpr std::array<dim_t, `3`> size{`2`, `3`, `4`}; // NHC
20773	auto *input =
20774	createPlaceholderConditionallyQuantized(mod, DTy, size, "input", false);
20775	bindings.allocate(input)->getHandle<DataType>().randomize(-`10.0`, `10.0`,
20776	mod.getPRNG());
20777
20778	auto *output = F->createResizeNearest("Upsample", input, {`1`, `2`, `1`});
20779	auto *save = F->createSave("Save", output);
20780	bindings.allocate(save->getPlaceholder());
20781
20782	EE.compile(CompilationMode::Infer);
20783	EE.run(bindings);
20784
20785	auto resultH = bindings.get(save->getPlaceholder())->getHandle<DataType>();
20786	auto inputH = bindings.get(input)->getHandle<DataType>();
20787
20788	EXPECT_EQ(resultH.dims()[`0`], inputH.dims()[`0`]);
20789	EXPECT_EQ(resultH.dims()[`1`], `2` * inputH.dims()[`1`]);
20790	EXPECT_EQ(resultH.dims()[`2`], inputH.dims()[`2`]);
20791	for (dim_t m = `0`; m < size [`0`]; m++) {
20792	for (dim_t i = `0`; i < size [`1`]; i++) {
20793	for (dim_t n = `0`; n < size [`2`]; n++) {
20794	EXPECT_EQ(resultH.at({m, `2` * i + `0`, n}),
20795	static_cast<DataType>(inputH.at({m, i, n})));
20796	EXPECT_EQ(resultH.at({m, `2` * i + `1`, n}),
20797	static_cast<DataType>(inputH.at({m, i, n})));
20798	}
20799	}
20800	}
20801	}
20802
20803	TEST_P(OperatorTest, Upsample_Nearest3D_Float) {
20804	CHECK_IF_ENABLED();
20805	testUpsample3D<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy);
20806	}
20807
20808	TEST_P(OperatorTest, Upsample_Nearest3D_Float16) {
20809	CHECK_IF_ENABLED();
20810	testUpsample3D<float16_t>(bindings_, mod_, F_, EE_, ElemKind::Float16Ty);
20811	}
20812
20813	TEST_P(OperatorTest, Upsample_Nearest3D_Int8) {
20814	CHECK_IF_ENABLED();
20815	testUpsample3D<int8_t>(bindings_, mod_, F_, EE_, ElemKind::Int8QTy);
20816	}
20817
20818	TEST_P(OperatorTest, Upsample_Nearest2D_Float) {
20819	CHECK_IF_ENABLED();
20820	testUpsample2D<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy);
20821	}
20822
20823	TEST_P(OperatorTest, Upsample_Nearest2D_Float16) {
20824	CHECK_IF_ENABLED();
20825	testUpsample2D<float16_t>(bindings_, mod_, F_, EE_, ElemKind::Float16Ty);
20826	}
20827
20828	TEST_P(OperatorTest, Upsample_Nearest2D_Int8) {
20829	CHECK_IF_ENABLED();
20830	testUpsample2D<int8_t>(bindings_, mod_, F_, EE_, ElemKind::Int8QTy);
20831	}
20832
20833	TEST_P(OperatorTest, Upsample_Nearest1D_Float) {
20834	CHECK_IF_ENABLED();
20835	testUpsample1D<float>(bindings_, mod_, F_, EE_, ElemKind::FloatTy);
20836	}
20837
20838	TEST_P(OperatorTest, Upsample_Nearest1D_Float16) {
20839	CHECK_IF_ENABLED();
20840	testUpsample1D<float16_t>(bindings_, mod_, F_, EE_, ElemKind::Float16Ty);
20841	}
20842
20843	TEST_P(OperatorTest, Upsample_Nearest1D_Int8) {
20844	CHECK_IF_ENABLED();
20845	testUpsample1D<int8_t>(bindings_, mod_, F_, EE_, ElemKind::Int8QTy);
20846	}
20847
20848	TEST_P(OperatorTest, RMSNorm) {
20849	CHECK_IF_ENABLED();
20850	const std::vector<dim_t> XShape{`3`, `4`};
20851	auto X = mod_.createPlaceholder(ElemKind::FloatTy, XShape, "X", false*);
20852	auto gamma = mod_.createPlaceholder(ElemKind::FloatTy, `4`, "gamma", false*);
20853	auto beta = mod_.createPlaceholder(ElemKind::FloatTy, `4`, "beta", false*);
20854	float epsilon = `1.0f`;
20855	bindings_.allocate(X)->getHandle<float>() = {`1`, `2`, `3`, `4`, `5`, `6`,
20856	`7`, `8`, `9`, `10`, `11`, `12`};
20857	bindings_.allocate(gamma)->getHandle<float>() = {`1`, `2`, `3`, `4`};
20858	bindings_.allocate(beta)->getHandle<float>() = {`1`, `2`, `3`, `4`};
20859	auto rmsNorm = F_->createRMSNorm("rmsnorm", X, gamma, beta, epsilon);
20860	auto *save0 = F_->createSave("save", rmsNorm [`0`]);
20861	auto *save1 = F_->createSave("save", rmsNorm [`1`]);
20862	auto *resultY = bindings_.allocate(save0->getPlaceholder());
20863	auto *resultRrms = bindings_.allocate(save1->getPlaceholder());
20864	EE_.compile(CompilationMode::Infer);
20865	EE_.run(bindings_);
20866
20867	const std::vector<dim_t> expectedYShape{XShape};
20868	const std::vector<std::vector<float>> expectedY{
20869	{`1.3429972`, `3.3719888`, `6.0869746`, `9.487955`},
20870	{`1.7495317`, `3.798876`, `6.148033`, `8.797003`},
20871	{`1.8485281`, `3.8856182`, `6.11127`, `8.525484`},
20872	};
20873	EXPECT_EQ(expectedYShape, resultY->dims().vec());
20874	auto hY = resultY->getHandle<float>();
20875	for (dim_t i = `0`; i < expectedYShape [`0`]; ++i) {
20876	for (dim_t j = `0`; j < expectedYShape [`1`]; ++j) {
20877	EXPECT_NEAR(expectedY[i][j], hY.at({i, j}), `1e-5`)
20878	<< "at pos (" << i << "," << j << ")";
20879	}
20880	}
20881
20882	const std::vector<dim_t> expectedRrmsShape{XShape [`0`]};
20883	const std::vector<float> expectedRrms{`0.3429972`, `0.14990634`, `0.09428091`};
20884	EXPECT_EQ(expectedRrmsShape, resultRrms->dims().vec());
20885	auto hRrms = resultRrms->getHandle<float>();
20886	for (dim_t i = `0`; i < expectedRrmsShape [`0`]; ++i) {
20887	EXPECT_NEAR(expectedRrms[i], hRrms.at({i}), `1e-5`) << "at pos " << i;
20888	}
20889	}
20890
20891	TEST_P(OperatorTest, InstanceNormalization_FloatTy) {
20892	CHECK_IF_ENABLED();
20893	auto *inp =
20894	mod_.createPlaceholder(ElemKind::FloatTy, {`2`, `3`, `2`, `2`}, "inp", false);
20895	// Initiliaze inp
20896	auto inpH = bindings_.allocate(inp)->getHandle<float>();
20897	inpH = {`0.0`, `1.0`, `2.0`, `3.0`, `4.0`, `5.0`, `6.0`, `7.0`,
20898	`8.0`, `9.0`, `10.0`, `11.0`, `12.0`, `13.0`, `14.0`, `15.0`,
20899	`16.0`, `17.0`, `18.0`, `19.0`, `20.0`, `21.0`, `22.0`, `23.0`};
20900	// Setting scale and bias
20901	auto *scale = mod_.createConstant(ElemKind::FloatTy, {`3`}, "scale");
20902	scale->getHandle() = {`1.0`, `1.5`, `2.0`};
20903	auto *bias = mod_.createConstant(ElemKind::FloatTy, {`3`}, "bias");
20904	bias->getHandle() = {`0.0`, `1.0`, `2.0`};
20905
20906	auto *node =
20907	F_->createInstanceNormalization("instNorm", inp, bias, scale, `1`, `1e-5`);
20908	auto *save = F_->createSave("save", node);
20909	auto *outT = bindings_.allocate(save->getPlaceholder());
20910	EE_.compile(CompilationMode::Infer);
20911	EE_.run(bindings_);
20912	auto outH = outT->getHandle<float>();
20913	std::vector<float> mergedScale = {`0.89442361`, `1.34163542`, `1.78884723`,
20914	`0.89442361`, `1.34163542`, `1.78884723`};
20915	std::vector<float> mergedBias = {-`1.34163542`, -`6.37899481`, -`14.99404865`,
20916	-`12.07471878`, -`22.47861985`, -`36.46021537`};
20917
20918	EXPECT_EQ(outH.size(), `24`);
20919	for (dim_t i = `0`; i < `2`; i++) {
20920	for (dim_t j = `0`; j < `3`; j++) {
20921	for (dim_t k = `0`; k < `2`; k++) {
20922	for (dim_t l = `0`; l < `2`; l++) {
20923	EXPECT_NEAR(outH.at({i, j, k, l}),
20924	inpH.at({i, j, k, l}) * mergedScale.at(i * `3` + j) +
20925	mergedBias.at(i * `3` + j),
20926	`1e-5`);
20927	}
20928	}
20929	}
20930	}
20931	}
20932
20933	TEST_P(OperatorTest, SparseLabelSplit) {
20934	CHECK_IF_ENABLED();
20935
20936	constexpr auto numLengths = `4U`;
20937	constexpr auto numIndices = `8U`;
20938	auto lengths = mod_.createPlaceholder(ElemKind::Int32ITy, {numLengths},
20939	"lengths", false);
20940	auto indices = mod_.createPlaceholder(ElemKind::Int64ITy, {numIndices},
20941	"indices", false);
20942	auto values =
20943	mod_.createPlaceholder(ElemKind::FloatTy, {numIndices}, "values", false);
20944	constexpr auto numLabels = `4U`;
20945
20946	bindings_.allocate(lengths)->getHandle<int32_t>() = {`1`, `3`, `2`, `2`};
20947	bindings_.allocate(indices)->getHandle<int64_t>() = {`3`, `1`, `2`, `0`, `0`, `2`, `1`, `3`};
20948	bindings_.allocate(values)->getHandle<float>() = {`1.2`, `2.3`, `3.1`, `6.7`,
20949	`8.3`, `9.0`, `3.7`, `8.8`};
20950
20951	auto output = F_->createSparseLabelSplit("sparselabelsplit", lengths, indices,
20952	values, numLabels);
20953
20954	auto labelValues = F_->createSave("labelValues", output->getLabelValues());
20955	auto exampleIds = F_->createSave("exampleIds", output->getExampleIds());
20956	auto gradientOffsetMap =
20957	F_->createSave("gradientOffsetMap", output->getGradientOffsetMap());
20958
20959	Tensor *labelValuesT = bindings_.allocate(labelValues->getPlaceholder());
20960	Tensor *exampleIdsT = bindings_.allocate(exampleIds->getPlaceholder());
20961	Tensor *gradientOffsetMapT =
20962	bindings_.allocate(gradientOffsetMap->getPlaceholder());
20963
20964	EE_.compile(CompilationMode::Infer);
20965	EE_.run(bindings_);
20966
20967	const std::vector<dim_t> expectedOutputShape{numLabels,
20968	numIndices / numLabels};
20969	EXPECT_EQ(expectedOutputShape, labelValuesT->dims().vec());
20970	EXPECT_EQ(expectedOutputShape, exampleIdsT->dims().vec());
20971
20972	const std::vector<dim_t> expectedGradientOffsetMapShape{numIndices};
20973	EXPECT_EQ(expectedGradientOffsetMapShape, gradientOffsetMapT->dims().vec());
20974
20975	const std::vector<std::vector<float>> expectedLabelValues{
20976	{`6.7`, `8.3`}, {`2.3`, `3.7`}, {`3.1`, `9.0`}, {`1.2`, `8.8`}};
20977	const std::vector<std::vector<int32_t>> expectedExampleIds{
20978	{`1`, `2`}, {`1`, `3`}, {`1`, `2`}, {`0`, `3`}};
20979	for (dim_t d1 = `0`; d1 < numLabels; ++d1) {
20980	for (dim_t d2 = `0`; d2 < numIndices / numLabels; ++d2) {
20981	EXPECT_NEAR(expectedLabelValues[d1][d2],
20982	labelValuesT->getHandle().at({d1, d2}), `1e-3`);
20983	EXPECT_EQ(expectedExampleIds[d1][d2],
20984	exampleIdsT->getHandle<int32_t>().at({d1, d2}));
20985	}
20986	}
20987
20988	const std::vector<int32_t> expectedGradientOffsetMap{`0`, `0`, `0`, `0`, `1`, `1`, `1`, `1`};
20989	for (dim_t d = `0`; d < numIndices; ++d) {
20990	EXPECT_EQ(expectedGradientOffsetMap[d],
20991	gradientOffsetMapT->getHandle<int32_t>().at(d));
20992	}
20993	}
20994
20995	/// Test BatchedUnaryEmbeddingsBags
20996	template <typename DataTy>
20997	static void testBatchedUnaryEmbeddingsBags(glow::PlaceholderBindings &bindings,
20998	glow::Module &mod, glow::Function *F,
20999	glow::ExecutionEngine &EE,
21000	ElemKind DTy, float allowedError) {
21001	ShapeVector idims = {`1`, `38`, `1`};
21002	ShapeVector odims = {`1`, `1`, `3`};
21003
21004	Tensor weightsTensorReal(DTy, idims);
21005	Tensor indicesTensorReal(ElemKind::Int32ITy, {`9`});
21006	Tensor offsetsTensorReal(ElemKind::Int32ITy, {`4`});
21007	Tensor tableOffsetsTensorReal(ElemKind::Int32ITy, {`4`});
21008
21009	weightsTensorReal.getHandle<DataTy>() = {
21010	`0.4705`, `0.0634`, `0.8867`, `0.3685`, `0.0328`, `0.1191`, `0.1907`, `0.9518`,
21011	`0.3688`, `0.5838`, `0.0315`, `0.3067`, `0.0160`, `0.3304`, `0.2706`, `0.4694`,
21012	`0.0182`, `0.9961`, `0.5213`, `0.4605`, `0.6342`, `0.5052`, `0.9236`, `0.2747`,
21013	`0.3745`, `0.9434`, `0.5810`, `0.5646`, `0.5182`, `0.9379`, `0.0866`, `0.0854`,
21014	`0.1088`, `0.4771`, `0.0636`, `0.5778`, `0.5571`, `0.3586`};
21015	indicesTensorReal.getHandle<int32_t>() = {`1`, `1`, `3`, `13`, `14`, `4`, `15`, `11`, `16`};
21016	offsetsTensorReal.getHandle<int32_t>() = {`0`, `3`, `5`, `9`};
21017	tableOffsetsTensorReal.getHandle<int32_t>() = {`0`, `4`, `21`, `38`};
21018
21019	auto weights = mod.createPlaceholder(DTy, idims, "weights", false);
21020	auto indices =
21021	mod.createPlaceholder(ElemKind::Int32ITy, {`9`}, "indices", false);
21022	auto offsets =
21023	mod.createPlaceholder(ElemKind::Int32ITy, {`4`}, "offsets", false);
21024	auto tableOffsets =
21025	mod.createPlaceholder(ElemKind::Int32ITy, {`4`}, "tableOffsets", false);
21026
21027	bindings.insert(weights, std::move(weightsTensorReal));
21028	bindings.insert(indices, std::move(indicesTensorReal));
21029	bindings.insert(offsets, std::move(offsetsTensorReal));
21030	bindings.insert(tableOffsets, std::move(tableOffsetsTensorReal));
21031
21032	auto *R = F->createBatchedUnaryEmbeddingsBags(
21033	"BatchedUnaryEmbeddingsBags", weights, tableOffsets, indices, offsets);
21034	auto *S = F->createSave("save", R);
21035	bindings.allocate(S->getPlaceholder());
21036
21037	EE.compile(CompilationMode::Infer);
21038	EE.run(bindings);
21039
21040	Tensor &result = *bindings.get(S->getPlaceholder());
21041	Tensor expected(DTy, odims);
21042	expected.getHandle<DataTy>() = {
21043	`0.4953`,
21044	`1.5174`,
21045	`1.9679`,
21046	};
21047
21048	EXPECT_TRUE(expected.isEqual(result, allowedError));
21049	}
21050
21051	/// Test that BatchedUnaryEmbeddingsBags is correctly supported in FloatTy.
21052	TEST_P(OperatorTest, BatchedUnaryEmbeddingsBags_Float) {
21053	CHECK_IF_ENABLED();
21054	testBatchedUnaryEmbeddingsBags<float>(bindings_, mod_, F_, EE_,
21055	ElemKind::FloatTy, `0.0001`);
21056	}
21057
21058	/// Test that BatchedUnaryEmbeddingsBags is correctly supported in Float16Ty.
21059	TEST_P(OperatorTest, BatchedUnaryEmbeddingsBags_Float16) {
21060	CHECK_IF_ENABLED();
21061	testBatchedUnaryEmbeddingsBags<float16_t>(bindings_, mod_, F_, EE_,
21062	ElemKind::Float16Ty, `0.005`);
21063	}
21064
21065	template <typename WeightTy, typename IndexTy, typename OutputTy>
21066	static void testIntNBitSplitEmbeddingBagsSingle(
21067	glow::PlaceholderBindings &bindings, glow::Module &mod, glow::Function *F,
21068	glow::ExecutionEngine &EE, SplitEmbeddingSparseType DTy, ElemKind IdxTy,
21069	Tensor expected, SplitEmbeddingPoolingMode poolingMode,
21070	SplitEmbeddingSparseType outputDType, float allowedError) {
21071	Tensor devWeightsTensorReal(ElemKind::UInt8ITy, {`128`});
21072	Tensor indicesTensorReal(IdxTy, {`10`});
21073	Tensor offsetsTensorReal(IdxTy, {`3`});
21074	Tensor dimOffsetsTensorReal(ElemKind::Int32ITy, {`2`});
21075	Tensor weightsPlacementReal(ElemKind::Int32ITy, {`1`});
21076	Tensor weightsTysTensorReal(ElemKind::UInt8ITy, {`1`});
21077
21078	// single float 1.724026083946228 with padding zeros
21079	// half float -0.07635 with padding bytes and zeros
21080	// int8 1.965 with padding bytes and zeros
21081	// int4 1.965 with padding bytes and zeros
21082	devWeightsTensorReal.getHandle<uint8_t>() = {
21083	`227`, `172`, `220`, `63`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
21084	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
21085	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
21086	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
21087	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
21088	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
21089	};
21090	Tensor uvmWeightsTensorReal = devWeightsTensorReal.clone();
21091	indicesTensorReal.getHandle<IndexTy>() = {`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`};
21092	Tensor weightsOffsetsReal(ElemKind::Int32ITy, {`1`});
21093	weightsOffsetsReal.getHandle<int32_t>() = {`0`};
21094	offsetsTensorReal.getHandle<IndexTy>() = {`0`, `4`, `10`};
21095	dimOffsetsTensorReal.getHandle<int32_t>() = {`0`, `1`};
21096	weightsPlacementReal.getHandle<int32_t>() = {`3`};
21097	weightsTysTensorReal.getHandle<uint8_t>() = {static_cast<uint8_t>(DTy)};
21098
21099	auto devWeights = mod.createPlaceholder(ElemKind::UInt8ITy,
21100	devWeightsTensorReal.getSizeInBytes(),
21101	"devWeights", false);
21102	auto indices = mod.createPlaceholder(IdxTy, {`10`}, "indices", false);
21103	auto offsets = mod.createPlaceholder(IdxTy, {`3`}, "offsets", false);
21104	auto weightsOffsets =
21105	mod.createPlaceholder(ElemKind::Int32ITy, {`1`}, "weightsOffsets", false);
21106	auto dimOffsets =
21107	mod.createPlaceholder(ElemKind::Int32ITy, {`2`}, "dimOffsets", false);
21108	auto uvmWeights = mod.createPlaceholder(ElemKind::UInt8ITy,
21109	uvmWeightsTensorReal.getSizeInBytes(),
21110	"uvmWeights", false);
21111	auto weightsPlacement =
21112	mod.createPlaceholder(ElemKind::Int32ITy, {`1`}, "weightsPlacement", false);
21113	auto weightsTys =
21114	mod.createPlaceholder(ElemKind::UInt8ITy, {`1`}, "weightsTys", false);
21115	auto indiceWeights = NodeValue ();
21116
21117	bindings.insert(devWeights, std::move(devWeightsTensorReal));
21118	bindings.insert(uvmWeights, std::move(uvmWeightsTensorReal));
21119	bindings.insert(indices, std::move(indicesTensorReal));
21120	bindings.insert(offsets, std::move(offsetsTensorReal));
21121	bindings.insert(weightsOffsets, std::move(weightsOffsetsReal));
21122	bindings.insert(dimOffsets, std::move(dimOffsetsTensorReal));
21123	bindings.insert(weightsPlacement, std::move(weightsPlacementReal));
21124	bindings.insert(weightsTys, std::move(weightsTysTensorReal));
21125
21126	auto *R = F->createIntNBitSplitEmbeddingBags(
21127	"IntNBitSplitEmbeddingBags", devWeights, uvmWeights, weightsPlacement,
21128	weightsOffsets, weightsTys, dimOffsets, `1`, indices, offsets, poolingMode,
21129	outputDType);
21130	auto *S = F->createSave("save", R);
21131	bindings.allocate(S->getPlaceholder());
21132
21133	EE.compile(CompilationMode::Infer);
21134	EE.run(bindings);
21135
21136	Tensor &result = *bindings.get(S->getPlaceholder());
21137
21138	EXPECT_TRUE(expected.isEqual(result, allowedError));
21139	}
21140
21141	/// Test IntNBitSplitEmbeddingBags
21142	template <typename WeightTy, typename IndexTy, typename OutputTy>
21143	static void testIntNBitSplitEmbeddingBags(
21144	glow::PlaceholderBindings &bindings, glow::Module &mod, glow::Function *F,
21145	glow::ExecutionEngine &EE, ElemKind DTy, ElemKind IdxTy, Tensor Weights,
21146	Tensor WeightsOffsets, Tensor expected,
21147	SplitEmbeddingPoolingMode poolingMode, SplitEmbeddingSparseType outputDType,
21148	float allowedError) {
21149	Tensor devWeightsTensorReal = Weights.clone();
21150	Tensor uvmWeightsTensorReal = Weights.clone();
21151	Tensor indicesTensorReal(IdxTy, {`157`});
21152	Tensor offsetsTensorReal(IdxTy, {`9`});
21153	Tensor weightsOffsetsTensorReal = std::move(WeightsOffsets);
21154	Tensor dimOffsetsTensorReal(ElemKind::Int32ITy, {`5`});
21155	Tensor weightsPlacementReal(ElemKind::Int32ITy, {`4`});
21156	Tensor weightsTysTensorReal(ElemKind::UInt8ITy, {`4`});
21157
21158	indicesTensorReal.getHandle<IndexTy>() = {
21159	`5`, `3`, `6`, `0`, `0`, `5`, `6`, `6`, `5`, `7`, `1`, `1`, `7`, `6`, `3`, `1`, `4`, `1`, `3`, `3`, `6`, `1`, `1`,
21160	`6`, `7`, `2`, `5`, `4`, `6`, `7`, `1`, `4`, `1`, `4`, `4`, `5`, `4`, `2`, `3`, `6`, `4`, `0`, `4`, `2`, `6`, `7`,
21161	`5`, `0`, `1`, `3`, `1`, `2`, `1`, `5`, `9`, `3`, `8`, `4`, `1`, `4`, `10`, `4`, `1`, `1`, `1`, `7`, `4`, `7`, `2`,
21162	`2`, `4`, `3`, `4`, `9`, `8`, `8`, `5`, `5`, `5`, `2`, `6`, `7`, `4`, `7`, `6`, `6`, `10`, `0`, `3`, `10`, `5`, `4`,
21163	`3`, `3`, `3`, `4`, `4`, `9`, `9`, `7`, `2`, `1`, `7`, `4`, `2`, `9`, `6`, `6`, `10`, `5`, `1`, `0`, `6`, `3`, `6`,
21164	`2`, `9`, `3`, `9`, `3`, `1`, `3`, `2`, `3`, `1`, `3`, `7`, `2`, `3`, `3`, `8`, `7`, `4`, `7`, `8`, `9`, `2`, `3`,
21165	`3`, `4`, `4`, `8`, `3`, `4`, `1`, `9`, `2`, `1`, `9`, `2`, `6`, `8`, `3`, `3`, `4`, `2`, `9`,
21166	};
21167	offsetsTensorReal.getHandle<IndexTy>() = {`0`, `0`, `1`, `2`, `3`, `51`, `92`, `123`, `157`};
21168	dimOffsetsTensorReal.getHandle<int32_t>() = {`0`, `8`, `16`, `20`, `26`};
21169	weightsPlacementReal.getHandle<int32_t>() = {`3`, `1`, `2`, `3`};
21170	if (std::is_same<WeightTy, float>::value) {
21171	weightsTysTensorReal.getHandle<uint8_t>() = {`0`, `0`, `0`, `0`};
21172	} else {
21173	weightsTysTensorReal.getHandle<uint8_t>() = {`1`, `1`, `1`, `1`};
21174	}
21175
21176	auto devWeights = mod.createPlaceholder(ElemKind::UInt8ITy,
21177	devWeightsTensorReal.getSizeInBytes(),
21178	"devWeights", false);
21179	auto indices = mod.createPlaceholder(IdxTy, {`157`}, "indices", false);
21180	auto offsets = mod.createPlaceholder(IdxTy, {`9`}, "offsets", false);
21181	auto weightsOffsets =
21182	mod.createPlaceholder(ElemKind::Int32ITy, {`4`}, "weightsOffsets", false);
21183	auto dimOffsets =
21184	mod.createPlaceholder(ElemKind::Int32ITy, {`5`}, "dimOffsets", false);
21185	auto uvmWeights = mod.createPlaceholder(ElemKind::UInt8ITy,
21186	uvmWeightsTensorReal.getSizeInBytes(),
21187	"uvmWeights", false);
21188	auto weightsPlacement =
21189	mod.createPlaceholder(ElemKind::Int32ITy, {`4`}, "weightsPlacement", false);
21190	auto weightsTys =
21191	mod.createPlaceholder(ElemKind::UInt8ITy, {`4`}, "weightsTys", false);
21192	auto indiceWeights = NodeValue ();
21193
21194	bindings.insert(devWeights, std::move(devWeightsTensorReal));
21195	bindings.insert(uvmWeights, std::move(uvmWeightsTensorReal));
21196	bindings.insert(indices, std::move(indicesTensorReal));
21197	bindings.insert(offsets, std::move(offsetsTensorReal));
21198	bindings.insert(weightsOffsets, std::move(weightsOffsetsTensorReal));
21199	bindings.insert(dimOffsets, std::move(dimOffsetsTensorReal));
21200	bindings.insert(weightsPlacement, std::move(weightsPlacementReal));
21201	bindings.insert(weightsTys, std::move(weightsTysTensorReal));
21202
21203	auto *R = F->createIntNBitSplitEmbeddingBags(
21204	"IntNBitSplitEmbeddingBags", devWeights, uvmWeights, weightsPlacement,
21205	weightsOffsets, weightsTys, dimOffsets, `26`, indices, offsets, poolingMode,
21206	outputDType);
21207	auto *S = F->createSave("save", R);
21208	bindings.allocate(S->getPlaceholder());
21209
21210	EE.compile(CompilationMode::Infer);
21211	EE.run(bindings);
21212
21213	Tensor &result = *bindings.get(S->getPlaceholder());
21214
21215	EXPECT_TRUE(expected.isEqual(result, allowedError));
21216	}
21217
21218	static Tensor getIntNBitSplitEmbeddingBagsWeightsFloat() {
21219	Tensor weights(ElemKind::UInt8ITy, `1024`);
21220	weights.getHandle<uint8_t>() = {
21221	`122`, `49`, `217`, `65`, `98`, `154`, `111`, `224`, `171`, `33`, `179`, `80`, `219`, `246`, `62`,
21222	`36`, `94`, `80`, `108`, `33`, `71`, `149`, `148`, `130`, `238`, `154`, `108`, `236`, `242`, `144`,
21223	`72`, `152`, `103`, `0`, `226`, `107`, `40`, `232`, `196`, `50`, `21`, `205`, `140`, `139`, `178`,
21224	`100`, `44`, `74`, `10`, `116`, `203`, `228`, `153`, `102`, `220`, `160`, `184`, `187`, `168`, `201`,
21225	`145`, `43`, `139`, `36`, `184`, `195`, `212`, `245`, `196`, `128`, `133`, `219`, `5`, `211`, `31`,
21226	`144`, `211`, `63`, `5`, `104`, `169`, `237`, `245`, `52`, `171`, `136`, `32`, `191`, `101`, `174`,
21227	`162`, `10`, `84`, `4`, `92`, `183`, `28`, `171`, `23`, `65`, `115`, `28`, `209`, `250`, `32`,
21228	`31`, `104`, `0`, `48`, `37`, `148`, `163`, `208`, `94`, `16`, `182`, `39`, `225`, `204`, `211`,
21229	`78`, `234`, `148`, `37`, `52`, `140`, `131`, `85`, `209`, `45`, `203`, `183`, `55`, `201`, `20`,
21230	`75`, `103`, `132`, `101`, `2`, `121`, `82`, `251`, `110`, `4`, `3`, `62`, `237`, `27`, `106`,
21231	`46`, `86`, `230`, `49`, `76`, `85`, `117`, `18`, `244`, `176`, `183`, `8`, `102`, `32`, `97`,
21232	`138`, `52`, `64`, `79`, `73`, `2`, `159`, `245`, `48`, `145`, `72`, `7`, `180`, `120`, `108`,
21233	`58`, `114`, `145`, `150`, `1`, `81`, `182`, `133`, `197`, `75`, `80`, `34`, `70`, `30`, `33`,
21234	`44`, `124`, `227`, `177`, `0`, `27`, `52`, `117`, `73`, `37`, `103`, `194`, `179`, `134`, `188`,
21235	`156`, `10`, `12`, `167`, `86`, `103`, `89`, `224`, `128`, `87`, `110`, `183`, `23`, `208`, `249`,
21236	`224`, `4`, `63`, `85`, `135`, `242`, `206`, `46`, `68`, `9`, `127`, `71`, `24`, `17`, `181`,
21237	`39`, `156`, `171`, `92`, `92`, `131`, `244`, `95`, `233`, `226`, `183`, `22`, `71`, `38`, `66`,
21238	`93`, `45`, `85`, `241`, `9`, `240`, `48`, `12`, `41`, `252`, `190`, `202`, `191`, `248`, `170`,
21239	`174`, `157`, `73`, `67`, `5`, `247`, `56`, `21`, `224`, `150`, `47`, `65`, `125`, `143`, `21`,
21240	`194`, `194`, `142`, `211`, `36`, `29`, `196`, `110`, `77`, `179`, `235`, `173`, `237`, `152`, `18`,
21241	`254`, `2`, `120`, `254`, `192`, `18`, `251`, `113`, `139`, `105`, `69`, `83`, `25`, `230`, `65`,
21242	`116`, `107`, `107`, `230`, `105`, `245`, `133`, `28`, `88`, `97`, `47`, `209`, `6`, `101`, `157`,
21243	`198`, `153`, `108`, `197`, `116`, `183`, `139`, `32`, `115`, `237`, `229`, `146`, `13`, `30`, `205`,
21244	`234`, `66`, `108`, `52`, `83`, `91`, `81`, `240`, `4`, `21`, `131`, `224`, `142`, `250`, `6`,
21245	`150`, `159`, `36`, `220`, `130`, `178`, `133`, `187`, `19`, `221`, `187`, `198`, `63`, `236`, `67`,
21246	`166`, `248`, `151`, `204`, `148`, `228`, `176`, `181`, `128`, `174`, `118`, `186`, `214`, `220`, `199`,
21247	`56`, `160`, `90`, `173`, `236`, `124`, `41`, `109`, `98`, `231`, `83`, `160`, `1`, `92`, `210`,
21248	`110`, `102`, `68`, `198`, `84`, `56`, `187`, `20`, `68`, `58`, `147`, `0`, `0`, `0`, `0`,
21249	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
21250	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
21251	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
21252	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
21253	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
21254	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
21255	`0`, `0`, `108`, `52`, `136`, `212`, `17`, `233`, `188`, `145`, `127`, `218`, `92`, `119`, `131`,
21256	`107`, `11`, `179`, `73`, `94`, `231`, `175`, `54`, `178`, `116`, `127`, `212`, `38`, `175`, `196`,
21257	`145`, `207`, `204`, `68`, `135`, `229`, `94`, `16`, `167`, `20`, `189`, `61`, `252`, `21`, `21`,
21258	`196`, `245`, `72`, `230`, `14`, `94`, `146`, `3`, `37`, `213`, `199`, `19`, `8`, `244`, `122`,
21259	`174`, `185`, `2`, `11`, `42`, `23`, `7`, `177`, `185`, `222`, `90`, `44`, `220`, `41`, `53`,
21260	`135`, `46`, `227`, `41`, `85`, `183`, `166`, `250`, `156`, `10`, `50`, `100`, `103`, `120`, `95`,
21261	`105`, `10`, `6`, `222`, `233`, `76`, `147`, `254`, `251`, `139`, `36`, `23`, `71`, `169`, `228`,
21262	`161`, `174`, `244`, `71`, `85`, `118`, `163`, `126`, `152`, `9`, `224`, `98`, `49`, `66`, `146`,
21263	`77`, `186`, `218`, `130`, `112`, `48`, `20`, `211`, `141`, `175`, `145`, `254`, `217`, `102`, `22`,
21264	`184`, `213`, `179`, `204`, `206`, `151`, `29`, `209`, `125`, `236`, `142`, `217`, `157`, `243`, `162`,
21265	`163`, `95`, `227`, `223`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `187`, `236`, `246`,
21266	`127`, `92`, `248`, `2`, `247`, `7`, `41`, `108`, `161`, `66`, `133`, `134`, `159`, `55`, `85`,
21267	`161`, `73`, `108`, `95`, `106`, `145`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `221`,
21268	`149`, `198`, `30`, `206`, `45`, `150`, `80`, `172`, `118`, `29`, `250`, `196`, `93`, `39`, `252`,
21269	`118`, `112`, `62`, `169`, `170`, `59`, `99`, `106`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
21270	`0`, `238`, `12`, `93`, `71`, `43`, `47`, `86`, `230`, `138`, `103`, `125`, `129`, `129`, `251`,
21271	`207`, `26`, `249`, `232`, `50`, `168`, `156`, `63`, `238`, `159`, `0`, `0`, `0`, `0`, `0`,
21272	`0`, `0`, `0`, `68`, `74`, `234`, `112`, `127`, `94`, `155`, `155`, `178`, `19`, `153`, `186`,
21273	`137`, `218`, `13`, `57`, `243`, `106`, `99`, `219`, `153`, `218`, `174`, `21`, `0`, `0`, `0`,
21274	`0`, `0`, `0`, `0`, `0`, `12`, `125`, `32`, `194`, `118`, `244`, `208`, `234`, `21`, `220`,
21275	`101`, `34`, `150`, `182`, `102`, `196`, `161`, `155`, `235`, `148`, `120`, `153`, `161`, `239`, `0`,
21276	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `175`, `148`, `182`, `141`, `251`, `25`, `127`, `0`,
21277	`33`, `67`, `236`, `173`, `100`, `89`, `67`, `195`, `11`, `151`, `96`, `143`, `136`, `34`, `61`,
21278	`196`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `193`, `251`, `231`, `23`, `174`, `240`,
21279	`116`, `161`, `6`, `46`, `49`, `198`, `175`, `197`, `131`, `1`, `233`, `160`, `33`, `78`, `254`,
21280	`215`, `103`, `171`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `6`, `251`, `197`, `145`,
21281	`64`, `128`, `52`, `72`, `55`, `60`, `21`, `242`, `33`, `12`, `252`, `133`, `165`, `15`, `65`,
21282	`93`, `237`, `166`, `204`, `87`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `202`, `13`,
21283	`100`, `124`, `139`, `43`, `180`, `40`, `165`, `244`, `228`, `109`, `9`, `226`, `55`, `154`, `88`,
21284	`4`, `122`, `102`, `185`, `113`, `165`, `39`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
21285	`212`, `2`, `67`, `190`, `101`, `23`, `80`, `96`, `123`, `7`, `43`, `179`, `114`, `167`, `149`,
21286	`199`, `5`, `204`, `192`, `145`, `236`, `246`, `143`, `98`, `0`, `0`, `0`, `0`, `0`, `0`,
21287	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
21288	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
21289	`0`, `0`, `0`, `0`,
21290	};
21291	return weights;
21292	}
21293
21294	static Tensor getIntNBitSplitEmbeddingBagsWeightsFloat16() {
21295	Tensor weights(ElemKind::UInt8ITy, `768`);
21296	weights.getHandle<uint8_t>() = {
21297	`194`, `31`, `91`, `184`, `11`, `126`, `172`, `207`, `244`, `59`, `98`, `70`, `26`, `85`, `30`,
21298	`218`, `102`, `157`, `91`, `59`, `133`, `49`, `3`, `145`, `241`, `208`, `68`, `54`, `13`, `84`,
21299	`249`, `182`, `16`, `70`, `152`, `11`, `184`, `203`, `220`, `46`, `119`, `32`, `168`, `206`, `63`,
21300	`3`, `108`, `180`, `73`, `26`, `10`, `254`, `201`, `92`, `243`, `246`, `143`, `99`, `186`, `216`,
21301	`51`, `208`, `131`, `193`, `182`, `25`, `242`, `197`, `170`, `16`, `60`, `237`, `170`, `193`, `87`,
21302	`37`, `140`, `103`, `55`, `145`, `242`, `15`, `118`, `9`, `33`, `37`, `103`, `97`, `241`, `220`,
21303	`99`, `7`, `69`, `156`, `185`, `47`, `94`, `194`, `135`, `44`, `54`, `224`, `135`, `217`, `160`,
21304	`69`, `253`, `57`, `58`, `228`, `53`, `65`, `201`, `106`, `105`, `66`, `157`, `185`, `19`, `170`,
21305	`141`, `55`, `55`, `12`, `114`, `4`, `113`, `252`, `32`, `166`, `127`, `17`, `228`, `236`, `23`,
21306	`30`, `17`, `242`, `236`, `39`, `215`, `107`, `187`, `224`, `246`, `53`, `26`, `243`, `120`, `19`,
21307	`26`, `26`, `22`, `175`, `181`, `104`, `194`, `161`, `192`, `25`, `176`, `51`, `100`, `207`, `137`,
21308	`250`, `125`, `115`, `231`, `249`, `14`, `223`, `31`, `161`, `194`, `140`, `226`, `102`, `210`, `43`,
21309	`146`, `254`, `251`, `212`, `4`, `211`, `138`, `198`, `62`, `198`, `174`, `207`, `0`, `0`, `0`,
21310	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
21311	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
21312	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
21313	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
21314	`0`, `11`, `241`, `92`, `203`, `12`, `128`, `247`, `140`, `60`, `47`, `141`, `226`, `212`, `234`,
21315	`32`, `66`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
21316	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
21317	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
21318	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
21319	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
21320	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
21321	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
21322	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `117`, `222`, `214`, `8`, `99`, `240`,
21323	`40`, `218`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `204`, `204`, `221`, `108`, `194`,
21324	`66`, `233`, `230`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `164`, `91`, `127`, `89`,
21325	`51`, `43`, `87`, `126`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `103`, `80`, `249`,
21326	`187`, `217`, `152`, `133`, `65`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `164`, `131`,
21327	`158`, `92`, `37`, `246`, `208`, `134`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `28`,
21328	`4`, `211`, `193`, `186`, `39`, `7`, `188`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
21329	`64`, `42`, `121`, `216`, `118`, `47`, `242`, `44`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
21330	`0`, `248`, `161`, `215`, `33`, `60`, `113`, `206`, `210`, `0`, `0`, `0`, `0`, `0`, `0`,
21331	`0`, `0`, `205`, `192`, `81`, `45`, `132`, `157`, `201`, `106`, `183`, `156`, `198`, `187`, `0`,
21332	`0`, `0`, `0`, `254`, `183`, `131`, `85`, `139`, `7`, `245`, `30`, `230`, `44`, `243`, `11`,
21333	`0`, `0`, `0`, `0`, `24`, `116`, `161`, `118`, `240`, `113`, `161`, `172`, `26`, `24`, `150`,
21334	`251`, `0`, `0`, `0`, `0`, `43`, `30`, `81`, `239`, `141`, `159`, `216`, `59`, `77`, `230`,
21335	`199`, `213`, `0`, `0`, `0`, `0`, `36`, `145`, `99`, `116`, `147`, `140`, `90`, `179`, `237`,
21336	`161`, `251`, `186`, `0`, `0`, `0`, `0`, `13`, `58`, `154`, `71`, `196`, `79`, `206`, `176`,
21337	`140`, `189`, `1`, `95`, `0`, `0`, `0`, `0`, `48`, `157`, `37`, `77`, `93`, `54`, `180`,
21338	`147`, `237`, `28`, `11`, `156`, `0`, `0`, `0`, `0`, `203`, `222`, `181`, `241`, `46`, `21`,
21339	`120`, `155`, `207`, `96`, `100`, `175`, `0`, `0`, `0`, `0`, `41`, `84`, `184`, `64`, `8`,
21340	`104`, `228`, `137`, `228`, `60`, `147`, `147`, `0`, `0`, `0`, `0`, `88`, `90`, `35`, `217`,
21341	`195`, `107`, `117`, `238`, `122`, `50`, `107`, `13`, `0`, `0`, `0`, `0`, `21`, `200`, `244`,
21342	`182`, `240`, `207`, `181`, `113`, `105`, `98`, `30`, `101`, `0`, `0`, `0`, `0`, `0`, `0`,
21343	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
21344	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
21345	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
21346	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
21347	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
21348	`0`, `0`, `0`,
21349	};
21350	return weights;
21351	}
21352
21353	/// Test that IntNBitSplitEmbeddingBags is correctly supported in single
21354	/// feature with sum pooling
21355	TEST_P(OperatorTest, IntNBitSplitEmbeddingBagsSingle_Float) {
21356	CHECK_IF_ENABLED();
21357	Tensor expected(ElemKind::UInt8ITy, {`2`, `4`});
21358	// 2 floats:
21359	// 6.896104335784912 = 1.724026083946228 4,*
21360	// 10.344156265258789 = 1.724026083946228 6*
21361	expected.getHandle<uint8_t>() = {
21362	`227`, `172`, `220`, `64`, `170`, `129`, `37`, `65`,
21363	};
21364
21365	testIntNBitSplitEmbeddingBagsSingle<float, int32_t, float>(
21366	bindings_, mod_, F_, EE_, SplitEmbeddingSparseType::EST_FLOAT,
21367	ElemKind::Int32ITy, std::move(expected),
21368	SplitEmbeddingPoolingMode::EP_SUM, SplitEmbeddingSparseType::EST_FLOAT,
21369	`0.0001`);
21370	}
21371
21372	/// Test that IntNBitSplitEmbeddingBags is correctly supported in single
21373	/// feature with mean pooling
21374	TEST_P(OperatorTest, IntNBitSplitEmbeddingBagsSingle_Float_MeanPooling) {
21375	CHECK_IF_ENABLED();
21376	Tensor expected(ElemKind::UInt8ITy, {`2`, `4`});
21377
21378	// 2 floats: 1.724026083946228, 1.724026083946228
21379	expected.getHandle<uint8_t>() = {
21380	`227`, `172`, `220`, `63`, `227`, `172`, `220`, `63`,
21381	};
21382
21383	testIntNBitSplitEmbeddingBagsSingle<float, int32_t, float>(
21384	bindings_, mod_, F_, EE_, SplitEmbeddingSparseType::EST_FLOAT,
21385	ElemKind::Int32ITy, std::move(expected),
21386	SplitEmbeddingPoolingMode::EP_MEAN, SplitEmbeddingSparseType::EST_FLOAT,
21387	`0.0001`);
21388	}
21389
21390	/// Test that IntNBitSplitEmbeddingBags is correctly supported in single
21391	/// feature with sum pooling in half floats
21392	TEST_P(OperatorTest, IntNBitSplitEmbeddingBagsSingle_Float16_SumPooling) {
21393	CHECK_IF_ENABLED();
21394	Tensor expected(ElemKind::UInt8ITy, {`2`, `4`});
21395
21396	// 2 floats:
21397	// -0.305419921875 = -0.07635498 4*
21398	// -0.4581298828125 = -0.07635498 6*
21399	expected.getHandle<uint8_t>() = {
21400	`0`, `96`, `156`, `190`, `0`, `144`, `234`, `190`,
21401	};
21402
21403	testIntNBitSplitEmbeddingBagsSingle<float16_t, int32_t, float>(
21404	bindings_, mod_, F_, EE_, SplitEmbeddingSparseType::EST_FLOAT16,
21405	ElemKind::Int32ITy, std::move(expected),
21406	SplitEmbeddingPoolingMode::EP_SUM, SplitEmbeddingSparseType::EST_FLOAT,
21407	`0.0001`);
21408	}
21409
21410	/// Test that IntNBitSplitEmbeddingBags is correctly supported in single
21411	/// feature with mean pooling in half floats
21412	TEST_P(OperatorTest, IntNBitSplitEmbeddingBagsSingle_Float16_MeanPooling) {
21413	CHECK_IF_ENABLED();
21414	Tensor expected(ElemKind::UInt8ITy, {`2`, `4`});
21415
21416	// 2 floats: -0.07635498, -0.07635498
21417	expected.getHandle<uint8_t>() = {
21418	`0`, `96`, `156`, `189`, `0`, `96`, `156`, `189`,
21419	};
21420
21421	testIntNBitSplitEmbeddingBagsSingle<float16_t, int32_t, float>(
21422	bindings_, mod_, F_, EE_, SplitEmbeddingSparseType::EST_FLOAT16,
21423	ElemKind::Int32ITy, std::move(expected),
21424	SplitEmbeddingPoolingMode::EP_MEAN, SplitEmbeddingSparseType::EST_FLOAT,
21425	`0.0001`);
21426	}
21427
21428	/// Test that IntNBitSplitEmbeddingBags is correctly supported in single
21429	/// feature with sum pooling in int8
21430	TEST_P(OperatorTest, IntNBitSplitEmbeddingBagsSingle_Int8_SumPooling) {
21431	CHECK_IF_ENABLED();
21432	Tensor expected(ElemKind::UInt8ITy, {`2`, `4`});
21433
21434	// 2 floats:
21435	// 7.859375 = 1.964840 4*
21436	// 11.7890625 = 1.964840 6*
21437	expected.getHandle<uint8_t>() = {
21438	`0`, `128`, `251`, `64`, `0`, `160`, `60`, `65`,
21439	};
21440
21441	testIntNBitSplitEmbeddingBagsSingle<uint8_t, int32_t, float>(
21442	bindings_, mod_, F_, EE_, SplitEmbeddingSparseType::EST_INT8,
21443	ElemKind::Int32ITy, std::move(expected),
21444	SplitEmbeddingPoolingMode::EP_SUM, SplitEmbeddingSparseType::EST_FLOAT,
21445	`0.0001`);
21446	}
21447
21448	/// Test that IntNBitSplitEmbeddingBags is correctly supported in single
21449	/// feature with mean pooling in int8
21450	TEST_P(OperatorTest, IntNBitSplitEmbeddingBagsSingle_Int8_MeanPooling) {
21451	CHECK_IF_ENABLED();
21452	Tensor expected(ElemKind::UInt8ITy, {`2`, `4`});
21453
21454	// 2 floats: 1.96484375, 1.96484375
21455	expected.getHandle<uint8_t>() = {
21456	`0`, `128`, `251`, `63`, `0`, `128`, `251`, `63`,
21457	};
21458
21459	testIntNBitSplitEmbeddingBagsSingle<uint8_t, int32_t, float>(
21460	bindings_, mod_, F_, EE_, SplitEmbeddingSparseType::EST_INT8,
21461	ElemKind::Int32ITy, std::move(expected),
21462	SplitEmbeddingPoolingMode::EP_MEAN, SplitEmbeddingSparseType::EST_FLOAT,
21463	`0.0001`);
21464	}
21465
21466	/// Test that IntNBitSplitEmbeddingBags is correctly supported in single
21467	/// feature with sum pooling in int4
21468	TEST_P(OperatorTest, IntNBitSplitEmbeddingBagsSingle_Int4_SumPooling) {
21469	CHECK_IF_ENABLED();
21470	Tensor expected(ElemKind::UInt8ITy, {`2`, `4`});
21471
21472	// 2 floats:
21473	// 7.859375 = 1.964840 4*
21474	// 11.7890625 = 1.964840 6*
21475	expected.getHandle<uint8_t>() = {
21476	`0`, `128`, `251`, `64`, `0`, `160`, `60`, `65`,
21477	};
21478
21479	testIntNBitSplitEmbeddingBagsSingle<uint8_t, int32_t, float>(
21480	bindings_, mod_, F_, EE_, SplitEmbeddingSparseType::EST_INT4,
21481	ElemKind::Int32ITy, std::move(expected),
21482	SplitEmbeddingPoolingMode::EP_SUM, SplitEmbeddingSparseType::EST_FLOAT,
21483	`0.0001`);
21484	}
21485
21486	/// Test that IntNBitSplitEmbeddingBags is correctly supported in single
21487	/// feature with mean pooling in int4
21488	TEST_P(OperatorTest, IntNBitSplitEmbeddingBagsSingle_Int4_MeanPooling) {
21489	CHECK_IF_ENABLED();
21490	Tensor expected(ElemKind::UInt8ITy, {`2`, `4`});
21491
21492	// 2 floats: 1.96484375, 1.96484375
21493	expected.getHandle<uint8_t>() = {
21494	`0`, `128`, `251`, `63`, `0`, `128`, `251`, `63`,
21495	};
21496
21497	testIntNBitSplitEmbeddingBagsSingle<uint8_t, int32_t, float>(
21498	bindings_, mod_, F_, EE_, SplitEmbeddingSparseType::EST_INT4,
21499	ElemKind::Int32ITy, std::move(expected),
21500	SplitEmbeddingPoolingMode::EP_MEAN, SplitEmbeddingSparseType::EST_FLOAT,
21501	`0.0001`);
21502	}
21503
21504	/// Test that IntNBitSplitEmbeddingBags is correctly supported in FloatTy.
21505	TEST_P(OperatorTest, IntNBitSplitEmbeddingBags_Float) {
21506	CHECK_IF_ENABLED();
21507	Tensor expected(ElemKind::UInt8ITy, {`2`, `104`});
21508	expected.getHandle<uint8_t>() = {
21509	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
21510	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
21511	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
21512	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
21513	`0`, `0`, `0`, `0`, `12`, `177`, `57`, `224`, `0`, `0`, `128`, `127`, `127`, `218`, `92`,
21514	`120`, `0`, `0`, `128`, `255`, `187`, `236`, `246`, `127`, `138`, `116`, `196`, `247`, `172`, `118`,
21515	`29`, `251`, `196`, `93`, `39`, `253`, `182`, `66`, `156`, `103`, `219`, `39`, `161`, `239`, `183`,
21516	`8`, `102`, `32`, `97`, `138`, `52`, `64`, `79`, `73`, `2`, `159`, `245`, `48`, `145`, `72`,
21517	`7`, `180`, `120`, `108`, `58`, `114`, `145`, `150`, `1`, `81`, `182`, `133`, `197`, `75`, `80`,
21518	`34`, `7`, `177`, `185`, `222`, `90`, `44`, `220`, `41`, `53`, `135`, `46`, `227`, `41`, `85`,
21519	`183`, `166`, `250`, `156`, `10`, `50`, `100`, `103`, `120`, `95`, `105`, `10`, `6`, `222`, `233`,
21520	`76`, `147`, `254`, `187`, `236`, `246`, `127`, `0`, `0`, `128`, `127`, `127`, `218`, `92`, `119`,
21521	`0`, `0`, `128`, `255`, `187`, `236`, `246`, `127`, `138`, `116`, `196`, `247`, `236`, `212`, `68`,
21522	`251`, `53`, `53`, `81`, `253`, `131`, `4`, `122`, `103`, `74`, `5`, `142`, `107`,
21523	};
21524
21525	Tensor weightsOffsets(ElemKind::Int32ITy, {`4`});
21526	weightsOffsets.getHandle<int32_t>() = {`0`, `384`, `512`, `640`};
21527
21528	testIntNBitSplitEmbeddingBags<float, int32_t, float>(
21529	bindings_, mod_, F_, EE_, ElemKind::Float16Ty, ElemKind::Int32ITy,
21530	getIntNBitSplitEmbeddingBagsWeightsFloat(), std::move(weightsOffsets),
21531	std::move(expected), SplitEmbeddingPoolingMode::EP_SUM,
21532	SplitEmbeddingSparseType::EST_FLOAT, `0.0001`);
21533	}
21534
21535	TEST_P(OperatorTest, IntNBitSplitEmbeddingBags_Float_MeanPooling) {
21536	CHECK_IF_ENABLED();
21537	Tensor expected(ElemKind::UInt8ITy, {`2`, `104`});
21538	expected.getHandle<uint8_t>() = {
21539	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
21540	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
21541	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
21542	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
21543	`0`, `0`, `0`, `0`, `187`, `150`, `119`, `221`, `0`, `0`, `128`, `127`, `85`, `60`, `147`,
21544	`117`, `0`, `0`, `128`, `255`, `187`, `236`, `246`, `127`, `225`, `202`, `74`, `245`, `4`, `139`,
21545	`162`, `248`, `227`, `195`, `172`, `250`, `31`, `77`, `33`, `101`, `177`, `90`, `38`, `237`, `183`,
21546	`8`, `102`, `32`, `97`, `138`, `52`, `64`, `79`, `73`, `2`, `159`, `245`, `48`, `145`, `72`,
21547	`7`, `180`, `120`, `108`, `58`, `114`, `145`, `150`, `1`, `81`, `182`, `133`, `197`, `75`, `80`,
21548	`34`, `7`, `177`, `185`, `222`, `90`, `44`, `220`, `41`, `53`, `135`, `46`, `227`, `41`, `85`,
21549	`183`, `166`, `250`, `156`, `10`, `50`, `100`, `103`, `120`, `95`, `105`, `10`, `6`, `222`, `233`,
21550	`76`, `147`, `254`, `187`, `236`, `246`, `127`, `0`, `0`, `128`, `127`, `155`, `95`, `172`, `116`,
21551	`0`, `0`, `128`, `255`, `187`, `236`, `246`, `127`, `40`, `230`, `56`, `245`, `222`, `64`, `185`,
21552	`248`, `200`, `230`, `196`, `250`, `138`, `79`, `235`, `100`, `160`, `170`, `5`, `105`,
21553	};
21554
21555	Tensor weightsOffsets(ElemKind::Int32ITy, {`4`});
21556	weightsOffsets.getHandle<int32_t>() = {`0`, `384`, `512`, `640`};
21557	testIntNBitSplitEmbeddingBags<float, int32_t, float>(
21558	bindings_, mod_, F_, EE_, ElemKind::Float16Ty, ElemKind::Int32ITy,
21559	getIntNBitSplitEmbeddingBagsWeightsFloat(), std::move(weightsOffsets),
21560	std::move(expected), SplitEmbeddingPoolingMode::EP_MEAN,
21561	SplitEmbeddingSparseType::EST_FLOAT, `0.0001`);
21562	}
21563
21564	/// Test that IntNBitSplitEmbeddingBags is correctly supported in Float16Ty.
21565	TEST_P(OperatorTest, IntNBitSplitEmbeddingBags_Float16) {
21566	CHECK_IF_ENABLED();
21567
21568	Tensor expected(ElemKind::UInt8ITy, {`2`, `52`});
21569	expected.getHandle<uint8_t>() = {
21570	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
21571	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
21572	`0`, `0`, `76`, `227`, `74`, `122`, `0`, `252`, `0`, `126`, `0`, `124`, `0`, `124`, `0`,
21573	`124`, `94`, `244`, `131`, `240`, `0`, `252`, `242`, `15`, `118`, `9`, `33`, `37`, `103`, `97`,
21574	`241`, `220`, `99`, `7`, `69`, `156`, `185`, `47`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
21575	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `54`, `122`, `0`, `124`, `0`, `252`,
21576	`0`, `126`, `0`, `124`, `0`, `124`, `0`, `124`, `116`, `246`, `48`, `242`, `0`, `252`,
21577	};
21578
21579	Tensor weightsOffsets(ElemKind::Int32ITy, {`4`});
21580	weightsOffsets.getHandle<int32_t>() = {`0`, `256`, `384`, `512`};
21581
21582	testIntNBitSplitEmbeddingBags<float16_t, int32_t, float16_t>(
21583	bindings_, mod_, F_, EE_, ElemKind::Float16Ty, ElemKind::Int32ITy,
21584	getIntNBitSplitEmbeddingBagsWeightsFloat16(), std::move(weightsOffsets),
21585	std::move(expected), SplitEmbeddingPoolingMode::EP_SUM,
21586	SplitEmbeddingSparseType::EST_FLOAT16, `0.005`);
21587	}
21588
21589	TEST_P(OperatorTest, IntNBitSplitEmbeddingBags_Float16_MeanPooling) {
21590	CHECK_IF_ENABLED();
21591	Tensor expected(ElemKind::UInt8ITy, {`2`, `52`});
21592	expected.getHandle<uint8_t>() = {
21593	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
21594	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
21595	`0`, `0`, `221`, `204`, `49`, `100`, `0`, `252`, `0`, `126`, `0`, `124`, `0`, `124`, `0`,
21596	`124`, `130`, `224`, `168`, `220`, `0`, `252`, `242`, `15`, `118`, `9`, `33`, `37`, `103`, `97`,
21597	`241`, `220`, `99`, `7`, `69`, `156`, `185`, `47`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
21598	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `217`, `100`, `0`, `124`, `0`, `252`,
21599	`0`, `126`, `0`, `124`, `0`, `124`, `0`, `124`, `19`, `226`, `211`, `221`, `0`, `252`,
21600	};
21601	Tensor weightsOffsets(ElemKind::Int32ITy, {`4`});
21602	weightsOffsets.getHandle<int32_t>() = {`0`, `256`, `384`, `512`};
21603
21604	testIntNBitSplitEmbeddingBags<float16_t, int32_t, float16_t>(
21605	bindings_, mod_, F_, EE_, ElemKind::Float16Ty, ElemKind::Int32ITy,
21606	getIntNBitSplitEmbeddingBagsWeightsFloat16(), std::move(weightsOffsets),
21607	std::move(expected), SplitEmbeddingPoolingMode::EP_MEAN,
21608	SplitEmbeddingSparseType::EST_FLOAT16, `0.005`);
21609	}
21610
21611	/// Test IntNBitSplitEmbeddingWeightedBags
21612	template <typename WeightTy, typename IndexTy, typename OutputTy>
21613	static void testIntNBitSplitEmbeddingWeightedBags(
21614	glow::PlaceholderBindings &bindings, glow::Module &mod, glow::Function *F,
21615	glow::ExecutionEngine &EE, ElemKind DTy, ElemKind IdxTy, Tensor Weights,
21616	Tensor WeightsOffsets, Tensor expected,
21617	SplitEmbeddingSparseType outputDType, float allowedError) {
21618	Tensor devWeightsTensorReal = Weights.clone();
21619	Tensor uvmWeightsTensorReal = Weights.clone();
21620	Tensor indicesTensorReal(IdxTy, {`157`});
21621	Tensor offsetsTensorReal(IdxTy, {`9`});
21622	Tensor weightsOffsetsTensorReal = std::move(WeightsOffsets);
21623	Tensor dimOffsetsTensorReal(ElemKind::Int32ITy, {`5`});
21624	Tensor weightsPlacementReal(ElemKind::Int32ITy, {`4`});
21625	Tensor weightsTysTensorReal(ElemKind::UInt8ITy, {`4`});
21626	Tensor indiceWeightsTensorReal(ElemKind::FloatTy, {`157`});
21627
21628	indicesTensorReal.getHandle<IndexTy>() = {
21629	`5`, `3`, `6`, `0`, `0`, `5`, `6`, `6`, `5`, `7`, `1`, `1`, `7`, `6`, `3`, `1`, `4`, `1`, `3`, `3`, `6`, `1`, `1`,
21630	`6`, `7`, `2`, `5`, `4`, `6`, `7`, `1`, `4`, `1`, `4`, `4`, `5`, `4`, `2`, `3`, `6`, `4`, `0`, `4`, `2`, `6`, `7`,
21631	`5`, `0`, `1`, `3`, `1`, `2`, `1`, `5`, `9`, `3`, `8`, `4`, `1`, `4`, `10`, `4`, `1`, `1`, `1`, `7`, `4`, `7`, `2`,
21632	`2`, `4`, `3`, `4`, `9`, `8`, `8`, `5`, `5`, `5`, `2`, `6`, `7`, `4`, `7`, `6`, `6`, `10`, `0`, `3`, `10`, `5`, `4`,
21633	`3`, `3`, `3`, `4`, `4`, `9`, `9`, `7`, `2`, `1`, `7`, `4`, `2`, `9`, `6`, `6`, `10`, `5`, `1`, `0`, `6`, `3`, `6`,
21634	`2`, `9`, `3`, `9`, `3`, `1`, `3`, `2`, `3`, `1`, `3`, `7`, `2`, `3`, `3`, `8`, `7`, `4`, `7`, `8`, `9`, `2`, `3`,
21635	`3`, `4`, `4`, `8`, `3`, `4`, `1`, `9`, `2`, `1`, `9`, `2`, `6`, `8`, `3`, `3`, `4`, `2`, `9`,
21636	};
21637	offsetsTensorReal.getHandle<IndexTy>() = {`0`, `0`, `1`, `2`, `3`, `51`, `92`, `123`, `157`};
21638	dimOffsetsTensorReal.getHandle<int32_t>() = {`0`, `8`, `16`, `20`, `26`};
21639	weightsPlacementReal.getHandle<int32_t>() = {`3`, `1`, `2`, `3`};
21640	if (std::is_same<WeightTy, float>::value) {
21641	weightsTysTensorReal.getHandle<uint8_t>() = {`0`, `0`, `0`, `0`};
21642	} else {
21643	weightsTysTensorReal.getHandle<uint8_t>() = {`1`, `1`, `1`, `1`};
21644	}
21645	indiceWeightsTensorReal.getHandle<float>() = {
21646	`0.73059422`, `0.09048918`, `0.554031`, `0.13158787`, `0.23915586`, `0.3018666`,
21647	`0.6207229`, `0.57457829`, `0.33964851`, `0.02707603`, `0.92231585`, `0.64477818`,
21648	`0.67243994`, `0.58562965`, `0.30533718`, `0.54483425`, `0.53211636`, `0.12025826`,
21649	`0.34783277`, `0.92105082`, `0.93280413`, `0.30841353`, `0.67953576`, `0.16194991`,
21650	`0.2832056`, `0.48093265`, `0.18568761`, `0.20272573`, `0.71193419`, `0.38782271`,
21651	`0.10420006`, `0.18165586`, `0.73882372`, `0.19265882`, `0.62022048`, `0.97320652`,
21652	`0.21687336`, `0.61186471`, `0.84622618`, `0.2533562`, `0.37639096`, `0.29951705`,
21653	`0.61392843`, `0.93121569`, `0.26724258`, `0.80117613`, `0.37856814`, `0.20297429`,
21654	`0.90158672`, `0.8148197`, `0.11421551`, `0.80284475`, `0.38450052`, `0.84448327`,
21655	`0.61715987`, `0.36695459`, `0.02693524`, `0.44444055`, `0.33264582`, `0.08956304`,
21656	`0.7469183`, `0.40471427`, `0.15962103`, `0.46057547`, `0.8303796`, `0.48742954`,
21657	`0.22941005`, `0.82710538`, `0.51743851`, `0.63713515`, `0.27360268`, `0.28787691`,
21658	`0.68998699`, `0.91243272`, `0.92242145`, `0.00800144`, `0.44966546`, `0.23878438`,
21659	`0.33966388`, `0.26723466`, `0.68331924`, `0.06104597`, `0.28901017`, `0.3702946`,
21660	`0.91717632`, `0.01063433`, `0.01559091`, `0.68447562`, `0.26032356`, `0.82551908`,
21661	`0.71921533`, `0.3309967`, `0.34700732`, `0.46732838`, `0.98460019`, `0.97221335`,
21662	`0.30027433`, `0.890952`, `0.30998982`, `0.19929673`, `0.16206062`, `0.3012844`,
21663	`0.15551239`, `0.09132537`, `0.69239636`, `0.61895815`, `0.7683584`, `0.85551139`,
21664	`0.56375194`, `0.6407271`, `0.37005971`, `0.48018709`, `0.91556693`, `0.52716185`,
21665	`0.3401635`, `0.51378091`, `0.34776683`, `0.91414342`, `0.08269295`, `0.3923621`,
21666	`0.89205286`, `0.62261808`, `0.93271026`, `0.65990633`, `0.00268492`, `0.02678248`,
21667	`0.61158337`, `0.8550025`, `0.16225994`, `0.43618449`, `0.86736373`, `0.55277513`,
21668	`0.68459586`, `0.84311144`, `0.53162982`, `0.28421924`, `0.33122102`, `0.47700932`,
21669	`0.92586793`, `0.13141876`, `0.96422898`, `0.10795071`, `0.10225672`, `0.03472978`,
21670	`0.83985586`, `0.87896339`, `0.91911041`, `0.83078131`, `0.7561809`, `0.13999207`,
21671	`0.44837753`, `0.81394738`, `0.74990102`, `0.9324097`, `0.64143043`, `0.55022502`,
21672	`0.97235411`,
21673	};
21674
21675	auto devWeights = mod.createPlaceholder(ElemKind::UInt8ITy,
21676	devWeightsTensorReal.getSizeInBytes(),
21677	"devWeights", false);
21678	auto indices = mod.createPlaceholder(IdxTy, {`157`}, "indices", false);
21679	auto offsets = mod.createPlaceholder(IdxTy, {`9`}, "offsets", false);
21680	auto weightsOffsets =
21681	mod.createPlaceholder(ElemKind::Int32ITy, {`4`}, "weightsOffsets", false);
21682	auto dimOffsets =
21683	mod.createPlaceholder(ElemKind::Int32ITy, {`5`}, "dimOffsets", false);
21684	auto uvmWeights = mod.createPlaceholder(ElemKind::UInt8ITy,
21685	uvmWeightsTensorReal.getSizeInBytes(),
21686	"uvmWeights", false);
21687	auto weightsPlacement =
21688	mod.createPlaceholder(ElemKind::Int32ITy, {`4`}, "weightsPlacement", false);
21689	auto weightsTys =
21690	mod.createPlaceholder(ElemKind::UInt8ITy, {`4`}, "weightsTys", false);
21691	auto indiceWeights =
21692	mod.createPlaceholder(DTy, {`157`}, "indiceWeights", false);
21693
21694	bindings.insert(devWeights, std::move(devWeightsTensorReal));
21695	bindings.insert(uvmWeights, std::move(uvmWeightsTensorReal));
21696	bindings.insert(indices, std::move(indicesTensorReal));
21697	bindings.insert(offsets, std::move(offsetsTensorReal));
21698	bindings.insert(weightsOffsets, std::move(weightsOffsetsTensorReal));
21699	bindings.insert(dimOffsets, std::move(dimOffsetsTensorReal));
21700	bindings.insert(weightsPlacement, std::move(weightsPlacementReal));
21701	bindings.insert(indiceWeights, std::move(indiceWeightsTensorReal));
21702	bindings.insert(weightsTys, std::move(weightsTysTensorReal));
21703
21704	auto *R = F->createIntNBitSplitEmbeddingWeightedBags(
21705	"IntNBitSplitEmbeddingWeightedBags", devWeights, uvmWeights,
21706	weightsPlacement, weightsOffsets, weightsTys, dimOffsets, `26`, indices,
21707	offsets, SplitEmbeddingPoolingMode::EP_SUM, outputDType, indiceWeights);
21708	auto *S = F->createSave("save", R);
21709	bindings.allocate(S->getPlaceholder());
21710
21711	EE.compile(CompilationMode::Infer);
21712	EE.run(bindings);
21713
21714	Tensor &result = *bindings.get(S->getPlaceholder());
21715
21716	EXPECT_TRUE(expected.isEqual(result, allowedError));
21717	}
21718
21719	/// Test that IntNBitSplitEmbeddingBags is correctly supported in FloatTy.
21720	TEST_P(OperatorTest, IntNBitSplitEmbeddingWeightedBags_Float) {
21721	CHECK_IF_ENABLED();
21722	Tensor expected(ElemKind::UInt8ITy, {`2`, `104`});
21723	expected.getHandle<uint8_t>() = {
21724	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
21725	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
21726	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
21727	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
21728	`0`, `0`, `0`, `0`, `245`, `82`, `136`, `223`, `0`, `0`, `128`, `127`, `102`, `219`, `64`,
21729	`119`, `110`, `123`, `32`, `255`, `187`, `236`, `246`, `127`, `35`, `194`, `76`, `247`, `98`, `40`,
21730	`181`, `250`, `241`, `140`, `192`, `252`, `49`, `168`, `12`, `103`, `94`, `146`, `78`, `239`, `193`,
21731	`15`, `40`, `32`, `225`, `230`, `3`, `64`, `120`, `95`, `190`, `158`, `218`, `38`, `84`, `72`,
21732	`126`, `179`, `53`, `108`, `57`, `134`, `84`, `150`, `7`, `51`, `133`, `133`, `10`, `46`, `24`,
21733	`34`, `250`, `193`, `77`, `222`, `51`, `247`, `115`, `41`, `85`, `99`, `193`, `226`, `218`, `36`,
21734	`75`, `166`, `142`, `151`, `153`, `49`, `154`, `159`, `9`, `95`, `127`, `134`, `148`, `221`, `208`,
21735	`55`, `35`, `254`, `187`, `236`, `246`, `127`, `0`, `0`, `128`, `127`, `57`, `43`, `23`, `119`,
21736	`54`, `227`, `62`, `255`, `187`, `236`, `246`, `127`, `190`, `39`, `91`, `247`, `241`, `25`, `220`,
21737	`250`, `195`, `240`, `233`, `252`, `185`, `190`, `52`, `103`, `119`, `207`, `30`, `107`,
21738	};
21739
21740	Tensor weightsOffsets(ElemKind::Int32ITy, {`4`});
21741	weightsOffsets.getHandle<int32_t>() = {`0`, `384`, `512`, `640`};
21742	testIntNBitSplitEmbeddingWeightedBags<float, int32_t, float>(
21743	bindings_, mod_, F_, EE_, ElemKind::FloatTy, ElemKind::Int32ITy,
21744	getIntNBitSplitEmbeddingBagsWeightsFloat(), std::move(weightsOffsets),
21745	std::move(expected), SplitEmbeddingSparseType::EST_FLOAT, `0.0001`);
21746	}
21747
21748	/// Test that IntNBitSplitEmbeddingBags is correctly supported in Float16Ty.
21749	TEST_P(OperatorTest, IntNBitSplitEmbeddingWeightedBags_Float16) {
21750	CHECK_IF_ENABLED();
21751	Tensor expected(ElemKind::UInt8ITy, {`2`, `104`});
21752	expected.getHandle<uint8_t>() = {
21753	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
21754	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
21755	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
21756	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
21757	`0`, `0`, `0`, `0`, `233`, `171`, `22`, `67`, `217`, `126`, `202`, `70`, `20`, `86`, `100`,
21758	`199`, `0`, `224`, `202`, `127`, `212`, `222`, `23`, `71`, `26`, `97`, `70`, `71`, `147`, `61`,
21759	`16`, `71`, `201`, `211`, `206`, `197`, `84`, `56`, `193`, `197`, `220`, `5`, `11`, `200`, `235`,
21760	`192`, `185`, `57`, `186`, `87`, `255`, `56`, `75`, `209`, `111`, `60`, `91`, `154`, `252`, `67`,
21761	`232`, `12`, `103`, `195`, `184`, `177`, `172`, `56`, `135`, `162`, `71`, `187`, `80`, `140`, `180`,
21762	`61`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
21763	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
21764	`0`, `0`, `0`, `134`, `249`, `209`, `70`, `188`, `229`, `86`, `71`, `208`, `250`, `17`, `199`,
21765	`0`, `224`, `202`, `127`, `189`, `151`, `54`, `71`, `140`, `36`, `117`, `71`, `17`, `100`, `68`,
21766	`71`, `212`, `88`, `149`, `198`, `30`, `185`, `186`, `197`, `163`, `15`, `42`, `200`,
21767	};
21768
21769	Tensor weightsOffsets(ElemKind::Int32ITy, {`4`});
21770	weightsOffsets.getHandle<int32_t>() = {`0`, `256`, `384`, `512`};
21771	testIntNBitSplitEmbeddingWeightedBags<float16_t, int32_t, float16_t>(
21772	bindings_, mod_, F_, EE_, ElemKind::FloatTy, ElemKind::Int32ITy,
21773	getIntNBitSplitEmbeddingBagsWeightsFloat16(), std::move(weightsOffsets),
21774	std::move(expected), SplitEmbeddingSparseType::EST_FLOAT, `0.005`);
21775	}
21776
21777	INSTANTIATE_BACKEND_TEST(OperatorStatelessTest);
21778	INSTANTIATE_BACKEND_TEST(OperatorTest);
21779

Browse the source code of glow/tests/unittests/OperatorTest.cpp