BackendTestUtils.cpp source code [glow/tests/unittests/BackendTestUtils.cpp]

1	/**
2	* Copyright (c) Glow Contributors. See CONTRIBUTORS file.
3	*
4	* Licensed under the Apache License, Version 2.0 (the "License");
5	* you may not use this file except in compliance with the License.
6	* You may obtain a copy of the License at
7	*
8	* http://www.apache.org/licenses/LICENSE-2.0
9	*
10	* Unless required by applicable law or agreed to in writing, software
11	* distributed under the License is distributed on an "AS IS" BASIS,
12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13	* See the License for the specific language governing permissions and
14	* limitations under the License.
15	*/
16
17	#include "BackendTestUtils.h"
18
19	#include "glow/Converter/TypeAToTypeBFunctionConverter.h"
20	#include "glow/ExecutionEngine/ExecutionEngine.h"
21	#include "glow/Graph/Graph.h"
22	#include "glow/IR/IR.h"
23	#include "glow/IR/IRBuilder.h"
24	#include "glow/IR/Instrs.h"
25	#include "glow/Optimizer/GraphOptimizer/GraphOptimizer.h"
26	#include "glow/Quantization/Quantization.h"
27
28	#include "gtest/gtest.h"
29
30	#include "llvm/Support/CommandLine.h"
31
32	#include <future>
33
34	namespace glow {
35
36	llvm::cl::OptionCategory backendTestUtilsCat("BackendTestUtils Category");
37
38	unsigned parCloneCountOpt;
39	llvm::cl::opt<unsigned, / ExternalStorage / true> parCloneCountI(
40	"parallel-clone-count",
41	llvm::cl::desc (
42	"Number of times to clone a graph in parallel. Intended to stress test "
43	"different backends. This option is not used by all unit "
44	"tests; for now you must check the test to see if so."),
45	llvm::cl::location(parCloneCountOpt), llvm::cl::Optional, llvm::cl::init(`1`),
46	llvm::cl::cat (backendTestUtilsCat));
47
48	bool runDisabledTests;
49	llvm::cl::opt<bool, / ExternalStorage / true> runDisabledTestsI(
50	"run-disabled-tests",
51	llvm::cl::desc ("If set, disabled tests will not be skipped."),
52	llvm::cl::location(runDisabledTests), llvm::cl::Optional,
53	llvm::cl::init(false), llvm::cl::cat (backendTestUtilsCat));
54
55	using llvm::cast;
56
57	namespace {
58
59	static Placeholder *createQuantizedPlaceholder(Module &mod,
60	PlaceholderBindings &bindings,
61	Tensor tensor, float* scale,
62	int32_t offset,
63	llvm::StringRef name) {
64	auto *P = mod.createPlaceholder(tensor->getElementType(), tensor->dims(),
65	scale, offset, name, false);
66	auto *PTensor = bindings.allocate(P);
67	PTensor->assign(tensor);
68
69	return P;
70	}
71
72	/// Create and initialize a function using the argument \p createAndInitFunction
73	/// then run the function in profiling mode to get the profiling parameters.
74	/// \p count is the number of times to clone the Function inside itself before
75	/// profiling. \returns the profiling parameters for all the function nodes.
76	static std::vector<NodeProfilingInfo>
77	profileAndGetNodeProfilingInfo(CreateAndInitFunction createAndInitFunction,
78	unsigned count) {
79	LoweredInfoMap loweredMapForProf;
80	PlaceholderBindings pBindings;
81	// Note: deviceMemory = 0 is a signal to use the defaultMemory.
82	ExecutionEngine PEE{"Interpreter", / deviceMemory / `0`,
83	/ ignoreUserDeviceConfig / true};
84	auto FT = createAndInitFunction (pBindings, PEE);
85	CompilationContext cctx{&pBindings, &loweredMapForProf};
86
87	// Clone the number of times as requested to match the Function that will be
88	// quantized.
89	cloneFunInsideFun(FT, &pBindings, cctx, count);
90	cctx.precisionConfig.quantMode = QuantizationMode::Profile;
91	PEE.compile(cctx);
92	PEE.run(pBindings);
93
94	// We get the new function using front() because the original function was
95	// deleted as part of the Partitioner quantization flow.
96	return quantization::generateNodeProfilingInfos(
97	pBindings, PEE.getModule().getFunctions().front(), loweredMapForProf);
98	}
99
100	/// Helper that sets up and \returns a pair of configs for both interpreter and
101	/// backend being tested.
102	static std::pair<CompilationContext, CompilationContext>
103	setupInterpAndBackendConfigs(
104	Function *IF, ExecutionEngine &IEE, PlaceholderBindings &iBindings,
105	LoweredInfoMap &ILIM, PlaceholderBindings &bBindings, LoweredInfoMap &BLIM,
106	ElemKind interpElemKind, ElemKind backendElemKind,
107	quantization::Schema schema, bool convertToRowwiseQuantization,
108	CreateAndInitFunction createAndInitFunction, ElemKind biasElemKind,
109	bool forceFP16AccumSLS, PrecisionConfiguration::Float16Format float16Format,
110	unsigned count, bool convertToChannelwiseQuantization,
111	bool skipQuantizeFCBias) {
112	CompilationContext cctxI{&iBindings, &ILIM};
113	CompilationContext cctxB{&bBindings, &BLIM};
114	PrecisionConfiguration &precConfigI = cctxI.precisionConfig;
115	PrecisionConfiguration &precConfigB = cctxB.precisionConfig;
116
117	if (isQuantizedElemKind(interpElemKind) \|\|
118	isQuantizedElemKind(backendElemKind)) {
119	// If either interp or backend need to be quantized then we need to profile
120	// and get quantization infos.
121	if (isQuantizedElemKind(interpElemKind)) {
122	// Note: We only do parallel cloning for the backend, so always use count
123	// of 1 here.
124	auto NQII =
125	profileAndGetNodeProfilingInfo(createAndInitFunction, / count / `1`);
126
127	precConfigI.quantMode = QuantizationMode::Quantize;
128	precConfigI.quantConfig.infos = NQII;
129	precConfigI.quantConfig.enableRowwise = convertToRowwiseQuantization;
130	precConfigI.quantConfig.enableChannelwise =
131	convertToChannelwiseQuantization;
132	precConfigI.quantConfig.schema = schema;
133	precConfigI.quantConfig.precision = interpElemKind;
134	precConfigI.quantConfig.assertAllNodesQuantized = true;
135	precConfigI.quantConfig.precisionBias = biasElemKind;
136	precConfigI.quantConfig.skipQuantizeFCBias = skipQuantizeFCBias;
137	}
138
139	if (isQuantizedElemKind(backendElemKind)) {
140	// Always clone count times here. This matches the Function the backend
141	// will quantize.
142	auto NQIB = profileAndGetNodeProfilingInfo(createAndInitFunction, count);
143
144	precConfigB.quantMode = QuantizationMode::Quantize;
145	precConfigB.quantConfig.infos = NQIB;
146	precConfigB.quantConfig.enableRowwise = convertToRowwiseQuantization;
147	precConfigB.quantConfig.enableChannelwise =
148	convertToChannelwiseQuantization;
149	precConfigB.quantConfig.schema = schema;
150	precConfigB.quantConfig.precision = backendElemKind;
151	precConfigB.quantConfig.assertAllNodesQuantized = true;
152	precConfigB.quantConfig.precisionBias = biasElemKind;
153	precConfigB.quantConfig.skipQuantizeFCBias = skipQuantizeFCBias;
154	}
155	}
156
157	// For now if the ElemKind is FP16 then we use Float16Ty, UInt8FusedFP16QTy.
158	precConfigI.convertToFP16 = interpElemKind == ElemKind::Float16Ty;
159	precConfigI.convertFusedToFP16 = interpElemKind == ElemKind::Float16Ty;
160	precConfigI.forceFP16AccumSLS = forceFP16AccumSLS;
161	precConfigB.convertToFP16 = backendElemKind == ElemKind::Float16Ty;
162	precConfigB.convertFusedToFP16 = backendElemKind == ElemKind::Float16Ty;
163	precConfigB.forceFP16AccumSLS = forceFP16AccumSLS;
164
165	return std::make_pair(cctxI, cctxB);
166	}
167	} // namespace
168
169	void dispatchInference(const std::string &fname,
170	runtime::HostManager *hostManager,
171	ExecutionContext &context,
172	unsigned concurrentRequestsOpt,
173	bool useNewExecutionContext) {
174	// If additional requests are desired, setup additional contexts.
175	std::vector<std::unique_ptr<ExecutionContext>> contexts;
176	std::unique_ptr<ExecutionContext> originalContextPtr(&context);
177	contexts.push_back(std::move(originalContextPtr));
178	if (concurrentRequestsOpt > `1`) {
179	// Clone the placeholder bindings into a new executionContext.
180	for (unsigned i = `0`, max = concurrentRequestsOpt - `1`; i < max; i++) {
181	std::unique_ptr<ExecutionContext> newContext =
182	(useNewExecutionContext)
183	? glow::make_unique<ExecutionContext>()
184	: glow::make_unique<ExecutionContext>(
185	glow::make_unique<PlaceholderBindings>(
186	context.getPlaceholderBindings()->clone()));
187	contexts.push_back(std::move(newContext));
188	}
189	}
190	std::vector<std::promise<void>> promises(concurrentRequestsOpt);
191	std::vector<std::future<void>> futures;
192	for (auto &promise : promises) {
193	futures.push_back(promise.get_future());
194	}
195	for (unsigned i = `0`; i < concurrentRequestsOpt; i++) {
196	hostManager->runNetwork(fname, std::move(contexts [i]),
197	[&contexts, &promises,
198	i](runtime::RunIdentifierTy, Error err,
199	std::unique_ptr<ExecutionContext> contextPtr) {
200	contexts [i] = std::move(contextPtr);
201	// Expect no errors.
202	EXIT_ON_ERR(std::move(err));
203	promises [i].set_value();
204	});
205	}
206
207	for (auto &future : futures) {
208	future.wait();
209	}
210	for (auto &c : contexts) {
211	c ->getPlaceholderBindings()->ensureOnHost();
212	}
213	// Release the original context passed in by reference so we don't free it.
214	contexts [`0`].release();
215	}
216
217	/// Helper that iterates over all of the Placeholders from the function \p F
218	/// and converts the Tensors found in \p bindings to the same type as the
219	/// Placeholders if necessary.
220	static void convertBindingsToCorrectType(Function *F,
221	PlaceholderBindings &bindings) {
222	PlaceholderList PHs = F->findPlaceholders();
223	for (Placeholder *PH : PHs) {
224	Tensor *T = bindings.get(PH);
225	TypeRef newTy = PH->getType();
226	if (T->getType().isEqual(newTy)) {
227	continue;
228	}
229	// For input placeholders convert tensor type and values.
230	// For output placeholders convert only the tensor type.
231	if (isInput(PH, *F)) {
232	ElemKind newK = newTy->getElementType();
233	if (isQuantizedElemKind(newK)) {
234	Tensor QT = quantization::quantizeTensor(
235	*T, {newTy->getScale(), newTy->getOffset()}, newK);
236	T->assign(&QT);
237	} else {
238	T->convertToType(newK);
239	}
240	} else {
241	T->reset(*newTy);
242	}
243	}
244	}
245
246	/// Helper to get a float copy of a Tensor \p T if needed.
247	static Tensor convertToFloatIfNecessary(Tensor &T) {
248	const ElemKind srcK = T.getType().getElementType();
249	if (srcK == ElemKind::FloatTy) {
250	return T.clone();
251	}
252	if (isQuantizedElemKind(srcK)) {
253	return quantization::dequantizeTensor(T, ElemKind::FloatTy);
254	}
255	return T.getCopyConvertedToType(ElemKind::FloatTy);
256	}
257
258	void compareAgainstInterpreter(
259	llvm::StringRef backendName, CreateAndInitFunction createAndInitFunction,
260	ElemKind interpElemKind, ElemKind backendElemKind, float allowedError,
261	unsigned count, bool convertToRowwiseQuantization,
262	quantization::Schema schema, ElemKind biasElemKind, bool forceFP16AccumSLS,
263	PrecisionConfiguration::Float16Format float16Format,
264	bool convertToChannelwiseQuantization, bool skipQuantizeFCBias) {
265	// Note: deviceMemory = 0 is a signal to use the defaultMemory.
266	ExecutionEngine IEE{"Interpreter", / deviceMemory / `0`,
267	/ ignoreUserDeviceConfig / true};
268	ExecutionEngine BEE{backendName};
269	PlaceholderBindings iBindings, bBindings;
270
271	LOG(INFO) << "Comparing Interpreter with precision "
272	<< Type::getElementName(interpElemKind).str() << " against "
273	<< backendName.str() << " with precision "
274	<< Type::getElementName(backendElemKind).str() << " with Bias "
275	<< (skipQuantizeFCBias ? "unquantized"
276	: Type::getElementName(biasElemKind).str())
277	<< " with FP16 AccumulationSLS " << forceFP16AccumSLS;
278
279	// Create the same network on the interpreter and the backend being tested.
280	FunctionTensorPair IFT = createAndInitFunction (iBindings, IEE);
281	FunctionTensorPair BFT = createAndInitFunction (bBindings, BEE);
282
283	Function *IF = IFT.first;
284
285	// Set up the configs for interpreter and backend. If one or both functions
286	// will be quantized, then gather a profile the graph on the interpreter, and
287	// then quantize the Functions as requested.
288	LoweredInfoMap ILIM, BLIM;
289	auto configs = setupInterpAndBackendConfigs(
290	IF, IEE, iBindings, ILIM, bBindings, BLIM, interpElemKind,
291	backendElemKind, schema, convertToRowwiseQuantization,
292	createAndInitFunction, biasElemKind, forceFP16AccumSLS, float16Format,
293	count, convertToChannelwiseQuantization, skipQuantizeFCBias);
294	CompilationContext &cctxI = configs.first;
295	CompilationContext &cctxB = configs.second;
296
297	// Skip conversion for rowwise quantized tests as they are a special case
298	// which don't fit cleanly here -- e.g. RWQ-SLS has FloatTy outputs.
299	if (!convertToRowwiseQuantization) {
300	// We want to compare the ops themselves and not see differences in
301	// conversion, so fold ElemKind conversion nodes into IO.
302	cctxI.optimizationOpts.foldElemKindConversionIntoIO = true;
303	cctxB.optimizationOpts.foldElemKindConversionIntoIO = true;
304	}
305
306	// Clone the Function inside itself many times if desired.
307	std::unordered_set<Tensor *> resultTensors =
308	cloneFunInsideFun(BFT, &bBindings, cctxB, count);
309	assert(resultTensors.size() == count &&
310	"Should get the same number of Tensors back as count.");
311
312	IEE.compile(cctxI);
313	BEE.compile(cctxB);
314
315	// Again skip rowwise quantization as before.
316	if (!convertToRowwiseQuantization) {
317	// Now that we have compiled, precision transformation has occurred. Now
318	// convert all mismatches for Placeholders given their original bindings.
319	convertBindingsToCorrectType(IEE.getSingleFunctionFromModule(), iBindings);
320	convertBindingsToCorrectType(BEE.getSingleFunctionFromModule(), bBindings);
321	}
322
323	IEE.run(iBindings);
324	BEE.run(bBindings);
325
326	// Compare each of our result tensors to the original. Always convert back to
327	// float if necessary, as allowed error is expected to compare float.
328	Tensor finalIT = convertToFloatIfNecessary(*IFT.second);
329	for (Tensor *T : resultTensors) {
330	Tensor finalBT = convertToFloatIfNecessary(*T);
331	EXPECT_TRUE(finalIT.isEqual(finalBT, allowedError, / verbose / true));
332	}
333
334	// Additionally check that each of the results from the parallel cloned
335	// Functions are bitwise equal.
336	auto it = resultTensors.begin();
337	Tensor firstResult = it;
338	for (it ++; it != resultTensors.end(); it ++) {
339	EXPECT_TRUE(firstResult->isBitwiseEqual(**it));
340	}
341	}
342
343	std::unordered_set<Tensor *> cloneFunInsideFun(FunctionTensorPair FTP,
344	PlaceholderBindings *bindings,
345	CompilationContext &cctx,
346	unsigned count) {
347	Function *origF = FTP.first;
348
349	// Always save the original Function's Tensor, which we will keep around.
350	std::unordered_set<Tensor *> resultTensors;
351	resultTensors.insert(FTP.second);
352
353	// Nothing to do if we just want the one.
354	if (count == `1`) {
355	return resultTensors;
356	}
357
358	Module *mod = origF->getParent();
359
360	// Clone the original Function to repeatedly add it to the original.
361	auto *cloneF = origF->clone("single_clone");
362
363	// We keep the original Function, then clone/add count-1 more.
364	for (size_t i = `1`; i < count; i++) {
365	// Clone the clone, and then add all the new nodes to the original function.
366	auto *tmpF = cloneF->clone("tmp" + std::to_string(i));
367	std::unordered_set<Node *> clonedNodes;
368	bool foundSaveNode = false;
369	for (auto &N : tmpF->getNodes()) {
370	clonedNodes.insert(&N);
371
372	// For every Node we add, check if it uses a Placeholder node, and if so
373	// clone it in the Module so that CSE doesn't undo all our hard work.
374	for (size_t j = `0`, f = N.getNumInputs(); j < f; j++) {
375	Placeholder *origPH = llvm::dyn_cast<Placeholder>(N.getNthInput(j));
376	if (!origPH) {
377	continue;
378	}
379
380	// Clone the Placeholder, allocate it in the bindings, and replace the
381	// usage of the original node to point to the clone.
382	Placeholder *clonePH = mod->createPlaceholder(
383	origPH->getType(), origPH->getName(), origPH->isTraining());
384	Tensor *oldT = bindings->get(origPH);
385	assert(oldT);
386	Tensor *newT = bindings->allocate(clonePH);
387	newT->assign(oldT);
388	N.setNthInput(j, clonePH);
389
390	// Save the result Tensors to return so we can compare the results of
391	// all of our clones.
392	if (llvm::isa<SaveNode>(N)) {
393	assert(!foundSaveNode &&
394	"Can only handle Functions with a single SaveNode.");
395	foundSaveNode = true;
396	resultTensors.insert(newT);
397	}
398	}
399	}
400	for (auto &N : clonedNodes) {
401	origF->takeOwnershipOfNode(N);
402	}
403	mod->eraseFunction(tmpF);
404	}
405	// Now erase the clone we used to copy in, as it's no longer needed.
406	mod->eraseFunction(cloneF);
407
408	// Finally, duplicate all of the node profiling infos with the new expected
409	// clone's name so that the cloned copies will find the same profiling info
410	// as the original node if being quantized.
411	auto &origInfos = cctx.precisionConfig.quantConfig.infos;
412	origInfos.reserve(count * origInfos.size());
413	std::vector<NodeProfilingInfo> newInfos;
414	newInfos.reserve((count - `1`) * origInfos.size());
415	for (const auto &PI : origInfos) {
416	const size_t colonIdx = PI.nodeOutputName_.find(":");
417	assert(colonIdx != std::string::npos && "Name should always contain ':'");
418	for (size_t i = `1`; i < count; i++) {
419	std::string newName(PI.nodeOutputName_);
420	// Cloned nodes end up with the original name plus the count number
421	// appended to their name due to uniquing. Replicate the same thing.
422	newName.insert(colonIdx, std::to_string(i));
423	newInfos.emplace_back(newName, PI.tensorProfilingParams_);
424	}
425	}
426	origInfos.insert(origInfos.end(), newInfos.begin(), newInfos.end());
427
428	return resultTensors;
429	}
430
431	unsigned countNodeKind(Function *F, Kinded::Kind kind) {
432	unsigned count = `0`;
433	for (auto &n : F->getNodes()) {
434	if (n.getKind() == kind) {
435	count++;
436	}
437	}
438	return count;
439	}
440
441	void inferIntLookupTableNetInt8(Tensor input, Tensor out,
442	llvm::ArrayRef<int8_t> table,
443	llvm::StringRef kind) {
444	PlaceholderBindings bindings;
445	ExecutionEngine EE(kind);
446	auto &mod = EE.getModule();
447	Function *F = mod.createFunction("main");
448	auto outTy = mod.uniqueType(ElemKind::Int8QTy, {(dim_t)input->size()}, `3`, `3`);
449	auto var = createQuantizedPlaceholder(mod, bindings, input,
450	input->getType().getScale(),
451	input->getType().getOffset(), "var");
452	auto *lookupTable = F->createIntLookupTable("lookuptable", var, table, outTy);
453	auto *result = F->createSave("ret", lookupTable);
454	auto *resultTensor = bindings.allocate(result->getPlaceholder());
455
456	EE.compile(CompilationMode::Infer);
457	bindings.allocate(mod.getPlaceholders());
458
459	updateInputPlaceholders(bindings, {var}, {input});
460	EE.run(bindings);
461	out->assign(resultTensor);
462	}
463
464	void inferIntLookupTableNetInt16(Tensor input, Tensor out,
465	llvm::ArrayRef<int16_t> table,
466	llvm::StringRef kind) {
467	PlaceholderBindings bindings;
468	ExecutionEngine EE(kind);
469	auto &mod = EE.getModule();
470	Function *F = mod.createFunction("main");
471	auto outTy = mod.uniqueType(ElemKind::Int16QTy, {(dim_t)input->size()}, `3`, `3`);
472	auto var = createQuantizedPlaceholder(mod, bindings, input,
473	input->getType().getScale(),
474	input->getType().getOffset(), "var");
475	auto *lookupTable = F->createIntLookupTable("lookuptable", var, table, outTy);
476	auto *result = F->createSave("ret", lookupTable);
477	auto *resultTensor = bindings.allocate(result->getPlaceholder());
478
479	EE.compile(CompilationMode::Infer);
480	bindings.allocate(mod.getPlaceholders());
481
482	updateInputPlaceholders(bindings, {var}, {input});
483	EE.run(bindings);
484	out->assign(resultTensor);
485	}
486
487	void inferConvNet(Tensor inputs, Tensor filter, Tensor bias, Tensor out,
488	llvm::StringRef kind) {
489	PlaceholderBindings bindings;
490	ExecutionEngine EE(kind);
491	auto &mod = EE.getModule();
492	Function *F = mod.createFunction("main");
493	Placeholder *inputP;
494	Placeholder *filterP;
495	Placeholder *biasP;
496	Placeholder *outP;
497	TypeRef OT;
498	if (inputs->getType().isQuantizedType()) {
499	auto &outType = out->getType();
500	auto &inType = inputs->getType();
501	auto &filterType = filter->getType();
502	auto &biasType = bias->getType();
503	inputP = createQuantizedPlaceholder(
504	mod, bindings, inputs, inType.getScale(), inType.getOffset(), "inputP");
505	filterP =
506	createQuantizedPlaceholder(mod, bindings, filter, filterType.getScale(),
507	filterType.getOffset(), "filterP");
508	biasP = createQuantizedPlaceholder(mod, bindings, bias, biasType.getScale(),
509	biasType.getOffset(), "biasP");
510	outP = createQuantizedPlaceholder(mod, bindings, out, outType.getScale(),
511	outType.getOffset(), "outP");
512	OT = F->getParent()->uniqueType(out->getElementType(), out->dims(),
513	outType.getScale(), outType.getOffset());
514	} else {
515	inputP = createPlaceholder(mod, bindings, inputs, "inputP");
516	filterP = createPlaceholder(mod, bindings, filter, "filterP");
517	biasP = createPlaceholder(mod, bindings, bias, "biasP");
518	outP = createPlaceholder(mod, bindings, out, "outP");
519	OT = F->getParent()->uniqueType(out->getElementType(), out->dims());
520	}
521	auto *conv = F->createConv("conv", inputP, filterP, biasP, OT, `5`, `3`, `4`, `1`);
522	auto *result = F->createSave("ret", conv, outP);
523	auto *resultTensor = bindings.get(result->getPlaceholder());
524
525	EE.compile(CompilationMode::Infer);
526
527	updateInputPlaceholders(bindings, {inputP, filterP, biasP},
528	{inputs, filter, bias});
529	EE.run(bindings);
530	out->assign(resultTensor);
531	}
532
533	int inferConvReluNet(Tensor inputs, Tensor filter, Tensor bias, Tensor out,
534	unsigned_t kernel, unsigned_t stride, unsigned_t pad,
535	llvm::StringRef kind) {
536	PlaceholderBindings bindings;
537	ExecutionEngine EE(kind);
538	auto &mod = EE.getModule();
539	Function *F = mod.createFunction("main");
540	Placeholder *inputP;
541	Placeholder *filterP;
542	Placeholder *biasP;
543	Placeholder *outP;
544	TypeRef OT;
545	if (inputs->getType().isQuantizedType()) {
546	auto &outType = out->getType();
547	auto &inType = inputs->getType();
548	auto &filterType = filter->getType();
549	auto &biasType = bias->getType();
550	inputP = createQuantizedPlaceholder(
551	mod, bindings, inputs, inType.getScale(), inType.getOffset(), "inputP");
552	filterP =
553	createQuantizedPlaceholder(mod, bindings, filter, filterType.getScale(),
554	filterType.getOffset(), "filterP");
555	biasP = createQuantizedPlaceholder(mod, bindings, bias, biasType.getScale(),
556	biasType.getOffset(), "biasP");
557	outP = createQuantizedPlaceholder(mod, bindings, out, outType.getScale(),
558	outType.getOffset(), "outP");
559	OT = F->getParent()->uniqueType(out->getElementType(), out->dims(),
560	outType.getScale(), outType.getOffset());
561	} else {
562	inputP = createPlaceholder(mod, bindings, inputs, "inputP");
563	filterP = createPlaceholder(mod, bindings, filter, "filterP");
564	biasP = createPlaceholder(mod, bindings, bias, "biasP");
565	outP = createPlaceholder(mod, bindings, out, "outP");
566	OT = F->getParent()->uniqueType(out->getElementType(), out->dims());
567	}
568	auto *conv =
569	F->createConv("conv", inputP, filterP, biasP, OT, kernel, stride, pad, `1`);
570	// Relu
571	auto *relu = F->createRELU("relu", conv);
572	auto *result = F->createSave("ret", relu, outP);
573	auto *resultTensor = bindings.get(result->getPlaceholder());
574
575	EE.compile(CompilationMode::Infer);
576
577	// check fusion depending on build option.
578	// EXPECT_EQ(conv->getFusedActivation(), FusedActivation::RELU);
579
580	updateInputPlaceholders(bindings, {inputP, filterP, biasP},
581	{inputs, filter, bias});
582	EE.run(bindings);
583	out->assign(resultTensor);
584	return conv->getFusedActivation();
585	}
586
587	void trainConvNet(Tensor inputs, Tensor kernel1, Tensor *bias1,
588	Tensor kernel2, Tensor bias2, Tensor *selected,
589	llvm::ArrayRef<dim_t> shape1, llvm::ArrayRef<dim_t> shape2,
590	Tensor *out, llvm::StringRef kind) {
591	ExecutionEngine EET(kind);
592	ExecutionEngine EEI(kind);
593	std::vector<ExecutionEngine *> engines;
594	engines.push_back(&EEI);
595	engines.push_back(&EET);
596	TrainingConfig TC;
597	PlaceholderBindings bindings, inferBindings, trainingBindings;
598
599	// This variable records the number of the next sample to be used for
600	// training.
601	size_t sampleCounter = `0`;
602
603	TC.learningRate = `0.03`;
604	TC.momentum = `0.3`;
605	TC.L2Decay = `0.01`;
606	Function *F;
607	Placeholder var1, var2;
608	for (auto *EE : engines) {
609	auto &mod = EE->getModule();
610	F = mod.createFunction("main");
611	var1 = createPlaceholder(mod, bindings, inputs, "var1");
612	var2 = createPlaceholder(mod, bindings, selected, "var2");
613	auto *conv1 = F->createConv(bindings, "conv1", var1, `3`, {`5`, `3`}, {`2`, `1`},
614	{`2`, `1`, `2`, `1`}, `1`);
615	bindings.get(cast<Placeholder>(conv1->getFilter()))->assign(kernel1);
616	bindings.get(cast<Placeholder>(conv1->getBias()))->assign(bias1);
617	auto *reshape1 = F->createReshape("reshape1", conv1, shape1);
618	auto *conv2 = F->createConv(bindings, "conv2", reshape1, `2`, `2`, `2`, `0`, `1`);
619	bindings.get(cast<Placeholder>(conv2->getFilter()))->assign(kernel2);
620	bindings.get(cast<Placeholder>(conv2->getBias()))->assign(bias2);
621	auto *reshape2 = F->createReshape("reshape2", conv2, shape2);
622	auto *softmax = F->createSoftMax("softmax", reshape2, var2);
623	F->createSave("ret", softmax);
624	}
625
626	auto *TF = glow::differentiate(F, TC);
627	auto tfName = TF->getName();
628	auto fName = F->getName();
629	EET.compile(CompilationMode::Train);
630	trainingBindings.allocate(EET.getModule().getPlaceholders());
631	inferBindings.allocate(EEI.getModule().getPlaceholders());
632	bindings.copyTrainableWeightsTo(trainingBindings);
633	auto *res =
634	inferBindings.get(EEI.getModule().getPlaceholderByNameSlow("ret"));
635
636	runBatch(EET, trainingBindings, `8`, sampleCounter, {var1, var2},
637	{inputs, selected}, tfName);
638	trainingBindings.copyTrainableWeightsTo(inferBindings);
639	EEI.compile(CompilationMode::Infer);
640	var1 = inferBindings.getPlaceholderByNameSlow("var1");
641	var2 = inferBindings.getPlaceholderByNameSlow("var2");
642	updateInputPlaceholders(inferBindings, {var1, var2}, {inputs, selected});
643	EEI.run(inferBindings, fName);
644	out->assign(res);
645	}
646
647	void inferLocalResponseNormalizationNet(Tensor inputs, Tensor out,
648	llvm::StringRef kind) {
649	PlaceholderBindings bindings;
650	ExecutionEngine EE(kind);
651	auto &mod = EE.getModule();
652	Function *F = mod.createFunction("main");
653	auto *var = createPlaceholder(mod, bindings, inputs, "var");
654	auto *lrn = F->createLocalResponseNormalization("lrn", var, `5`, `3.0`, `0.5`, `1.5`);
655	auto *result = F->createSave("ret", lrn);
656	auto *resultTensor = bindings.allocate(result->getPlaceholder());
657
658	EE.compile(CompilationMode::Infer);
659
660	updateInputPlaceholders(bindings, {var}, {inputs});
661	EE.run(bindings);
662	out->assign(resultTensor);
663	}
664
665	void trainLocalResponseNormalizationNet(Tensor inputs, Tensor weights,
666	Tensor bias, Tensor selected,
667	llvm::ArrayRef<dim_t> shape1,
668	llvm::ArrayRef<dim_t> shape2,
669	Tensor *out, llvm::StringRef kind) {
670	PlaceholderBindings bindings, trainingBindings;
671	ExecutionEngine EET(kind);
672	ExecutionEngine EEI(kind);
673	std::vector<ExecutionEngine *> engines{&EEI, &EET};
674	TrainingConfig TC;
675
676	// This variable records the number of the next sample to be used for
677	// training.
678	size_t sampleCounter = `0`;
679
680	TC.learningRate = `0.06`;
681	TC.momentum = `0.1`;
682	TC.L2Decay = `0.01`;
683	Placeholder var1, var2;
684	std::string fName;
685	for (auto *EE : engines) {
686	auto &mod = EE->getModule();
687	Function *F = mod.createFunction("main");
688	fName = F->getName().str();
689	var1 = createPlaceholder(mod, bindings, inputs, "var1");
690	var2 = createPlaceholder(mod, bindings, selected, "var2");
691	auto *fc = F->createFullyConnected(bindings, "fc", var1, bias->dims()[`0`]);
692	bindings.get(cast<Placeholder>(fc->getWeights()))->assign(weights);
693	bindings.get(cast<Placeholder>(fc->getBias()))->assign(bias);
694	auto *reshape1 = F->createReshape("reshape1", fc, shape1);
695	auto *lrn =
696	F->createLocalResponseNormalization("lrn", reshape1, `2`, `2.0`, `0.5`, `1.0`);
697	auto *reshape2 = F->createReshape("reshape2", lrn, shape2);
698	auto *softmax = F->createSoftMax("softmax", reshape2, var2);
699	auto *result = F->createSave("ret", softmax);
700	bindings.allocate(result->getPlaceholder());
701	}
702	auto *TF = glow::differentiate(EET.getModule().getFunction(fName), TC);
703	auto tfName = TF->getName();
704	EET.compile(CompilationMode::Train);
705	trainingBindings.allocate(EET.getModule().getPlaceholders());
706	bindings.copyTrainableWeightsTo(trainingBindings);
707	bindings.clear();
708	bindings.allocate(EEI.getModule().getPlaceholders());
709
710	runBatch(EET, trainingBindings, `8`, sampleCounter, {var1, var2},
711	{inputs, selected}, tfName);
712	trainingBindings.copyTrainableWeightsTo(bindings);
713	var1 = bindings.getPlaceholderByNameSlow("var1");
714	var2 = bindings.getPlaceholderByNameSlow("var2");
715	EEI.compile(CompilationMode::Infer);
716
717	runBatch(EEI, bindings, `1`, sampleCounter, {var1, var2}, {inputs, selected});
718	out->assign(bindings.get(bindings.getPlaceholderByNameSlow("ret")));
719	}
720
721	void trainAvgPoolNet(Tensor inputs, Tensor weights, Tensor *bias,
722	Tensor *selected, llvm::ArrayRef<dim_t> shape1,
723	llvm::ArrayRef<dim_t> shape2, Tensor *out,
724	llvm::StringRef kind) {
725	ExecutionEngine EET(kind);
726	ExecutionEngine EEI(kind);
727	std::vector<ExecutionEngine *> engines{&EEI, &EET};
728	TrainingConfig TC;
729	PlaceholderBindings bindings, trainingBindings;
730
731	// This variable records the number of the next sample to be used for
732	// training.
733	size_t sampleCounter = `0`;
734
735	TC.learningRate = `0.01`;
736	TC.momentum = `0.4`;
737	TC.L2Decay = `0.01`;
738	Placeholder var1, var2;
739	std::string fName;
740	for (auto *EE : engines) {
741	auto &mod = EE->getModule();
742	Function *F = mod.createFunction("main");
743	fName = F->getName().str();
744	var1 = createPlaceholder(mod, bindings, inputs, "var1");
745	var2 = createPlaceholder(mod, bindings, selected, "var2");
746	auto *fc = F->createFullyConnected(bindings, "fc", var1, bias->dims()[`0`]);
747	bindings.get(cast<Placeholder>(fc->getWeights()))->assign(weights);
748	bindings.get(cast<Placeholder>(fc->getBias()))->assign(bias);
749	auto *reshape1 = F->createReshape("reshape1", fc, shape1);
750	auto *pool = F->createAvgPool("pool", reshape1, `2`, `2`, `0`);
751	auto *reshape2 = F->createReshape("reshape2", pool, shape2);
752	auto *softmax = F->createSoftMax("softmax", reshape2, var2);
753	auto *result = F->createSave("ret", softmax);
754	bindings.allocate(result->getPlaceholder());
755	}
756	auto *TF = glow::differentiate(EET.getModule().getFunction("main"), TC);
757	auto tfName = TF->getName();
758	EET.compile(CompilationMode::Train);
759	trainingBindings.allocate(EET.getModule().getPlaceholders());
760	bindings.copyTrainableWeightsTo(trainingBindings);
761	bindings.clear();
762	bindings.allocate(EEI.getModule().getPlaceholders());
763
764	runBatch(EET, trainingBindings, `10`, sampleCounter, {var1, var2},
765	{inputs, selected}, tfName);
766	trainingBindings.copyTrainableWeightsTo(bindings);
767	var1 = bindings.getPlaceholderByNameSlow("var1");
768	var2 = bindings.getPlaceholderByNameSlow("var2");
769	EEI.compile(CompilationMode::Infer);
770
771	updateInputPlaceholders(bindings, {var1, var2}, {inputs, selected});
772	EEI.run(bindings);
773	out->assign(bindings.get(bindings.getPlaceholderByNameSlow("ret")));
774	}
775
776	void trainMaxPoolNet(Tensor inputs, Tensor weights, Tensor *bias,
777	Tensor *selected, llvm::ArrayRef<dim_t> shape1,
778	llvm::ArrayRef<dim_t> shape2, Tensor *out,
779	llvm::StringRef kind) {
780	ExecutionEngine EET(kind);
781	ExecutionEngine EEI(kind);
782	std::vector<ExecutionEngine *> engines;
783	engines.push_back(&EEI);
784	engines.push_back(&EET);
785	TrainingConfig TC;
786	PlaceholderBindings bindings, inferBindings, trainingBindings;
787
788	// This variable records the number of the next sample to be used for
789	// training.
790	size_t sampleCounter = `0`;
791
792	TC.learningRate = `0.03`;
793	TC.momentum = `0.3`;
794	TC.L2Decay = `0.003`;
795	Function *F;
796	Placeholder var1, var2;
797	for (auto *EE : engines) {
798	bindings.clear();
799	auto &mod = EE->getModule();
800	F = mod.createFunction("main");
801	var1 = createPlaceholder(mod, bindings, inputs, "var1");
802	var2 = createPlaceholder(mod, bindings, selected, "var2");
803	auto *fc = F->createFullyConnected(bindings, "fc", var1, bias->dims()[`0`]);
804	bindings.get(cast<Placeholder>(fc->getWeights()))->assign(weights);
805	bindings.get(cast<Placeholder>(fc->getBias()))->assign(bias);
806	auto *reshape1 = F->createReshape("reshape1", fc, shape1);
807	auto *pool = F->createMaxPool("pool", reshape1, `5`, `3`, `4`);
808	auto *reshape2 = F->createReshape("reshape2", pool->getResult(), shape2);
809	auto *softmax = F->createSoftMax("softmax", reshape2, var2);
810	F->createSave("ret", softmax);
811	}
812	auto *TF = glow::differentiate(F, TC);
813	auto fName = F->getName();
814	auto tfName = TF->getName();
815	EET.compile(CompilationMode::Train);
816	trainingBindings.allocate(EET.getModule().getPlaceholders());
817	inferBindings.allocate(EEI.getModule().getPlaceholders());
818	bindings.copyTrainableWeightsTo(trainingBindings);
819	auto *res =
820	inferBindings.get(EEI.getModule().getPlaceholderByNameSlow("ret"));
821
822	runBatch(EET, trainingBindings, `7`, sampleCounter, {var1, var2},
823	{inputs, selected}, tfName);
824	trainingBindings.copyTrainableWeightsTo(inferBindings);
825	EEI.compile(CompilationMode::Infer);
826	var1 = inferBindings.getPlaceholderByNameSlow("var1");
827	var2 = inferBindings.getPlaceholderByNameSlow("var2");
828	runBatch(EEI, inferBindings, `1`, sampleCounter, {var1, var2},
829	{inputs, selected}, fName);
830	out->assign(res);
831	}
832
833	void inferSmallConv(Tensor inputs, Tensor out, llvm::StringRef kind) {
834	PlaceholderBindings bindings;
835	ExecutionEngine EE(kind);
836	auto &mod = EE.getModule();
837	auto *F = mod.createFunction("main");
838	auto *in = createPlaceholder(mod, bindings, inputs, "in", "NHWC");
839	auto *C = F->createConv(bindings, "conv2a", in, `64`, `1`, `1`, `0`, `1`);
840	bindings.get(cast<Placeholder>(C->getFilter()))->getHandle().clear(`0.3`);
841	bindings.get(cast<Placeholder>(C->getBias()))->getHandle().clear(`0.4`);
842	auto *result = F->createSave("ret", C);
843	auto *resultTensor = bindings.allocate(result->getPlaceholder());
844	convertPlaceholdersToConstants(F, bindings, {in, result->getPlaceholder()});
845
846	EE.compile(CompilationMode::Infer);
847
848	updateInputPlaceholders(bindings, {in}, {inputs});
849	EE.run(bindings);
850
851	out->assign(resultTensor);
852	}
853
854	void inferGroupConv(Tensor *out, llvm::StringRef kind) {
855	PlaceholderBindings bindings;
856	ExecutionEngine EE(kind);
857	auto &mod = EE.getModule();
858	auto *F = mod.createFunction("main");
859
860	auto *input =
861	mod.createPlaceholder(ElemKind::FloatTy, {`1`, `2`, `1`, `32`}, "input", false);
862	auto *inputTensor = bindings.allocate(input);
863	auto IH = inputTensor->getHandle();
864	for (size_t i = `0`; i < `2` * `32`; i++) {
865	IH.raw(i) = (i + `1`) / `10.0`;
866	}
867
868	auto *filter = mod.createPlaceholder(ElemKind::FloatTy, {`128`, `1`, `1`, `16`},
869	"filter", false);
870	auto *filterTensor = bindings.allocate(filter);
871	auto FH = filterTensor->getHandle();
872	for (dim_t i = `0`; i < `128`; i++)
873	for (dim_t j = `0`; j < `16`; j++) {
874	FH.at({i, `0`, `0`, j}) = (i + j) / `100.0`;
875	}
876	auto *zeroBias =
877	mod.createPlaceholder(ElemKind::FloatTy, {`128`}, "bias", false);
878	auto *zeroBiasTensor = bindings.allocate(zeroBias);
879	zeroBiasTensor->zero();
880
881	auto outTy = mod.uniqueType(ElemKind::FloatTy, {`1`, `2`, `1`, `128`});
882
883	ConvolutionNode *CN =
884	F->createConv("Conv", input, filter, zeroBias, outTy, `1`, `1`, `0`, `2`);
885	SaveNode *result = F->createSave("save", CN);
886	auto *resultTensor = bindings.allocate(result->getPlaceholder());
887
888	EE.compile(CompilationMode::Infer);
889
890	EE.run(bindings);
891	out->assign(resultTensor);
892	}
893
894	void inferNonSquarePaddingConv(Tensor *out, llvm::StringRef kind) {
895	PlaceholderBindings bindings;
896	ExecutionEngine EE(kind);
897	auto &mod = EE.getModule();
898	auto *F = mod.createFunction("main");
899
900	auto *input =
901	mod.createPlaceholder(ElemKind::FloatTy, {`1`, `2`, `1`, `32`}, "input", false);
902	auto *inputTensor = bindings.allocate(input);
903	auto IH = inputTensor->getHandle();
904	for (size_t i = `0`; i < `2` * `32`; i++) {
905	IH.raw(i) = (i + `1`) / `10.0`;
906	}
907
908	auto *filter = mod.createPlaceholder(ElemKind::FloatTy, {`128`, `1`, `1`, `32`},
909	"filter", false);
910	auto *filterTensor = bindings.allocate(filter);
911	auto FH = filterTensor->getHandle();
912	for (dim_t i = `0`; i < `128`; i++)
913	for (dim_t j = `0`; j < `32`; j++) {
914	FH.at({i, `0`, `0`, j}) = (i + j) / `100.0`;
915	}
916	auto *zeroBias =
917	mod.createPlaceholder(ElemKind::FloatTy, {`128`}, "bias", false);
918	auto *zeroBiasTensor = bindings.allocate(zeroBias);
919	zeroBiasTensor->zero();
920	auto outTy = mod.uniqueType(ElemKind::FloatTy, {`1`, `4`, `5`, `128`});
921
922	ConvolutionNode *CN = F->createConv("Conv", input, filter, zeroBias, outTy,
923	{`1`, `1`}, {`1`, `1`}, {`0`, `1`, `2`, `3`}, `1`);
924	SaveNode *result = F->createSave("save", CN);
925	auto *resultTensor = bindings.allocate(result->getPlaceholder());
926
927	EE.compile(CompilationMode::Infer);
928
929	EE.run(bindings);
930	out->assign(resultTensor);
931	}
932
933	void inferNonSquareKernelConv(Tensor *out, llvm::StringRef kind) {
934	PlaceholderBindings bindings;
935	ExecutionEngine EE(kind);
936	auto &mod = EE.getModule();
937	auto *F = mod.createFunction("main");
938
939	auto *input =
940	mod.createPlaceholder(ElemKind::FloatTy, {`1`, `2`, `1`, `32`}, "input", false);
941	auto *inputTensor = bindings.allocate(input);
942	auto IH = inputTensor->getHandle();
943	for (size_t i = `0`; i < `2` * `32`; i++) {
944	IH.raw(i) = (i + `1`) / `10.0`;
945	}
946
947	auto *filter = mod.createPlaceholder(ElemKind::FloatTy, {`128`, `2`, `1`, `32`},
948	"filter", false);
949	auto *filterTensor = bindings.allocate(filter);
950	auto FH = filterTensor->getHandle();
951	for (dim_t i = `0`; i < `128`; i++)
952	for (dim_t j = `0`; j < `2`; j++)
953	for (dim_t k = `0`; k < `32`; k++) {
954	FH.at({i, j, `0`, k}) = (i + j + k) / `100.0`;
955	}
956	auto *zeroBias =
957	mod.createPlaceholder(ElemKind::FloatTy, {`128`}, "bias", false);
958	auto *zeroBiasTensor = bindings.allocate(zeroBias);
959	zeroBiasTensor->zero();
960	auto outTy = mod.uniqueType(ElemKind::FloatTy, {`1`, `3`, `5`, `128`});
961
962	ConvolutionNode *CN = F->createConv("Conv", input, filter, zeroBias, outTy,
963	{`2`, `1`}, {`1`, `1`}, {`0`, `1`, `2`, `3`}, `1`);
964	SaveNode *result = F->createSave("save", CN);
965	auto *resultTensor = bindings.allocate(result->getPlaceholder());
966
967	EE.compile(CompilationMode::Infer);
968
969	EE.run(bindings);
970	out->assign(resultTensor);
971	}
972
973	void inferNonSquareStrideConv(Tensor *out, llvm::StringRef kind) {
974	PlaceholderBindings bindings;
975	ExecutionEngine EE(kind);
976	auto &mod = EE.getModule();
977	auto *F = mod.createFunction("main");
978
979	auto *input =
980	mod.createPlaceholder(ElemKind::FloatTy, {`1`, `2`, `1`, `32`}, "input", false);
981	auto *inputTensor = bindings.allocate(input);
982	auto IH = inputTensor->getHandle();
983	for (size_t i = `0`; i < `2` * `32`; i++) {
984	IH.raw(i) = (i + `1`) / `10.0`;
985	}
986
987	auto *filter = mod.createPlaceholder(ElemKind::FloatTy, {`128`, `2`, `1`, `32`},
988	"filter", false);
989	auto *filterTensor = bindings.allocate(filter);
990	auto FH = filterTensor->getHandle();
991	for (dim_t i = `0`; i < `128`; i++)
992	for (dim_t j = `0`; j < `2`; j++)
993	for (dim_t k = `0`; k < `32`; k++) {
994	FH.at({i, j, `0`, k}) = (i + j + k) / `100.0`;
995	}
996	auto *zeroBias =
997	mod.createPlaceholder(ElemKind::FloatTy, {`128`}, "bias", false);
998	auto *zeroBiasTensor = bindings.allocate(zeroBias);
999	zeroBiasTensor->zero();
1000	auto outTy = mod.uniqueType(ElemKind::FloatTy, {`1`, `2`, `5`, `128`});
1001
1002	ConvolutionNode *CN = F->createConv("Conv", input, filter, zeroBias, outTy,
1003	{`2`, `1`}, {`2`, `1`}, {`0`, `1`, `2`, `3`}, `1`);
1004	SaveNode *result = F->createSave("save", CN);
1005	auto *resultTensor = bindings.allocate(result->getPlaceholder());
1006
1007	EE.compile(CompilationMode::Infer);
1008
1009	EE.run(bindings);
1010	out->assign(resultTensor);
1011	}
1012
1013	void inferConvDKKC8(Tensor *out, llvm::StringRef kind) {
1014	PlaceholderBindings bindings;
1015	ExecutionEngine EE(kind);
1016	auto &mod = EE.getModule();
1017	auto *F = mod.createFunction("main");
1018
1019	auto *input =
1020	mod.createPlaceholder(ElemKind::FloatTy, {`3`, `3`, `3`, `32`}, "input", false);
1021	auto *inputTensor = bindings.allocate(input);
1022	auto IH = inputTensor->getHandle();
1023	for (size_t i = `0`; i < `3` * `3` * `3` * `32`; i++) {
1024	IH.raw(i) = (i + `1`) / `10.0`;
1025	}
1026
1027	auto *filter = mod.createPlaceholder(ElemKind::FloatTy, {`192`, `3`, `3`, `32`},
1028	"filter", false);
1029	auto *filterTensor = bindings.allocate(filter);
1030	filterTensor->zero();
1031	auto FH = filterTensor->getHandle();
1032	for (dim_t i = `0`; i < `192`; i++)
1033	for (dim_t j = `0`; j < `3`; j++)
1034	for (dim_t k = `0`; k < `3`; k++)
1035	for (dim_t l = `0`; l < `32`; l++) {
1036	FH.at({i, j, k, k}) = (i + j + k + l) / `200.0`;
1037	}
1038	auto *zeroBias =
1039	mod.createPlaceholder(ElemKind::FloatTy, {`192`}, "bias", false);
1040	auto *zeroBiasTensor = bindings.allocate(zeroBias);
1041	zeroBiasTensor->zero();
1042	auto outTy = mod.uniqueType(ElemKind::FloatTy, {`3`, `3`, `3`, `192`});
1043
1044	ConvolutionNode *CN = F->createConv("Conv", input, filter, zeroBias, outTy,
1045	{`3`, `3`}, {`1`, `1`}, {`1`, `1`, `1`, `1`}, `1`);
1046	SaveNode *result = F->createSave("save", CN);
1047	auto *resultTensor = bindings.allocate(result->getPlaceholder());
1048
1049	EE.compile(CompilationMode::Infer);
1050
1051	EE.run(bindings);
1052	out->assign(resultTensor);
1053	}
1054
1055	void trainSoftMaxNet(Tensor inputs, Tensor weights, Tensor *bias,
1056	Tensor selected, Tensor out, llvm::StringRef kind) {
1057	ExecutionEngine EEI(kind);
1058	ExecutionEngine EET(kind);
1059	std::vector<ExecutionEngine *> engines;
1060	engines.push_back(&EEI);
1061	engines.push_back(&EET);
1062	TrainingConfig TC;
1063	PlaceholderBindings bindings, inferBindings, trainingBindings;
1064
1065	// This variable records the number of the next sample to be used for
1066	// training.
1067	size_t sampleCounter = `0`;
1068
1069	TC.learningRate = `0.003`;
1070	TC.momentum = `0.7`;
1071	TC.L2Decay = `0.001`;
1072	Function *F;
1073	Placeholder var1, var2;
1074	for (auto *EE : engines) {
1075	auto &mod = EE->getModule();
1076	F = mod.createFunction("main");
1077	var1 = createPlaceholder(mod, bindings, inputs, "var1");
1078	var2 = createPlaceholder(mod, bindings, selected, "var2");
1079	auto *fc = F->createFullyConnected(bindings, "fc", var1, bias->dims()[`0`]);
1080	bindings.get(cast<Placeholder>(fc->getWeights()))->assign(weights);
1081	bindings.get(cast<Placeholder>(fc->getBias()))->assign(bias);
1082	auto *softmax = F->createSoftMax("softmax", fc, var2);
1083	F->createSave("ret", softmax);
1084	}
1085
1086	auto *TF = glow::differentiate(F, TC);
1087	auto tfName = TF->getName();
1088	auto fName = F->getName();
1089
1090	EET.compile(CompilationMode::Train);
1091	trainingBindings.allocate(EET.getModule().getPlaceholders());
1092	bindings.copyTrainableWeightsTo(trainingBindings);
1093	runBatch(EET, trainingBindings, `30`, sampleCounter, {var1, var2},
1094	{inputs, selected}, tfName);
1095	EEI.compile(CompilationMode::Infer);
1096	inferBindings.allocate(EEI.getModule().getPlaceholders());
1097	trainingBindings.copyTrainableWeightsTo(inferBindings);
1098	auto *res =
1099	inferBindings.get(EEI.getModule().getPlaceholderByNameSlow("ret"));
1100	var1 = inferBindings.getPlaceholderByNameSlow("var1");
1101	var2 = inferBindings.getPlaceholderByNameSlow("var2");
1102	updateInputPlaceholders(inferBindings, {var1, var2}, {inputs, selected});
1103	EEI.run(inferBindings, fName);
1104	out->assign(res);
1105	}
1106
1107	void inferTanhConcatNet(Tensor input1, Tensor input2, Tensor *input3,
1108	Tensor *out, llvm::StringRef kind) {
1109	PlaceholderBindings bindings;
1110	ExecutionEngine EE(kind);
1111	auto &mod = EE.getModule();
1112	Function *F = mod.createFunction("main");
1113	auto *var1 = createPlaceholder(mod, bindings, input1, "var1");
1114	auto *var2 = createPlaceholder(mod, bindings, input2, "var2");
1115	auto *var3 = createPlaceholder(mod, bindings, input3, "var3");
1116	auto *T1 = F->createTanh("tanh1", var1);
1117	auto *T2 = F->createTanh("tanh2", var2);
1118	auto *T3 = F->createTanh("tanh3", var3);
1119	Node *C1 = F->createConcat("concat", {T1, T2}, `0`);
1120	Node *C2 = F->createConcat("concat", {T2, T3, C1, T2}, `0`);
1121	auto *result = F->createSave("ret", C2);
1122	auto *resultTensor = bindings.allocate(result->getPlaceholder());
1123
1124	EE.compile(CompilationMode::Infer);
1125
1126	updateInputPlaceholders(bindings, {var1, var2, var3},
1127	{input1, input2, input3});
1128	EE.run(bindings);
1129	out->assign(resultTensor);
1130	}
1131
1132	void inferBasicConvNet(Tensor inputs, Tensor out, llvm::StringRef kind,
1133	size_t convDepth) {
1134	PlaceholderBindings bindings;
1135	ExecutionEngine EE(kind);
1136	auto &mod = EE.getModule();
1137	Function *F = mod.createFunction("main");
1138	auto *var = createPlaceholder(mod, bindings, inputs, "var", "NCHW");
1139	auto *tr = F->createTranspose("tr", var, NCHW2NHWC);
1140	auto *conv = F->createConv(bindings, "conv", tr, convDepth, {`5`, `5`}, {`2`, `2`},
1141	{`1`, `1`, `1`, `1`}, `1`);
1142	bindings.get(cast<Placeholder>(conv->getFilter()))->getHandle().clear(`0.1`);
1143	bindings.get(cast<Placeholder>(conv->getBias()))->getHandle().clear(`0.2`);
1144	auto *pool = F->createMaxPool("pool", conv, `2`, `2`, `0`);
1145	auto *result = F->createSave("ret", pool->getResult());
1146	auto *resultTensor = bindings.allocate(result->getPlaceholder());
1147	convertPlaceholdersToConstants(F, bindings, {var, result->getPlaceholder()});
1148
1149	EE.compile(CompilationMode::Infer);
1150
1151	updateInputPlaceholders(bindings, {var}, {inputs});
1152	EE.run(bindings);
1153	out->assign(resultTensor);
1154	}
1155
1156	FunctionTensorPair createAndInitBasicFCNet(PlaceholderBindings &bindings,
1157	ExecutionEngine &EE) {
1158	auto &mod = EE.getModule();
1159	Function *F = mod.createFunction("main");
1160
1161	auto *var = mod.createPlaceholder(ElemKind::FloatTy, {`2`, `3`, `16`, `16`}, "var",
1162	false, "NCHW");
1163	auto *tr = F->createTranspose("tr", var, NCHW2NHWC);
1164	auto *fc = F->createFullyConnected(bindings, "fc", tr, `16`);
1165	auto *rl0 = F->createRELU("relu", fc);
1166	auto *fc2 = F->createFullyConnected(bindings, "fc2", rl0, `8`);
1167	auto *rl1 = F->createRELU("relu", fc2);
1168	bindings.get(cast<Placeholder>(fc->getWeights()))->getHandle().clear(`0.8`);
1169	bindings.get(cast<Placeholder>(fc2->getWeights()))->getHandle().clear(`1.5`);
1170	auto *result = F->createSave("ret", rl1);
1171	auto *resultTensor = bindings.allocate(result->getPlaceholder());
1172
1173	PseudoRNG PRNG;
1174	bindings.allocate(var)->getHandle().initXavier(`1`, PRNG);
1175
1176	return std::make_pair(F, resultTensor);
1177	}
1178
1179	void inferMixedNet(Tensor inputs, Tensor out, llvm::StringRef kind) {
1180	PlaceholderBindings bindings;
1181	ExecutionEngine EE(kind);
1182	auto &mod = EE.getModule();
1183	Function *F = mod.createFunction("main");
1184	auto *var = createPlaceholder(mod, bindings, inputs, "var", "NCHW");
1185	auto *selected =
1186	mod.createPlaceholder(ElemKind::Int64ITy, {`2`, `1`}, "selected", false);
1187
1188	auto *tr = F->createTranspose("tr", var, NCHW2NHWC);
1189	auto *fc = F->createFullyConnected(bindings, "fc", tr, `16`);
1190	auto *th0 = F->createTanh("tanh", fc);
1191	auto *sg0 = F->createSigmoid("sig", fc);
1192	auto *A1 = F->createAdd("add", th0, sg0);
1193	auto *fc2 = F->createFullyConnected(bindings, "fc2", A1, `16`);
1194
1195	auto *R = F->createRegression("reg", fc2, fc2);
1196	auto *SM = F->createSoftMax("SM", R, selected);
1197	auto *result = F->createSave("ret", SM);
1198	auto *resultTensor = bindings.allocate(result->getPlaceholder());
1199
1200	bindings.get(cast<Placeholder>(fc->getWeights()))->getHandle().clear(`0.4`);
1201	bindings.get(cast<Placeholder>(fc2->getWeights()))->getHandle().clear(`3.5`);
1202
1203	EE.compile(CompilationMode::Infer);
1204
1205	updateInputPlaceholders(bindings, {var}, {inputs});
1206	EE.run(bindings);
1207	out->assign(resultTensor);
1208	}
1209
1210	void inferComplexNet1(Tensor inputs1, Tensor inputs2, Tensor *inputs3,
1211	Tensor inputs4, Tensor out, llvm::StringRef kind) {
1212	PlaceholderBindings bindings;
1213	ExecutionEngine EE(kind);
1214	auto &mod = EE.getModule();
1215	Function *F = mod.createFunction("main");
1216	auto *var1 = createPlaceholder(mod, bindings, inputs1, "var1");
1217	auto *var2 = createPlaceholder(mod, bindings, inputs2, "var2");
1218	auto *var3 = createPlaceholder(mod, bindings, inputs3, "var3");
1219	auto *var4 = createPlaceholder(mod, bindings, inputs4, "var4");
1220	auto *conv1 = F->createConv(bindings, "conv1", var1, `6`, `4`, `1`, `2`, `1`);
1221	bindings.get(cast<Placeholder>(conv1->getFilter()))->getHandle().clear(`0.5`);
1222	bindings.get(cast<Placeholder>(conv1->getBias()))->getHandle().clear(`0.7`);
1223	auto *sigmoid1 = F->createSigmoid("sigmoid1", conv1);
1224	auto *fc1 = F->createFullyConnected(bindings, "fc1", var2, `2352`);
1225	bindings.get(cast<Placeholder>(fc1->getWeights()))->getHandle().clear(`0.6`);
1226	auto *reshape1 = F->createReshape("reshape1", fc1, {`8`, `14`, `28`, `6`}, "NHWC");
1227	auto *relu1 = F->createRELU("relu1", reshape1);
1228	auto *pool1 = F->createMaxPool("pool1", relu1, `2`, `2`, `1`);
1229	auto *add = F->createAdd("add", sigmoid1, pool1->getResult());
1230	auto *tanh = F->createTanh("tanh", add);
1231	auto *fc2 = F->createFullyConnected(bindings, "fc2", var3, `720`);
1232	bindings.get(cast<Placeholder>(fc2->getWeights()))->getHandle().clear(`1.1`);
1233	auto *reshape2 = F->createReshape("reshape2", fc2, {`8`, `8`, `15`, `6`}, "NHWC");
1234	auto *mul = F->createMul("mul", tanh, reshape2);
1235	auto *sigmoid2 = F->createSigmoid("sigmoid2", mul);
1236	auto *conv2 = F->createConv(bindings, "conv2", sigmoid2, `7`, `3`, `2`, `1`, `1`);
1237	bindings.get(cast<Placeholder>(conv2->getFilter()))->getHandle().clear(`0.3`);
1238	bindings.get(cast<Placeholder>(conv2->getBias()))->getHandle().clear(`1.3`);
1239	auto *reshape3 = F->createReshape("reshape3", conv2, {`8`, `8`, `7`, `4`}, "NHWC");
1240	auto *sub = F->createSub("sub", reshape3, var4);
1241	auto *relu2 = F->createRELU("relu2", sub);
1242	auto *pool2 = F->createAvgPool("pool2", relu2, `3`, `2`, `1`);
1243	auto *sigmoid3 = F->createSigmoid("sigmoid3", pool2);
1244	auto *result = F->createSave("ret", sigmoid3);
1245	auto *resultTensor = bindings.allocate(result->getPlaceholder());
1246
1247	EE.compile(CompilationMode::Infer);
1248
1249	updateInputPlaceholders(bindings, {var1, var2, var3, var4},
1250	{inputs1, inputs2, inputs3, inputs4});
1251	EE.run(bindings);
1252	out->assign(resultTensor);
1253	}
1254
1255	namespace {
1256	// Helper for initializing conv node filter/bias from input tensors.
1257	static void initConv(PlaceholderBindings &bindings, ConvolutionNode *C,
1258	Tensor &filter, Tensor &bias) {
1259	bindings.get(cast<Placeholder>(C->getFilter()))->assign(&filter);
1260	bindings.get(cast<Placeholder>(C->getBias()))->assign(&bias);
1261	}
1262	} // namespace
1263
1264	void inferTinyResnet(Tensor input, Tensor out, std::vector<Tensor> &weights,
1265	llvm::StringRef kind) {
1266	PlaceholderBindings bindings;
1267	ExecutionEngine EE(kind);
1268	auto &mod = EE.getModule();
1269	auto *F = mod.createFunction("main");
1270
1271	auto *in = createPlaceholder(mod, bindings, input, "in", "NHWC");
1272	auto *conv1 = F->createConv(bindings, "conv1", in, `256`, `1`, `1`, `0`, `1`);
1273	auto *conv2a = F->createConv(bindings, "conv2a", conv1, `64`, `1`, `1`, `0`, `1`);
1274	auto *relu2a = F->createRELU("relu2a", conv2a);
1275	auto *conv2b = F->createConv(bindings, "conv2b", relu2a, `64`, `3`, `1`, `1`, `1`);
1276	auto *relu2b = F->createRELU("relu2b", conv2b);
1277	auto *conv2c = F->createConv(bindings, "conv2c", relu2b, `256`, `1`, `1`, `0`, `1`);
1278	auto *add = F->createAdd("add", conv2c, conv1);
1279	auto *relu = F->createRELU("res2a_relu", add);
1280	auto *result = F->createSave("ret", relu);
1281	auto *resultTensor = bindings.allocate(result->getPlaceholder());
1282
1283	initConv(bindings, conv1, weights [`0`], weights [`1`]);
1284	initConv(bindings, conv2a, weights [`2`], weights [`3`]);
1285	initConv(bindings, conv2b, weights [`4`], weights [`5`]);
1286	initConv(bindings, conv2c, weights [`6`], weights [`7`]);
1287	convertPlaceholdersToConstants(F, bindings, {in, result->getPlaceholder()});
1288
1289	EE.compile(CompilationMode::Infer);
1290
1291	updateInputPlaceholders(bindings, {in}, {input});
1292	EE.run(bindings);
1293	out->assign(resultTensor);
1294	}
1295
1296	void inferExtract3D(Tensor input, Tensor out, llvm::StringRef kind) {
1297	PlaceholderBindings bindings;
1298	ExecutionEngine EE(kind);
1299	auto &mod = EE.getModule();
1300	auto *F = mod.createFunction("main");
1301
1302	auto *inputs = createPlaceholder(mod, bindings, input, "inputs");
1303
1304	auto *x1 = F->createSlice("ex1", inputs, {`0`, `5`, `0`}, {`1`, `100`, `100`});
1305	auto *x2 = F->createSlice("ex2", inputs, {`1`, `5`, `0`}, {`2`, `100`, `100`});
1306	auto *x3 = F->createSlice("ex3", inputs, {`2`, `5`, `0`}, {`3`, `100`, `100`});
1307	auto *x4 = F->createSlice("ex4", inputs, {`3`, `5`, `0`}, {`4`, `100`, `100`});
1308
1309	auto *x12 = F->createConcat("x12", {x1, x2}, `1`);
1310	auto *x34 = F->createConcat("x34", {x3, x4}, `1`);
1311	auto *x13 = F->createConcat("x34", {x1, x3}, `1`);
1312	auto *x24 = F->createConcat("x34", {x2, x4}, `1`);
1313
1314	auto *add1 = F->createAdd("add1", x12, x34);
1315	auto *add2 = F->createAdd("add1", x13, x24);
1316	auto *add3 = F->createAdd("add1", add1, add2);
1317
1318	auto *e = F->createSlice("slice", add3, {`0`, `55`, `50`}, {`1`, `150`, `100`});
1319	auto *result = F->createSave("ret", e);
1320	auto *resultTensor = bindings.allocate(result->getPlaceholder());
1321
1322	EE.compile(CompilationMode::Infer);
1323
1324	updateInputPlaceholders(bindings, {inputs}, {input});
1325	EE.run(bindings);
1326	out->assign(resultTensor);
1327	}
1328
1329	void inferMaxSplat(Tensor input, Tensor out, llvm::StringRef kind) {
1330	PlaceholderBindings bindings;
1331	ExecutionEngine EE(kind);
1332	auto &mod = EE.getModule();
1333	Function *F = mod.createFunction("main");
1334
1335	auto T = mod.uniqueType(ElemKind::Int8QTy, input->getType().dims(),
1336	`2` * input->getType().getScale(),
1337	-input->getType().getOffset());
1338	auto *var = createQuantizedPlaceholder(mod, bindings, input,
1339	input->getType().getScale(),
1340	input->getType().getOffset(), "var");
1341	auto *rescale = F->createRescaleQuantized("rescale", var, T);
1342
1343	auto *splat1 = F->createSplat("splat1", T, `0.0`);
1344	auto *splat2 = F->createSplat("splat2", T, `5.0`);
1345
1346	auto *max1 = F->createMax("max1", rescale, splat1);
1347	auto *max2 = F->createMax("max2", splat2, max1);
1348
1349	auto *result = F->createSave("ret", max2);
1350	auto *resultTensor = bindings.allocate(result->getPlaceholder());
1351
1352	EE.compile(CompilationMode::Infer);
1353
1354	updateInputPlaceholders(bindings, {var}, {input});
1355	EE.run(bindings);
1356	out->assign(resultTensor);
1357	}
1358
1359	void insertCompiledFunction(llvm::StringRef name, CompiledFunction *func,
1360	runtime::DeviceManager device, Module mod) {
1361	runtime::FunctionMapTy functionMap;
1362	functionMap [name.str()] = func;
1363
1364	std::promise<void> addPromise;
1365	auto fut = addPromise.get_future();
1366	Error addErr = Error::empty();
1367	device->addNetwork(mod, std::move(functionMap),
1368	[&addPromise, &addErr](const Module *, Error err) {
1369	addErr = std::move(err);
1370	addPromise.set_value();
1371	});
1372	fut.wait();
1373	EXIT_ON_ERR(std::move(addErr));
1374	}
1375
1376	void runOnDevice(ExecutionContext &context, llvm::StringRef name,
1377	runtime::DeviceManager *device) {
1378	std::unique_ptr<ExecutionContext> contextPtr(&context);
1379	std::promise<void> runPromise;
1380	auto fut = runPromise.get_future();
1381	Error runErr = Error::empty();
1382	device->runFunction(
1383	name.str(), std::move(contextPtr),
1384	[&runPromise, &runErr](runtime::RunIdentifierTy, Error err,
1385	std::unique_ptr<ExecutionContext> contextPtr) {
1386	// Don't delete context.
1387	contextPtr.release();
1388	runErr = std::move(err);
1389	runPromise.set_value();
1390	});
1391	fut.wait();
1392	EXIT_ON_ERR(std::move(runErr));
1393	}
1394
1395	Constant *createRandomizedConstant(Module &mod, TypeRef type,
1396	llvm::ArrayRef<dim_t> dims,
1397	llvm::StringRef name) {
1398	auto *c = mod.createConstant(mod.uniqueTypeWithNewShape(type, dims), name);
1399
1400	switch (type->getElementType()) {
1401	case ElemKind::FloatTy: {
1402	c->getHandle<float>().initXavier(c->getType()->size() * `2`, mod.getPRNG());
1403	break;
1404	}
1405	case ElemKind::Float16Ty: {
1406	c->getHandle<float16_t>().initXavier(c->getType()->size() * `2`,
1407	mod.getPRNG());
1408	break;
1409	}
1410	case ElemKind::BFloat16Ty: {
1411	c->getHandle<bfloat16_t>().initXavier(c->getType()->size() * `2`,
1412	mod.getPRNG());
1413	break;
1414	}
1415	case ElemKind::Int32QTy: {
1416	c->getHandle<int32_t>().randomize(INT32_MIN, INT32_MAX, mod.getPRNG());
1417	break;
1418	}
1419	case ElemKind::Int8QTy: {
1420	c->getHandle<int8_t>().randomize(INT8_MIN, INT8_MAX, mod.getPRNG());
1421	break;
1422	}
1423	case ElemKind::UInt8FusedQTy:
1424	case ElemKind::UInt8FusedFP16QTy: {
1425	c->getHandle<uint8_t>().randomize(UINT8_MIN, UINT8_MAX, mod.getPRNG());
1426	break;
1427	}
1428	default:
1429	LOG(FATAL) << "Unsupported type: " << type->getElementName().str();
1430	}
1431
1432	return c;
1433	}
1434
1435	Constant *createRandomFusedRowwiseQuantizedConstant(Module &mod,
1436	llvm::ArrayRef<dim_t> dims,
1437	llvm::StringRef name,
1438	bool useFusedFP16) {
1439	auto T = mod.uniqueType(
1440	(useFusedFP16 ? ElemKind::UInt8FusedFP16QTy : ElemKind::UInt8FusedQTy),
1441	{`1`}, `1`, `0`);
1442	const dim_t sizeScaleOffset =
1443	useFusedFP16 ? sizeof(float16_t) : sizeof(float);
1444	Constant *c = createRandomizedConstant(
1445	mod, T, {dims [`0`], dims [`1`] + `2` * sizeScaleOffset}, name);
1446
1447	// Range (0, 255) -> (-0.1, 0.1)
1448	constexpr float scale = `1.0f` / `1275`;
1449	constexpr float offset = -`0.1`;
1450	auto cH = c->getPayload().getHandle<uint8_t>();
1451	for (unsigned i = `0`, e = c->dims()[`0`]; i < e; i++) {
1452	if (useFusedFP16) {
1453	cH.setFusedScaleOffsetInRow<float16_t>(i, scale, offset);
1454	} else {
1455	cH.setFusedScaleOffsetInRow<float>(i, scale, offset);
1456	}
1457	}
1458
1459	return c;
1460	}
1461
1462	Placeholder *createFusedRowwiseQuantizedPlaceholder(Module &mod,
1463	llvm::ArrayRef<dim_t> dims,
1464	llvm::StringRef name,
1465	bool useFusedFP16) {
1466	auto T = useFusedFP16 ? ElemKind::UInt8FusedFP16QTy : ElemKind::UInt8FusedQTy;
1467	const dim_t sizeScaleOffset =
1468	useFusedFP16 ? sizeof(float16_t) : sizeof(float);
1469	constexpr float scale = `1.0f` / `1275`;
1470	constexpr float offset = -`0.1`;
1471	Placeholder *ph = mod.createPlaceholder(
1472	T, {dims [`0`], dims [`1`] + `2` * sizeScaleOffset}, scale, offset, name, false);
1473
1474	return ph;
1475	}
1476
1477	// Helper for creating and intializing placeholders from tensors.
1478	Placeholder *createPlaceholder(Module &mod, PlaceholderBindings &bindings,
1479	Tensor *tensor, llvm::StringRef name,
1480	const std::string &layout) {
1481	auto P = mod.createPlaceholder(&tensor->getType(), name, false*, layout);
1482	auto *PTensor = bindings.allocate(P);
1483	PTensor->assign(tensor);
1484	return P;
1485	}
1486
1487	} // namespace glow
1488

Browse the source code of glow/tests/unittests/BackendTestUtils.cpp