1/**
2 * Copyright (c) Glow Contributors. See CONTRIBUTORS file.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "BackendTestUtils.h"
18
19#include "glow/Converter/TypeAToTypeBFunctionConverter.h"
20#include "glow/ExecutionEngine/ExecutionEngine.h"
21#include "glow/Graph/Graph.h"
22#include "glow/IR/IR.h"
23#include "glow/IR/IRBuilder.h"
24#include "glow/IR/Instrs.h"
25#include "glow/Optimizer/GraphOptimizer/GraphOptimizer.h"
26#include "glow/Quantization/Quantization.h"
27
28#include "gtest/gtest.h"
29
30#include "llvm/Support/CommandLine.h"
31
32#include <future>
33
34namespace glow {
35
36llvm::cl::OptionCategory backendTestUtilsCat("BackendTestUtils Category");
37
38unsigned parCloneCountOpt;
39llvm::cl::opt<unsigned, /* ExternalStorage */ true> parCloneCountI(
40 "parallel-clone-count",
41 llvm::cl::desc(
42 "Number of times to clone a graph in parallel. Intended to stress test "
43 "different backends. This option is not used by all unit "
44 "tests; for now you must check the test to see if so."),
45 llvm::cl::location(parCloneCountOpt), llvm::cl::Optional, llvm::cl::init(1),
46 llvm::cl::cat(backendTestUtilsCat));
47
48bool runDisabledTests;
49llvm::cl::opt<bool, /* ExternalStorage */ true> runDisabledTestsI(
50 "run-disabled-tests",
51 llvm::cl::desc("If set, disabled tests will not be skipped."),
52 llvm::cl::location(runDisabledTests), llvm::cl::Optional,
53 llvm::cl::init(false), llvm::cl::cat(backendTestUtilsCat));
54
55using llvm::cast;
56
57namespace {
58
59static Placeholder *createQuantizedPlaceholder(Module &mod,
60 PlaceholderBindings &bindings,
61 Tensor *tensor, float scale,
62 int32_t offset,
63 llvm::StringRef name) {
64 auto *P = mod.createPlaceholder(tensor->getElementType(), tensor->dims(),
65 scale, offset, name, false);
66 auto *PTensor = bindings.allocate(P);
67 PTensor->assign(tensor);
68
69 return P;
70}
71
72/// Create and initialize a function using the argument \p createAndInitFunction
73/// then run the function in profiling mode to get the profiling parameters.
74/// \p count is the number of times to clone the Function inside itself before
75/// profiling. \returns the profiling parameters for all the function nodes.
76static std::vector<NodeProfilingInfo>
77profileAndGetNodeProfilingInfo(CreateAndInitFunction createAndInitFunction,
78 unsigned count) {
79 LoweredInfoMap loweredMapForProf;
80 PlaceholderBindings pBindings;
81 // Note: deviceMemory = 0 is a signal to use the defaultMemory.
82 ExecutionEngine PEE{"Interpreter", /* deviceMemory */ 0,
83 /* ignoreUserDeviceConfig */ true};
84 auto FT = createAndInitFunction(pBindings, PEE);
85 CompilationContext cctx{&pBindings, &loweredMapForProf};
86
87 // Clone the number of times as requested to match the Function that will be
88 // quantized.
89 cloneFunInsideFun(FT, &pBindings, cctx, count);
90 cctx.precisionConfig.quantMode = QuantizationMode::Profile;
91 PEE.compile(cctx);
92 PEE.run(pBindings);
93
94 // We get the new function using front() because the original function was
95 // deleted as part of the Partitioner quantization flow.
96 return quantization::generateNodeProfilingInfos(
97 pBindings, PEE.getModule().getFunctions().front(), loweredMapForProf);
98}
99
100/// Helper that sets up and \returns a pair of configs for both interpreter and
101/// backend being tested.
102static std::pair<CompilationContext, CompilationContext>
103setupInterpAndBackendConfigs(
104 Function *IF, ExecutionEngine &IEE, PlaceholderBindings &iBindings,
105 LoweredInfoMap &ILIM, PlaceholderBindings &bBindings, LoweredInfoMap &BLIM,
106 ElemKind interpElemKind, ElemKind backendElemKind,
107 quantization::Schema schema, bool convertToRowwiseQuantization,
108 CreateAndInitFunction createAndInitFunction, ElemKind biasElemKind,
109 bool forceFP16AccumSLS, PrecisionConfiguration::Float16Format float16Format,
110 unsigned count, bool convertToChannelwiseQuantization,
111 bool skipQuantizeFCBias) {
112 CompilationContext cctxI{&iBindings, &ILIM};
113 CompilationContext cctxB{&bBindings, &BLIM};
114 PrecisionConfiguration &precConfigI = cctxI.precisionConfig;
115 PrecisionConfiguration &precConfigB = cctxB.precisionConfig;
116
117 if (isQuantizedElemKind(interpElemKind) ||
118 isQuantizedElemKind(backendElemKind)) {
119 // If either interp or backend need to be quantized then we need to profile
120 // and get quantization infos.
121 if (isQuantizedElemKind(interpElemKind)) {
122 // Note: We only do parallel cloning for the backend, so always use count
123 // of 1 here.
124 auto NQII =
125 profileAndGetNodeProfilingInfo(createAndInitFunction, /* count */ 1);
126
127 precConfigI.quantMode = QuantizationMode::Quantize;
128 precConfigI.quantConfig.infos = NQII;
129 precConfigI.quantConfig.enableRowwise = convertToRowwiseQuantization;
130 precConfigI.quantConfig.enableChannelwise =
131 convertToChannelwiseQuantization;
132 precConfigI.quantConfig.schema = schema;
133 precConfigI.quantConfig.precision = interpElemKind;
134 precConfigI.quantConfig.assertAllNodesQuantized = true;
135 precConfigI.quantConfig.precisionBias = biasElemKind;
136 precConfigI.quantConfig.skipQuantizeFCBias = skipQuantizeFCBias;
137 }
138
139 if (isQuantizedElemKind(backendElemKind)) {
140 // Always clone count times here. This matches the Function the backend
141 // will quantize.
142 auto NQIB = profileAndGetNodeProfilingInfo(createAndInitFunction, count);
143
144 precConfigB.quantMode = QuantizationMode::Quantize;
145 precConfigB.quantConfig.infos = NQIB;
146 precConfigB.quantConfig.enableRowwise = convertToRowwiseQuantization;
147 precConfigB.quantConfig.enableChannelwise =
148 convertToChannelwiseQuantization;
149 precConfigB.quantConfig.schema = schema;
150 precConfigB.quantConfig.precision = backendElemKind;
151 precConfigB.quantConfig.assertAllNodesQuantized = true;
152 precConfigB.quantConfig.precisionBias = biasElemKind;
153 precConfigB.quantConfig.skipQuantizeFCBias = skipQuantizeFCBias;
154 }
155 }
156
157 // For now if the ElemKind is FP16 then we use Float16Ty, UInt8FusedFP16QTy.
158 precConfigI.convertToFP16 = interpElemKind == ElemKind::Float16Ty;
159 precConfigI.convertFusedToFP16 = interpElemKind == ElemKind::Float16Ty;
160 precConfigI.forceFP16AccumSLS = forceFP16AccumSLS;
161 precConfigB.convertToFP16 = backendElemKind == ElemKind::Float16Ty;
162 precConfigB.convertFusedToFP16 = backendElemKind == ElemKind::Float16Ty;
163 precConfigB.forceFP16AccumSLS = forceFP16AccumSLS;
164
165 return std::make_pair(cctxI, cctxB);
166}
167} // namespace
168
169void dispatchInference(const std::string &fname,
170 runtime::HostManager *hostManager,
171 ExecutionContext &context,
172 unsigned concurrentRequestsOpt,
173 bool useNewExecutionContext) {
174 // If additional requests are desired, setup additional contexts.
175 std::vector<std::unique_ptr<ExecutionContext>> contexts;
176 std::unique_ptr<ExecutionContext> originalContextPtr(&context);
177 contexts.push_back(std::move(originalContextPtr));
178 if (concurrentRequestsOpt > 1) {
179 // Clone the placeholder bindings into a new executionContext.
180 for (unsigned i = 0, max = concurrentRequestsOpt - 1; i < max; i++) {
181 std::unique_ptr<ExecutionContext> newContext =
182 (useNewExecutionContext)
183 ? glow::make_unique<ExecutionContext>()
184 : glow::make_unique<ExecutionContext>(
185 glow::make_unique<PlaceholderBindings>(
186 context.getPlaceholderBindings()->clone()));
187 contexts.push_back(std::move(newContext));
188 }
189 }
190 std::vector<std::promise<void>> promises(concurrentRequestsOpt);
191 std::vector<std::future<void>> futures;
192 for (auto &promise : promises) {
193 futures.push_back(promise.get_future());
194 }
195 for (unsigned i = 0; i < concurrentRequestsOpt; i++) {
196 hostManager->runNetwork(fname, std::move(contexts[i]),
197 [&contexts, &promises,
198 i](runtime::RunIdentifierTy, Error err,
199 std::unique_ptr<ExecutionContext> contextPtr) {
200 contexts[i] = std::move(contextPtr);
201 // Expect no errors.
202 EXIT_ON_ERR(std::move(err));
203 promises[i].set_value();
204 });
205 }
206
207 for (auto &future : futures) {
208 future.wait();
209 }
210 for (auto &c : contexts) {
211 c->getPlaceholderBindings()->ensureOnHost();
212 }
213 // Release the original context passed in by reference so we don't free it.
214 contexts[0].release();
215}
216
217/// Helper that iterates over all of the Placeholders from the function \p F
218/// and converts the Tensors found in \p bindings to the same type as the
219/// Placeholders if necessary.
220static void convertBindingsToCorrectType(Function *F,
221 PlaceholderBindings &bindings) {
222 PlaceholderList PHs = F->findPlaceholders();
223 for (Placeholder *PH : PHs) {
224 Tensor *T = bindings.get(PH);
225 TypeRef newTy = PH->getType();
226 if (T->getType().isEqual(newTy)) {
227 continue;
228 }
229 // For input placeholders convert tensor type and values.
230 // For output placeholders convert only the tensor type.
231 if (isInput(PH, *F)) {
232 ElemKind newK = newTy->getElementType();
233 if (isQuantizedElemKind(newK)) {
234 Tensor QT = quantization::quantizeTensor(
235 *T, {newTy->getScale(), newTy->getOffset()}, newK);
236 T->assign(&QT);
237 } else {
238 T->convertToType(newK);
239 }
240 } else {
241 T->reset(*newTy);
242 }
243 }
244}
245
246/// Helper to get a float copy of a Tensor \p T if needed.
247static Tensor convertToFloatIfNecessary(Tensor &T) {
248 const ElemKind srcK = T.getType().getElementType();
249 if (srcK == ElemKind::FloatTy) {
250 return T.clone();
251 }
252 if (isQuantizedElemKind(srcK)) {
253 return quantization::dequantizeTensor(T, ElemKind::FloatTy);
254 }
255 return T.getCopyConvertedToType(ElemKind::FloatTy);
256}
257
258void compareAgainstInterpreter(
259 llvm::StringRef backendName, CreateAndInitFunction createAndInitFunction,
260 ElemKind interpElemKind, ElemKind backendElemKind, float allowedError,
261 unsigned count, bool convertToRowwiseQuantization,
262 quantization::Schema schema, ElemKind biasElemKind, bool forceFP16AccumSLS,
263 PrecisionConfiguration::Float16Format float16Format,
264 bool convertToChannelwiseQuantization, bool skipQuantizeFCBias) {
265 // Note: deviceMemory = 0 is a signal to use the defaultMemory.
266 ExecutionEngine IEE{"Interpreter", /* deviceMemory */ 0,
267 /* ignoreUserDeviceConfig */ true};
268 ExecutionEngine BEE{backendName};
269 PlaceholderBindings iBindings, bBindings;
270
271 LOG(INFO) << "Comparing Interpreter with precision "
272 << Type::getElementName(interpElemKind).str() << " against "
273 << backendName.str() << " with precision "
274 << Type::getElementName(backendElemKind).str() << " with Bias "
275 << (skipQuantizeFCBias ? "unquantized"
276 : Type::getElementName(biasElemKind).str())
277 << " with FP16 AccumulationSLS " << forceFP16AccumSLS;
278
279 // Create the same network on the interpreter and the backend being tested.
280 FunctionTensorPair IFT = createAndInitFunction(iBindings, IEE);
281 FunctionTensorPair BFT = createAndInitFunction(bBindings, BEE);
282
283 Function *IF = IFT.first;
284
285 // Set up the configs for interpreter and backend. If one or both functions
286 // will be quantized, then gather a profile the graph on the interpreter, and
287 // then quantize the Functions as requested.
288 LoweredInfoMap ILIM, BLIM;
289 auto configs = setupInterpAndBackendConfigs(
290 IF, IEE, iBindings, ILIM, bBindings, BLIM, interpElemKind,
291 backendElemKind, schema, convertToRowwiseQuantization,
292 createAndInitFunction, biasElemKind, forceFP16AccumSLS, float16Format,
293 count, convertToChannelwiseQuantization, skipQuantizeFCBias);
294 CompilationContext &cctxI = configs.first;
295 CompilationContext &cctxB = configs.second;
296
297 // Skip conversion for rowwise quantized tests as they are a special case
298 // which don't fit cleanly here -- e.g. RWQ-SLS has FloatTy outputs.
299 if (!convertToRowwiseQuantization) {
300 // We want to compare the ops themselves and not see differences in
301 // conversion, so fold ElemKind conversion nodes into IO.
302 cctxI.optimizationOpts.foldElemKindConversionIntoIO = true;
303 cctxB.optimizationOpts.foldElemKindConversionIntoIO = true;
304 }
305
306 // Clone the Function inside itself many times if desired.
307 std::unordered_set<Tensor *> resultTensors =
308 cloneFunInsideFun(BFT, &bBindings, cctxB, count);
309 assert(resultTensors.size() == count &&
310 "Should get the same number of Tensors back as count.");
311
312 IEE.compile(cctxI);
313 BEE.compile(cctxB);
314
315 // Again skip rowwise quantization as before.
316 if (!convertToRowwiseQuantization) {
317 // Now that we have compiled, precision transformation has occurred. Now
318 // convert all mismatches for Placeholders given their original bindings.
319 convertBindingsToCorrectType(IEE.getSingleFunctionFromModule(), iBindings);
320 convertBindingsToCorrectType(BEE.getSingleFunctionFromModule(), bBindings);
321 }
322
323 IEE.run(iBindings);
324 BEE.run(bBindings);
325
326 // Compare each of our result tensors to the original. Always convert back to
327 // float if necessary, as allowed error is expected to compare float.
328 Tensor finalIT = convertToFloatIfNecessary(*IFT.second);
329 for (Tensor *T : resultTensors) {
330 Tensor finalBT = convertToFloatIfNecessary(*T);
331 EXPECT_TRUE(finalIT.isEqual(finalBT, allowedError, /* verbose */ true));
332 }
333
334 // Additionally check that each of the results from the parallel cloned
335 // Functions are bitwise equal.
336 auto it = resultTensors.begin();
337 Tensor *firstResult = *it;
338 for (it++; it != resultTensors.end(); it++) {
339 EXPECT_TRUE(firstResult->isBitwiseEqual(**it));
340 }
341}
342
343std::unordered_set<Tensor *> cloneFunInsideFun(FunctionTensorPair FTP,
344 PlaceholderBindings *bindings,
345 CompilationContext &cctx,
346 unsigned count) {
347 Function *origF = FTP.first;
348
349 // Always save the original Function's Tensor, which we will keep around.
350 std::unordered_set<Tensor *> resultTensors;
351 resultTensors.insert(FTP.second);
352
353 // Nothing to do if we just want the one.
354 if (count == 1) {
355 return resultTensors;
356 }
357
358 Module *mod = origF->getParent();
359
360 // Clone the original Function to repeatedly add it to the original.
361 auto *cloneF = origF->clone("single_clone");
362
363 // We keep the original Function, then clone/add count-1 more.
364 for (size_t i = 1; i < count; i++) {
365 // Clone the clone, and then add all the new nodes to the original function.
366 auto *tmpF = cloneF->clone("tmp" + std::to_string(i));
367 std::unordered_set<Node *> clonedNodes;
368 bool foundSaveNode = false;
369 for (auto &N : tmpF->getNodes()) {
370 clonedNodes.insert(&N);
371
372 // For every Node we add, check if it uses a Placeholder node, and if so
373 // clone it in the Module so that CSE doesn't undo all our hard work.
374 for (size_t j = 0, f = N.getNumInputs(); j < f; j++) {
375 Placeholder *origPH = llvm::dyn_cast<Placeholder>(N.getNthInput(j));
376 if (!origPH) {
377 continue;
378 }
379
380 // Clone the Placeholder, allocate it in the bindings, and replace the
381 // usage of the original node to point to the clone.
382 Placeholder *clonePH = mod->createPlaceholder(
383 origPH->getType(), origPH->getName(), origPH->isTraining());
384 Tensor *oldT = bindings->get(origPH);
385 assert(oldT);
386 Tensor *newT = bindings->allocate(clonePH);
387 newT->assign(oldT);
388 N.setNthInput(j, clonePH);
389
390 // Save the result Tensors to return so we can compare the results of
391 // all of our clones.
392 if (llvm::isa<SaveNode>(N)) {
393 assert(!foundSaveNode &&
394 "Can only handle Functions with a single SaveNode.");
395 foundSaveNode = true;
396 resultTensors.insert(newT);
397 }
398 }
399 }
400 for (auto &N : clonedNodes) {
401 origF->takeOwnershipOfNode(N);
402 }
403 mod->eraseFunction(tmpF);
404 }
405 // Now erase the clone we used to copy in, as it's no longer needed.
406 mod->eraseFunction(cloneF);
407
408 // Finally, duplicate all of the node profiling infos with the new expected
409 // clone's name so that the cloned copies will find the same profiling info
410 // as the original node if being quantized.
411 auto &origInfos = cctx.precisionConfig.quantConfig.infos;
412 origInfos.reserve(count * origInfos.size());
413 std::vector<NodeProfilingInfo> newInfos;
414 newInfos.reserve((count - 1) * origInfos.size());
415 for (const auto &PI : origInfos) {
416 const size_t colonIdx = PI.nodeOutputName_.find(":");
417 assert(colonIdx != std::string::npos && "Name should always contain ':'");
418 for (size_t i = 1; i < count; i++) {
419 std::string newName(PI.nodeOutputName_);
420 // Cloned nodes end up with the original name plus the count number
421 // appended to their name due to uniquing. Replicate the same thing.
422 newName.insert(colonIdx, std::to_string(i));
423 newInfos.emplace_back(newName, PI.tensorProfilingParams_);
424 }
425 }
426 origInfos.insert(origInfos.end(), newInfos.begin(), newInfos.end());
427
428 return resultTensors;
429}
430
431unsigned countNodeKind(Function *F, Kinded::Kind kind) {
432 unsigned count = 0;
433 for (auto &n : F->getNodes()) {
434 if (n.getKind() == kind) {
435 count++;
436 }
437 }
438 return count;
439}
440
441void inferIntLookupTableNetInt8(Tensor *input, Tensor *out,
442 llvm::ArrayRef<int8_t> table,
443 llvm::StringRef kind) {
444 PlaceholderBindings bindings;
445 ExecutionEngine EE(kind);
446 auto &mod = EE.getModule();
447 Function *F = mod.createFunction("main");
448 auto outTy = mod.uniqueType(ElemKind::Int8QTy, {(dim_t)input->size()}, 3, 3);
449 auto var = createQuantizedPlaceholder(mod, bindings, input,
450 input->getType().getScale(),
451 input->getType().getOffset(), "var");
452 auto *lookupTable = F->createIntLookupTable("lookuptable", var, table, outTy);
453 auto *result = F->createSave("ret", lookupTable);
454 auto *resultTensor = bindings.allocate(result->getPlaceholder());
455
456 EE.compile(CompilationMode::Infer);
457 bindings.allocate(mod.getPlaceholders());
458
459 updateInputPlaceholders(bindings, {var}, {input});
460 EE.run(bindings);
461 out->assign(resultTensor);
462}
463
464void inferIntLookupTableNetInt16(Tensor *input, Tensor *out,
465 llvm::ArrayRef<int16_t> table,
466 llvm::StringRef kind) {
467 PlaceholderBindings bindings;
468 ExecutionEngine EE(kind);
469 auto &mod = EE.getModule();
470 Function *F = mod.createFunction("main");
471 auto outTy = mod.uniqueType(ElemKind::Int16QTy, {(dim_t)input->size()}, 3, 3);
472 auto var = createQuantizedPlaceholder(mod, bindings, input,
473 input->getType().getScale(),
474 input->getType().getOffset(), "var");
475 auto *lookupTable = F->createIntLookupTable("lookuptable", var, table, outTy);
476 auto *result = F->createSave("ret", lookupTable);
477 auto *resultTensor = bindings.allocate(result->getPlaceholder());
478
479 EE.compile(CompilationMode::Infer);
480 bindings.allocate(mod.getPlaceholders());
481
482 updateInputPlaceholders(bindings, {var}, {input});
483 EE.run(bindings);
484 out->assign(resultTensor);
485}
486
487void inferConvNet(Tensor *inputs, Tensor *filter, Tensor *bias, Tensor *out,
488 llvm::StringRef kind) {
489 PlaceholderBindings bindings;
490 ExecutionEngine EE(kind);
491 auto &mod = EE.getModule();
492 Function *F = mod.createFunction("main");
493 Placeholder *inputP;
494 Placeholder *filterP;
495 Placeholder *biasP;
496 Placeholder *outP;
497 TypeRef OT;
498 if (inputs->getType().isQuantizedType()) {
499 auto &outType = out->getType();
500 auto &inType = inputs->getType();
501 auto &filterType = filter->getType();
502 auto &biasType = bias->getType();
503 inputP = createQuantizedPlaceholder(
504 mod, bindings, inputs, inType.getScale(), inType.getOffset(), "inputP");
505 filterP =
506 createQuantizedPlaceholder(mod, bindings, filter, filterType.getScale(),
507 filterType.getOffset(), "filterP");
508 biasP = createQuantizedPlaceholder(mod, bindings, bias, biasType.getScale(),
509 biasType.getOffset(), "biasP");
510 outP = createQuantizedPlaceholder(mod, bindings, out, outType.getScale(),
511 outType.getOffset(), "outP");
512 OT = F->getParent()->uniqueType(out->getElementType(), out->dims(),
513 outType.getScale(), outType.getOffset());
514 } else {
515 inputP = createPlaceholder(mod, bindings, inputs, "inputP");
516 filterP = createPlaceholder(mod, bindings, filter, "filterP");
517 biasP = createPlaceholder(mod, bindings, bias, "biasP");
518 outP = createPlaceholder(mod, bindings, out, "outP");
519 OT = F->getParent()->uniqueType(out->getElementType(), out->dims());
520 }
521 auto *conv = F->createConv("conv", inputP, filterP, biasP, OT, 5, 3, 4, 1);
522 auto *result = F->createSave("ret", conv, outP);
523 auto *resultTensor = bindings.get(result->getPlaceholder());
524
525 EE.compile(CompilationMode::Infer);
526
527 updateInputPlaceholders(bindings, {inputP, filterP, biasP},
528 {inputs, filter, bias});
529 EE.run(bindings);
530 out->assign(resultTensor);
531}
532
533int inferConvReluNet(Tensor *inputs, Tensor *filter, Tensor *bias, Tensor *out,
534 unsigned_t kernel, unsigned_t stride, unsigned_t pad,
535 llvm::StringRef kind) {
536 PlaceholderBindings bindings;
537 ExecutionEngine EE(kind);
538 auto &mod = EE.getModule();
539 Function *F = mod.createFunction("main");
540 Placeholder *inputP;
541 Placeholder *filterP;
542 Placeholder *biasP;
543 Placeholder *outP;
544 TypeRef OT;
545 if (inputs->getType().isQuantizedType()) {
546 auto &outType = out->getType();
547 auto &inType = inputs->getType();
548 auto &filterType = filter->getType();
549 auto &biasType = bias->getType();
550 inputP = createQuantizedPlaceholder(
551 mod, bindings, inputs, inType.getScale(), inType.getOffset(), "inputP");
552 filterP =
553 createQuantizedPlaceholder(mod, bindings, filter, filterType.getScale(),
554 filterType.getOffset(), "filterP");
555 biasP = createQuantizedPlaceholder(mod, bindings, bias, biasType.getScale(),
556 biasType.getOffset(), "biasP");
557 outP = createQuantizedPlaceholder(mod, bindings, out, outType.getScale(),
558 outType.getOffset(), "outP");
559 OT = F->getParent()->uniqueType(out->getElementType(), out->dims(),
560 outType.getScale(), outType.getOffset());
561 } else {
562 inputP = createPlaceholder(mod, bindings, inputs, "inputP");
563 filterP = createPlaceholder(mod, bindings, filter, "filterP");
564 biasP = createPlaceholder(mod, bindings, bias, "biasP");
565 outP = createPlaceholder(mod, bindings, out, "outP");
566 OT = F->getParent()->uniqueType(out->getElementType(), out->dims());
567 }
568 auto *conv =
569 F->createConv("conv", inputP, filterP, biasP, OT, kernel, stride, pad, 1);
570 // Relu
571 auto *relu = F->createRELU("relu", conv);
572 auto *result = F->createSave("ret", relu, outP);
573 auto *resultTensor = bindings.get(result->getPlaceholder());
574
575 EE.compile(CompilationMode::Infer);
576
577 // check fusion depending on build option.
578 // EXPECT_EQ(conv->getFusedActivation(), FusedActivation::RELU);
579
580 updateInputPlaceholders(bindings, {inputP, filterP, biasP},
581 {inputs, filter, bias});
582 EE.run(bindings);
583 out->assign(resultTensor);
584 return conv->getFusedActivation();
585}
586
587void trainConvNet(Tensor *inputs, Tensor *kernel1, Tensor *bias1,
588 Tensor *kernel2, Tensor *bias2, Tensor *selected,
589 llvm::ArrayRef<dim_t> shape1, llvm::ArrayRef<dim_t> shape2,
590 Tensor *out, llvm::StringRef kind) {
591 ExecutionEngine EET(kind);
592 ExecutionEngine EEI(kind);
593 std::vector<ExecutionEngine *> engines;
594 engines.push_back(&EEI);
595 engines.push_back(&EET);
596 TrainingConfig TC;
597 PlaceholderBindings bindings, inferBindings, trainingBindings;
598
599 // This variable records the number of the next sample to be used for
600 // training.
601 size_t sampleCounter = 0;
602
603 TC.learningRate = 0.03;
604 TC.momentum = 0.3;
605 TC.L2Decay = 0.01;
606 Function *F;
607 Placeholder *var1, *var2;
608 for (auto *EE : engines) {
609 auto &mod = EE->getModule();
610 F = mod.createFunction("main");
611 var1 = createPlaceholder(mod, bindings, inputs, "var1");
612 var2 = createPlaceholder(mod, bindings, selected, "var2");
613 auto *conv1 = F->createConv(bindings, "conv1", var1, 3, {5, 3}, {2, 1},
614 {2, 1, 2, 1}, 1);
615 bindings.get(cast<Placeholder>(conv1->getFilter()))->assign(kernel1);
616 bindings.get(cast<Placeholder>(conv1->getBias()))->assign(bias1);
617 auto *reshape1 = F->createReshape("reshape1", conv1, shape1);
618 auto *conv2 = F->createConv(bindings, "conv2", reshape1, 2, 2, 2, 0, 1);
619 bindings.get(cast<Placeholder>(conv2->getFilter()))->assign(kernel2);
620 bindings.get(cast<Placeholder>(conv2->getBias()))->assign(bias2);
621 auto *reshape2 = F->createReshape("reshape2", conv2, shape2);
622 auto *softmax = F->createSoftMax("softmax", reshape2, var2);
623 F->createSave("ret", softmax);
624 }
625
626 auto *TF = glow::differentiate(F, TC);
627 auto tfName = TF->getName();
628 auto fName = F->getName();
629 EET.compile(CompilationMode::Train);
630 trainingBindings.allocate(EET.getModule().getPlaceholders());
631 inferBindings.allocate(EEI.getModule().getPlaceholders());
632 bindings.copyTrainableWeightsTo(trainingBindings);
633 auto *res =
634 inferBindings.get(EEI.getModule().getPlaceholderByNameSlow("ret"));
635
636 runBatch(EET, trainingBindings, 8, sampleCounter, {var1, var2},
637 {inputs, selected}, tfName);
638 trainingBindings.copyTrainableWeightsTo(inferBindings);
639 EEI.compile(CompilationMode::Infer);
640 var1 = inferBindings.getPlaceholderByNameSlow("var1");
641 var2 = inferBindings.getPlaceholderByNameSlow("var2");
642 updateInputPlaceholders(inferBindings, {var1, var2}, {inputs, selected});
643 EEI.run(inferBindings, fName);
644 out->assign(res);
645}
646
647void inferLocalResponseNormalizationNet(Tensor *inputs, Tensor *out,
648 llvm::StringRef kind) {
649 PlaceholderBindings bindings;
650 ExecutionEngine EE(kind);
651 auto &mod = EE.getModule();
652 Function *F = mod.createFunction("main");
653 auto *var = createPlaceholder(mod, bindings, inputs, "var");
654 auto *lrn = F->createLocalResponseNormalization("lrn", var, 5, 3.0, 0.5, 1.5);
655 auto *result = F->createSave("ret", lrn);
656 auto *resultTensor = bindings.allocate(result->getPlaceholder());
657
658 EE.compile(CompilationMode::Infer);
659
660 updateInputPlaceholders(bindings, {var}, {inputs});
661 EE.run(bindings);
662 out->assign(resultTensor);
663}
664
665void trainLocalResponseNormalizationNet(Tensor *inputs, Tensor *weights,
666 Tensor *bias, Tensor *selected,
667 llvm::ArrayRef<dim_t> shape1,
668 llvm::ArrayRef<dim_t> shape2,
669 Tensor *out, llvm::StringRef kind) {
670 PlaceholderBindings bindings, trainingBindings;
671 ExecutionEngine EET(kind);
672 ExecutionEngine EEI(kind);
673 std::vector<ExecutionEngine *> engines{&EEI, &EET};
674 TrainingConfig TC;
675
676 // This variable records the number of the next sample to be used for
677 // training.
678 size_t sampleCounter = 0;
679
680 TC.learningRate = 0.06;
681 TC.momentum = 0.1;
682 TC.L2Decay = 0.01;
683 Placeholder *var1, *var2;
684 std::string fName;
685 for (auto *EE : engines) {
686 auto &mod = EE->getModule();
687 Function *F = mod.createFunction("main");
688 fName = F->getName().str();
689 var1 = createPlaceholder(mod, bindings, inputs, "var1");
690 var2 = createPlaceholder(mod, bindings, selected, "var2");
691 auto *fc = F->createFullyConnected(bindings, "fc", var1, bias->dims()[0]);
692 bindings.get(cast<Placeholder>(fc->getWeights()))->assign(weights);
693 bindings.get(cast<Placeholder>(fc->getBias()))->assign(bias);
694 auto *reshape1 = F->createReshape("reshape1", fc, shape1);
695 auto *lrn =
696 F->createLocalResponseNormalization("lrn", reshape1, 2, 2.0, 0.5, 1.0);
697 auto *reshape2 = F->createReshape("reshape2", lrn, shape2);
698 auto *softmax = F->createSoftMax("softmax", reshape2, var2);
699 auto *result = F->createSave("ret", softmax);
700 bindings.allocate(result->getPlaceholder());
701 }
702 auto *TF = glow::differentiate(EET.getModule().getFunction(fName), TC);
703 auto tfName = TF->getName();
704 EET.compile(CompilationMode::Train);
705 trainingBindings.allocate(EET.getModule().getPlaceholders());
706 bindings.copyTrainableWeightsTo(trainingBindings);
707 bindings.clear();
708 bindings.allocate(EEI.getModule().getPlaceholders());
709
710 runBatch(EET, trainingBindings, 8, sampleCounter, {var1, var2},
711 {inputs, selected}, tfName);
712 trainingBindings.copyTrainableWeightsTo(bindings);
713 var1 = bindings.getPlaceholderByNameSlow("var1");
714 var2 = bindings.getPlaceholderByNameSlow("var2");
715 EEI.compile(CompilationMode::Infer);
716
717 runBatch(EEI, bindings, 1, sampleCounter, {var1, var2}, {inputs, selected});
718 out->assign(bindings.get(bindings.getPlaceholderByNameSlow("ret")));
719}
720
721void trainAvgPoolNet(Tensor *inputs, Tensor *weights, Tensor *bias,
722 Tensor *selected, llvm::ArrayRef<dim_t> shape1,
723 llvm::ArrayRef<dim_t> shape2, Tensor *out,
724 llvm::StringRef kind) {
725 ExecutionEngine EET(kind);
726 ExecutionEngine EEI(kind);
727 std::vector<ExecutionEngine *> engines{&EEI, &EET};
728 TrainingConfig TC;
729 PlaceholderBindings bindings, trainingBindings;
730
731 // This variable records the number of the next sample to be used for
732 // training.
733 size_t sampleCounter = 0;
734
735 TC.learningRate = 0.01;
736 TC.momentum = 0.4;
737 TC.L2Decay = 0.01;
738 Placeholder *var1, *var2;
739 std::string fName;
740 for (auto *EE : engines) {
741 auto &mod = EE->getModule();
742 Function *F = mod.createFunction("main");
743 fName = F->getName().str();
744 var1 = createPlaceholder(mod, bindings, inputs, "var1");
745 var2 = createPlaceholder(mod, bindings, selected, "var2");
746 auto *fc = F->createFullyConnected(bindings, "fc", var1, bias->dims()[0]);
747 bindings.get(cast<Placeholder>(fc->getWeights()))->assign(weights);
748 bindings.get(cast<Placeholder>(fc->getBias()))->assign(bias);
749 auto *reshape1 = F->createReshape("reshape1", fc, shape1);
750 auto *pool = F->createAvgPool("pool", reshape1, 2, 2, 0);
751 auto *reshape2 = F->createReshape("reshape2", pool, shape2);
752 auto *softmax = F->createSoftMax("softmax", reshape2, var2);
753 auto *result = F->createSave("ret", softmax);
754 bindings.allocate(result->getPlaceholder());
755 }
756 auto *TF = glow::differentiate(EET.getModule().getFunction("main"), TC);
757 auto tfName = TF->getName();
758 EET.compile(CompilationMode::Train);
759 trainingBindings.allocate(EET.getModule().getPlaceholders());
760 bindings.copyTrainableWeightsTo(trainingBindings);
761 bindings.clear();
762 bindings.allocate(EEI.getModule().getPlaceholders());
763
764 runBatch(EET, trainingBindings, 10, sampleCounter, {var1, var2},
765 {inputs, selected}, tfName);
766 trainingBindings.copyTrainableWeightsTo(bindings);
767 var1 = bindings.getPlaceholderByNameSlow("var1");
768 var2 = bindings.getPlaceholderByNameSlow("var2");
769 EEI.compile(CompilationMode::Infer);
770
771 updateInputPlaceholders(bindings, {var1, var2}, {inputs, selected});
772 EEI.run(bindings);
773 out->assign(bindings.get(bindings.getPlaceholderByNameSlow("ret")));
774}
775
776void trainMaxPoolNet(Tensor *inputs, Tensor *weights, Tensor *bias,
777 Tensor *selected, llvm::ArrayRef<dim_t> shape1,
778 llvm::ArrayRef<dim_t> shape2, Tensor *out,
779 llvm::StringRef kind) {
780 ExecutionEngine EET(kind);
781 ExecutionEngine EEI(kind);
782 std::vector<ExecutionEngine *> engines;
783 engines.push_back(&EEI);
784 engines.push_back(&EET);
785 TrainingConfig TC;
786 PlaceholderBindings bindings, inferBindings, trainingBindings;
787
788 // This variable records the number of the next sample to be used for
789 // training.
790 size_t sampleCounter = 0;
791
792 TC.learningRate = 0.03;
793 TC.momentum = 0.3;
794 TC.L2Decay = 0.003;
795 Function *F;
796 Placeholder *var1, *var2;
797 for (auto *EE : engines) {
798 bindings.clear();
799 auto &mod = EE->getModule();
800 F = mod.createFunction("main");
801 var1 = createPlaceholder(mod, bindings, inputs, "var1");
802 var2 = createPlaceholder(mod, bindings, selected, "var2");
803 auto *fc = F->createFullyConnected(bindings, "fc", var1, bias->dims()[0]);
804 bindings.get(cast<Placeholder>(fc->getWeights()))->assign(weights);
805 bindings.get(cast<Placeholder>(fc->getBias()))->assign(bias);
806 auto *reshape1 = F->createReshape("reshape1", fc, shape1);
807 auto *pool = F->createMaxPool("pool", reshape1, 5, 3, 4);
808 auto *reshape2 = F->createReshape("reshape2", pool->getResult(), shape2);
809 auto *softmax = F->createSoftMax("softmax", reshape2, var2);
810 F->createSave("ret", softmax);
811 }
812 auto *TF = glow::differentiate(F, TC);
813 auto fName = F->getName();
814 auto tfName = TF->getName();
815 EET.compile(CompilationMode::Train);
816 trainingBindings.allocate(EET.getModule().getPlaceholders());
817 inferBindings.allocate(EEI.getModule().getPlaceholders());
818 bindings.copyTrainableWeightsTo(trainingBindings);
819 auto *res =
820 inferBindings.get(EEI.getModule().getPlaceholderByNameSlow("ret"));
821
822 runBatch(EET, trainingBindings, 7, sampleCounter, {var1, var2},
823 {inputs, selected}, tfName);
824 trainingBindings.copyTrainableWeightsTo(inferBindings);
825 EEI.compile(CompilationMode::Infer);
826 var1 = inferBindings.getPlaceholderByNameSlow("var1");
827 var2 = inferBindings.getPlaceholderByNameSlow("var2");
828 runBatch(EEI, inferBindings, 1, sampleCounter, {var1, var2},
829 {inputs, selected}, fName);
830 out->assign(res);
831}
832
833void inferSmallConv(Tensor *inputs, Tensor *out, llvm::StringRef kind) {
834 PlaceholderBindings bindings;
835 ExecutionEngine EE(kind);
836 auto &mod = EE.getModule();
837 auto *F = mod.createFunction("main");
838 auto *in = createPlaceholder(mod, bindings, inputs, "in", "NHWC");
839 auto *C = F->createConv(bindings, "conv2a", in, 64, 1, 1, 0, 1);
840 bindings.get(cast<Placeholder>(C->getFilter()))->getHandle().clear(0.3);
841 bindings.get(cast<Placeholder>(C->getBias()))->getHandle().clear(0.4);
842 auto *result = F->createSave("ret", C);
843 auto *resultTensor = bindings.allocate(result->getPlaceholder());
844 convertPlaceholdersToConstants(F, bindings, {in, result->getPlaceholder()});
845
846 EE.compile(CompilationMode::Infer);
847
848 updateInputPlaceholders(bindings, {in}, {inputs});
849 EE.run(bindings);
850
851 out->assign(resultTensor);
852}
853
854void inferGroupConv(Tensor *out, llvm::StringRef kind) {
855 PlaceholderBindings bindings;
856 ExecutionEngine EE(kind);
857 auto &mod = EE.getModule();
858 auto *F = mod.createFunction("main");
859
860 auto *input =
861 mod.createPlaceholder(ElemKind::FloatTy, {1, 2, 1, 32}, "input", false);
862 auto *inputTensor = bindings.allocate(input);
863 auto IH = inputTensor->getHandle();
864 for (size_t i = 0; i < 2 * 32; i++) {
865 IH.raw(i) = (i + 1) / 10.0;
866 }
867
868 auto *filter = mod.createPlaceholder(ElemKind::FloatTy, {128, 1, 1, 16},
869 "filter", false);
870 auto *filterTensor = bindings.allocate(filter);
871 auto FH = filterTensor->getHandle();
872 for (dim_t i = 0; i < 128; i++)
873 for (dim_t j = 0; j < 16; j++) {
874 FH.at({i, 0, 0, j}) = (i + j) / 100.0;
875 }
876 auto *zeroBias =
877 mod.createPlaceholder(ElemKind::FloatTy, {128}, "bias", false);
878 auto *zeroBiasTensor = bindings.allocate(zeroBias);
879 zeroBiasTensor->zero();
880
881 auto outTy = mod.uniqueType(ElemKind::FloatTy, {1, 2, 1, 128});
882
883 ConvolutionNode *CN =
884 F->createConv("Conv", input, filter, zeroBias, outTy, 1, 1, 0, 2);
885 SaveNode *result = F->createSave("save", CN);
886 auto *resultTensor = bindings.allocate(result->getPlaceholder());
887
888 EE.compile(CompilationMode::Infer);
889
890 EE.run(bindings);
891 out->assign(resultTensor);
892}
893
894void inferNonSquarePaddingConv(Tensor *out, llvm::StringRef kind) {
895 PlaceholderBindings bindings;
896 ExecutionEngine EE(kind);
897 auto &mod = EE.getModule();
898 auto *F = mod.createFunction("main");
899
900 auto *input =
901 mod.createPlaceholder(ElemKind::FloatTy, {1, 2, 1, 32}, "input", false);
902 auto *inputTensor = bindings.allocate(input);
903 auto IH = inputTensor->getHandle();
904 for (size_t i = 0; i < 2 * 32; i++) {
905 IH.raw(i) = (i + 1) / 10.0;
906 }
907
908 auto *filter = mod.createPlaceholder(ElemKind::FloatTy, {128, 1, 1, 32},
909 "filter", false);
910 auto *filterTensor = bindings.allocate(filter);
911 auto FH = filterTensor->getHandle();
912 for (dim_t i = 0; i < 128; i++)
913 for (dim_t j = 0; j < 32; j++) {
914 FH.at({i, 0, 0, j}) = (i + j) / 100.0;
915 }
916 auto *zeroBias =
917 mod.createPlaceholder(ElemKind::FloatTy, {128}, "bias", false);
918 auto *zeroBiasTensor = bindings.allocate(zeroBias);
919 zeroBiasTensor->zero();
920 auto outTy = mod.uniqueType(ElemKind::FloatTy, {1, 4, 5, 128});
921
922 ConvolutionNode *CN = F->createConv("Conv", input, filter, zeroBias, outTy,
923 {1, 1}, {1, 1}, {0, 1, 2, 3}, 1);
924 SaveNode *result = F->createSave("save", CN);
925 auto *resultTensor = bindings.allocate(result->getPlaceholder());
926
927 EE.compile(CompilationMode::Infer);
928
929 EE.run(bindings);
930 out->assign(resultTensor);
931}
932
933void inferNonSquareKernelConv(Tensor *out, llvm::StringRef kind) {
934 PlaceholderBindings bindings;
935 ExecutionEngine EE(kind);
936 auto &mod = EE.getModule();
937 auto *F = mod.createFunction("main");
938
939 auto *input =
940 mod.createPlaceholder(ElemKind::FloatTy, {1, 2, 1, 32}, "input", false);
941 auto *inputTensor = bindings.allocate(input);
942 auto IH = inputTensor->getHandle();
943 for (size_t i = 0; i < 2 * 32; i++) {
944 IH.raw(i) = (i + 1) / 10.0;
945 }
946
947 auto *filter = mod.createPlaceholder(ElemKind::FloatTy, {128, 2, 1, 32},
948 "filter", false);
949 auto *filterTensor = bindings.allocate(filter);
950 auto FH = filterTensor->getHandle();
951 for (dim_t i = 0; i < 128; i++)
952 for (dim_t j = 0; j < 2; j++)
953 for (dim_t k = 0; k < 32; k++) {
954 FH.at({i, j, 0, k}) = (i + j + k) / 100.0;
955 }
956 auto *zeroBias =
957 mod.createPlaceholder(ElemKind::FloatTy, {128}, "bias", false);
958 auto *zeroBiasTensor = bindings.allocate(zeroBias);
959 zeroBiasTensor->zero();
960 auto outTy = mod.uniqueType(ElemKind::FloatTy, {1, 3, 5, 128});
961
962 ConvolutionNode *CN = F->createConv("Conv", input, filter, zeroBias, outTy,
963 {2, 1}, {1, 1}, {0, 1, 2, 3}, 1);
964 SaveNode *result = F->createSave("save", CN);
965 auto *resultTensor = bindings.allocate(result->getPlaceholder());
966
967 EE.compile(CompilationMode::Infer);
968
969 EE.run(bindings);
970 out->assign(resultTensor);
971}
972
973void inferNonSquareStrideConv(Tensor *out, llvm::StringRef kind) {
974 PlaceholderBindings bindings;
975 ExecutionEngine EE(kind);
976 auto &mod = EE.getModule();
977 auto *F = mod.createFunction("main");
978
979 auto *input =
980 mod.createPlaceholder(ElemKind::FloatTy, {1, 2, 1, 32}, "input", false);
981 auto *inputTensor = bindings.allocate(input);
982 auto IH = inputTensor->getHandle();
983 for (size_t i = 0; i < 2 * 32; i++) {
984 IH.raw(i) = (i + 1) / 10.0;
985 }
986
987 auto *filter = mod.createPlaceholder(ElemKind::FloatTy, {128, 2, 1, 32},
988 "filter", false);
989 auto *filterTensor = bindings.allocate(filter);
990 auto FH = filterTensor->getHandle();
991 for (dim_t i = 0; i < 128; i++)
992 for (dim_t j = 0; j < 2; j++)
993 for (dim_t k = 0; k < 32; k++) {
994 FH.at({i, j, 0, k}) = (i + j + k) / 100.0;
995 }
996 auto *zeroBias =
997 mod.createPlaceholder(ElemKind::FloatTy, {128}, "bias", false);
998 auto *zeroBiasTensor = bindings.allocate(zeroBias);
999 zeroBiasTensor->zero();
1000 auto outTy = mod.uniqueType(ElemKind::FloatTy, {1, 2, 5, 128});
1001
1002 ConvolutionNode *CN = F->createConv("Conv", input, filter, zeroBias, outTy,
1003 {2, 1}, {2, 1}, {0, 1, 2, 3}, 1);
1004 SaveNode *result = F->createSave("save", CN);
1005 auto *resultTensor = bindings.allocate(result->getPlaceholder());
1006
1007 EE.compile(CompilationMode::Infer);
1008
1009 EE.run(bindings);
1010 out->assign(resultTensor);
1011}
1012
1013void inferConvDKKC8(Tensor *out, llvm::StringRef kind) {
1014 PlaceholderBindings bindings;
1015 ExecutionEngine EE(kind);
1016 auto &mod = EE.getModule();
1017 auto *F = mod.createFunction("main");
1018
1019 auto *input =
1020 mod.createPlaceholder(ElemKind::FloatTy, {3, 3, 3, 32}, "input", false);
1021 auto *inputTensor = bindings.allocate(input);
1022 auto IH = inputTensor->getHandle();
1023 for (size_t i = 0; i < 3 * 3 * 3 * 32; i++) {
1024 IH.raw(i) = (i + 1) / 10.0;
1025 }
1026
1027 auto *filter = mod.createPlaceholder(ElemKind::FloatTy, {192, 3, 3, 32},
1028 "filter", false);
1029 auto *filterTensor = bindings.allocate(filter);
1030 filterTensor->zero();
1031 auto FH = filterTensor->getHandle();
1032 for (dim_t i = 0; i < 192; i++)
1033 for (dim_t j = 0; j < 3; j++)
1034 for (dim_t k = 0; k < 3; k++)
1035 for (dim_t l = 0; l < 32; l++) {
1036 FH.at({i, j, k, k}) = (i + j + k + l) / 200.0;
1037 }
1038 auto *zeroBias =
1039 mod.createPlaceholder(ElemKind::FloatTy, {192}, "bias", false);
1040 auto *zeroBiasTensor = bindings.allocate(zeroBias);
1041 zeroBiasTensor->zero();
1042 auto outTy = mod.uniqueType(ElemKind::FloatTy, {3, 3, 3, 192});
1043
1044 ConvolutionNode *CN = F->createConv("Conv", input, filter, zeroBias, outTy,
1045 {3, 3}, {1, 1}, {1, 1, 1, 1}, 1);
1046 SaveNode *result = F->createSave("save", CN);
1047 auto *resultTensor = bindings.allocate(result->getPlaceholder());
1048
1049 EE.compile(CompilationMode::Infer);
1050
1051 EE.run(bindings);
1052 out->assign(resultTensor);
1053}
1054
1055void trainSoftMaxNet(Tensor *inputs, Tensor *weights, Tensor *bias,
1056 Tensor *selected, Tensor *out, llvm::StringRef kind) {
1057 ExecutionEngine EEI(kind);
1058 ExecutionEngine EET(kind);
1059 std::vector<ExecutionEngine *> engines;
1060 engines.push_back(&EEI);
1061 engines.push_back(&EET);
1062 TrainingConfig TC;
1063 PlaceholderBindings bindings, inferBindings, trainingBindings;
1064
1065 // This variable records the number of the next sample to be used for
1066 // training.
1067 size_t sampleCounter = 0;
1068
1069 TC.learningRate = 0.003;
1070 TC.momentum = 0.7;
1071 TC.L2Decay = 0.001;
1072 Function *F;
1073 Placeholder *var1, *var2;
1074 for (auto *EE : engines) {
1075 auto &mod = EE->getModule();
1076 F = mod.createFunction("main");
1077 var1 = createPlaceholder(mod, bindings, inputs, "var1");
1078 var2 = createPlaceholder(mod, bindings, selected, "var2");
1079 auto *fc = F->createFullyConnected(bindings, "fc", var1, bias->dims()[0]);
1080 bindings.get(cast<Placeholder>(fc->getWeights()))->assign(weights);
1081 bindings.get(cast<Placeholder>(fc->getBias()))->assign(bias);
1082 auto *softmax = F->createSoftMax("softmax", fc, var2);
1083 F->createSave("ret", softmax);
1084 }
1085
1086 auto *TF = glow::differentiate(F, TC);
1087 auto tfName = TF->getName();
1088 auto fName = F->getName();
1089
1090 EET.compile(CompilationMode::Train);
1091 trainingBindings.allocate(EET.getModule().getPlaceholders());
1092 bindings.copyTrainableWeightsTo(trainingBindings);
1093 runBatch(EET, trainingBindings, 30, sampleCounter, {var1, var2},
1094 {inputs, selected}, tfName);
1095 EEI.compile(CompilationMode::Infer);
1096 inferBindings.allocate(EEI.getModule().getPlaceholders());
1097 trainingBindings.copyTrainableWeightsTo(inferBindings);
1098 auto *res =
1099 inferBindings.get(EEI.getModule().getPlaceholderByNameSlow("ret"));
1100 var1 = inferBindings.getPlaceholderByNameSlow("var1");
1101 var2 = inferBindings.getPlaceholderByNameSlow("var2");
1102 updateInputPlaceholders(inferBindings, {var1, var2}, {inputs, selected});
1103 EEI.run(inferBindings, fName);
1104 out->assign(res);
1105}
1106
1107void inferTanhConcatNet(Tensor *input1, Tensor *input2, Tensor *input3,
1108 Tensor *out, llvm::StringRef kind) {
1109 PlaceholderBindings bindings;
1110 ExecutionEngine EE(kind);
1111 auto &mod = EE.getModule();
1112 Function *F = mod.createFunction("main");
1113 auto *var1 = createPlaceholder(mod, bindings, input1, "var1");
1114 auto *var2 = createPlaceholder(mod, bindings, input2, "var2");
1115 auto *var3 = createPlaceholder(mod, bindings, input3, "var3");
1116 auto *T1 = F->createTanh("tanh1", var1);
1117 auto *T2 = F->createTanh("tanh2", var2);
1118 auto *T3 = F->createTanh("tanh3", var3);
1119 Node *C1 = F->createConcat("concat", {T1, T2}, 0);
1120 Node *C2 = F->createConcat("concat", {T2, T3, C1, T2}, 0);
1121 auto *result = F->createSave("ret", C2);
1122 auto *resultTensor = bindings.allocate(result->getPlaceholder());
1123
1124 EE.compile(CompilationMode::Infer);
1125
1126 updateInputPlaceholders(bindings, {var1, var2, var3},
1127 {input1, input2, input3});
1128 EE.run(bindings);
1129 out->assign(resultTensor);
1130}
1131
1132void inferBasicConvNet(Tensor *inputs, Tensor *out, llvm::StringRef kind,
1133 size_t convDepth) {
1134 PlaceholderBindings bindings;
1135 ExecutionEngine EE(kind);
1136 auto &mod = EE.getModule();
1137 Function *F = mod.createFunction("main");
1138 auto *var = createPlaceholder(mod, bindings, inputs, "var", "NCHW");
1139 auto *tr = F->createTranspose("tr", var, NCHW2NHWC);
1140 auto *conv = F->createConv(bindings, "conv", tr, convDepth, {5, 5}, {2, 2},
1141 {1, 1, 1, 1}, 1);
1142 bindings.get(cast<Placeholder>(conv->getFilter()))->getHandle().clear(0.1);
1143 bindings.get(cast<Placeholder>(conv->getBias()))->getHandle().clear(0.2);
1144 auto *pool = F->createMaxPool("pool", conv, 2, 2, 0);
1145 auto *result = F->createSave("ret", pool->getResult());
1146 auto *resultTensor = bindings.allocate(result->getPlaceholder());
1147 convertPlaceholdersToConstants(F, bindings, {var, result->getPlaceholder()});
1148
1149 EE.compile(CompilationMode::Infer);
1150
1151 updateInputPlaceholders(bindings, {var}, {inputs});
1152 EE.run(bindings);
1153 out->assign(resultTensor);
1154}
1155
1156FunctionTensorPair createAndInitBasicFCNet(PlaceholderBindings &bindings,
1157 ExecutionEngine &EE) {
1158 auto &mod = EE.getModule();
1159 Function *F = mod.createFunction("main");
1160
1161 auto *var = mod.createPlaceholder(ElemKind::FloatTy, {2, 3, 16, 16}, "var",
1162 false, "NCHW");
1163 auto *tr = F->createTranspose("tr", var, NCHW2NHWC);
1164 auto *fc = F->createFullyConnected(bindings, "fc", tr, 16);
1165 auto *rl0 = F->createRELU("relu", fc);
1166 auto *fc2 = F->createFullyConnected(bindings, "fc2", rl0, 8);
1167 auto *rl1 = F->createRELU("relu", fc2);
1168 bindings.get(cast<Placeholder>(fc->getWeights()))->getHandle().clear(0.8);
1169 bindings.get(cast<Placeholder>(fc2->getWeights()))->getHandle().clear(1.5);
1170 auto *result = F->createSave("ret", rl1);
1171 auto *resultTensor = bindings.allocate(result->getPlaceholder());
1172
1173 PseudoRNG PRNG;
1174 bindings.allocate(var)->getHandle().initXavier(1, PRNG);
1175
1176 return std::make_pair(F, resultTensor);
1177}
1178
1179void inferMixedNet(Tensor *inputs, Tensor *out, llvm::StringRef kind) {
1180 PlaceholderBindings bindings;
1181 ExecutionEngine EE(kind);
1182 auto &mod = EE.getModule();
1183 Function *F = mod.createFunction("main");
1184 auto *var = createPlaceholder(mod, bindings, inputs, "var", "NCHW");
1185 auto *selected =
1186 mod.createPlaceholder(ElemKind::Int64ITy, {2, 1}, "selected", false);
1187
1188 auto *tr = F->createTranspose("tr", var, NCHW2NHWC);
1189 auto *fc = F->createFullyConnected(bindings, "fc", tr, 16);
1190 auto *th0 = F->createTanh("tanh", fc);
1191 auto *sg0 = F->createSigmoid("sig", fc);
1192 auto *A1 = F->createAdd("add", th0, sg0);
1193 auto *fc2 = F->createFullyConnected(bindings, "fc2", A1, 16);
1194
1195 auto *R = F->createRegression("reg", fc2, fc2);
1196 auto *SM = F->createSoftMax("SM", R, selected);
1197 auto *result = F->createSave("ret", SM);
1198 auto *resultTensor = bindings.allocate(result->getPlaceholder());
1199
1200 bindings.get(cast<Placeholder>(fc->getWeights()))->getHandle().clear(0.4);
1201 bindings.get(cast<Placeholder>(fc2->getWeights()))->getHandle().clear(3.5);
1202
1203 EE.compile(CompilationMode::Infer);
1204
1205 updateInputPlaceholders(bindings, {var}, {inputs});
1206 EE.run(bindings);
1207 out->assign(resultTensor);
1208}
1209
1210void inferComplexNet1(Tensor *inputs1, Tensor *inputs2, Tensor *inputs3,
1211 Tensor *inputs4, Tensor *out, llvm::StringRef kind) {
1212 PlaceholderBindings bindings;
1213 ExecutionEngine EE(kind);
1214 auto &mod = EE.getModule();
1215 Function *F = mod.createFunction("main");
1216 auto *var1 = createPlaceholder(mod, bindings, inputs1, "var1");
1217 auto *var2 = createPlaceholder(mod, bindings, inputs2, "var2");
1218 auto *var3 = createPlaceholder(mod, bindings, inputs3, "var3");
1219 auto *var4 = createPlaceholder(mod, bindings, inputs4, "var4");
1220 auto *conv1 = F->createConv(bindings, "conv1", var1, 6, 4, 1, 2, 1);
1221 bindings.get(cast<Placeholder>(conv1->getFilter()))->getHandle().clear(0.5);
1222 bindings.get(cast<Placeholder>(conv1->getBias()))->getHandle().clear(0.7);
1223 auto *sigmoid1 = F->createSigmoid("sigmoid1", conv1);
1224 auto *fc1 = F->createFullyConnected(bindings, "fc1", var2, 2352);
1225 bindings.get(cast<Placeholder>(fc1->getWeights()))->getHandle().clear(0.6);
1226 auto *reshape1 = F->createReshape("reshape1", fc1, {8, 14, 28, 6}, "NHWC");
1227 auto *relu1 = F->createRELU("relu1", reshape1);
1228 auto *pool1 = F->createMaxPool("pool1", relu1, 2, 2, 1);
1229 auto *add = F->createAdd("add", sigmoid1, pool1->getResult());
1230 auto *tanh = F->createTanh("tanh", add);
1231 auto *fc2 = F->createFullyConnected(bindings, "fc2", var3, 720);
1232 bindings.get(cast<Placeholder>(fc2->getWeights()))->getHandle().clear(1.1);
1233 auto *reshape2 = F->createReshape("reshape2", fc2, {8, 8, 15, 6}, "NHWC");
1234 auto *mul = F->createMul("mul", tanh, reshape2);
1235 auto *sigmoid2 = F->createSigmoid("sigmoid2", mul);
1236 auto *conv2 = F->createConv(bindings, "conv2", sigmoid2, 7, 3, 2, 1, 1);
1237 bindings.get(cast<Placeholder>(conv2->getFilter()))->getHandle().clear(0.3);
1238 bindings.get(cast<Placeholder>(conv2->getBias()))->getHandle().clear(1.3);
1239 auto *reshape3 = F->createReshape("reshape3", conv2, {8, 8, 7, 4}, "NHWC");
1240 auto *sub = F->createSub("sub", reshape3, var4);
1241 auto *relu2 = F->createRELU("relu2", sub);
1242 auto *pool2 = F->createAvgPool("pool2", relu2, 3, 2, 1);
1243 auto *sigmoid3 = F->createSigmoid("sigmoid3", pool2);
1244 auto *result = F->createSave("ret", sigmoid3);
1245 auto *resultTensor = bindings.allocate(result->getPlaceholder());
1246
1247 EE.compile(CompilationMode::Infer);
1248
1249 updateInputPlaceholders(bindings, {var1, var2, var3, var4},
1250 {inputs1, inputs2, inputs3, inputs4});
1251 EE.run(bindings);
1252 out->assign(resultTensor);
1253}
1254
1255namespace {
1256// Helper for initializing conv node filter/bias from input tensors.
1257static void initConv(PlaceholderBindings &bindings, ConvolutionNode *C,
1258 Tensor &filter, Tensor &bias) {
1259 bindings.get(cast<Placeholder>(C->getFilter()))->assign(&filter);
1260 bindings.get(cast<Placeholder>(C->getBias()))->assign(&bias);
1261}
1262} // namespace
1263
1264void inferTinyResnet(Tensor *input, Tensor *out, std::vector<Tensor> &weights,
1265 llvm::StringRef kind) {
1266 PlaceholderBindings bindings;
1267 ExecutionEngine EE(kind);
1268 auto &mod = EE.getModule();
1269 auto *F = mod.createFunction("main");
1270
1271 auto *in = createPlaceholder(mod, bindings, input, "in", "NHWC");
1272 auto *conv1 = F->createConv(bindings, "conv1", in, 256, 1, 1, 0, 1);
1273 auto *conv2a = F->createConv(bindings, "conv2a", conv1, 64, 1, 1, 0, 1);
1274 auto *relu2a = F->createRELU("relu2a", conv2a);
1275 auto *conv2b = F->createConv(bindings, "conv2b", relu2a, 64, 3, 1, 1, 1);
1276 auto *relu2b = F->createRELU("relu2b", conv2b);
1277 auto *conv2c = F->createConv(bindings, "conv2c", relu2b, 256, 1, 1, 0, 1);
1278 auto *add = F->createAdd("add", conv2c, conv1);
1279 auto *relu = F->createRELU("res2a_relu", add);
1280 auto *result = F->createSave("ret", relu);
1281 auto *resultTensor = bindings.allocate(result->getPlaceholder());
1282
1283 initConv(bindings, conv1, weights[0], weights[1]);
1284 initConv(bindings, conv2a, weights[2], weights[3]);
1285 initConv(bindings, conv2b, weights[4], weights[5]);
1286 initConv(bindings, conv2c, weights[6], weights[7]);
1287 convertPlaceholdersToConstants(F, bindings, {in, result->getPlaceholder()});
1288
1289 EE.compile(CompilationMode::Infer);
1290
1291 updateInputPlaceholders(bindings, {in}, {input});
1292 EE.run(bindings);
1293 out->assign(resultTensor);
1294}
1295
1296void inferExtract3D(Tensor *input, Tensor *out, llvm::StringRef kind) {
1297 PlaceholderBindings bindings;
1298 ExecutionEngine EE(kind);
1299 auto &mod = EE.getModule();
1300 auto *F = mod.createFunction("main");
1301
1302 auto *inputs = createPlaceholder(mod, bindings, input, "inputs");
1303
1304 auto *x1 = F->createSlice("ex1", inputs, {0, 5, 0}, {1, 100, 100});
1305 auto *x2 = F->createSlice("ex2", inputs, {1, 5, 0}, {2, 100, 100});
1306 auto *x3 = F->createSlice("ex3", inputs, {2, 5, 0}, {3, 100, 100});
1307 auto *x4 = F->createSlice("ex4", inputs, {3, 5, 0}, {4, 100, 100});
1308
1309 auto *x12 = F->createConcat("x12", {x1, x2}, 1);
1310 auto *x34 = F->createConcat("x34", {x3, x4}, 1);
1311 auto *x13 = F->createConcat("x34", {x1, x3}, 1);
1312 auto *x24 = F->createConcat("x34", {x2, x4}, 1);
1313
1314 auto *add1 = F->createAdd("add1", x12, x34);
1315 auto *add2 = F->createAdd("add1", x13, x24);
1316 auto *add3 = F->createAdd("add1", add1, add2);
1317
1318 auto *e = F->createSlice("slice", add3, {0, 55, 50}, {1, 150, 100});
1319 auto *result = F->createSave("ret", e);
1320 auto *resultTensor = bindings.allocate(result->getPlaceholder());
1321
1322 EE.compile(CompilationMode::Infer);
1323
1324 updateInputPlaceholders(bindings, {inputs}, {input});
1325 EE.run(bindings);
1326 out->assign(resultTensor);
1327}
1328
1329void inferMaxSplat(Tensor *input, Tensor *out, llvm::StringRef kind) {
1330 PlaceholderBindings bindings;
1331 ExecutionEngine EE(kind);
1332 auto &mod = EE.getModule();
1333 Function *F = mod.createFunction("main");
1334
1335 auto T = mod.uniqueType(ElemKind::Int8QTy, input->getType().dims(),
1336 2 * input->getType().getScale(),
1337 -input->getType().getOffset());
1338 auto *var = createQuantizedPlaceholder(mod, bindings, input,
1339 input->getType().getScale(),
1340 input->getType().getOffset(), "var");
1341 auto *rescale = F->createRescaleQuantized("rescale", var, T);
1342
1343 auto *splat1 = F->createSplat("splat1", T, 0.0);
1344 auto *splat2 = F->createSplat("splat2", T, 5.0);
1345
1346 auto *max1 = F->createMax("max1", rescale, splat1);
1347 auto *max2 = F->createMax("max2", splat2, max1);
1348
1349 auto *result = F->createSave("ret", max2);
1350 auto *resultTensor = bindings.allocate(result->getPlaceholder());
1351
1352 EE.compile(CompilationMode::Infer);
1353
1354 updateInputPlaceholders(bindings, {var}, {input});
1355 EE.run(bindings);
1356 out->assign(resultTensor);
1357}
1358
1359void insertCompiledFunction(llvm::StringRef name, CompiledFunction *func,
1360 runtime::DeviceManager *device, Module *mod) {
1361 runtime::FunctionMapTy functionMap;
1362 functionMap[name.str()] = func;
1363
1364 std::promise<void> addPromise;
1365 auto fut = addPromise.get_future();
1366 Error addErr = Error::empty();
1367 device->addNetwork(mod, std::move(functionMap),
1368 [&addPromise, &addErr](const Module *, Error err) {
1369 addErr = std::move(err);
1370 addPromise.set_value();
1371 });
1372 fut.wait();
1373 EXIT_ON_ERR(std::move(addErr));
1374}
1375
1376void runOnDevice(ExecutionContext &context, llvm::StringRef name,
1377 runtime::DeviceManager *device) {
1378 std::unique_ptr<ExecutionContext> contextPtr(&context);
1379 std::promise<void> runPromise;
1380 auto fut = runPromise.get_future();
1381 Error runErr = Error::empty();
1382 device->runFunction(
1383 name.str(), std::move(contextPtr),
1384 [&runPromise, &runErr](runtime::RunIdentifierTy, Error err,
1385 std::unique_ptr<ExecutionContext> contextPtr) {
1386 // Don't delete context.
1387 contextPtr.release();
1388 runErr = std::move(err);
1389 runPromise.set_value();
1390 });
1391 fut.wait();
1392 EXIT_ON_ERR(std::move(runErr));
1393}
1394
1395Constant *createRandomizedConstant(Module &mod, TypeRef type,
1396 llvm::ArrayRef<dim_t> dims,
1397 llvm::StringRef name) {
1398 auto *c = mod.createConstant(mod.uniqueTypeWithNewShape(type, dims), name);
1399
1400 switch (type->getElementType()) {
1401 case ElemKind::FloatTy: {
1402 c->getHandle<float>().initXavier(c->getType()->size() * 2, mod.getPRNG());
1403 break;
1404 }
1405 case ElemKind::Float16Ty: {
1406 c->getHandle<float16_t>().initXavier(c->getType()->size() * 2,
1407 mod.getPRNG());
1408 break;
1409 }
1410 case ElemKind::BFloat16Ty: {
1411 c->getHandle<bfloat16_t>().initXavier(c->getType()->size() * 2,
1412 mod.getPRNG());
1413 break;
1414 }
1415 case ElemKind::Int32QTy: {
1416 c->getHandle<int32_t>().randomize(INT32_MIN, INT32_MAX, mod.getPRNG());
1417 break;
1418 }
1419 case ElemKind::Int8QTy: {
1420 c->getHandle<int8_t>().randomize(INT8_MIN, INT8_MAX, mod.getPRNG());
1421 break;
1422 }
1423 case ElemKind::UInt8FusedQTy:
1424 case ElemKind::UInt8FusedFP16QTy: {
1425 c->getHandle<uint8_t>().randomize(UINT8_MIN, UINT8_MAX, mod.getPRNG());
1426 break;
1427 }
1428 default:
1429 LOG(FATAL) << "Unsupported type: " << type->getElementName().str();
1430 }
1431
1432 return c;
1433}
1434
1435Constant *createRandomFusedRowwiseQuantizedConstant(Module &mod,
1436 llvm::ArrayRef<dim_t> dims,
1437 llvm::StringRef name,
1438 bool useFusedFP16) {
1439 auto T = mod.uniqueType(
1440 (useFusedFP16 ? ElemKind::UInt8FusedFP16QTy : ElemKind::UInt8FusedQTy),
1441 {1}, 1, 0);
1442 const dim_t sizeScaleOffset =
1443 useFusedFP16 ? sizeof(float16_t) : sizeof(float);
1444 Constant *c = createRandomizedConstant(
1445 mod, T, {dims[0], dims[1] + 2 * sizeScaleOffset}, name);
1446
1447 // Range (0, 255) -> (-0.1, 0.1)
1448 constexpr float scale = 1.0f / 1275;
1449 constexpr float offset = -0.1;
1450 auto cH = c->getPayload().getHandle<uint8_t>();
1451 for (unsigned i = 0, e = c->dims()[0]; i < e; i++) {
1452 if (useFusedFP16) {
1453 cH.setFusedScaleOffsetInRow<float16_t>(i, scale, offset);
1454 } else {
1455 cH.setFusedScaleOffsetInRow<float>(i, scale, offset);
1456 }
1457 }
1458
1459 return c;
1460}
1461
1462Placeholder *createFusedRowwiseQuantizedPlaceholder(Module &mod,
1463 llvm::ArrayRef<dim_t> dims,
1464 llvm::StringRef name,
1465 bool useFusedFP16) {
1466 auto T = useFusedFP16 ? ElemKind::UInt8FusedFP16QTy : ElemKind::UInt8FusedQTy;
1467 const dim_t sizeScaleOffset =
1468 useFusedFP16 ? sizeof(float16_t) : sizeof(float);
1469 constexpr float scale = 1.0f / 1275;
1470 constexpr float offset = -0.1;
1471 Placeholder *ph = mod.createPlaceholder(
1472 T, {dims[0], dims[1] + 2 * sizeScaleOffset}, scale, offset, name, false);
1473
1474 return ph;
1475}
1476
1477// Helper for creating and intializing placeholders from tensors.
1478Placeholder *createPlaceholder(Module &mod, PlaceholderBindings &bindings,
1479 Tensor *tensor, llvm::StringRef name,
1480 const std::string &layout) {
1481 auto *P = mod.createPlaceholder(&tensor->getType(), name, false, layout);
1482 auto *PTensor = bindings.allocate(P);
1483 PTensor->assign(tensor);
1484 return P;
1485}
1486
1487} // namespace glow
1488