1 | /** |
2 | * Copyright (c) Glow Contributors. See CONTRIBUTORS file. |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | */ |
16 | |
17 | #include "BackendTestUtils.h" |
18 | |
19 | #include "glow/Converter/TypeAToTypeBFunctionConverter.h" |
20 | #include "glow/ExecutionEngine/ExecutionEngine.h" |
21 | #include "glow/Graph/Graph.h" |
22 | #include "glow/IR/IR.h" |
23 | #include "glow/IR/IRBuilder.h" |
24 | #include "glow/IR/Instrs.h" |
25 | #include "glow/Optimizer/GraphOptimizer/GraphOptimizer.h" |
26 | #include "glow/Quantization/Quantization.h" |
27 | |
28 | #include "gtest/gtest.h" |
29 | |
30 | #include "llvm/Support/CommandLine.h" |
31 | |
32 | #include <future> |
33 | |
34 | namespace glow { |
35 | |
36 | llvm::cl::OptionCategory backendTestUtilsCat("BackendTestUtils Category" ); |
37 | |
38 | unsigned parCloneCountOpt; |
39 | llvm::cl::opt<unsigned, /* ExternalStorage */ true> parCloneCountI( |
40 | "parallel-clone-count" , |
41 | llvm::cl::desc( |
42 | "Number of times to clone a graph in parallel. Intended to stress test " |
43 | "different backends. This option is not used by all unit " |
44 | "tests; for now you must check the test to see if so." ), |
45 | llvm::cl::location(parCloneCountOpt), llvm::cl::Optional, llvm::cl::init(1), |
46 | llvm::cl::cat(backendTestUtilsCat)); |
47 | |
48 | bool runDisabledTests; |
49 | llvm::cl::opt<bool, /* ExternalStorage */ true> runDisabledTestsI( |
50 | "run-disabled-tests" , |
51 | llvm::cl::desc("If set, disabled tests will not be skipped." ), |
52 | llvm::cl::location(runDisabledTests), llvm::cl::Optional, |
53 | llvm::cl::init(false), llvm::cl::cat(backendTestUtilsCat)); |
54 | |
55 | using llvm::cast; |
56 | |
57 | namespace { |
58 | |
59 | static Placeholder *createQuantizedPlaceholder(Module &mod, |
60 | PlaceholderBindings &bindings, |
61 | Tensor *tensor, float scale, |
62 | int32_t offset, |
63 | llvm::StringRef name) { |
64 | auto *P = mod.createPlaceholder(tensor->getElementType(), tensor->dims(), |
65 | scale, offset, name, false); |
66 | auto *PTensor = bindings.allocate(P); |
67 | PTensor->assign(tensor); |
68 | |
69 | return P; |
70 | } |
71 | |
72 | /// Create and initialize a function using the argument \p createAndInitFunction |
73 | /// then run the function in profiling mode to get the profiling parameters. |
74 | /// \p count is the number of times to clone the Function inside itself before |
75 | /// profiling. \returns the profiling parameters for all the function nodes. |
76 | static std::vector<NodeProfilingInfo> |
77 | profileAndGetNodeProfilingInfo(CreateAndInitFunction createAndInitFunction, |
78 | unsigned count) { |
79 | LoweredInfoMap loweredMapForProf; |
80 | PlaceholderBindings pBindings; |
81 | // Note: deviceMemory = 0 is a signal to use the defaultMemory. |
82 | ExecutionEngine PEE{"Interpreter" , /* deviceMemory */ 0, |
83 | /* ignoreUserDeviceConfig */ true}; |
84 | auto FT = createAndInitFunction(pBindings, PEE); |
85 | CompilationContext cctx{&pBindings, &loweredMapForProf}; |
86 | |
87 | // Clone the number of times as requested to match the Function that will be |
88 | // quantized. |
89 | cloneFunInsideFun(FT, &pBindings, cctx, count); |
90 | cctx.precisionConfig.quantMode = QuantizationMode::Profile; |
91 | PEE.compile(cctx); |
92 | PEE.run(pBindings); |
93 | |
94 | // We get the new function using front() because the original function was |
95 | // deleted as part of the Partitioner quantization flow. |
96 | return quantization::generateNodeProfilingInfos( |
97 | pBindings, PEE.getModule().getFunctions().front(), loweredMapForProf); |
98 | } |
99 | |
100 | /// Helper that sets up and \returns a pair of configs for both interpreter and |
101 | /// backend being tested. |
102 | static std::pair<CompilationContext, CompilationContext> |
103 | setupInterpAndBackendConfigs( |
104 | Function *IF, ExecutionEngine &IEE, PlaceholderBindings &iBindings, |
105 | LoweredInfoMap &ILIM, PlaceholderBindings &bBindings, LoweredInfoMap &BLIM, |
106 | ElemKind interpElemKind, ElemKind backendElemKind, |
107 | quantization::Schema schema, bool convertToRowwiseQuantization, |
108 | CreateAndInitFunction createAndInitFunction, ElemKind biasElemKind, |
109 | bool forceFP16AccumSLS, PrecisionConfiguration::Float16Format float16Format, |
110 | unsigned count, bool convertToChannelwiseQuantization, |
111 | bool skipQuantizeFCBias) { |
112 | CompilationContext cctxI{&iBindings, &ILIM}; |
113 | CompilationContext cctxB{&bBindings, &BLIM}; |
114 | PrecisionConfiguration &precConfigI = cctxI.precisionConfig; |
115 | PrecisionConfiguration &precConfigB = cctxB.precisionConfig; |
116 | |
117 | if (isQuantizedElemKind(interpElemKind) || |
118 | isQuantizedElemKind(backendElemKind)) { |
119 | // If either interp or backend need to be quantized then we need to profile |
120 | // and get quantization infos. |
121 | if (isQuantizedElemKind(interpElemKind)) { |
122 | // Note: We only do parallel cloning for the backend, so always use count |
123 | // of 1 here. |
124 | auto NQII = |
125 | profileAndGetNodeProfilingInfo(createAndInitFunction, /* count */ 1); |
126 | |
127 | precConfigI.quantMode = QuantizationMode::Quantize; |
128 | precConfigI.quantConfig.infos = NQII; |
129 | precConfigI.quantConfig.enableRowwise = convertToRowwiseQuantization; |
130 | precConfigI.quantConfig.enableChannelwise = |
131 | convertToChannelwiseQuantization; |
132 | precConfigI.quantConfig.schema = schema; |
133 | precConfigI.quantConfig.precision = interpElemKind; |
134 | precConfigI.quantConfig.assertAllNodesQuantized = true; |
135 | precConfigI.quantConfig.precisionBias = biasElemKind; |
136 | precConfigI.quantConfig.skipQuantizeFCBias = skipQuantizeFCBias; |
137 | } |
138 | |
139 | if (isQuantizedElemKind(backendElemKind)) { |
140 | // Always clone count times here. This matches the Function the backend |
141 | // will quantize. |
142 | auto NQIB = profileAndGetNodeProfilingInfo(createAndInitFunction, count); |
143 | |
144 | precConfigB.quantMode = QuantizationMode::Quantize; |
145 | precConfigB.quantConfig.infos = NQIB; |
146 | precConfigB.quantConfig.enableRowwise = convertToRowwiseQuantization; |
147 | precConfigB.quantConfig.enableChannelwise = |
148 | convertToChannelwiseQuantization; |
149 | precConfigB.quantConfig.schema = schema; |
150 | precConfigB.quantConfig.precision = backendElemKind; |
151 | precConfigB.quantConfig.assertAllNodesQuantized = true; |
152 | precConfigB.quantConfig.precisionBias = biasElemKind; |
153 | precConfigB.quantConfig.skipQuantizeFCBias = skipQuantizeFCBias; |
154 | } |
155 | } |
156 | |
157 | // For now if the ElemKind is FP16 then we use Float16Ty, UInt8FusedFP16QTy. |
158 | precConfigI.convertToFP16 = interpElemKind == ElemKind::Float16Ty; |
159 | precConfigI.convertFusedToFP16 = interpElemKind == ElemKind::Float16Ty; |
160 | precConfigI.forceFP16AccumSLS = forceFP16AccumSLS; |
161 | precConfigB.convertToFP16 = backendElemKind == ElemKind::Float16Ty; |
162 | precConfigB.convertFusedToFP16 = backendElemKind == ElemKind::Float16Ty; |
163 | precConfigB.forceFP16AccumSLS = forceFP16AccumSLS; |
164 | |
165 | return std::make_pair(cctxI, cctxB); |
166 | } |
167 | } // namespace |
168 | |
169 | void dispatchInference(const std::string &fname, |
170 | runtime::HostManager *hostManager, |
171 | ExecutionContext &context, |
172 | unsigned concurrentRequestsOpt, |
173 | bool useNewExecutionContext) { |
174 | // If additional requests are desired, setup additional contexts. |
175 | std::vector<std::unique_ptr<ExecutionContext>> contexts; |
176 | std::unique_ptr<ExecutionContext> originalContextPtr(&context); |
177 | contexts.push_back(std::move(originalContextPtr)); |
178 | if (concurrentRequestsOpt > 1) { |
179 | // Clone the placeholder bindings into a new executionContext. |
180 | for (unsigned i = 0, max = concurrentRequestsOpt - 1; i < max; i++) { |
181 | std::unique_ptr<ExecutionContext> newContext = |
182 | (useNewExecutionContext) |
183 | ? glow::make_unique<ExecutionContext>() |
184 | : glow::make_unique<ExecutionContext>( |
185 | glow::make_unique<PlaceholderBindings>( |
186 | context.getPlaceholderBindings()->clone())); |
187 | contexts.push_back(std::move(newContext)); |
188 | } |
189 | } |
190 | std::vector<std::promise<void>> promises(concurrentRequestsOpt); |
191 | std::vector<std::future<void>> futures; |
192 | for (auto &promise : promises) { |
193 | futures.push_back(promise.get_future()); |
194 | } |
195 | for (unsigned i = 0; i < concurrentRequestsOpt; i++) { |
196 | hostManager->runNetwork(fname, std::move(contexts[i]), |
197 | [&contexts, &promises, |
198 | i](runtime::RunIdentifierTy, Error err, |
199 | std::unique_ptr<ExecutionContext> contextPtr) { |
200 | contexts[i] = std::move(contextPtr); |
201 | // Expect no errors. |
202 | EXIT_ON_ERR(std::move(err)); |
203 | promises[i].set_value(); |
204 | }); |
205 | } |
206 | |
207 | for (auto &future : futures) { |
208 | future.wait(); |
209 | } |
210 | for (auto &c : contexts) { |
211 | c->getPlaceholderBindings()->ensureOnHost(); |
212 | } |
213 | // Release the original context passed in by reference so we don't free it. |
214 | contexts[0].release(); |
215 | } |
216 | |
217 | /// Helper that iterates over all of the Placeholders from the function \p F |
218 | /// and converts the Tensors found in \p bindings to the same type as the |
219 | /// Placeholders if necessary. |
220 | static void convertBindingsToCorrectType(Function *F, |
221 | PlaceholderBindings &bindings) { |
222 | PlaceholderList PHs = F->findPlaceholders(); |
223 | for (Placeholder *PH : PHs) { |
224 | Tensor *T = bindings.get(PH); |
225 | TypeRef newTy = PH->getType(); |
226 | if (T->getType().isEqual(newTy)) { |
227 | continue; |
228 | } |
229 | // For input placeholders convert tensor type and values. |
230 | // For output placeholders convert only the tensor type. |
231 | if (isInput(PH, *F)) { |
232 | ElemKind newK = newTy->getElementType(); |
233 | if (isQuantizedElemKind(newK)) { |
234 | Tensor QT = quantization::quantizeTensor( |
235 | *T, {newTy->getScale(), newTy->getOffset()}, newK); |
236 | T->assign(&QT); |
237 | } else { |
238 | T->convertToType(newK); |
239 | } |
240 | } else { |
241 | T->reset(*newTy); |
242 | } |
243 | } |
244 | } |
245 | |
246 | /// Helper to get a float copy of a Tensor \p T if needed. |
247 | static Tensor convertToFloatIfNecessary(Tensor &T) { |
248 | const ElemKind srcK = T.getType().getElementType(); |
249 | if (srcK == ElemKind::FloatTy) { |
250 | return T.clone(); |
251 | } |
252 | if (isQuantizedElemKind(srcK)) { |
253 | return quantization::dequantizeTensor(T, ElemKind::FloatTy); |
254 | } |
255 | return T.getCopyConvertedToType(ElemKind::FloatTy); |
256 | } |
257 | |
258 | void compareAgainstInterpreter( |
259 | llvm::StringRef backendName, CreateAndInitFunction createAndInitFunction, |
260 | ElemKind interpElemKind, ElemKind backendElemKind, float allowedError, |
261 | unsigned count, bool convertToRowwiseQuantization, |
262 | quantization::Schema schema, ElemKind biasElemKind, bool forceFP16AccumSLS, |
263 | PrecisionConfiguration::Float16Format float16Format, |
264 | bool convertToChannelwiseQuantization, bool skipQuantizeFCBias) { |
265 | // Note: deviceMemory = 0 is a signal to use the defaultMemory. |
266 | ExecutionEngine IEE{"Interpreter" , /* deviceMemory */ 0, |
267 | /* ignoreUserDeviceConfig */ true}; |
268 | ExecutionEngine BEE{backendName}; |
269 | PlaceholderBindings iBindings, bBindings; |
270 | |
271 | LOG(INFO) << "Comparing Interpreter with precision " |
272 | << Type::getElementName(interpElemKind).str() << " against " |
273 | << backendName.str() << " with precision " |
274 | << Type::getElementName(backendElemKind).str() << " with Bias " |
275 | << (skipQuantizeFCBias ? "unquantized" |
276 | : Type::getElementName(biasElemKind).str()) |
277 | << " with FP16 AccumulationSLS " << forceFP16AccumSLS; |
278 | |
279 | // Create the same network on the interpreter and the backend being tested. |
280 | FunctionTensorPair IFT = createAndInitFunction(iBindings, IEE); |
281 | FunctionTensorPair BFT = createAndInitFunction(bBindings, BEE); |
282 | |
283 | Function *IF = IFT.first; |
284 | |
285 | // Set up the configs for interpreter and backend. If one or both functions |
286 | // will be quantized, then gather a profile the graph on the interpreter, and |
287 | // then quantize the Functions as requested. |
288 | LoweredInfoMap ILIM, BLIM; |
289 | auto configs = setupInterpAndBackendConfigs( |
290 | IF, IEE, iBindings, ILIM, bBindings, BLIM, interpElemKind, |
291 | backendElemKind, schema, convertToRowwiseQuantization, |
292 | createAndInitFunction, biasElemKind, forceFP16AccumSLS, float16Format, |
293 | count, convertToChannelwiseQuantization, skipQuantizeFCBias); |
294 | CompilationContext &cctxI = configs.first; |
295 | CompilationContext &cctxB = configs.second; |
296 | |
297 | // Skip conversion for rowwise quantized tests as they are a special case |
298 | // which don't fit cleanly here -- e.g. RWQ-SLS has FloatTy outputs. |
299 | if (!convertToRowwiseQuantization) { |
300 | // We want to compare the ops themselves and not see differences in |
301 | // conversion, so fold ElemKind conversion nodes into IO. |
302 | cctxI.optimizationOpts.foldElemKindConversionIntoIO = true; |
303 | cctxB.optimizationOpts.foldElemKindConversionIntoIO = true; |
304 | } |
305 | |
306 | // Clone the Function inside itself many times if desired. |
307 | std::unordered_set<Tensor *> resultTensors = |
308 | cloneFunInsideFun(BFT, &bBindings, cctxB, count); |
309 | assert(resultTensors.size() == count && |
310 | "Should get the same number of Tensors back as count." ); |
311 | |
312 | IEE.compile(cctxI); |
313 | BEE.compile(cctxB); |
314 | |
315 | // Again skip rowwise quantization as before. |
316 | if (!convertToRowwiseQuantization) { |
317 | // Now that we have compiled, precision transformation has occurred. Now |
318 | // convert all mismatches for Placeholders given their original bindings. |
319 | convertBindingsToCorrectType(IEE.getSingleFunctionFromModule(), iBindings); |
320 | convertBindingsToCorrectType(BEE.getSingleFunctionFromModule(), bBindings); |
321 | } |
322 | |
323 | IEE.run(iBindings); |
324 | BEE.run(bBindings); |
325 | |
326 | // Compare each of our result tensors to the original. Always convert back to |
327 | // float if necessary, as allowed error is expected to compare float. |
328 | Tensor finalIT = convertToFloatIfNecessary(*IFT.second); |
329 | for (Tensor *T : resultTensors) { |
330 | Tensor finalBT = convertToFloatIfNecessary(*T); |
331 | EXPECT_TRUE(finalIT.isEqual(finalBT, allowedError, /* verbose */ true)); |
332 | } |
333 | |
334 | // Additionally check that each of the results from the parallel cloned |
335 | // Functions are bitwise equal. |
336 | auto it = resultTensors.begin(); |
337 | Tensor *firstResult = *it; |
338 | for (it++; it != resultTensors.end(); it++) { |
339 | EXPECT_TRUE(firstResult->isBitwiseEqual(**it)); |
340 | } |
341 | } |
342 | |
343 | std::unordered_set<Tensor *> cloneFunInsideFun(FunctionTensorPair FTP, |
344 | PlaceholderBindings *bindings, |
345 | CompilationContext &cctx, |
346 | unsigned count) { |
347 | Function *origF = FTP.first; |
348 | |
349 | // Always save the original Function's Tensor, which we will keep around. |
350 | std::unordered_set<Tensor *> resultTensors; |
351 | resultTensors.insert(FTP.second); |
352 | |
353 | // Nothing to do if we just want the one. |
354 | if (count == 1) { |
355 | return resultTensors; |
356 | } |
357 | |
358 | Module *mod = origF->getParent(); |
359 | |
360 | // Clone the original Function to repeatedly add it to the original. |
361 | auto *cloneF = origF->clone("single_clone" ); |
362 | |
363 | // We keep the original Function, then clone/add count-1 more. |
364 | for (size_t i = 1; i < count; i++) { |
365 | // Clone the clone, and then add all the new nodes to the original function. |
366 | auto *tmpF = cloneF->clone("tmp" + std::to_string(i)); |
367 | std::unordered_set<Node *> clonedNodes; |
368 | bool foundSaveNode = false; |
369 | for (auto &N : tmpF->getNodes()) { |
370 | clonedNodes.insert(&N); |
371 | |
372 | // For every Node we add, check if it uses a Placeholder node, and if so |
373 | // clone it in the Module so that CSE doesn't undo all our hard work. |
374 | for (size_t j = 0, f = N.getNumInputs(); j < f; j++) { |
375 | Placeholder *origPH = llvm::dyn_cast<Placeholder>(N.getNthInput(j)); |
376 | if (!origPH) { |
377 | continue; |
378 | } |
379 | |
380 | // Clone the Placeholder, allocate it in the bindings, and replace the |
381 | // usage of the original node to point to the clone. |
382 | Placeholder *clonePH = mod->createPlaceholder( |
383 | origPH->getType(), origPH->getName(), origPH->isTraining()); |
384 | Tensor *oldT = bindings->get(origPH); |
385 | assert(oldT); |
386 | Tensor *newT = bindings->allocate(clonePH); |
387 | newT->assign(oldT); |
388 | N.setNthInput(j, clonePH); |
389 | |
390 | // Save the result Tensors to return so we can compare the results of |
391 | // all of our clones. |
392 | if (llvm::isa<SaveNode>(N)) { |
393 | assert(!foundSaveNode && |
394 | "Can only handle Functions with a single SaveNode." ); |
395 | foundSaveNode = true; |
396 | resultTensors.insert(newT); |
397 | } |
398 | } |
399 | } |
400 | for (auto &N : clonedNodes) { |
401 | origF->takeOwnershipOfNode(N); |
402 | } |
403 | mod->eraseFunction(tmpF); |
404 | } |
405 | // Now erase the clone we used to copy in, as it's no longer needed. |
406 | mod->eraseFunction(cloneF); |
407 | |
408 | // Finally, duplicate all of the node profiling infos with the new expected |
409 | // clone's name so that the cloned copies will find the same profiling info |
410 | // as the original node if being quantized. |
411 | auto &origInfos = cctx.precisionConfig.quantConfig.infos; |
412 | origInfos.reserve(count * origInfos.size()); |
413 | std::vector<NodeProfilingInfo> newInfos; |
414 | newInfos.reserve((count - 1) * origInfos.size()); |
415 | for (const auto &PI : origInfos) { |
416 | const size_t colonIdx = PI.nodeOutputName_.find(":" ); |
417 | assert(colonIdx != std::string::npos && "Name should always contain ':'" ); |
418 | for (size_t i = 1; i < count; i++) { |
419 | std::string newName(PI.nodeOutputName_); |
420 | // Cloned nodes end up with the original name plus the count number |
421 | // appended to their name due to uniquing. Replicate the same thing. |
422 | newName.insert(colonIdx, std::to_string(i)); |
423 | newInfos.emplace_back(newName, PI.tensorProfilingParams_); |
424 | } |
425 | } |
426 | origInfos.insert(origInfos.end(), newInfos.begin(), newInfos.end()); |
427 | |
428 | return resultTensors; |
429 | } |
430 | |
431 | unsigned countNodeKind(Function *F, Kinded::Kind kind) { |
432 | unsigned count = 0; |
433 | for (auto &n : F->getNodes()) { |
434 | if (n.getKind() == kind) { |
435 | count++; |
436 | } |
437 | } |
438 | return count; |
439 | } |
440 | |
441 | void inferIntLookupTableNetInt8(Tensor *input, Tensor *out, |
442 | llvm::ArrayRef<int8_t> table, |
443 | llvm::StringRef kind) { |
444 | PlaceholderBindings bindings; |
445 | ExecutionEngine EE(kind); |
446 | auto &mod = EE.getModule(); |
447 | Function *F = mod.createFunction("main" ); |
448 | auto outTy = mod.uniqueType(ElemKind::Int8QTy, {(dim_t)input->size()}, 3, 3); |
449 | auto var = createQuantizedPlaceholder(mod, bindings, input, |
450 | input->getType().getScale(), |
451 | input->getType().getOffset(), "var" ); |
452 | auto *lookupTable = F->createIntLookupTable("lookuptable" , var, table, outTy); |
453 | auto *result = F->createSave("ret" , lookupTable); |
454 | auto *resultTensor = bindings.allocate(result->getPlaceholder()); |
455 | |
456 | EE.compile(CompilationMode::Infer); |
457 | bindings.allocate(mod.getPlaceholders()); |
458 | |
459 | updateInputPlaceholders(bindings, {var}, {input}); |
460 | EE.run(bindings); |
461 | out->assign(resultTensor); |
462 | } |
463 | |
464 | void inferIntLookupTableNetInt16(Tensor *input, Tensor *out, |
465 | llvm::ArrayRef<int16_t> table, |
466 | llvm::StringRef kind) { |
467 | PlaceholderBindings bindings; |
468 | ExecutionEngine EE(kind); |
469 | auto &mod = EE.getModule(); |
470 | Function *F = mod.createFunction("main" ); |
471 | auto outTy = mod.uniqueType(ElemKind::Int16QTy, {(dim_t)input->size()}, 3, 3); |
472 | auto var = createQuantizedPlaceholder(mod, bindings, input, |
473 | input->getType().getScale(), |
474 | input->getType().getOffset(), "var" ); |
475 | auto *lookupTable = F->createIntLookupTable("lookuptable" , var, table, outTy); |
476 | auto *result = F->createSave("ret" , lookupTable); |
477 | auto *resultTensor = bindings.allocate(result->getPlaceholder()); |
478 | |
479 | EE.compile(CompilationMode::Infer); |
480 | bindings.allocate(mod.getPlaceholders()); |
481 | |
482 | updateInputPlaceholders(bindings, {var}, {input}); |
483 | EE.run(bindings); |
484 | out->assign(resultTensor); |
485 | } |
486 | |
487 | void inferConvNet(Tensor *inputs, Tensor *filter, Tensor *bias, Tensor *out, |
488 | llvm::StringRef kind) { |
489 | PlaceholderBindings bindings; |
490 | ExecutionEngine EE(kind); |
491 | auto &mod = EE.getModule(); |
492 | Function *F = mod.createFunction("main" ); |
493 | Placeholder *inputP; |
494 | Placeholder *filterP; |
495 | Placeholder *biasP; |
496 | Placeholder *outP; |
497 | TypeRef OT; |
498 | if (inputs->getType().isQuantizedType()) { |
499 | auto &outType = out->getType(); |
500 | auto &inType = inputs->getType(); |
501 | auto &filterType = filter->getType(); |
502 | auto &biasType = bias->getType(); |
503 | inputP = createQuantizedPlaceholder( |
504 | mod, bindings, inputs, inType.getScale(), inType.getOffset(), "inputP" ); |
505 | filterP = |
506 | createQuantizedPlaceholder(mod, bindings, filter, filterType.getScale(), |
507 | filterType.getOffset(), "filterP" ); |
508 | biasP = createQuantizedPlaceholder(mod, bindings, bias, biasType.getScale(), |
509 | biasType.getOffset(), "biasP" ); |
510 | outP = createQuantizedPlaceholder(mod, bindings, out, outType.getScale(), |
511 | outType.getOffset(), "outP" ); |
512 | OT = F->getParent()->uniqueType(out->getElementType(), out->dims(), |
513 | outType.getScale(), outType.getOffset()); |
514 | } else { |
515 | inputP = createPlaceholder(mod, bindings, inputs, "inputP" ); |
516 | filterP = createPlaceholder(mod, bindings, filter, "filterP" ); |
517 | biasP = createPlaceholder(mod, bindings, bias, "biasP" ); |
518 | outP = createPlaceholder(mod, bindings, out, "outP" ); |
519 | OT = F->getParent()->uniqueType(out->getElementType(), out->dims()); |
520 | } |
521 | auto *conv = F->createConv("conv" , inputP, filterP, biasP, OT, 5, 3, 4, 1); |
522 | auto *result = F->createSave("ret" , conv, outP); |
523 | auto *resultTensor = bindings.get(result->getPlaceholder()); |
524 | |
525 | EE.compile(CompilationMode::Infer); |
526 | |
527 | updateInputPlaceholders(bindings, {inputP, filterP, biasP}, |
528 | {inputs, filter, bias}); |
529 | EE.run(bindings); |
530 | out->assign(resultTensor); |
531 | } |
532 | |
533 | int inferConvReluNet(Tensor *inputs, Tensor *filter, Tensor *bias, Tensor *out, |
534 | unsigned_t kernel, unsigned_t stride, unsigned_t pad, |
535 | llvm::StringRef kind) { |
536 | PlaceholderBindings bindings; |
537 | ExecutionEngine EE(kind); |
538 | auto &mod = EE.getModule(); |
539 | Function *F = mod.createFunction("main" ); |
540 | Placeholder *inputP; |
541 | Placeholder *filterP; |
542 | Placeholder *biasP; |
543 | Placeholder *outP; |
544 | TypeRef OT; |
545 | if (inputs->getType().isQuantizedType()) { |
546 | auto &outType = out->getType(); |
547 | auto &inType = inputs->getType(); |
548 | auto &filterType = filter->getType(); |
549 | auto &biasType = bias->getType(); |
550 | inputP = createQuantizedPlaceholder( |
551 | mod, bindings, inputs, inType.getScale(), inType.getOffset(), "inputP" ); |
552 | filterP = |
553 | createQuantizedPlaceholder(mod, bindings, filter, filterType.getScale(), |
554 | filterType.getOffset(), "filterP" ); |
555 | biasP = createQuantizedPlaceholder(mod, bindings, bias, biasType.getScale(), |
556 | biasType.getOffset(), "biasP" ); |
557 | outP = createQuantizedPlaceholder(mod, bindings, out, outType.getScale(), |
558 | outType.getOffset(), "outP" ); |
559 | OT = F->getParent()->uniqueType(out->getElementType(), out->dims(), |
560 | outType.getScale(), outType.getOffset()); |
561 | } else { |
562 | inputP = createPlaceholder(mod, bindings, inputs, "inputP" ); |
563 | filterP = createPlaceholder(mod, bindings, filter, "filterP" ); |
564 | biasP = createPlaceholder(mod, bindings, bias, "biasP" ); |
565 | outP = createPlaceholder(mod, bindings, out, "outP" ); |
566 | OT = F->getParent()->uniqueType(out->getElementType(), out->dims()); |
567 | } |
568 | auto *conv = |
569 | F->createConv("conv" , inputP, filterP, biasP, OT, kernel, stride, pad, 1); |
570 | // Relu |
571 | auto *relu = F->createRELU("relu" , conv); |
572 | auto *result = F->createSave("ret" , relu, outP); |
573 | auto *resultTensor = bindings.get(result->getPlaceholder()); |
574 | |
575 | EE.compile(CompilationMode::Infer); |
576 | |
577 | // check fusion depending on build option. |
578 | // EXPECT_EQ(conv->getFusedActivation(), FusedActivation::RELU); |
579 | |
580 | updateInputPlaceholders(bindings, {inputP, filterP, biasP}, |
581 | {inputs, filter, bias}); |
582 | EE.run(bindings); |
583 | out->assign(resultTensor); |
584 | return conv->getFusedActivation(); |
585 | } |
586 | |
587 | void trainConvNet(Tensor *inputs, Tensor *kernel1, Tensor *bias1, |
588 | Tensor *kernel2, Tensor *bias2, Tensor *selected, |
589 | llvm::ArrayRef<dim_t> shape1, llvm::ArrayRef<dim_t> shape2, |
590 | Tensor *out, llvm::StringRef kind) { |
591 | ExecutionEngine EET(kind); |
592 | ExecutionEngine EEI(kind); |
593 | std::vector<ExecutionEngine *> engines; |
594 | engines.push_back(&EEI); |
595 | engines.push_back(&EET); |
596 | TrainingConfig TC; |
597 | PlaceholderBindings bindings, inferBindings, trainingBindings; |
598 | |
599 | // This variable records the number of the next sample to be used for |
600 | // training. |
601 | size_t sampleCounter = 0; |
602 | |
603 | TC.learningRate = 0.03; |
604 | TC.momentum = 0.3; |
605 | TC.L2Decay = 0.01; |
606 | Function *F; |
607 | Placeholder *var1, *var2; |
608 | for (auto *EE : engines) { |
609 | auto &mod = EE->getModule(); |
610 | F = mod.createFunction("main" ); |
611 | var1 = createPlaceholder(mod, bindings, inputs, "var1" ); |
612 | var2 = createPlaceholder(mod, bindings, selected, "var2" ); |
613 | auto *conv1 = F->createConv(bindings, "conv1" , var1, 3, {5, 3}, {2, 1}, |
614 | {2, 1, 2, 1}, 1); |
615 | bindings.get(cast<Placeholder>(conv1->getFilter()))->assign(kernel1); |
616 | bindings.get(cast<Placeholder>(conv1->getBias()))->assign(bias1); |
617 | auto *reshape1 = F->createReshape("reshape1" , conv1, shape1); |
618 | auto *conv2 = F->createConv(bindings, "conv2" , reshape1, 2, 2, 2, 0, 1); |
619 | bindings.get(cast<Placeholder>(conv2->getFilter()))->assign(kernel2); |
620 | bindings.get(cast<Placeholder>(conv2->getBias()))->assign(bias2); |
621 | auto *reshape2 = F->createReshape("reshape2" , conv2, shape2); |
622 | auto *softmax = F->createSoftMax("softmax" , reshape2, var2); |
623 | F->createSave("ret" , softmax); |
624 | } |
625 | |
626 | auto *TF = glow::differentiate(F, TC); |
627 | auto tfName = TF->getName(); |
628 | auto fName = F->getName(); |
629 | EET.compile(CompilationMode::Train); |
630 | trainingBindings.allocate(EET.getModule().getPlaceholders()); |
631 | inferBindings.allocate(EEI.getModule().getPlaceholders()); |
632 | bindings.copyTrainableWeightsTo(trainingBindings); |
633 | auto *res = |
634 | inferBindings.get(EEI.getModule().getPlaceholderByNameSlow("ret" )); |
635 | |
636 | runBatch(EET, trainingBindings, 8, sampleCounter, {var1, var2}, |
637 | {inputs, selected}, tfName); |
638 | trainingBindings.copyTrainableWeightsTo(inferBindings); |
639 | EEI.compile(CompilationMode::Infer); |
640 | var1 = inferBindings.getPlaceholderByNameSlow("var1" ); |
641 | var2 = inferBindings.getPlaceholderByNameSlow("var2" ); |
642 | updateInputPlaceholders(inferBindings, {var1, var2}, {inputs, selected}); |
643 | EEI.run(inferBindings, fName); |
644 | out->assign(res); |
645 | } |
646 | |
647 | void inferLocalResponseNormalizationNet(Tensor *inputs, Tensor *out, |
648 | llvm::StringRef kind) { |
649 | PlaceholderBindings bindings; |
650 | ExecutionEngine EE(kind); |
651 | auto &mod = EE.getModule(); |
652 | Function *F = mod.createFunction("main" ); |
653 | auto *var = createPlaceholder(mod, bindings, inputs, "var" ); |
654 | auto *lrn = F->createLocalResponseNormalization("lrn" , var, 5, 3.0, 0.5, 1.5); |
655 | auto *result = F->createSave("ret" , lrn); |
656 | auto *resultTensor = bindings.allocate(result->getPlaceholder()); |
657 | |
658 | EE.compile(CompilationMode::Infer); |
659 | |
660 | updateInputPlaceholders(bindings, {var}, {inputs}); |
661 | EE.run(bindings); |
662 | out->assign(resultTensor); |
663 | } |
664 | |
665 | void trainLocalResponseNormalizationNet(Tensor *inputs, Tensor *weights, |
666 | Tensor *bias, Tensor *selected, |
667 | llvm::ArrayRef<dim_t> shape1, |
668 | llvm::ArrayRef<dim_t> shape2, |
669 | Tensor *out, llvm::StringRef kind) { |
670 | PlaceholderBindings bindings, trainingBindings; |
671 | ExecutionEngine EET(kind); |
672 | ExecutionEngine EEI(kind); |
673 | std::vector<ExecutionEngine *> engines{&EEI, &EET}; |
674 | TrainingConfig TC; |
675 | |
676 | // This variable records the number of the next sample to be used for |
677 | // training. |
678 | size_t sampleCounter = 0; |
679 | |
680 | TC.learningRate = 0.06; |
681 | TC.momentum = 0.1; |
682 | TC.L2Decay = 0.01; |
683 | Placeholder *var1, *var2; |
684 | std::string fName; |
685 | for (auto *EE : engines) { |
686 | auto &mod = EE->getModule(); |
687 | Function *F = mod.createFunction("main" ); |
688 | fName = F->getName().str(); |
689 | var1 = createPlaceholder(mod, bindings, inputs, "var1" ); |
690 | var2 = createPlaceholder(mod, bindings, selected, "var2" ); |
691 | auto *fc = F->createFullyConnected(bindings, "fc" , var1, bias->dims()[0]); |
692 | bindings.get(cast<Placeholder>(fc->getWeights()))->assign(weights); |
693 | bindings.get(cast<Placeholder>(fc->getBias()))->assign(bias); |
694 | auto *reshape1 = F->createReshape("reshape1" , fc, shape1); |
695 | auto *lrn = |
696 | F->createLocalResponseNormalization("lrn" , reshape1, 2, 2.0, 0.5, 1.0); |
697 | auto *reshape2 = F->createReshape("reshape2" , lrn, shape2); |
698 | auto *softmax = F->createSoftMax("softmax" , reshape2, var2); |
699 | auto *result = F->createSave("ret" , softmax); |
700 | bindings.allocate(result->getPlaceholder()); |
701 | } |
702 | auto *TF = glow::differentiate(EET.getModule().getFunction(fName), TC); |
703 | auto tfName = TF->getName(); |
704 | EET.compile(CompilationMode::Train); |
705 | trainingBindings.allocate(EET.getModule().getPlaceholders()); |
706 | bindings.copyTrainableWeightsTo(trainingBindings); |
707 | bindings.clear(); |
708 | bindings.allocate(EEI.getModule().getPlaceholders()); |
709 | |
710 | runBatch(EET, trainingBindings, 8, sampleCounter, {var1, var2}, |
711 | {inputs, selected}, tfName); |
712 | trainingBindings.copyTrainableWeightsTo(bindings); |
713 | var1 = bindings.getPlaceholderByNameSlow("var1" ); |
714 | var2 = bindings.getPlaceholderByNameSlow("var2" ); |
715 | EEI.compile(CompilationMode::Infer); |
716 | |
717 | runBatch(EEI, bindings, 1, sampleCounter, {var1, var2}, {inputs, selected}); |
718 | out->assign(bindings.get(bindings.getPlaceholderByNameSlow("ret" ))); |
719 | } |
720 | |
721 | void trainAvgPoolNet(Tensor *inputs, Tensor *weights, Tensor *bias, |
722 | Tensor *selected, llvm::ArrayRef<dim_t> shape1, |
723 | llvm::ArrayRef<dim_t> shape2, Tensor *out, |
724 | llvm::StringRef kind) { |
725 | ExecutionEngine EET(kind); |
726 | ExecutionEngine EEI(kind); |
727 | std::vector<ExecutionEngine *> engines{&EEI, &EET}; |
728 | TrainingConfig TC; |
729 | PlaceholderBindings bindings, trainingBindings; |
730 | |
731 | // This variable records the number of the next sample to be used for |
732 | // training. |
733 | size_t sampleCounter = 0; |
734 | |
735 | TC.learningRate = 0.01; |
736 | TC.momentum = 0.4; |
737 | TC.L2Decay = 0.01; |
738 | Placeholder *var1, *var2; |
739 | std::string fName; |
740 | for (auto *EE : engines) { |
741 | auto &mod = EE->getModule(); |
742 | Function *F = mod.createFunction("main" ); |
743 | fName = F->getName().str(); |
744 | var1 = createPlaceholder(mod, bindings, inputs, "var1" ); |
745 | var2 = createPlaceholder(mod, bindings, selected, "var2" ); |
746 | auto *fc = F->createFullyConnected(bindings, "fc" , var1, bias->dims()[0]); |
747 | bindings.get(cast<Placeholder>(fc->getWeights()))->assign(weights); |
748 | bindings.get(cast<Placeholder>(fc->getBias()))->assign(bias); |
749 | auto *reshape1 = F->createReshape("reshape1" , fc, shape1); |
750 | auto *pool = F->createAvgPool("pool" , reshape1, 2, 2, 0); |
751 | auto *reshape2 = F->createReshape("reshape2" , pool, shape2); |
752 | auto *softmax = F->createSoftMax("softmax" , reshape2, var2); |
753 | auto *result = F->createSave("ret" , softmax); |
754 | bindings.allocate(result->getPlaceholder()); |
755 | } |
756 | auto *TF = glow::differentiate(EET.getModule().getFunction("main" ), TC); |
757 | auto tfName = TF->getName(); |
758 | EET.compile(CompilationMode::Train); |
759 | trainingBindings.allocate(EET.getModule().getPlaceholders()); |
760 | bindings.copyTrainableWeightsTo(trainingBindings); |
761 | bindings.clear(); |
762 | bindings.allocate(EEI.getModule().getPlaceholders()); |
763 | |
764 | runBatch(EET, trainingBindings, 10, sampleCounter, {var1, var2}, |
765 | {inputs, selected}, tfName); |
766 | trainingBindings.copyTrainableWeightsTo(bindings); |
767 | var1 = bindings.getPlaceholderByNameSlow("var1" ); |
768 | var2 = bindings.getPlaceholderByNameSlow("var2" ); |
769 | EEI.compile(CompilationMode::Infer); |
770 | |
771 | updateInputPlaceholders(bindings, {var1, var2}, {inputs, selected}); |
772 | EEI.run(bindings); |
773 | out->assign(bindings.get(bindings.getPlaceholderByNameSlow("ret" ))); |
774 | } |
775 | |
776 | void trainMaxPoolNet(Tensor *inputs, Tensor *weights, Tensor *bias, |
777 | Tensor *selected, llvm::ArrayRef<dim_t> shape1, |
778 | llvm::ArrayRef<dim_t> shape2, Tensor *out, |
779 | llvm::StringRef kind) { |
780 | ExecutionEngine EET(kind); |
781 | ExecutionEngine EEI(kind); |
782 | std::vector<ExecutionEngine *> engines; |
783 | engines.push_back(&EEI); |
784 | engines.push_back(&EET); |
785 | TrainingConfig TC; |
786 | PlaceholderBindings bindings, inferBindings, trainingBindings; |
787 | |
788 | // This variable records the number of the next sample to be used for |
789 | // training. |
790 | size_t sampleCounter = 0; |
791 | |
792 | TC.learningRate = 0.03; |
793 | TC.momentum = 0.3; |
794 | TC.L2Decay = 0.003; |
795 | Function *F; |
796 | Placeholder *var1, *var2; |
797 | for (auto *EE : engines) { |
798 | bindings.clear(); |
799 | auto &mod = EE->getModule(); |
800 | F = mod.createFunction("main" ); |
801 | var1 = createPlaceholder(mod, bindings, inputs, "var1" ); |
802 | var2 = createPlaceholder(mod, bindings, selected, "var2" ); |
803 | auto *fc = F->createFullyConnected(bindings, "fc" , var1, bias->dims()[0]); |
804 | bindings.get(cast<Placeholder>(fc->getWeights()))->assign(weights); |
805 | bindings.get(cast<Placeholder>(fc->getBias()))->assign(bias); |
806 | auto *reshape1 = F->createReshape("reshape1" , fc, shape1); |
807 | auto *pool = F->createMaxPool("pool" , reshape1, 5, 3, 4); |
808 | auto *reshape2 = F->createReshape("reshape2" , pool->getResult(), shape2); |
809 | auto *softmax = F->createSoftMax("softmax" , reshape2, var2); |
810 | F->createSave("ret" , softmax); |
811 | } |
812 | auto *TF = glow::differentiate(F, TC); |
813 | auto fName = F->getName(); |
814 | auto tfName = TF->getName(); |
815 | EET.compile(CompilationMode::Train); |
816 | trainingBindings.allocate(EET.getModule().getPlaceholders()); |
817 | inferBindings.allocate(EEI.getModule().getPlaceholders()); |
818 | bindings.copyTrainableWeightsTo(trainingBindings); |
819 | auto *res = |
820 | inferBindings.get(EEI.getModule().getPlaceholderByNameSlow("ret" )); |
821 | |
822 | runBatch(EET, trainingBindings, 7, sampleCounter, {var1, var2}, |
823 | {inputs, selected}, tfName); |
824 | trainingBindings.copyTrainableWeightsTo(inferBindings); |
825 | EEI.compile(CompilationMode::Infer); |
826 | var1 = inferBindings.getPlaceholderByNameSlow("var1" ); |
827 | var2 = inferBindings.getPlaceholderByNameSlow("var2" ); |
828 | runBatch(EEI, inferBindings, 1, sampleCounter, {var1, var2}, |
829 | {inputs, selected}, fName); |
830 | out->assign(res); |
831 | } |
832 | |
833 | void inferSmallConv(Tensor *inputs, Tensor *out, llvm::StringRef kind) { |
834 | PlaceholderBindings bindings; |
835 | ExecutionEngine EE(kind); |
836 | auto &mod = EE.getModule(); |
837 | auto *F = mod.createFunction("main" ); |
838 | auto *in = createPlaceholder(mod, bindings, inputs, "in" , "NHWC" ); |
839 | auto *C = F->createConv(bindings, "conv2a" , in, 64, 1, 1, 0, 1); |
840 | bindings.get(cast<Placeholder>(C->getFilter()))->getHandle().clear(0.3); |
841 | bindings.get(cast<Placeholder>(C->getBias()))->getHandle().clear(0.4); |
842 | auto *result = F->createSave("ret" , C); |
843 | auto *resultTensor = bindings.allocate(result->getPlaceholder()); |
844 | convertPlaceholdersToConstants(F, bindings, {in, result->getPlaceholder()}); |
845 | |
846 | EE.compile(CompilationMode::Infer); |
847 | |
848 | updateInputPlaceholders(bindings, {in}, {inputs}); |
849 | EE.run(bindings); |
850 | |
851 | out->assign(resultTensor); |
852 | } |
853 | |
854 | void inferGroupConv(Tensor *out, llvm::StringRef kind) { |
855 | PlaceholderBindings bindings; |
856 | ExecutionEngine EE(kind); |
857 | auto &mod = EE.getModule(); |
858 | auto *F = mod.createFunction("main" ); |
859 | |
860 | auto *input = |
861 | mod.createPlaceholder(ElemKind::FloatTy, {1, 2, 1, 32}, "input" , false); |
862 | auto *inputTensor = bindings.allocate(input); |
863 | auto IH = inputTensor->getHandle(); |
864 | for (size_t i = 0; i < 2 * 32; i++) { |
865 | IH.raw(i) = (i + 1) / 10.0; |
866 | } |
867 | |
868 | auto *filter = mod.createPlaceholder(ElemKind::FloatTy, {128, 1, 1, 16}, |
869 | "filter" , false); |
870 | auto *filterTensor = bindings.allocate(filter); |
871 | auto FH = filterTensor->getHandle(); |
872 | for (dim_t i = 0; i < 128; i++) |
873 | for (dim_t j = 0; j < 16; j++) { |
874 | FH.at({i, 0, 0, j}) = (i + j) / 100.0; |
875 | } |
876 | auto *zeroBias = |
877 | mod.createPlaceholder(ElemKind::FloatTy, {128}, "bias" , false); |
878 | auto *zeroBiasTensor = bindings.allocate(zeroBias); |
879 | zeroBiasTensor->zero(); |
880 | |
881 | auto outTy = mod.uniqueType(ElemKind::FloatTy, {1, 2, 1, 128}); |
882 | |
883 | ConvolutionNode *CN = |
884 | F->createConv("Conv" , input, filter, zeroBias, outTy, 1, 1, 0, 2); |
885 | SaveNode *result = F->createSave("save" , CN); |
886 | auto *resultTensor = bindings.allocate(result->getPlaceholder()); |
887 | |
888 | EE.compile(CompilationMode::Infer); |
889 | |
890 | EE.run(bindings); |
891 | out->assign(resultTensor); |
892 | } |
893 | |
894 | void inferNonSquarePaddingConv(Tensor *out, llvm::StringRef kind) { |
895 | PlaceholderBindings bindings; |
896 | ExecutionEngine EE(kind); |
897 | auto &mod = EE.getModule(); |
898 | auto *F = mod.createFunction("main" ); |
899 | |
900 | auto *input = |
901 | mod.createPlaceholder(ElemKind::FloatTy, {1, 2, 1, 32}, "input" , false); |
902 | auto *inputTensor = bindings.allocate(input); |
903 | auto IH = inputTensor->getHandle(); |
904 | for (size_t i = 0; i < 2 * 32; i++) { |
905 | IH.raw(i) = (i + 1) / 10.0; |
906 | } |
907 | |
908 | auto *filter = mod.createPlaceholder(ElemKind::FloatTy, {128, 1, 1, 32}, |
909 | "filter" , false); |
910 | auto *filterTensor = bindings.allocate(filter); |
911 | auto FH = filterTensor->getHandle(); |
912 | for (dim_t i = 0; i < 128; i++) |
913 | for (dim_t j = 0; j < 32; j++) { |
914 | FH.at({i, 0, 0, j}) = (i + j) / 100.0; |
915 | } |
916 | auto *zeroBias = |
917 | mod.createPlaceholder(ElemKind::FloatTy, {128}, "bias" , false); |
918 | auto *zeroBiasTensor = bindings.allocate(zeroBias); |
919 | zeroBiasTensor->zero(); |
920 | auto outTy = mod.uniqueType(ElemKind::FloatTy, {1, 4, 5, 128}); |
921 | |
922 | ConvolutionNode *CN = F->createConv("Conv" , input, filter, zeroBias, outTy, |
923 | {1, 1}, {1, 1}, {0, 1, 2, 3}, 1); |
924 | SaveNode *result = F->createSave("save" , CN); |
925 | auto *resultTensor = bindings.allocate(result->getPlaceholder()); |
926 | |
927 | EE.compile(CompilationMode::Infer); |
928 | |
929 | EE.run(bindings); |
930 | out->assign(resultTensor); |
931 | } |
932 | |
933 | void inferNonSquareKernelConv(Tensor *out, llvm::StringRef kind) { |
934 | PlaceholderBindings bindings; |
935 | ExecutionEngine EE(kind); |
936 | auto &mod = EE.getModule(); |
937 | auto *F = mod.createFunction("main" ); |
938 | |
939 | auto *input = |
940 | mod.createPlaceholder(ElemKind::FloatTy, {1, 2, 1, 32}, "input" , false); |
941 | auto *inputTensor = bindings.allocate(input); |
942 | auto IH = inputTensor->getHandle(); |
943 | for (size_t i = 0; i < 2 * 32; i++) { |
944 | IH.raw(i) = (i + 1) / 10.0; |
945 | } |
946 | |
947 | auto *filter = mod.createPlaceholder(ElemKind::FloatTy, {128, 2, 1, 32}, |
948 | "filter" , false); |
949 | auto *filterTensor = bindings.allocate(filter); |
950 | auto FH = filterTensor->getHandle(); |
951 | for (dim_t i = 0; i < 128; i++) |
952 | for (dim_t j = 0; j < 2; j++) |
953 | for (dim_t k = 0; k < 32; k++) { |
954 | FH.at({i, j, 0, k}) = (i + j + k) / 100.0; |
955 | } |
956 | auto *zeroBias = |
957 | mod.createPlaceholder(ElemKind::FloatTy, {128}, "bias" , false); |
958 | auto *zeroBiasTensor = bindings.allocate(zeroBias); |
959 | zeroBiasTensor->zero(); |
960 | auto outTy = mod.uniqueType(ElemKind::FloatTy, {1, 3, 5, 128}); |
961 | |
962 | ConvolutionNode *CN = F->createConv("Conv" , input, filter, zeroBias, outTy, |
963 | {2, 1}, {1, 1}, {0, 1, 2, 3}, 1); |
964 | SaveNode *result = F->createSave("save" , CN); |
965 | auto *resultTensor = bindings.allocate(result->getPlaceholder()); |
966 | |
967 | EE.compile(CompilationMode::Infer); |
968 | |
969 | EE.run(bindings); |
970 | out->assign(resultTensor); |
971 | } |
972 | |
973 | void inferNonSquareStrideConv(Tensor *out, llvm::StringRef kind) { |
974 | PlaceholderBindings bindings; |
975 | ExecutionEngine EE(kind); |
976 | auto &mod = EE.getModule(); |
977 | auto *F = mod.createFunction("main" ); |
978 | |
979 | auto *input = |
980 | mod.createPlaceholder(ElemKind::FloatTy, {1, 2, 1, 32}, "input" , false); |
981 | auto *inputTensor = bindings.allocate(input); |
982 | auto IH = inputTensor->getHandle(); |
983 | for (size_t i = 0; i < 2 * 32; i++) { |
984 | IH.raw(i) = (i + 1) / 10.0; |
985 | } |
986 | |
987 | auto *filter = mod.createPlaceholder(ElemKind::FloatTy, {128, 2, 1, 32}, |
988 | "filter" , false); |
989 | auto *filterTensor = bindings.allocate(filter); |
990 | auto FH = filterTensor->getHandle(); |
991 | for (dim_t i = 0; i < 128; i++) |
992 | for (dim_t j = 0; j < 2; j++) |
993 | for (dim_t k = 0; k < 32; k++) { |
994 | FH.at({i, j, 0, k}) = (i + j + k) / 100.0; |
995 | } |
996 | auto *zeroBias = |
997 | mod.createPlaceholder(ElemKind::FloatTy, {128}, "bias" , false); |
998 | auto *zeroBiasTensor = bindings.allocate(zeroBias); |
999 | zeroBiasTensor->zero(); |
1000 | auto outTy = mod.uniqueType(ElemKind::FloatTy, {1, 2, 5, 128}); |
1001 | |
1002 | ConvolutionNode *CN = F->createConv("Conv" , input, filter, zeroBias, outTy, |
1003 | {2, 1}, {2, 1}, {0, 1, 2, 3}, 1); |
1004 | SaveNode *result = F->createSave("save" , CN); |
1005 | auto *resultTensor = bindings.allocate(result->getPlaceholder()); |
1006 | |
1007 | EE.compile(CompilationMode::Infer); |
1008 | |
1009 | EE.run(bindings); |
1010 | out->assign(resultTensor); |
1011 | } |
1012 | |
1013 | void inferConvDKKC8(Tensor *out, llvm::StringRef kind) { |
1014 | PlaceholderBindings bindings; |
1015 | ExecutionEngine EE(kind); |
1016 | auto &mod = EE.getModule(); |
1017 | auto *F = mod.createFunction("main" ); |
1018 | |
1019 | auto *input = |
1020 | mod.createPlaceholder(ElemKind::FloatTy, {3, 3, 3, 32}, "input" , false); |
1021 | auto *inputTensor = bindings.allocate(input); |
1022 | auto IH = inputTensor->getHandle(); |
1023 | for (size_t i = 0; i < 3 * 3 * 3 * 32; i++) { |
1024 | IH.raw(i) = (i + 1) / 10.0; |
1025 | } |
1026 | |
1027 | auto *filter = mod.createPlaceholder(ElemKind::FloatTy, {192, 3, 3, 32}, |
1028 | "filter" , false); |
1029 | auto *filterTensor = bindings.allocate(filter); |
1030 | filterTensor->zero(); |
1031 | auto FH = filterTensor->getHandle(); |
1032 | for (dim_t i = 0; i < 192; i++) |
1033 | for (dim_t j = 0; j < 3; j++) |
1034 | for (dim_t k = 0; k < 3; k++) |
1035 | for (dim_t l = 0; l < 32; l++) { |
1036 | FH.at({i, j, k, k}) = (i + j + k + l) / 200.0; |
1037 | } |
1038 | auto *zeroBias = |
1039 | mod.createPlaceholder(ElemKind::FloatTy, {192}, "bias" , false); |
1040 | auto *zeroBiasTensor = bindings.allocate(zeroBias); |
1041 | zeroBiasTensor->zero(); |
1042 | auto outTy = mod.uniqueType(ElemKind::FloatTy, {3, 3, 3, 192}); |
1043 | |
1044 | ConvolutionNode *CN = F->createConv("Conv" , input, filter, zeroBias, outTy, |
1045 | {3, 3}, {1, 1}, {1, 1, 1, 1}, 1); |
1046 | SaveNode *result = F->createSave("save" , CN); |
1047 | auto *resultTensor = bindings.allocate(result->getPlaceholder()); |
1048 | |
1049 | EE.compile(CompilationMode::Infer); |
1050 | |
1051 | EE.run(bindings); |
1052 | out->assign(resultTensor); |
1053 | } |
1054 | |
1055 | void trainSoftMaxNet(Tensor *inputs, Tensor *weights, Tensor *bias, |
1056 | Tensor *selected, Tensor *out, llvm::StringRef kind) { |
1057 | ExecutionEngine EEI(kind); |
1058 | ExecutionEngine EET(kind); |
1059 | std::vector<ExecutionEngine *> engines; |
1060 | engines.push_back(&EEI); |
1061 | engines.push_back(&EET); |
1062 | TrainingConfig TC; |
1063 | PlaceholderBindings bindings, inferBindings, trainingBindings; |
1064 | |
1065 | // This variable records the number of the next sample to be used for |
1066 | // training. |
1067 | size_t sampleCounter = 0; |
1068 | |
1069 | TC.learningRate = 0.003; |
1070 | TC.momentum = 0.7; |
1071 | TC.L2Decay = 0.001; |
1072 | Function *F; |
1073 | Placeholder *var1, *var2; |
1074 | for (auto *EE : engines) { |
1075 | auto &mod = EE->getModule(); |
1076 | F = mod.createFunction("main" ); |
1077 | var1 = createPlaceholder(mod, bindings, inputs, "var1" ); |
1078 | var2 = createPlaceholder(mod, bindings, selected, "var2" ); |
1079 | auto *fc = F->createFullyConnected(bindings, "fc" , var1, bias->dims()[0]); |
1080 | bindings.get(cast<Placeholder>(fc->getWeights()))->assign(weights); |
1081 | bindings.get(cast<Placeholder>(fc->getBias()))->assign(bias); |
1082 | auto *softmax = F->createSoftMax("softmax" , fc, var2); |
1083 | F->createSave("ret" , softmax); |
1084 | } |
1085 | |
1086 | auto *TF = glow::differentiate(F, TC); |
1087 | auto tfName = TF->getName(); |
1088 | auto fName = F->getName(); |
1089 | |
1090 | EET.compile(CompilationMode::Train); |
1091 | trainingBindings.allocate(EET.getModule().getPlaceholders()); |
1092 | bindings.copyTrainableWeightsTo(trainingBindings); |
1093 | runBatch(EET, trainingBindings, 30, sampleCounter, {var1, var2}, |
1094 | {inputs, selected}, tfName); |
1095 | EEI.compile(CompilationMode::Infer); |
1096 | inferBindings.allocate(EEI.getModule().getPlaceholders()); |
1097 | trainingBindings.copyTrainableWeightsTo(inferBindings); |
1098 | auto *res = |
1099 | inferBindings.get(EEI.getModule().getPlaceholderByNameSlow("ret" )); |
1100 | var1 = inferBindings.getPlaceholderByNameSlow("var1" ); |
1101 | var2 = inferBindings.getPlaceholderByNameSlow("var2" ); |
1102 | updateInputPlaceholders(inferBindings, {var1, var2}, {inputs, selected}); |
1103 | EEI.run(inferBindings, fName); |
1104 | out->assign(res); |
1105 | } |
1106 | |
1107 | void inferTanhConcatNet(Tensor *input1, Tensor *input2, Tensor *input3, |
1108 | Tensor *out, llvm::StringRef kind) { |
1109 | PlaceholderBindings bindings; |
1110 | ExecutionEngine EE(kind); |
1111 | auto &mod = EE.getModule(); |
1112 | Function *F = mod.createFunction("main" ); |
1113 | auto *var1 = createPlaceholder(mod, bindings, input1, "var1" ); |
1114 | auto *var2 = createPlaceholder(mod, bindings, input2, "var2" ); |
1115 | auto *var3 = createPlaceholder(mod, bindings, input3, "var3" ); |
1116 | auto *T1 = F->createTanh("tanh1" , var1); |
1117 | auto *T2 = F->createTanh("tanh2" , var2); |
1118 | auto *T3 = F->createTanh("tanh3" , var3); |
1119 | Node *C1 = F->createConcat("concat" , {T1, T2}, 0); |
1120 | Node *C2 = F->createConcat("concat" , {T2, T3, C1, T2}, 0); |
1121 | auto *result = F->createSave("ret" , C2); |
1122 | auto *resultTensor = bindings.allocate(result->getPlaceholder()); |
1123 | |
1124 | EE.compile(CompilationMode::Infer); |
1125 | |
1126 | updateInputPlaceholders(bindings, {var1, var2, var3}, |
1127 | {input1, input2, input3}); |
1128 | EE.run(bindings); |
1129 | out->assign(resultTensor); |
1130 | } |
1131 | |
1132 | void inferBasicConvNet(Tensor *inputs, Tensor *out, llvm::StringRef kind, |
1133 | size_t convDepth) { |
1134 | PlaceholderBindings bindings; |
1135 | ExecutionEngine EE(kind); |
1136 | auto &mod = EE.getModule(); |
1137 | Function *F = mod.createFunction("main" ); |
1138 | auto *var = createPlaceholder(mod, bindings, inputs, "var" , "NCHW" ); |
1139 | auto *tr = F->createTranspose("tr" , var, NCHW2NHWC); |
1140 | auto *conv = F->createConv(bindings, "conv" , tr, convDepth, {5, 5}, {2, 2}, |
1141 | {1, 1, 1, 1}, 1); |
1142 | bindings.get(cast<Placeholder>(conv->getFilter()))->getHandle().clear(0.1); |
1143 | bindings.get(cast<Placeholder>(conv->getBias()))->getHandle().clear(0.2); |
1144 | auto *pool = F->createMaxPool("pool" , conv, 2, 2, 0); |
1145 | auto *result = F->createSave("ret" , pool->getResult()); |
1146 | auto *resultTensor = bindings.allocate(result->getPlaceholder()); |
1147 | convertPlaceholdersToConstants(F, bindings, {var, result->getPlaceholder()}); |
1148 | |
1149 | EE.compile(CompilationMode::Infer); |
1150 | |
1151 | updateInputPlaceholders(bindings, {var}, {inputs}); |
1152 | EE.run(bindings); |
1153 | out->assign(resultTensor); |
1154 | } |
1155 | |
1156 | FunctionTensorPair createAndInitBasicFCNet(PlaceholderBindings &bindings, |
1157 | ExecutionEngine &EE) { |
1158 | auto &mod = EE.getModule(); |
1159 | Function *F = mod.createFunction("main" ); |
1160 | |
1161 | auto *var = mod.createPlaceholder(ElemKind::FloatTy, {2, 3, 16, 16}, "var" , |
1162 | false, "NCHW" ); |
1163 | auto *tr = F->createTranspose("tr" , var, NCHW2NHWC); |
1164 | auto *fc = F->createFullyConnected(bindings, "fc" , tr, 16); |
1165 | auto *rl0 = F->createRELU("relu" , fc); |
1166 | auto *fc2 = F->createFullyConnected(bindings, "fc2" , rl0, 8); |
1167 | auto *rl1 = F->createRELU("relu" , fc2); |
1168 | bindings.get(cast<Placeholder>(fc->getWeights()))->getHandle().clear(0.8); |
1169 | bindings.get(cast<Placeholder>(fc2->getWeights()))->getHandle().clear(1.5); |
1170 | auto *result = F->createSave("ret" , rl1); |
1171 | auto *resultTensor = bindings.allocate(result->getPlaceholder()); |
1172 | |
1173 | PseudoRNG PRNG; |
1174 | bindings.allocate(var)->getHandle().initXavier(1, PRNG); |
1175 | |
1176 | return std::make_pair(F, resultTensor); |
1177 | } |
1178 | |
1179 | void inferMixedNet(Tensor *inputs, Tensor *out, llvm::StringRef kind) { |
1180 | PlaceholderBindings bindings; |
1181 | ExecutionEngine EE(kind); |
1182 | auto &mod = EE.getModule(); |
1183 | Function *F = mod.createFunction("main" ); |
1184 | auto *var = createPlaceholder(mod, bindings, inputs, "var" , "NCHW" ); |
1185 | auto *selected = |
1186 | mod.createPlaceholder(ElemKind::Int64ITy, {2, 1}, "selected" , false); |
1187 | |
1188 | auto *tr = F->createTranspose("tr" , var, NCHW2NHWC); |
1189 | auto *fc = F->createFullyConnected(bindings, "fc" , tr, 16); |
1190 | auto *th0 = F->createTanh("tanh" , fc); |
1191 | auto *sg0 = F->createSigmoid("sig" , fc); |
1192 | auto *A1 = F->createAdd("add" , th0, sg0); |
1193 | auto *fc2 = F->createFullyConnected(bindings, "fc2" , A1, 16); |
1194 | |
1195 | auto *R = F->createRegression("reg" , fc2, fc2); |
1196 | auto *SM = F->createSoftMax("SM" , R, selected); |
1197 | auto *result = F->createSave("ret" , SM); |
1198 | auto *resultTensor = bindings.allocate(result->getPlaceholder()); |
1199 | |
1200 | bindings.get(cast<Placeholder>(fc->getWeights()))->getHandle().clear(0.4); |
1201 | bindings.get(cast<Placeholder>(fc2->getWeights()))->getHandle().clear(3.5); |
1202 | |
1203 | EE.compile(CompilationMode::Infer); |
1204 | |
1205 | updateInputPlaceholders(bindings, {var}, {inputs}); |
1206 | EE.run(bindings); |
1207 | out->assign(resultTensor); |
1208 | } |
1209 | |
1210 | void inferComplexNet1(Tensor *inputs1, Tensor *inputs2, Tensor *inputs3, |
1211 | Tensor *inputs4, Tensor *out, llvm::StringRef kind) { |
1212 | PlaceholderBindings bindings; |
1213 | ExecutionEngine EE(kind); |
1214 | auto &mod = EE.getModule(); |
1215 | Function *F = mod.createFunction("main" ); |
1216 | auto *var1 = createPlaceholder(mod, bindings, inputs1, "var1" ); |
1217 | auto *var2 = createPlaceholder(mod, bindings, inputs2, "var2" ); |
1218 | auto *var3 = createPlaceholder(mod, bindings, inputs3, "var3" ); |
1219 | auto *var4 = createPlaceholder(mod, bindings, inputs4, "var4" ); |
1220 | auto *conv1 = F->createConv(bindings, "conv1" , var1, 6, 4, 1, 2, 1); |
1221 | bindings.get(cast<Placeholder>(conv1->getFilter()))->getHandle().clear(0.5); |
1222 | bindings.get(cast<Placeholder>(conv1->getBias()))->getHandle().clear(0.7); |
1223 | auto *sigmoid1 = F->createSigmoid("sigmoid1" , conv1); |
1224 | auto *fc1 = F->createFullyConnected(bindings, "fc1" , var2, 2352); |
1225 | bindings.get(cast<Placeholder>(fc1->getWeights()))->getHandle().clear(0.6); |
1226 | auto *reshape1 = F->createReshape("reshape1" , fc1, {8, 14, 28, 6}, "NHWC" ); |
1227 | auto *relu1 = F->createRELU("relu1" , reshape1); |
1228 | auto *pool1 = F->createMaxPool("pool1" , relu1, 2, 2, 1); |
1229 | auto *add = F->createAdd("add" , sigmoid1, pool1->getResult()); |
1230 | auto *tanh = F->createTanh("tanh" , add); |
1231 | auto *fc2 = F->createFullyConnected(bindings, "fc2" , var3, 720); |
1232 | bindings.get(cast<Placeholder>(fc2->getWeights()))->getHandle().clear(1.1); |
1233 | auto *reshape2 = F->createReshape("reshape2" , fc2, {8, 8, 15, 6}, "NHWC" ); |
1234 | auto *mul = F->createMul("mul" , tanh, reshape2); |
1235 | auto *sigmoid2 = F->createSigmoid("sigmoid2" , mul); |
1236 | auto *conv2 = F->createConv(bindings, "conv2" , sigmoid2, 7, 3, 2, 1, 1); |
1237 | bindings.get(cast<Placeholder>(conv2->getFilter()))->getHandle().clear(0.3); |
1238 | bindings.get(cast<Placeholder>(conv2->getBias()))->getHandle().clear(1.3); |
1239 | auto *reshape3 = F->createReshape("reshape3" , conv2, {8, 8, 7, 4}, "NHWC" ); |
1240 | auto *sub = F->createSub("sub" , reshape3, var4); |
1241 | auto *relu2 = F->createRELU("relu2" , sub); |
1242 | auto *pool2 = F->createAvgPool("pool2" , relu2, 3, 2, 1); |
1243 | auto *sigmoid3 = F->createSigmoid("sigmoid3" , pool2); |
1244 | auto *result = F->createSave("ret" , sigmoid3); |
1245 | auto *resultTensor = bindings.allocate(result->getPlaceholder()); |
1246 | |
1247 | EE.compile(CompilationMode::Infer); |
1248 | |
1249 | updateInputPlaceholders(bindings, {var1, var2, var3, var4}, |
1250 | {inputs1, inputs2, inputs3, inputs4}); |
1251 | EE.run(bindings); |
1252 | out->assign(resultTensor); |
1253 | } |
1254 | |
1255 | namespace { |
1256 | // Helper for initializing conv node filter/bias from input tensors. |
1257 | static void initConv(PlaceholderBindings &bindings, ConvolutionNode *C, |
1258 | Tensor &filter, Tensor &bias) { |
1259 | bindings.get(cast<Placeholder>(C->getFilter()))->assign(&filter); |
1260 | bindings.get(cast<Placeholder>(C->getBias()))->assign(&bias); |
1261 | } |
1262 | } // namespace |
1263 | |
1264 | void inferTinyResnet(Tensor *input, Tensor *out, std::vector<Tensor> &weights, |
1265 | llvm::StringRef kind) { |
1266 | PlaceholderBindings bindings; |
1267 | ExecutionEngine EE(kind); |
1268 | auto &mod = EE.getModule(); |
1269 | auto *F = mod.createFunction("main" ); |
1270 | |
1271 | auto *in = createPlaceholder(mod, bindings, input, "in" , "NHWC" ); |
1272 | auto *conv1 = F->createConv(bindings, "conv1" , in, 256, 1, 1, 0, 1); |
1273 | auto *conv2a = F->createConv(bindings, "conv2a" , conv1, 64, 1, 1, 0, 1); |
1274 | auto *relu2a = F->createRELU("relu2a" , conv2a); |
1275 | auto *conv2b = F->createConv(bindings, "conv2b" , relu2a, 64, 3, 1, 1, 1); |
1276 | auto *relu2b = F->createRELU("relu2b" , conv2b); |
1277 | auto *conv2c = F->createConv(bindings, "conv2c" , relu2b, 256, 1, 1, 0, 1); |
1278 | auto *add = F->createAdd("add" , conv2c, conv1); |
1279 | auto *relu = F->createRELU("res2a_relu" , add); |
1280 | auto *result = F->createSave("ret" , relu); |
1281 | auto *resultTensor = bindings.allocate(result->getPlaceholder()); |
1282 | |
1283 | initConv(bindings, conv1, weights[0], weights[1]); |
1284 | initConv(bindings, conv2a, weights[2], weights[3]); |
1285 | initConv(bindings, conv2b, weights[4], weights[5]); |
1286 | initConv(bindings, conv2c, weights[6], weights[7]); |
1287 | convertPlaceholdersToConstants(F, bindings, {in, result->getPlaceholder()}); |
1288 | |
1289 | EE.compile(CompilationMode::Infer); |
1290 | |
1291 | updateInputPlaceholders(bindings, {in}, {input}); |
1292 | EE.run(bindings); |
1293 | out->assign(resultTensor); |
1294 | } |
1295 | |
1296 | void (Tensor *input, Tensor *out, llvm::StringRef kind) { |
1297 | PlaceholderBindings bindings; |
1298 | ExecutionEngine EE(kind); |
1299 | auto &mod = EE.getModule(); |
1300 | auto *F = mod.createFunction("main" ); |
1301 | |
1302 | auto *inputs = createPlaceholder(mod, bindings, input, "inputs" ); |
1303 | |
1304 | auto *x1 = F->createSlice("ex1" , inputs, {0, 5, 0}, {1, 100, 100}); |
1305 | auto *x2 = F->createSlice("ex2" , inputs, {1, 5, 0}, {2, 100, 100}); |
1306 | auto *x3 = F->createSlice("ex3" , inputs, {2, 5, 0}, {3, 100, 100}); |
1307 | auto *x4 = F->createSlice("ex4" , inputs, {3, 5, 0}, {4, 100, 100}); |
1308 | |
1309 | auto *x12 = F->createConcat("x12" , {x1, x2}, 1); |
1310 | auto *x34 = F->createConcat("x34" , {x3, x4}, 1); |
1311 | auto *x13 = F->createConcat("x34" , {x1, x3}, 1); |
1312 | auto *x24 = F->createConcat("x34" , {x2, x4}, 1); |
1313 | |
1314 | auto *add1 = F->createAdd("add1" , x12, x34); |
1315 | auto *add2 = F->createAdd("add1" , x13, x24); |
1316 | auto *add3 = F->createAdd("add1" , add1, add2); |
1317 | |
1318 | auto *e = F->createSlice("slice" , add3, {0, 55, 50}, {1, 150, 100}); |
1319 | auto *result = F->createSave("ret" , e); |
1320 | auto *resultTensor = bindings.allocate(result->getPlaceholder()); |
1321 | |
1322 | EE.compile(CompilationMode::Infer); |
1323 | |
1324 | updateInputPlaceholders(bindings, {inputs}, {input}); |
1325 | EE.run(bindings); |
1326 | out->assign(resultTensor); |
1327 | } |
1328 | |
1329 | void inferMaxSplat(Tensor *input, Tensor *out, llvm::StringRef kind) { |
1330 | PlaceholderBindings bindings; |
1331 | ExecutionEngine EE(kind); |
1332 | auto &mod = EE.getModule(); |
1333 | Function *F = mod.createFunction("main" ); |
1334 | |
1335 | auto T = mod.uniqueType(ElemKind::Int8QTy, input->getType().dims(), |
1336 | 2 * input->getType().getScale(), |
1337 | -input->getType().getOffset()); |
1338 | auto *var = createQuantizedPlaceholder(mod, bindings, input, |
1339 | input->getType().getScale(), |
1340 | input->getType().getOffset(), "var" ); |
1341 | auto *rescale = F->createRescaleQuantized("rescale" , var, T); |
1342 | |
1343 | auto *splat1 = F->createSplat("splat1" , T, 0.0); |
1344 | auto *splat2 = F->createSplat("splat2" , T, 5.0); |
1345 | |
1346 | auto *max1 = F->createMax("max1" , rescale, splat1); |
1347 | auto *max2 = F->createMax("max2" , splat2, max1); |
1348 | |
1349 | auto *result = F->createSave("ret" , max2); |
1350 | auto *resultTensor = bindings.allocate(result->getPlaceholder()); |
1351 | |
1352 | EE.compile(CompilationMode::Infer); |
1353 | |
1354 | updateInputPlaceholders(bindings, {var}, {input}); |
1355 | EE.run(bindings); |
1356 | out->assign(resultTensor); |
1357 | } |
1358 | |
1359 | void insertCompiledFunction(llvm::StringRef name, CompiledFunction *func, |
1360 | runtime::DeviceManager *device, Module *mod) { |
1361 | runtime::FunctionMapTy functionMap; |
1362 | functionMap[name.str()] = func; |
1363 | |
1364 | std::promise<void> addPromise; |
1365 | auto fut = addPromise.get_future(); |
1366 | Error addErr = Error::empty(); |
1367 | device->addNetwork(mod, std::move(functionMap), |
1368 | [&addPromise, &addErr](const Module *, Error err) { |
1369 | addErr = std::move(err); |
1370 | addPromise.set_value(); |
1371 | }); |
1372 | fut.wait(); |
1373 | EXIT_ON_ERR(std::move(addErr)); |
1374 | } |
1375 | |
1376 | void runOnDevice(ExecutionContext &context, llvm::StringRef name, |
1377 | runtime::DeviceManager *device) { |
1378 | std::unique_ptr<ExecutionContext> contextPtr(&context); |
1379 | std::promise<void> runPromise; |
1380 | auto fut = runPromise.get_future(); |
1381 | Error runErr = Error::empty(); |
1382 | device->runFunction( |
1383 | name.str(), std::move(contextPtr), |
1384 | [&runPromise, &runErr](runtime::RunIdentifierTy, Error err, |
1385 | std::unique_ptr<ExecutionContext> contextPtr) { |
1386 | // Don't delete context. |
1387 | contextPtr.release(); |
1388 | runErr = std::move(err); |
1389 | runPromise.set_value(); |
1390 | }); |
1391 | fut.wait(); |
1392 | EXIT_ON_ERR(std::move(runErr)); |
1393 | } |
1394 | |
1395 | Constant *createRandomizedConstant(Module &mod, TypeRef type, |
1396 | llvm::ArrayRef<dim_t> dims, |
1397 | llvm::StringRef name) { |
1398 | auto *c = mod.createConstant(mod.uniqueTypeWithNewShape(type, dims), name); |
1399 | |
1400 | switch (type->getElementType()) { |
1401 | case ElemKind::FloatTy: { |
1402 | c->getHandle<float>().initXavier(c->getType()->size() * 2, mod.getPRNG()); |
1403 | break; |
1404 | } |
1405 | case ElemKind::Float16Ty: { |
1406 | c->getHandle<float16_t>().initXavier(c->getType()->size() * 2, |
1407 | mod.getPRNG()); |
1408 | break; |
1409 | } |
1410 | case ElemKind::BFloat16Ty: { |
1411 | c->getHandle<bfloat16_t>().initXavier(c->getType()->size() * 2, |
1412 | mod.getPRNG()); |
1413 | break; |
1414 | } |
1415 | case ElemKind::Int32QTy: { |
1416 | c->getHandle<int32_t>().randomize(INT32_MIN, INT32_MAX, mod.getPRNG()); |
1417 | break; |
1418 | } |
1419 | case ElemKind::Int8QTy: { |
1420 | c->getHandle<int8_t>().randomize(INT8_MIN, INT8_MAX, mod.getPRNG()); |
1421 | break; |
1422 | } |
1423 | case ElemKind::UInt8FusedQTy: |
1424 | case ElemKind::UInt8FusedFP16QTy: { |
1425 | c->getHandle<uint8_t>().randomize(UINT8_MIN, UINT8_MAX, mod.getPRNG()); |
1426 | break; |
1427 | } |
1428 | default: |
1429 | LOG(FATAL) << "Unsupported type: " << type->getElementName().str(); |
1430 | } |
1431 | |
1432 | return c; |
1433 | } |
1434 | |
1435 | Constant *createRandomFusedRowwiseQuantizedConstant(Module &mod, |
1436 | llvm::ArrayRef<dim_t> dims, |
1437 | llvm::StringRef name, |
1438 | bool useFusedFP16) { |
1439 | auto T = mod.uniqueType( |
1440 | (useFusedFP16 ? ElemKind::UInt8FusedFP16QTy : ElemKind::UInt8FusedQTy), |
1441 | {1}, 1, 0); |
1442 | const dim_t sizeScaleOffset = |
1443 | useFusedFP16 ? sizeof(float16_t) : sizeof(float); |
1444 | Constant *c = createRandomizedConstant( |
1445 | mod, T, {dims[0], dims[1] + 2 * sizeScaleOffset}, name); |
1446 | |
1447 | // Range (0, 255) -> (-0.1, 0.1) |
1448 | constexpr float scale = 1.0f / 1275; |
1449 | constexpr float offset = -0.1; |
1450 | auto cH = c->getPayload().getHandle<uint8_t>(); |
1451 | for (unsigned i = 0, e = c->dims()[0]; i < e; i++) { |
1452 | if (useFusedFP16) { |
1453 | cH.setFusedScaleOffsetInRow<float16_t>(i, scale, offset); |
1454 | } else { |
1455 | cH.setFusedScaleOffsetInRow<float>(i, scale, offset); |
1456 | } |
1457 | } |
1458 | |
1459 | return c; |
1460 | } |
1461 | |
1462 | Placeholder *createFusedRowwiseQuantizedPlaceholder(Module &mod, |
1463 | llvm::ArrayRef<dim_t> dims, |
1464 | llvm::StringRef name, |
1465 | bool useFusedFP16) { |
1466 | auto T = useFusedFP16 ? ElemKind::UInt8FusedFP16QTy : ElemKind::UInt8FusedQTy; |
1467 | const dim_t sizeScaleOffset = |
1468 | useFusedFP16 ? sizeof(float16_t) : sizeof(float); |
1469 | constexpr float scale = 1.0f / 1275; |
1470 | constexpr float offset = -0.1; |
1471 | Placeholder *ph = mod.createPlaceholder( |
1472 | T, {dims[0], dims[1] + 2 * sizeScaleOffset}, scale, offset, name, false); |
1473 | |
1474 | return ph; |
1475 | } |
1476 | |
1477 | // Helper for creating and intializing placeholders from tensors. |
1478 | Placeholder *createPlaceholder(Module &mod, PlaceholderBindings &bindings, |
1479 | Tensor *tensor, llvm::StringRef name, |
1480 | const std::string &layout) { |
1481 | auto *P = mod.createPlaceholder(&tensor->getType(), name, false, layout); |
1482 | auto *PTensor = bindings.allocate(P); |
1483 | PTensor->assign(tensor); |
1484 | return P; |
1485 | } |
1486 | |
1487 | } // namespace glow |
1488 | |