1/**
2 * Copyright (c) Glow Contributors. See CONTRIBUTORS file.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "Loader.h"
18
19#include "glow/Base/Image.h"
20#include "glow/Base/Tensor.h"
21#include "glow/Converter/TypeAToTypeBFunctionConverter.h"
22#include "glow/IR/IR.h"
23#include "glow/Importer/Caffe2ModelLoader.h"
24#include "glow/Importer/ONNXModelLoader.h"
25#include "glow/Importer/TFLiteModelLoader.h"
26#include "glow/Optimizer/GraphOptimizer/CompilationContext.h"
27#include "glow/Optimizer/GraphOptimizer/GraphOptimizer.h"
28#include "glow/Quantization/Serialization.h"
29#include "glow/Runtime/RuntimeTypes.h"
30
31#include "llvm/Support/CommandLine.h"
32#include "llvm/Support/FileSystem.h"
33#include "llvm/Support/FormatVariadic.h"
34#include "llvm/Support/Path.h"
35#include "llvm/Support/Signals.h"
36#include "llvm/Support/Timer.h"
37#include "llvm/Support/raw_ostream.h"
38
39#include <algorithm>
40#include <future>
41#include <sstream>
42
43using namespace glow;
44
45llvm::cl::OptionCategory loaderCat("Loader Options");
46
47std::vector<std::string> modelPathOpt;
48static llvm::cl::list<std::string, std::vector<std::string>> modelPathOptF(
49 "model",
50 llvm::cl::desc(
51 "Specify one of three:\n"
52 "1. Path to ONNX model file.\n"
53 "2. Two paths to Caffe2 model files: network structure and weight.\n"
54 "3. Path to directory with the Caffe2 network structure "
55 "<predict_net.pb> and weight <init_net.pb> files."),
56 llvm::cl::value_desc("modelPath"), llvm::cl::Required, llvm::cl::OneOrMore,
57 llvm::cl::cat(loaderCat), llvm::cl::location(modelPathOpt));
58static llvm::cl::alias modelPathAOpt("m", llvm::cl::desc("Alias for -model"),
59 llvm::cl::aliasopt(modelPathOptF),
60 llvm::cl::cat(loaderCat));
61
62namespace {
63
64llvm::cl::opt<bool>
65 verbose("verbose",
66 llvm::cl::desc("Specify whether to run with verbose output"),
67 llvm::cl::Optional, llvm::cl::cat(loaderCat));
68
69llvm::cl::opt<std::string> dumpProfileFileOpt(
70 "dump-profile",
71 llvm::cl::desc("Perform quantization profiling for a given graph "
72 "and dump result to the file."),
73 llvm::cl::value_desc("profile.yaml"), llvm::cl::Optional,
74 llvm::cl::cat(loaderCat));
75
76llvm::cl::opt<quantization::Schema> quantizationSchema(
77 "quantization-schema",
78 llvm::cl::desc("Specify which quantization schema to use"),
79 llvm::cl::Optional,
80 llvm::cl::values(
81 clEnumValN(quantization::Schema::Asymmetric, "asymmetric",
82 "Use asymmetric ranges"),
83 clEnumValN(quantization::Schema::Symmetric, "symmetric",
84 "Use symmetric ranges"),
85 clEnumValN(quantization::Schema::SymmetricWithUnsigned,
86 "symmetric_with_uint8",
87 "Use symmetric ranges with potentially uint8 ranges"),
88 clEnumValN(quantization::Schema::SymmetricWithPower2Scale,
89 "symmetric_with_power2_scale",
90 "Use symmetric ranges with power of 2 scaling factor")),
91 llvm::cl::init(quantization::Schema::Asymmetric), llvm::cl::cat(loaderCat));
92
93llvm::cl::opt<quantization::Calibration> quantizationCalibrationOpt(
94 "quantization-calibration",
95 llvm::cl::desc("Specify which quantization calibration method to use"),
96 llvm::cl::Optional,
97 llvm::cl::values(
98 clEnumValN(quantization::Calibration::None, "none", "No calibration"),
99 clEnumValN(quantization::Calibration::KLMinimization, "KL",
100 "Quantization calibration method based on minimizing the "
101 "Kullback-Leibler divergence metric (relative entropy)")),
102 llvm::cl::init(quantization::Calibration::None), llvm::cl::cat(loaderCat));
103
104llvm::cl::opt<bool> calibrateConstantsOpt(
105 "calibrate-constants",
106 llvm::cl::desc("Option to enable the quantization calibration for constant "
107 "weights which is disabled by default."),
108 llvm::cl::init(false), llvm::cl::Optional, llvm::cl::cat(loaderCat));
109
110llvm::cl::opt<ElemKind> quantizationPrecision(
111 "quantization-precision",
112 llvm::cl::desc("Specify which quantization precision to use, e.g., Int8"),
113 llvm::cl::Optional,
114 llvm::cl::values(
115 clEnumValN(ElemKind::Int8QTy, "Int8", "Use Int8 quantization"),
116 clEnumValN(ElemKind::Int16QTy, "Int16", "Use Int16 quantization")),
117 llvm::cl::init(ElemKind::Int8QTy), llvm::cl::cat(loaderCat));
118
119llvm::cl::opt<ElemKind> quantizationPrecisionBias(
120 "quantization-precision-bias",
121 llvm::cl::desc("Specify which quantization precision to use for bias "
122 "of Convolution and Fully Connected nodes."),
123 llvm::cl::Optional,
124 llvm::cl::values(
125 clEnumValN(ElemKind::Int8QTy, "Int8", "Use Int8 bias quantization"),
126 clEnumValN(ElemKind::Int16QTy, "Int16", "Use Int16 bias quantization"),
127 clEnumValN(ElemKind::Int32QTy, "Int32", "Use Int32 bias quantization")),
128 llvm::cl::init(ElemKind::Int32QTy), llvm::cl::cat(loaderCat));
129
130llvm::cl::opt<bool>
131 enableRowwiseOpt("enable-rowwise",
132 llvm::cl::desc("Enable rowwise quantized FullyConnected."),
133 llvm::cl::Optional, llvm::cl::init(false),
134 llvm::cl::cat(loaderCat));
135
136llvm::cl::opt<bool> enableChannelwiseOpt(
137 "enable-channelwise",
138 llvm::cl::desc("Enable channelwise quantized Convolution."),
139 llvm::cl::Optional, llvm::cl::init(false), llvm::cl::cat(loaderCat));
140
141llvm::cl::opt<std::string> loadProfileFileOpt(
142 "load-profile",
143 llvm::cl::desc("Load quantization profile file and quantize the graph"),
144 llvm::cl::value_desc("profile.yaml"), llvm::cl::Optional,
145 llvm::cl::cat(loaderCat));
146
147llvm::cl::list<std::string> keepOriginalPrecisionForNodesOpt(
148 "keep-original-precision-for-nodes",
149 llvm::cl::desc(
150 "Use to specify the name of nodes (e.g. Add, Div, etc.) that should "
151 "be kept as is when conversion/quantization is requested. "
152 "All nodes of the listed kinds will be kept as is;"
153 "e.g. if Add is specified and there are multiple Add nodes "
154 "in the input loaded model, none would be quantized/converted."),
155 llvm::cl::value_desc("NodeNames (e.g. Add,Div)"), llvm::cl::ZeroOrMore,
156 llvm::cl::CommaSeparated, llvm::cl::cat(loaderCat));
157
158llvm::cl::list<std::string> doNotLowerNodesForProfilingOpt(
159 "do-not-lower-nodes-for-profiling",
160 llvm::cl::desc(
161 "Use to specify the name of nodes (e.g. Convolution, FullyConnected, "
162 "etc.) that should not be lowered during profiling. All nodes of the "
163 "listed kinds will be kept as is; e.g. if Conv is specified and the "
164 "model has group convolutions then the convolution will not be lowered "
165 "for profiling. This means when using the profile for quantization, "
166 "the node should not be lowered then either."),
167 llvm::cl::value_desc("NodeNames (e.g. Convolution,FullyConnected)"),
168 llvm::cl::ZeroOrMore, llvm::cl::CommaSeparated, llvm::cl::cat(loaderCat));
169
170llvm::cl::opt<std::string> ExecutionBackend(
171 "backend",
172 llvm::cl::desc("Backend to use, e.g., Interpreter, CPU, OpenCL:"),
173 llvm::cl::init("Interpreter"), llvm::cl::cat(loaderCat));
174
175/// Debugging options.
176llvm::cl::OptionCategory
177 modelExportCat("How to export the Glow Intermediate Representation/Graphs",
178 "These options are for debugging the "
179 "graphs by writing the IR/Graphs to "
180 "given files/stdout");
181
182llvm::cl::opt<std::string> dumpGraphDAGFileBeforeCompilationOpt(
183 "dump-graph-DAG-before-compile",
184 llvm::cl::desc("Specify the file to export the Graph in DOT format"),
185 llvm::cl::value_desc("file.dot"), llvm::cl::cat(modelExportCat));
186
187llvm::cl::opt<std::string> dumpGraphDAGFileOpt(
188 "dump-graph-DAG",
189 llvm::cl::desc("Specify the file to export the Graph in DOT format"),
190 llvm::cl::value_desc("file.dot"), llvm::cl::cat(modelExportCat));
191
192llvm::cl::opt<bool> dumpGraphOpt("dump-graph",
193 llvm::cl::desc("Prints Graph to stdout"),
194 llvm::cl::cat(modelExportCat));
195
196llvm::cl::opt<bool>
197 convertToFP16("convert-to-fp16",
198 llvm::cl::desc("Run all floating-point computation in fp16."),
199 llvm::cl::init(false), llvm::cl::cat(loaderCat));
200
201llvm::cl::opt<PrecisionConfiguration::Float16Format> fp16Format(
202 "fp16-format", llvm::cl::desc("fp16 format to use."),
203 llvm::cl::values(clEnumValN(PrecisionConfiguration::Float16Format::FP16,
204 "fp16", "Use fp16"),
205 clEnumValN(PrecisionConfiguration::Float16Format::BFloat16,
206 "bfloat16", "Use bfloat16")),
207 llvm::cl::init(PrecisionConfiguration::Float16Format::FP16),
208 llvm::cl::cat(loaderCat));
209
210llvm::cl::opt<bool> convertPlaceholdersOpt(
211 "convert-placeholders",
212 llvm::cl::desc("Convert model placeholders by merging ConvertTo, Quantize "
213 "and Dequantize nodes into the model inputs and outputs."),
214 llvm::cl::init(false), llvm::cl::cat(loaderCat));
215
216/// Emit a bundle into the specified output directory.
217llvm::cl::opt<std::string>
218 emitBundle("emit-bundle",
219 llvm::cl::desc("Output directory for the bundle serialization"),
220 llvm::cl::cat(loaderCat));
221
222llvm::cl::opt<bool> assertAllNodesQuantizedOpt(
223 "assert-all-nodes-quantized",
224 llvm::cl::desc(
225 "Debugging tool, used to assert the quantizer quantizes all nodes in "
226 "the model, or abort otherwise. When false, nodes that are unsupported "
227 "as quantized by the backend will be left unquantized, and may have "
228 "their inputs dequantized/outputs quantized as necessary. Can be used "
229 "in conjunction with -keep-original-precision-for-nodes to explicitly "
230 "whitelist node kinds that are allowed to be left unquantized."),
231 llvm::cl::init(false), llvm::cl::cat(loaderCat));
232
233llvm::cl::opt<unsigned> numHistogramBinsOpt(
234 "num-histogram-bins",
235 llvm::cl::desc("Number of bins used for histogram during profiling. If "
236 "histogram based calibration is used then the number of "
237 "histogram bins must be greater than 255 in order for any "
238 "calibration to take place (in the order of 1000's)."),
239 llvm::cl::init(10), llvm::cl::value_desc("N"), llvm::cl::cat(loaderCat));
240
241/// Name of the network being bundled.
242llvm::cl::opt<std::string> networkName(
243 "network-name",
244 llvm::cl::desc("Name of the network being bundled. This name is used as a "
245 "prefix for all the files that are generated."),
246 llvm::cl::cat(loaderCat));
247
248/// Name of the main entry of the bundle.
249llvm::cl::opt<std::string>
250 mainEntryName("main-entry-name",
251 llvm::cl::desc("Name of the main entry in the bundle. "
252 "This name is used as the function name "
253 "of the entry point to the network."),
254 llvm::cl::cat(loaderCat));
255
256} // namespace
257
258// These are outside the namespace so they can be used by the image-classifier.
259std::vector<std::string> modelInputsOpt;
260static llvm::cl::list<std::string, std::vector<std::string>> modelInputsOptF(
261 "model-input", llvm::cl::ZeroOrMore, llvm::cl::location(modelInputsOpt),
262 llvm::cl::desc(
263 " For ONNX models the inputs of the graph can be inferred \n"
264 " automatically and hence this option is not mandatory. \n"
265 " For Caffe2 models the graph definition does not contain \n"
266 " the description of the inputs and hence must be provided \n"
267 " explicitly using this option. One or more model inputs \n"
268 " are provided using the following format: \n"
269 " -model-input=<inputName1>,<inputType1>,<inputShape1> \n"
270 " -model-input=<inputName2>,<inputType2>,<inputShape2> \n"
271 " .................................................... \n"
272 " For quantized types the format is slightly different since\n"
273 " the scale and offset parameters should also be provided: \n"
274 " -model-input=<name>,<type>,<scale>,<offset>,<shape> \n"
275 " For example we can can provide one or more inputs: \n"
276 " -model-input=input_03_data,float,[1] \n"
277 " -model-input=data_bias,int32,[1,32,32] \n"
278 " -model-input=data,int8q,0.123,-13,[1,10] \n"
279 " If only the name is provided, the default type is 'float' \n"
280 " and the default shape is '[1]': \n"
281 " -model-input=<inputName1> \n"
282 " The supported types are: \n"
283 " - float, float16 (floating point types) \n"
284 " - int32, int64 (integer types) \n"
285 " - int8q, int16q, int32q (integer quantized types) \n"
286 " - bool (logic type)\n"),
287 llvm::cl::value_desc("name,[type,[scale,offset],shape]"),
288 llvm::cl::cat(loaderCat));
289
290llvm::cl::alias modelInputName("model-input-name",
291 llvm::cl::desc("Alias for -model-input"),
292 llvm::cl::aliasopt(modelInputsOptF),
293 llvm::cl::cat(loaderCat));
294
295llvm::cl::opt<unsigned> numDevices("num-devices",
296 llvm::cl::desc("Number of Devices to use"),
297 llvm::cl::init(1), llvm::cl::value_desc("N"),
298 llvm::cl::cat(loaderCat));
299
300llvm::cl::opt<bool> runAllInputsOnAllDevices(
301 "run-all-inputs-on-all-devices",
302 llvm::cl::desc("Run all inputs on all devices. Used for testing purposes."),
303 llvm::cl::init(false), llvm::cl::cat(loaderCat));
304
305llvm::cl::opt<bool>
306 timeOpt("time",
307 llvm::cl::desc("Print timer output to stderr detailing how long it "
308 "takes for the program to execute"),
309 llvm::cl::Optional, llvm::cl::cat(loaderCat));
310
311llvm::cl::opt<unsigned> iterationsOpt(
312 "iterations", llvm::cl::desc("Number of iterations to perform"),
313 llvm::cl::Optional, llvm::cl::init(0), llvm::cl::cat(loaderCat));
314
315std::string Loader::getModelOptPath() {
316 // If given a single path, return it.
317 if (modelPathOpt.size() == 1 &&
318 llvm::sys::fs::is_directory(*modelPathOpt.begin())) {
319 return *modelPathOpt.begin();
320 }
321
322 // Model path must be to one or more files. Use the path of the first file.
323 size_t found = modelPathOpt[0].find_last_of("/");
324 return found == std::string::npos ? "." : modelPathOpt[0].substr(0, found);
325}
326
327llvm::StringRef Loader::getModelOptDir() {
328 assert(modelPathOpt.size() == 1 &&
329 llvm::sys::fs::is_directory(*modelPathOpt.begin()) &&
330 "Model path must be a single directory.");
331 return modelPathOpt[0];
332}
333
334bool glow::emittingBundle() { return !emitBundle.empty(); }
335
336bool glow::profilingGraph() { return !dumpProfileFileOpt.empty(); }
337
338/// Parse the 'modelInputsOpt' option and get the model input names and types.
339/// The expected format is one of the following:
340/// - <name> (default type is 'float', default shape is '[1]')
341/// - <name>,<type>,<shape> for non-quantized types.
342/// - <name>,<type>,<scale>,<offset>,<shape> for quantized types.
343static void getModelInputs(std::vector<std::string> &inputNames,
344 std::vector<Type> *inputTypes) {
345 for (const auto &str : modelInputsOpt) {
346 // Parse name.
347 auto strPair = llvm::StringRef(str).split(',');
348 llvm::StringRef name = strPair.first;
349 CHECK(name.size()) << "Model input name empty";
350
351 // Verify name is unique and add to vector.
352 for (const auto &nameIter : inputNames) {
353 if (name.equals(nameIter)) {
354 LOG(FATAL) << strFormat("Model input name \"%s\" is not unique. Check "
355 "the graph definition for the input names.",
356 std::string(name).c_str());
357 }
358 }
359 inputNames.push_back(name.str());
360
361 if (!inputTypes) {
362 continue;
363 }
364
365 // If only the name is provided, use the default type and shape.
366 if (strPair.second.size() == 0) {
367 inputTypes->push_back(Type(ElemKind::FloatTy, {1}));
368 continue;
369 }
370
371 // Parse type.
372 strPair = strPair.second.split(',');
373 llvm::StringRef type = strPair.first;
374 CHECK(type.size()) << "Model input type empty";
375 ElemKind kind;
376 if (type.equals("float")) {
377 kind = ElemKind::FloatTy;
378 } else if (type.equals("float16")) {
379 kind = ElemKind::Float16Ty;
380 } else if (type.equals("bfloat16")) {
381 kind = ElemKind::BFloat16Ty;
382 } else if (type.equals("int8q")) {
383 kind = ElemKind::Int8QTy;
384 } else if (type.equals("int16q")) {
385 kind = ElemKind::Int16QTy;
386 } else if (type.equals("int32q")) {
387 kind = ElemKind::Int32QTy;
388 } else if (type.equals("int32")) {
389 kind = ElemKind::Int32ITy;
390 } else if (type.equals("int64")) {
391 kind = ElemKind::Int64ITy;
392 } else if (type.equals("bool")) {
393 kind = ElemKind::BoolTy;
394 } else {
395 LOG(FATAL) << strFormat("Model input type \"%s\" not supported",
396 std::string(type).c_str());
397 }
398
399 // For quantized type get scale and offset.
400 double scale;
401 int32_t offset;
402 if (isQuantizedElemKind(kind)) {
403 strPair = strPair.second.split(',');
404 CHECK(strPair.first.size()) << "Model input scale empty";
405 CHECK(!strPair.first.getAsDouble(scale))
406 << "Model input scale parameter invalid";
407 strPair = strPair.second.split(',');
408 CHECK(strPair.first.size()) << "Model input offset empty";
409 CHECK(!strPair.first.getAsInteger(0, offset))
410 << "Model input offset parameter invalid";
411 }
412
413 // Parse shape string.
414 llvm::StringRef shape = strPair.second;
415 CHECK(shape.size()) << "Model input shape empty";
416 ShapeVector dims;
417 CHECK_EQ(shape.front(), '[') << "First shape char should be [";
418 shape = shape.drop_front();
419 CHECK_EQ(shape.back(), ']') << "First shape char should be ]";
420 shape = shape.drop_back();
421 CHECK(shape.size()) << "Model input shape empty";
422 size_t val;
423 while (shape.contains(',')) {
424 auto splitRes = shape.split(',');
425 CHECK(!splitRes.first.getAsInteger(0, val))
426 << "Model input shape integer invalid";
427 dims.push_back(val);
428 shape = splitRes.second;
429 }
430 CHECK(!shape.getAsInteger(0, val)) << "Model input shape integer invalid";
431 dims.push_back(val);
432
433 // Build type and add to vector.
434 if (isQuantizedElemKind(kind)) {
435 inputTypes->push_back(Type(kind, dims, (float)scale, offset));
436 } else {
437 inputTypes->push_back(Type(kind, dims));
438 }
439 }
440}
441
442void Loader::loadModel(PlaceholderBindings *bindings,
443 llvm::ArrayRef<TypeRef> inputType) {
444
445 // Get model input names and types.
446 std::vector<std::string> inputNames;
447 std::vector<Type> inputTypes;
448 getModelInputs(inputNames, &inputTypes);
449 std::vector<const char *> inputNameRefs;
450 std::vector<TypeRef> inputTypeRefs;
451 for (size_t idx = 0, e = inputNames.size(); idx < e; idx++) {
452 inputNameRefs.push_back(inputNames[idx].c_str());
453 inputTypeRefs.push_back(&inputTypes[idx]);
454 }
455
456 // Use explicit input type if given.
457 if (inputType.size()) {
458 inputTypeRefs = inputType;
459 }
460
461 // Load the model based on the model format.
462 if (!getCaffe2NetDescFilename().empty()) {
463 // For Caffe2 format the input placeholder names/types must be provided
464 // explicitly (mandatory).
465 std::unique_ptr<ProtobufLoader> protoLoader;
466 protoLoader.reset(new Caffe2ModelLoader(
467 getCaffe2NetDescFilename().str(), getCaffe2NetWeightFilename().str(),
468 inputNameRefs, inputTypeRefs, *getFunction()));
469 // Load the maps between original model names and the placeholders.
470 inputPlaceholderByName_ = protoLoader->getInputVarsMapping();
471 outputPlaceholderByName_ = protoLoader->getOutputVarsMapping();
472 if (bindings) {
473 postModelLoad(*bindings, *protoLoader.get(), outputPlaceholderByName_,
474 inputType);
475 }
476 } else if (!getTFLiteModelFilename().empty()) {
477 // For TensorFlowLite format the input placeholder names/types are not
478 // provided since are used directly from the model.
479 auto tfliteLoader = glow::make_unique<TFLiteModelLoader>(
480 getTFLiteModelFilename().str(), getFunction());
481 // Load the maps between original model names and the placeholders.
482 inputPlaceholderByName_ = tfliteLoader->getInputPlaceholderMap();
483 outputPlaceholderByName_ = tfliteLoader->getOutputPlaceholderMap();
484 // Since TensorFlowLite loader currently does not have the capability to
485 // enforce the input type (for batching) we must validate that when the
486 // input type is explicitly given it actually matches the model input type.
487 if (bindings) {
488 postModelLoad(*bindings, *tfliteLoader, outputPlaceholderByName_,
489 inputType);
490 }
491 if (inputType.size()) {
492 CHECK(inputPlaceholderByName_.size() == 1)
493 << "Model is expected to have only 1 input!";
494 Placeholder *inpPH = inputPlaceholderByName_.begin()->second;
495 auto modelBatchSize = inpPH->getType()->dims()[0];
496 auto inputBatchSize = inputType[0]->dims()[0];
497 CHECK(inputBatchSize == modelBatchSize)
498 << "Mismatch between the model batch size (" << modelBatchSize
499 << ") and the dataset batch size (" << inputBatchSize << ")! "
500 << "If you are using the 'image-classifier' tool set the "
501 << "dataset batch size with the option '-minibatch=" << modelBatchSize
502 << "'!";
503 }
504 } else {
505 // For ONNX format the input placeholders names/types can be optionally
506 // provided but is not mandatory. If not provided (the arrays are empty)
507 // they are derived automatically. One might want to provide explicitly
508 // the input placeholder types in order to override the placeholder sizes
509 // (one such example is the batch size).
510 std::unique_ptr<ProtobufLoader> protoLoader;
511 protoLoader.reset(new ONNXModelLoader(getOnnxModelFilename().str(),
512 inputNameRefs, inputTypeRefs,
513 *getFunction()));
514 // Load the maps between original model names and the placeholders.
515 inputPlaceholderByName_ = protoLoader->getInputVarsMapping();
516 outputPlaceholderByName_ = protoLoader->getOutputVarsMapping();
517 if (bindings) {
518 postModelLoad(*bindings, *protoLoader.get(), outputPlaceholderByName_,
519 inputType);
520 }
521 }
522}
523
524static bool commandLineIsInvalid() {
525 if (!dumpProfileFileOpt.empty() &&
526 (!loadProfileFileOpt.empty() || convertToFP16)) {
527 llvm::errs() << "Loader: the -" << dumpProfileFileOpt.ArgStr
528 << " option cannot be specified at the same time as either -"
529 << loadProfileFileOpt.ArgStr << " or -" << convertToFP16.ArgStr
530 << ".\n";
531 return true;
532 }
533
534 if (emitBundle.getNumOccurrences()) {
535 if (networkName.getNumOccurrences()) {
536 if (networkName.empty()) {
537 llvm::errs() << "Loader: -" << networkName.ArgStr
538 << " must not be empty.\n";
539 return true;
540 } // FIXME: else make sure networkName does not have any sequence of
541 // characters that could turn into evil stuff in the assembler.
542 } else {
543 // By default, use the last directory in the model path
544 // as the name of the network.
545 // Only do that when there is just one path specified.
546 if (modelPathOpt.size() == 1) {
547 for (auto it = llvm::sys::path::rbegin(modelPathOpt[0]),
548 end = llvm::sys::path::rend(modelPathOpt[0]);
549 it != end; ++it) {
550 networkName = std::string(*it);
551 // Strip extension (if any).
552 size_t lastDotPos = networkName.find_last_of(".");
553 if (lastDotPos != std::string::npos) {
554 networkName = networkName.substr(0, lastDotPos);
555 }
556 // Empty names are replaced by '.' (see Path.h in LLVM).
557 if (!networkName.empty() && networkName != ".") {
558 break;
559 }
560 }
561 }
562 if (networkName.empty()) {
563 llvm::errs() << "Loader: Use -" << networkName.ArgStr
564 << " to specify a non-empty network name.\n";
565 return true;
566 }
567 }
568 } else if (networkName.getNumOccurrences()) {
569 llvm::errs() << "Loader: -" << networkName.ArgStr
570 << " only makes sense when -" << emitBundle.ArgStr
571 << " is used.\n";
572 return true;
573 }
574 return false;
575}
576
577/// Clear external storage for cmd args defined in Loader.
578static void initCmdArgVars() {
579 llvm::cl::ResetAllOptionOccurrences();
580 modelInputsOpt.clear();
581 modelPathOpt.clear();
582}
583
584void glow::parseCommandLine(int argc, char **argv) {
585
586 initCmdArgVars();
587
588 llvm::cl::SetVersionPrinter([](llvm::raw_ostream &os) {
589#ifdef GLOW_VERSION
590 os << "Glow Tools version: " << GLOW_VERSION << "\n";
591#endif
592 });
593 // TODO - registered once to avoid error:
594 // "LLVM ERROR: too many signal callbacks already registered."
595 static bool stackTraceRegistered = false;
596 if (!stackTraceRegistered) {
597 stackTraceRegistered = true;
598 llvm::sys::PrintStackTraceOnErrorSignal(argv[0]);
599 }
600 llvm::cl::ParseCommandLineOptions(
601 argc, argv,
602 " The Glow compiler\n\n"
603 "Glow is a compiler for neural network accelerators.\n");
604
605 if (commandLineIsInvalid()) {
606 std::exit(1);
607 }
608
609 if (modelPathOpt.size() > 2) {
610 llvm::errs() << "-model flag should have either 1 or 2 paths assigned. "
611 "Please see flag's description.\n";
612 std::exit(1);
613 }
614}
615
616quantization::QuantizationConfiguration Loader::getQuantizationConfiguration() {
617 quantization::QuantizationConfiguration quantConfig;
618 quantConfig.precision = quantizationPrecision;
619 quantConfig.precisionBias = quantizationPrecisionBias;
620 quantConfig.schema = quantizationSchema;
621 quantConfig.calibration = quantizationCalibrationOpt;
622 quantConfig.calibrateConstants = calibrateConstantsOpt;
623 quantConfig.enableRowwise = enableRowwiseOpt;
624 quantConfig.enableChannelwise = enableChannelwiseOpt;
625 quantConfig.assertAllNodesQuantized = assertAllNodesQuantizedOpt;
626 if (!loadProfileFileOpt.empty()) {
627 auto fileExists = deserializeProfilingInfosFromYaml(
628 loadProfileFileOpt, quantConfig.graphPreLowerHash, quantConfig.infos);
629 CHECK(fileExists) << strFormat("Profile file \"%s\" does not exist!",
630 loadProfileFileOpt.c_str());
631 }
632 quantConfig.checkGraphPreLowerHash = true;
633 return quantConfig;
634}
635
636CompilationContext Loader::getCompilationContext(QuantizationMode mode) {
637
638 // Common configurations.
639 CompilationContext cctx;
640 cctx.loweredInfoMap = &loweredMap_;
641 PrecisionConfiguration &precConfig = cctx.precisionConfig;
642 precConfig.convertToFP16 = convertToFP16;
643 precConfig.float16Format = fp16Format;
644
645 // Specific configurations.
646 precConfig.quantMode = mode;
647 if (mode == QuantizationMode::None) {
648
649 // By default, when converting models, all nodes that can be converted are
650 // converted. However, some models may need to keep higher precision for
651 // some nodes to prevent high accuracy loss. Those nodes are gathered via
652 // the keepOriginalPrecisionForNodesOpt option and passed to the related
653 // conversion function.
654 for (llvm::StringRef kindName : keepOriginalPrecisionForNodesOpt) {
655 precConfig.precisionModeKindSet.insert(getKindFromNodeName(kindName));
656 }
657
658 } else if (mode == QuantizationMode::Quantize) {
659
660 // By default, when converting models, all nodes that can be converted are
661 // converted. However, some models may need to keep higher precision for
662 // some nodes to prevent high accuracy loss. Those nodes are gathered via
663 // the keepOriginalPrecisionForNodesOpt option and passed to the related
664 // conversion function.
665 for (llvm::StringRef kindName : keepOriginalPrecisionForNodesOpt) {
666 precConfig.precisionModeKindSet.insert(getKindFromNodeName(kindName));
667 }
668 precConfig.quantConfig = getQuantizationConfiguration();
669
670 } else if (mode == QuantizationMode::Profile) {
671
672 // Profiling parameters.
673 precConfig.profConfig.numHistogramBins = numHistogramBinsOpt;
674
675 // By default everything will be lowered for profiling. However this may
676 // cause performance issues for some models, e.g. if a model has group
677 // Convolutions which explode the size of the graph when lowered. Thus allow
678 // for disabling certain NodeKinds for profiling. This means that during
679 // quantization, these nodes should also not be lowered by the backend.
680 for (llvm::StringRef kindName : doNotLowerNodesForProfilingOpt) {
681 precConfig.precisionModeKindSet.insert(getKindFromNodeName(kindName));
682 }
683
684 } else {
685 LOG(FATAL) << "Quantization mode not supported";
686 }
687
688 // When converting the model placeholders, if the placeholders are already
689 // allocated, we should also convert the backing tensors. Since this procedure
690 // is not yet in place, we only convert when emitting a bundle.
691 if (convertPlaceholdersOpt && !emittingBundle()) {
692 llvm::errs() << "The flag 'convert-placeholders' can only be used when "
693 "emitting a bundle!\n";
694 std::exit(1);
695 }
696 cctx.optimizationOpts.foldElemKindConversionIntoIO = convertPlaceholdersOpt;
697
698 return cctx;
699}
700
701CompilationContext Loader::getCompilationContext() {
702 if (!dumpProfileFileOpt.empty()) {
703 return Loader::getCompilationContext(QuantizationMode::Profile);
704 } else if (!loadProfileFileOpt.empty()) {
705 return Loader::getCompilationContext(QuantizationMode::Quantize);
706 } else {
707 return Loader::getCompilationContext(QuantizationMode::None);
708 }
709}
710
711void Loader::compile(PlaceholderBindings &bindings) {
712 CompilationContext cctx = getCompilationContext();
713 cctx.bindings = &bindings;
714 compile(cctx);
715}
716
717void Loader::compile(CompilationContext &cctx) {
718
719 // Dump the DAG before compilation if needed.
720 if (!dumpGraphDAGFileBeforeCompilationOpt.empty()) {
721 F_->dumpDAG(dumpGraphDAGFileBeforeCompilationOpt.c_str());
722 }
723
724 // Store a raw pointer to the Module, we pass the unique_ptr to HostManager
725 // but the Module is stored by Hostmanager so the pointer will remain valid.
726 auto module = M_.get();
727
728 if (emittingBundle()) {
729 // Create bundle directory if not exists.
730 if (!llvm::sys::fs::is_directory(emitBundle)) {
731 llvm::sys::fs::create_directory(emitBundle);
732 }
733 // Emit IR for the graph, compile it and save as a bundle. Replicate the
734 // same optimizations seen during normal execution inside addNetwork().
735 EXIT_ON_ERR(::glow::optimizeFunctionBeforeLowering(F_, cctx));
736 EXIT_ON_ERR(::glow::optimizeFunction(F_, *backend_, cctx));
737 backend_->save(F_, emitBundle, networkName,
738 mainEntryName.empty() ? networkName : mainEntryName);
739 } else {
740 // Emit IR for the graph and compile it.
741 cctx.saturateHost = !runAllInputsOnAllDevices;
742 auto error = hostManager_->addNetwork(std::move(M_), cctx);
743 EXIT_ON_ERR(std::move(error));
744 // After partitioning, the original function may be removed. Need to update
745 // F_.
746 F_ = module->getFunctions().front();
747 }
748 if (dumpGraphOpt) {
749 for (auto function : module->getFunctions()) {
750 function->dump();
751 }
752 }
753 if (!dumpGraphDAGFileOpt.empty()) {
754 for (auto function : module->getFunctions()) {
755 std::string filename =
756 function->getFilename() + "_" + dumpGraphDAGFileOpt;
757 if (module->getFunctions().size() == 1) {
758 filename = dumpGraphDAGFileOpt;
759 }
760 function->dumpDAG(filename.c_str());
761 }
762 }
763 // Store compilation info in the Loader.
764 compilationInfo_ = cctx.info;
765}
766
767void Loader::runInference(PlaceholderBindings &bindings, size_t batchSize) {
768 assert(!emittingBundle() &&
769 "No inference is performed in the bundle generation mode.");
770 unsigned iterations = iterationsOpt == 0 ? 1 : iterationsOpt;
771 llvm::Timer timer("Infer", "Infer");
772 if (timeOpt) {
773 timer.startTimer();
774 }
775 for (unsigned i = 0; i < iterations; i++) {
776 auto runErr = hostManager_->runNetworkBlocking(functionName_, bindings);
777 EXIT_ON_ERR(std::move(runErr));
778 }
779 if (timeOpt) {
780 timer.stopTimer();
781 llvm::outs() << llvm::formatv("Wall time per item (s): {0:f4}\n",
782
783 timer.getTotalTime().getWallTime() /
784 iterations / batchSize);
785 }
786}
787
788void Loader::runInference(ExecutionContext *context, size_t batchSize) {
789 std::unique_ptr<ExecutionContext> contextP(context);
790
791 unsigned iterations = iterationsOpt == 0 ? 1 : iterationsOpt;
792 llvm::Timer timer("Infer", "Infer");
793 if (timeOpt) {
794 timer.startTimer();
795 }
796
797 for (unsigned i = 0; i < iterations; i++) {
798 std::promise<void> runPromise;
799 auto fut = runPromise.get_future();
800 std::unique_ptr<Error> runErr;
801 hostManager_->runNetwork(
802 functionName_, std::move(contextP),
803 [&runPromise, &runErr](runtime::RunIdentifierTy, Error err,
804 std::unique_ptr<ExecutionContext> contextPtr) {
805 // Don't really delete context since we don't own it.
806 contextPtr.release();
807
808 runErr = glow::make_unique<Error>(std::move(err));
809 runPromise.set_value();
810 });
811 fut.wait();
812 EXIT_ON_ERR(std::move(*DCHECK_NOTNULL(runErr.get())));
813 }
814 if (timeOpt) {
815 timer.stopTimer();
816 llvm::outs() << llvm::formatv("Wall time per item (s): {0:f4}\n",
817 timer.getTotalTime().getWallTime() /
818 iterations / batchSize);
819 }
820}
821
822static bool comparePI(const NodeProfilingInfo &a, const NodeProfilingInfo &b) {
823 return (a.nodeOutputName_.compare(b.nodeOutputName_) < 0);
824}
825
826void Loader::generateAndSerializeProfilingInfos(PlaceholderBindings &bindings) {
827 assert(!dumpProfileFileOpt.empty() &&
828 "Filename to dump serialized profile to must not be empty.");
829 std::vector<NodeProfilingInfo> PI;
830 for (auto F : getModule()->getFunctions()) {
831 std::vector<NodeProfilingInfo> tmp =
832 quantization::generateNodeProfilingInfos(bindings, F, loweredMap_);
833 PI.insert(PI.end(), tmp.begin(), tmp.end());
834 }
835 std::sort(PI.begin(), PI.end(), comparePI);
836 serializeProfilingInfosToYaml(dumpProfileFileOpt,
837 compilationInfo_.graphPreLowerHash, PI);
838}
839
840Loader &Loader::registerExtension(std::unique_ptr<LoaderExtension> extension) {
841 loaderExtensionList_.push_back(std::move(extension));
842 return *this;
843}
844
845void Loader::postModelLoad(PlaceholderBindings &bindings,
846 ProtobufLoader &protoLoader,
847 llvm::StringMap<Placeholder *> &placeholderMap,
848 llvm::ArrayRef<TypeRef> inputImageType) {
849 for (auto &&ext : loaderExtensionList_) {
850 ext->postModelLoad(*this, bindings, protoLoader, placeholderMap,
851 inputImageType);
852 }
853}
854
855void Loader::postModelLoad(PlaceholderBindings &bindings,
856 TFLiteModelLoader &tfloader,
857 llvm::StringMap<Placeholder *> &placeholderMap,
858 llvm::ArrayRef<TypeRef> inputImageType) {
859 for (auto &&ext : loaderExtensionList_) {
860 ext->postModelLoad(*this, bindings, tfloader, placeholderMap,
861 inputImageType);
862 }
863}
864
865void Loader::inferInitMiniBatch(PlaceholderBindings &bindings,
866 size_t minibatchIndex, size_t minibatchSize) {
867 for (auto &&ext : loaderExtensionList_) {
868 ext->inferInitMiniBatch(*this, bindings, minibatchIndex, minibatchSize);
869 }
870}
871
872void Loader::inferEndMiniBatch(PlaceholderBindings &bindings,
873 size_t minibatchIndex, size_t minibatchSize) {
874 for (auto &&ext : loaderExtensionList_) {
875 ext->inferEndMiniBatch(*this, bindings, minibatchIndex, minibatchSize);
876 }
877}
878
879Loader::Loader(llvm::ArrayRef<size_t> configDeviceIDs) {
880 if (modelPathOpt.size() == 1) {
881 if (llvm::sys::fs::is_directory(*modelPathOpt.begin())) {
882 caffe2NetDescFilename_ = modelPathOpt[0] + "/predict_net.pb";
883 caffe2NetWeightFilename_ = modelPathOpt[0] + "/init_net.pb";
884 } else {
885 llvm::StringRef modelPath = modelPathOpt[0];
886 if (modelPath.endswith("tflite")) {
887 tfliteModelFilename_ = modelPath.str();
888 } else {
889 onnxModelFilename_ = modelPath.str();
890 }
891 }
892 } else {
893 caffe2NetDescFilename_ = modelPathOpt[0];
894 caffe2NetWeightFilename_ = modelPathOpt[1];
895 }
896 M_.reset(new Module);
897
898 std::vector<std::unique_ptr<runtime::DeviceConfig>> configs;
899
900 if (configDeviceIDs.empty()) {
901 configs = runtime::generateDeviceConfigs(numDevices, ExecutionBackend);
902 } else {
903 for (size_t ID : configDeviceIDs) {
904 CHECK(ID < numDevices) << "IDs must be less than the number of devices";
905 auto config = glow::make_unique<runtime::DeviceConfig>(ExecutionBackend);
906 config->deviceID = ID;
907 configs.push_back(std::move(config));
908 }
909 }
910
911 hostManager_ = glow::make_unique<runtime::HostManager>(std::move(configs));
912 backend_ = std::unique_ptr<Backend>(createBackend(ExecutionBackend));
913 F_ = M_->createFunction(modelPathOpt[0]);
914 functionName_ = modelPathOpt[0];
915}
916