Loader.cpp source code [glow/tools/loader/Loader.cpp]

1	/**
2	* Copyright (c) Glow Contributors. See CONTRIBUTORS file.
3	*
4	* Licensed under the Apache License, Version 2.0 (the "License");
5	* you may not use this file except in compliance with the License.
6	* You may obtain a copy of the License at
7	*
8	* http://www.apache.org/licenses/LICENSE-2.0
9	*
10	* Unless required by applicable law or agreed to in writing, software
11	* distributed under the License is distributed on an "AS IS" BASIS,
12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13	* See the License for the specific language governing permissions and
14	* limitations under the License.
15	*/
16
17	#include "Loader.h"
18
19	#include "glow/Base/Image.h"
20	#include "glow/Base/Tensor.h"
21	#include "glow/Converter/TypeAToTypeBFunctionConverter.h"
22	#include "glow/IR/IR.h"
23	#include "glow/Importer/Caffe2ModelLoader.h"
24	#include "glow/Importer/ONNXModelLoader.h"
25	#include "glow/Importer/TFLiteModelLoader.h"
26	#include "glow/Optimizer/GraphOptimizer/CompilationContext.h"
27	#include "glow/Optimizer/GraphOptimizer/GraphOptimizer.h"
28	#include "glow/Quantization/Serialization.h"
29	#include "glow/Runtime/RuntimeTypes.h"
30
31	#include "llvm/Support/CommandLine.h"
32	#include "llvm/Support/FileSystem.h"
33	#include "llvm/Support/FormatVariadic.h"
34	#include "llvm/Support/Path.h"
35	#include "llvm/Support/Signals.h"
36	#include "llvm/Support/Timer.h"
37	#include "llvm/Support/raw_ostream.h"
38
39	#include <algorithm>
40	#include <future>
41	#include <sstream>
42
43	using namespace glow;
44
45	llvm::cl::OptionCategory loaderCat("Loader Options");
46
47	std::vector<std::string> modelPathOpt;
48	static llvm::cl::list<std::string, std::vector<std::string>> modelPathOptF(
49	"model",
50	llvm::cl::desc (
51	"Specify one of three:\n"
52	"1. Path to ONNX model file.\n"
53	"2. Two paths to Caffe2 model files: network structure and weight.\n"
54	"3. Path to directory with the Caffe2 network structure "
55	"<predict_net.pb> and weight <init_net.pb> files."),
56	llvm::cl::value_desc ("modelPath"), llvm::cl::Required, llvm::cl::OneOrMore,
57	llvm::cl::cat (loaderCat), llvm::cl::location(modelPathOpt));
58	static llvm::cl::alias modelPathAOpt("m", llvm::cl::desc ("Alias for -model"),
59	llvm::cl::aliasopt (modelPathOptF),
60	llvm::cl::cat (loaderCat));
61
62	namespace {
63
64	llvm::cl::opt<bool>
65	verbose("verbose",
66	llvm::cl::desc ("Specify whether to run with verbose output"),
67	llvm::cl::Optional, llvm::cl::cat (loaderCat));
68
69	llvm::cl::opt<std::string> dumpProfileFileOpt(
70	"dump-profile",
71	llvm::cl::desc ("Perform quantization profiling for a given graph "
72	"and dump result to the file."),
73	llvm::cl::value_desc ("profile.yaml"), llvm::cl::Optional,
74	llvm::cl::cat (loaderCat));
75
76	llvm::cl::opt<quantization::Schema> quantizationSchema(
77	"quantization-schema",
78	llvm::cl::desc ("Specify which quantization schema to use"),
79	llvm::cl::Optional,
80	llvm::cl::values(
81	clEnumValN(quantization::Schema::Asymmetric, "asymmetric",
82	"Use asymmetric ranges"),
83	clEnumValN(quantization::Schema::Symmetric, "symmetric",
84	"Use symmetric ranges"),
85	clEnumValN(quantization::Schema::SymmetricWithUnsigned,
86	"symmetric_with_uint8",
87	"Use symmetric ranges with potentially uint8 ranges"),
88	clEnumValN(quantization::Schema::SymmetricWithPower2Scale,
89	"symmetric_with_power2_scale",
90	"Use symmetric ranges with power of 2 scaling factor")),
91	llvm::cl::init(quantization::Schema::Asymmetric), llvm::cl::cat (loaderCat));
92
93	llvm::cl::opt<quantization::Calibration> quantizationCalibrationOpt(
94	"quantization-calibration",
95	llvm::cl::desc ("Specify which quantization calibration method to use"),
96	llvm::cl::Optional,
97	llvm::cl::values(
98	clEnumValN(quantization::Calibration::None, "none", "No calibration"),
99	clEnumValN(quantization::Calibration::KLMinimization, "KL",
100	"Quantization calibration method based on minimizing the "
101	"Kullback-Leibler divergence metric (relative entropy)")),
102	llvm::cl::init(quantization::Calibration::None), llvm::cl::cat (loaderCat));
103
104	llvm::cl::opt<bool> calibrateConstantsOpt(
105	"calibrate-constants",
106	llvm::cl::desc ("Option to enable the quantization calibration for constant "
107	"weights which is disabled by default."),
108	llvm::cl::init(false), llvm::cl::Optional, llvm::cl::cat (loaderCat));
109
110	llvm::cl::opt<ElemKind> quantizationPrecision(
111	"quantization-precision",
112	llvm::cl::desc ("Specify which quantization precision to use, e.g., Int8"),
113	llvm::cl::Optional,
114	llvm::cl::values(
115	clEnumValN(ElemKind::Int8QTy, "Int8", "Use Int8 quantization"),
116	clEnumValN(ElemKind::Int16QTy, "Int16", "Use Int16 quantization")),
117	llvm::cl::init(ElemKind::Int8QTy), llvm::cl::cat (loaderCat));
118
119	llvm::cl::opt<ElemKind> quantizationPrecisionBias(
120	"quantization-precision-bias",
121	llvm::cl::desc ("Specify which quantization precision to use for bias "
122	"of Convolution and Fully Connected nodes."),
123	llvm::cl::Optional,
124	llvm::cl::values(
125	clEnumValN(ElemKind::Int8QTy, "Int8", "Use Int8 bias quantization"),
126	clEnumValN(ElemKind::Int16QTy, "Int16", "Use Int16 bias quantization"),
127	clEnumValN(ElemKind::Int32QTy, "Int32", "Use Int32 bias quantization")),
128	llvm::cl::init(ElemKind::Int32QTy), llvm::cl::cat (loaderCat));
129
130	llvm::cl::opt<bool>
131	enableRowwiseOpt("enable-rowwise",
132	llvm::cl::desc ("Enable rowwise quantized FullyConnected."),
133	llvm::cl::Optional, llvm::cl::init(false),
134	llvm::cl::cat (loaderCat));
135
136	llvm::cl::opt<bool> enableChannelwiseOpt(
137	"enable-channelwise",
138	llvm::cl::desc ("Enable channelwise quantized Convolution."),
139	llvm::cl::Optional, llvm::cl::init(false), llvm::cl::cat (loaderCat));
140
141	llvm::cl::opt<std::string> loadProfileFileOpt(
142	"load-profile",
143	llvm::cl::desc ("Load quantization profile file and quantize the graph"),
144	llvm::cl::value_desc ("profile.yaml"), llvm::cl::Optional,
145	llvm::cl::cat (loaderCat));
146
147	llvm::cl::list<std::string> keepOriginalPrecisionForNodesOpt(
148	"keep-original-precision-for-nodes",
149	llvm::cl::desc (
150	"Use to specify the name of nodes (e.g. Add, Div, etc.) that should "
151	"be kept as is when conversion/quantization is requested. "
152	"All nodes of the listed kinds will be kept as is;"
153	"e.g. if Add is specified and there are multiple Add nodes "
154	"in the input loaded model, none would be quantized/converted."),
155	llvm::cl::value_desc ("NodeNames (e.g. Add,Div)"), llvm::cl::ZeroOrMore,
156	llvm::cl::CommaSeparated, llvm::cl::cat (loaderCat));
157
158	llvm::cl::list<std::string> doNotLowerNodesForProfilingOpt(
159	"do-not-lower-nodes-for-profiling",
160	llvm::cl::desc (
161	"Use to specify the name of nodes (e.g. Convolution, FullyConnected, "
162	"etc.) that should not be lowered during profiling. All nodes of the "
163	"listed kinds will be kept as is; e.g. if Conv is specified and the "
164	"model has group convolutions then the convolution will not be lowered "
165	"for profiling. This means when using the profile for quantization, "
166	"the node should not be lowered then either."),
167	llvm::cl::value_desc ("NodeNames (e.g. Convolution,FullyConnected)"),
168	llvm::cl::ZeroOrMore, llvm::cl::CommaSeparated, llvm::cl::cat (loaderCat));
169
170	llvm::cl::opt<std::string> ExecutionBackend(
171	"backend",
172	llvm::cl::desc ("Backend to use, e.g., Interpreter, CPU, OpenCL:"),
173	llvm::cl::init("Interpreter"), llvm::cl::cat (loaderCat));
174
175	/// Debugging options.
176	llvm::cl::OptionCategory
177	modelExportCat("How to export the Glow Intermediate Representation/Graphs",
178	"These options are for debugging the "
179	"graphs by writing the IR/Graphs to "
180	"given files/stdout");
181
182	llvm::cl::opt<std::string> dumpGraphDAGFileBeforeCompilationOpt(
183	"dump-graph-DAG-before-compile",
184	llvm::cl::desc ("Specify the file to export the Graph in DOT format"),
185	llvm::cl::value_desc ("file.dot"), llvm::cl::cat (modelExportCat));
186
187	llvm::cl::opt<std::string> dumpGraphDAGFileOpt(
188	"dump-graph-DAG",
189	llvm::cl::desc ("Specify the file to export the Graph in DOT format"),
190	llvm::cl::value_desc ("file.dot"), llvm::cl::cat (modelExportCat));
191
192	llvm::cl::opt<bool> dumpGraphOpt("dump-graph",
193	llvm::cl::desc ("Prints Graph to stdout"),
194	llvm::cl::cat (modelExportCat));
195
196	llvm::cl::opt<bool>
197	convertToFP16("convert-to-fp16",
198	llvm::cl::desc ("Run all floating-point computation in fp16."),
199	llvm::cl::init(false), llvm::cl::cat (loaderCat));
200
201	llvm::cl::opt<PrecisionConfiguration::Float16Format> fp16Format(
202	"fp16-format", llvm::cl::desc ("fp16 format to use."),
203	llvm::cl::values(clEnumValN(PrecisionConfiguration::Float16Format::FP16,
204	"fp16", "Use fp16"),
205	clEnumValN(PrecisionConfiguration::Float16Format::BFloat16,
206	"bfloat16", "Use bfloat16")),
207	llvm::cl::init(PrecisionConfiguration::Float16Format::FP16),
208	llvm::cl::cat (loaderCat));
209
210	llvm::cl::opt<bool> convertPlaceholdersOpt(
211	"convert-placeholders",
212	llvm::cl::desc ("Convert model placeholders by merging ConvertTo, Quantize "
213	"and Dequantize nodes into the model inputs and outputs."),
214	llvm::cl::init(false), llvm::cl::cat (loaderCat));
215
216	/// Emit a bundle into the specified output directory.
217	llvm::cl::opt<std::string>
218	emitBundle("emit-bundle",
219	llvm::cl::desc ("Output directory for the bundle serialization"),
220	llvm::cl::cat (loaderCat));
221
222	llvm::cl::opt<bool> assertAllNodesQuantizedOpt(
223	"assert-all-nodes-quantized",
224	llvm::cl::desc (
225	"Debugging tool, used to assert the quantizer quantizes all nodes in "
226	"the model, or abort otherwise. When false, nodes that are unsupported "
227	"as quantized by the backend will be left unquantized, and may have "
228	"their inputs dequantized/outputs quantized as necessary. Can be used "
229	"in conjunction with -keep-original-precision-for-nodes to explicitly "
230	"whitelist node kinds that are allowed to be left unquantized."),
231	llvm::cl::init(false), llvm::cl::cat (loaderCat));
232
233	llvm::cl::opt<unsigned> numHistogramBinsOpt(
234	"num-histogram-bins",
235	llvm::cl::desc ("Number of bins used for histogram during profiling. If "
236	"histogram based calibration is used then the number of "
237	"histogram bins must be greater than 255 in order for any "
238	"calibration to take place (in the order of 1000's)."),
239	llvm::cl::init(`10`), llvm::cl::value_desc ("N"), llvm::cl::cat (loaderCat));
240
241	/// Name of the network being bundled.
242	llvm::cl::opt<std::string> networkName(
243	"network-name",
244	llvm::cl::desc ("Name of the network being bundled. This name is used as a "
245	"prefix for all the files that are generated."),
246	llvm::cl::cat (loaderCat));
247
248	/// Name of the main entry of the bundle.
249	llvm::cl::opt<std::string>
250	mainEntryName("main-entry-name",
251	llvm::cl::desc ("Name of the main entry in the bundle. "
252	"This name is used as the function name "
253	"of the entry point to the network."),
254	llvm::cl::cat (loaderCat));
255
256	} // namespace
257
258	// These are outside the namespace so they can be used by the image-classifier.
259	std::vector<std::string> modelInputsOpt;
260	static llvm::cl::list<std::string, std::vector<std::string>> modelInputsOptF(
261	"model-input", llvm::cl::ZeroOrMore, llvm::cl::location(modelInputsOpt),
262	llvm::cl::desc (
263	" For ONNX models the inputs of the graph can be inferred \n"
264	" automatically and hence this option is not mandatory. \n"
265	" For Caffe2 models the graph definition does not contain \n"
266	" the description of the inputs and hence must be provided \n"
267	" explicitly using this option. One or more model inputs \n"
268	" are provided using the following format: \n"
269	" -model-input=<inputName1>,<inputType1>,<inputShape1> \n"
270	" -model-input=<inputName2>,<inputType2>,<inputShape2> \n"
271	" .................................................... \n"
272	" For quantized types the format is slightly different since\n"
273	" the scale and offset parameters should also be provided: \n"
274	" -model-input=<name>,<type>,<scale>,<offset>,<shape> \n"
275	" For example we can can provide one or more inputs: \n"
276	" -model-input=input_03_data,float,[1] \n"
277	" -model-input=data_bias,int32,[1,32,32] \n"
278	" -model-input=data,int8q,0.123,-13,[1,10] \n"
279	" If only the name is provided, the default type is 'float' \n"
280	" and the default shape is '[1]': \n"
281	" -model-input=<inputName1> \n"
282	" The supported types are: \n"
283	" - float, float16 (floating point types) \n"
284	" - int32, int64 (integer types) \n"
285	" - int8q, int16q, int32q (integer quantized types) \n"
286	" - bool (logic type)\n"),
287	llvm::cl::value_desc ("name,[type,[scale,offset],shape]"),
288	llvm::cl::cat (loaderCat));
289
290	llvm::cl::alias modelInputName("model-input-name",
291	llvm::cl::desc ("Alias for -model-input"),
292	llvm::cl::aliasopt (modelInputsOptF),
293	llvm::cl::cat (loaderCat));
294
295	llvm::cl::opt<unsigned> numDevices("num-devices",
296	llvm::cl::desc ("Number of Devices to use"),
297	llvm::cl::init(`1`), llvm::cl::value_desc ("N"),
298	llvm::cl::cat (loaderCat));
299
300	llvm::cl::opt<bool> runAllInputsOnAllDevices(
301	"run-all-inputs-on-all-devices",
302	llvm::cl::desc ("Run all inputs on all devices. Used for testing purposes."),
303	llvm::cl::init(false), llvm::cl::cat (loaderCat));
304
305	llvm::cl::opt<bool>
306	timeOpt("time",
307	llvm::cl::desc ("Print timer output to stderr detailing how long it "
308	"takes for the program to execute"),
309	llvm::cl::Optional, llvm::cl::cat (loaderCat));
310
311	llvm::cl::opt<unsigned> iterationsOpt(
312	"iterations", llvm::cl::desc ("Number of iterations to perform"),
313	llvm::cl::Optional, llvm::cl::init(`0`), llvm::cl::cat (loaderCat));
314
315	std::string Loader::getModelOptPath() {
316	// If given a single path, return it.
317	if (modelPathOpt.size() == `1` &&
318	llvm::sys::fs::is_directory(*modelPathOpt.begin())) {
319	return *modelPathOpt.begin();
320	}
321
322	// Model path must be to one or more files. Use the path of the first file.
323	size_t found = modelPathOpt [`0`].find_last_of("/");
324	return found == std::string::npos ? "." : modelPathOpt [`0`].substr(`0`, found);
325	}
326
327	llvm::StringRef Loader::getModelOptDir() {
328	assert(modelPathOpt.size() == `1` &&
329	llvm::sys::fs::is_directory(*modelPathOpt.begin()) &&
330	"Model path must be a single directory.");
331	return modelPathOpt [`0`];
332	}
333
334	bool glow::emittingBundle() { return !emitBundle.empty(); }
335
336	bool glow::profilingGraph() { return !dumpProfileFileOpt.empty(); }
337
338	/// Parse the 'modelInputsOpt' option and get the model input names and types.
339	/// The expected format is one of the following:
340	/// - <name> (default type is 'float', default shape is '[1]')
341	/// - <name>,<type>,<shape> for non-quantized types.
342	/// - <name>,<type>,<scale>,<offset>,<shape> for quantized types.
343	static void getModelInputs(std::vector<std::string> &inputNames,
344	std::vector<Type> *inputTypes) {
345	for (const auto &str : modelInputsOpt) {
346	// Parse name.
347	auto strPair = llvm::StringRef (str).split(`','`);
348	llvm::StringRef name = strPair.first;
349	CHECK(name.size()) << "Model input name empty";
350
351	// Verify name is unique and add to vector.
352	for (const auto &nameIter : inputNames) {
353	if (name.equals(nameIter)) {
354	LOG(FATAL) << strFormat("Model input name \"%s\" is not unique. Check "
355	"the graph definition for the input names.",
356	std::string (name).c_str());
357	}
358	}
359	inputNames.push_back(name.str());
360
361	if (!inputTypes) {
362	continue;
363	}
364
365	// If only the name is provided, use the default type and shape.
366	if (strPair.second.size() == `0`) {
367	inputTypes->push_back(Type (ElemKind::FloatTy, {`1`}));
368	continue;
369	}
370
371	// Parse type.
372	strPair = strPair.second.split(`','`);
373	llvm::StringRef type = strPair.first;
374	CHECK(type.size()) << "Model input type empty";
375	ElemKind kind;
376	if (type.equals("float")) {
377	kind = ElemKind::FloatTy;
378	} else if (type.equals("float16")) {
379	kind = ElemKind::Float16Ty;
380	} else if (type.equals("bfloat16")) {
381	kind = ElemKind::BFloat16Ty;
382	} else if (type.equals("int8q")) {
383	kind = ElemKind::Int8QTy;
384	} else if (type.equals("int16q")) {
385	kind = ElemKind::Int16QTy;
386	} else if (type.equals("int32q")) {
387	kind = ElemKind::Int32QTy;
388	} else if (type.equals("int32")) {
389	kind = ElemKind::Int32ITy;
390	} else if (type.equals("int64")) {
391	kind = ElemKind::Int64ITy;
392	} else if (type.equals("bool")) {
393	kind = ElemKind::BoolTy;
394	} else {
395	LOG(FATAL) << strFormat("Model input type \"%s\" not supported",
396	std::string (type).c_str());
397	}
398
399	// For quantized type get scale and offset.
400	double scale;
401	int32_t offset;
402	if (isQuantizedElemKind(kind)) {
403	strPair = strPair.second.split(`','`);
404	CHECK(strPair.first.size()) << "Model input scale empty";
405	CHECK(!strPair.first.getAsDouble(scale))
406	<< "Model input scale parameter invalid";
407	strPair = strPair.second.split(`','`);
408	CHECK(strPair.first.size()) << "Model input offset empty";
409	CHECK(!strPair.first.getAsInteger(`0`, offset))
410	<< "Model input offset parameter invalid";
411	}
412
413	// Parse shape string.
414	llvm::StringRef shape = strPair.second;
415	CHECK(shape.size()) << "Model input shape empty";
416	ShapeVector dims;
417	CHECK_EQ(shape.front(), `'['`) << "First shape char should be [";
418	shape = shape.drop_front();
419	CHECK_EQ(shape.back(), `']'`) << "First shape char should be ]";
420	shape = shape.drop_back();
421	CHECK(shape.size()) << "Model input shape empty";
422	size_t val;
423	while (shape.contains(`','`)) {
424	auto splitRes = shape.split(`','`);
425	CHECK(!splitRes.first.getAsInteger(`0`, val))
426	<< "Model input shape integer invalid";
427	dims.push_back(val);
428	shape = splitRes.second;
429	}
430	CHECK(!shape.getAsInteger(`0`, val)) << "Model input shape integer invalid";
431	dims.push_back(val);
432
433	// Build type and add to vector.
434	if (isQuantizedElemKind(kind)) {
435	inputTypes->push_back(Type (kind, dims, (float)scale, offset));
436	} else {
437	inputTypes->push_back(Type (kind, dims));
438	}
439	}
440	}
441
442	void Loader::loadModel(PlaceholderBindings *bindings,
443	llvm::ArrayRef<TypeRef> inputType) {
444
445	// Get model input names and types.
446	std::vector<std::string> inputNames;
447	std::vector<Type> inputTypes;
448	getModelInputs(inputNames, &inputTypes);
449	std::vector<const char *> inputNameRefs;
450	std::vector<TypeRef> inputTypeRefs;
451	for (size_t idx = `0`, e = inputNames.size(); idx < e; idx++) {
452	inputNameRefs.push_back(inputNames [idx].c_str());
453	inputTypeRefs.push_back(&inputTypes [idx]);
454	}
455
456	// Use explicit input type if given.
457	if (inputType.size()) {
458	inputTypeRefs = inputType;
459	}
460
461	// Load the model based on the model format.
462	if (!getCaffe2NetDescFilename().empty()) {
463	// For Caffe2 format the input placeholder names/types must be provided
464	// explicitly (mandatory).
465	std::unique_ptr<ProtobufLoader> protoLoader;
466	protoLoader.reset(new Caffe2ModelLoader (
467	getCaffe2NetDescFilename().str(), getCaffe2NetWeightFilename().str(),
468	inputNameRefs, inputTypeRefs, *getFunction()));
469	// Load the maps between original model names and the placeholders.
470	inputPlaceholderByName_ = protoLoader ->getInputVarsMapping();
471	outputPlaceholderByName_ = protoLoader ->getOutputVarsMapping();
472	if (bindings) {
473	postModelLoad(bindings, protoLoader.get(), outputPlaceholderByName_,
474	inputType);
475	}
476	} else if (!getTFLiteModelFilename().empty()) {
477	// For TensorFlowLite format the input placeholder names/types are not
478	// provided since are used directly from the model.
479	auto tfliteLoader = glow::make_unique<TFLiteModelLoader>(
480	getTFLiteModelFilename().str(), getFunction());
481	// Load the maps between original model names and the placeholders.
482	inputPlaceholderByName_ = tfliteLoader ->getInputPlaceholderMap();
483	outputPlaceholderByName_ = tfliteLoader ->getOutputPlaceholderMap();
484	// Since TensorFlowLite loader currently does not have the capability to
485	// enforce the input type (for batching) we must validate that when the
486	// input type is explicitly given it actually matches the model input type.
487	if (bindings) {
488	postModelLoad(bindings, tfliteLoader, outputPlaceholderByName_,
489	inputType);
490	}
491	if (inputType.size()) {
492	CHECK(inputPlaceholderByName_.size() == `1`)
493	<< "Model is expected to have only 1 input!";
494	Placeholder *inpPH = inputPlaceholderByName_.begin()->second;
495	auto modelBatchSize = inpPH->getType()->dims()[`0`];
496	auto inputBatchSize = inputType [`0`]->dims()[`0`];
497	CHECK(inputBatchSize == modelBatchSize)
498	<< "Mismatch between the model batch size (" << modelBatchSize
499	<< ") and the dataset batch size (" << inputBatchSize << ")! "
500	<< "If you are using the 'image-classifier' tool set the "
501	<< "dataset batch size with the option '-minibatch=" << modelBatchSize
502	<< "'!";
503	}
504	} else {
505	// For ONNX format the input placeholders names/types can be optionally
506	// provided but is not mandatory. If not provided (the arrays are empty)
507	// they are derived automatically. One might want to provide explicitly
508	// the input placeholder types in order to override the placeholder sizes
509	// (one such example is the batch size).
510	std::unique_ptr<ProtobufLoader> protoLoader;
511	protoLoader.reset(new ONNXModelLoader (getOnnxModelFilename().str(),
512	inputNameRefs, inputTypeRefs,
513	*getFunction()));
514	// Load the maps between original model names and the placeholders.
515	inputPlaceholderByName_ = protoLoader ->getInputVarsMapping();
516	outputPlaceholderByName_ = protoLoader ->getOutputVarsMapping();
517	if (bindings) {
518	postModelLoad(bindings, protoLoader.get(), outputPlaceholderByName_,
519	inputType);
520	}
521	}
522	}
523
524	static bool commandLineIsInvalid() {
525	if (!dumpProfileFileOpt.empty() &&
526	(!loadProfileFileOpt.empty() \|\| convertToFP16)) {
527	llvm::errs() << "Loader: the -" << dumpProfileFileOpt.ArgStr
528	<< " option cannot be specified at the same time as either -"
529	<< loadProfileFileOpt.ArgStr << " or -" << convertToFP16.ArgStr
530	<< ".\n";
531	return true;
532	}
533
534	if (emitBundle.getNumOccurrences()) {
535	if (networkName.getNumOccurrences()) {
536	if (networkName.empty()) {
537	llvm::errs() << "Loader: -" << networkName.ArgStr
538	<< " must not be empty.\n";
539	return true;
540	} // FIXME: else make sure networkName does not have any sequence of
541	// characters that could turn into evil stuff in the assembler.
542	} else {
543	// By default, use the last directory in the model path
544	// as the name of the network.
545	// Only do that when there is just one path specified.
546	if (modelPathOpt.size() == `1`) {
547	for (auto it = llvm::sys::path::rbegin(modelPathOpt [`0`]),
548	end = llvm::sys::path::rend(modelPathOpt [`0`]);
549	it != end; ++it) {
550	networkName = std::string (*it);
551	// Strip extension (if any).
552	size_t lastDotPos = networkName.find_last_of(".");
553	if (lastDotPos != std::string::npos) {
554	networkName = networkName.substr(`0`, lastDotPos);
555	}
556	// Empty names are replaced by '.' (see Path.h in LLVM).
557	if (!networkName.empty() && networkName != ".") {
558	break;
559	}
560	}
561	}
562	if (networkName.empty()) {
563	llvm::errs() << "Loader: Use -" << networkName.ArgStr
564	<< " to specify a non-empty network name.\n";
565	return true;
566	}
567	}
568	} else if (networkName.getNumOccurrences()) {
569	llvm::errs() << "Loader: -" << networkName.ArgStr
570	<< " only makes sense when -" << emitBundle.ArgStr
571	<< " is used.\n";
572	return true;
573	}
574	return false;
575	}
576
577	/// Clear external storage for cmd args defined in Loader.
578	static void initCmdArgVars() {
579	llvm::cl::ResetAllOptionOccurrences();
580	modelInputsOpt.clear();
581	modelPathOpt.clear();
582	}
583
584	void glow::parseCommandLine(int argc, char **argv) {
585
586	initCmdArgVars();
587
588	llvm::cl::SetVersionPrinter([](llvm::raw_ostream &os) {
589	#ifdef GLOW_VERSION
590	os << "Glow Tools version: " << GLOW_VERSION << "\n";
591	#endif
592	});
593	// TODO - registered once to avoid error:
594	// "LLVM ERROR: too many signal callbacks already registered."
595	static bool stackTraceRegistered = false;
596	if (!stackTraceRegistered) {
597	stackTraceRegistered = true;
598	llvm::sys::PrintStackTraceOnErrorSignal(argv[`0`]);
599	}
600	llvm::cl::ParseCommandLineOptions(
601	argc, argv,
602	" The Glow compiler\n\n"
603	"Glow is a compiler for neural network accelerators.\n");
604
605	if (commandLineIsInvalid()) {
606	std::exit(`1`);
607	}
608
609	if (modelPathOpt.size() > `2`) {
610	llvm::errs() << "-model flag should have either 1 or 2 paths assigned. "
611	"Please see flag's description.\n";
612	std::exit(`1`);
613	}
614	}
615
616	quantization::QuantizationConfiguration Loader::getQuantizationConfiguration() {
617	quantization::QuantizationConfiguration quantConfig;
618	quantConfig.precision = quantizationPrecision;
619	quantConfig.precisionBias = quantizationPrecisionBias;
620	quantConfig.schema = quantizationSchema;
621	quantConfig.calibration = quantizationCalibrationOpt;
622	quantConfig.calibrateConstants = calibrateConstantsOpt;
623	quantConfig.enableRowwise = enableRowwiseOpt;
624	quantConfig.enableChannelwise = enableChannelwiseOpt;
625	quantConfig.assertAllNodesQuantized = assertAllNodesQuantizedOpt;
626	if (!loadProfileFileOpt.empty()) {
627	auto fileExists = deserializeProfilingInfosFromYaml(
628	loadProfileFileOpt, quantConfig.graphPreLowerHash, quantConfig.infos);
629	CHECK(fileExists) << strFormat("Profile file \"%s\" does not exist!",
630	loadProfileFileOpt.c_str());
631	}
632	quantConfig.checkGraphPreLowerHash = true;
633	return quantConfig;
634	}
635
636	CompilationContext Loader::getCompilationContext(QuantizationMode mode) {
637
638	// Common configurations.
639	CompilationContext cctx;
640	cctx.loweredInfoMap = &loweredMap_;
641	PrecisionConfiguration &precConfig = cctx.precisionConfig;
642	precConfig.convertToFP16 = convertToFP16;
643	precConfig.float16Format = fp16Format;
644
645	// Specific configurations.
646	precConfig.quantMode = mode;
647	if (mode == QuantizationMode::None) {
648
649	// By default, when converting models, all nodes that can be converted are
650	// converted. However, some models may need to keep higher precision for
651	// some nodes to prevent high accuracy loss. Those nodes are gathered via
652	// the keepOriginalPrecisionForNodesOpt option and passed to the related
653	// conversion function.
654	for (llvm::StringRef kindName : keepOriginalPrecisionForNodesOpt) {
655	precConfig.precisionModeKindSet.insert(getKindFromNodeName(kindName));
656	}
657
658	} else if (mode == QuantizationMode::Quantize) {
659
660	// By default, when converting models, all nodes that can be converted are
661	// converted. However, some models may need to keep higher precision for
662	// some nodes to prevent high accuracy loss. Those nodes are gathered via
663	// the keepOriginalPrecisionForNodesOpt option and passed to the related
664	// conversion function.
665	for (llvm::StringRef kindName : keepOriginalPrecisionForNodesOpt) {
666	precConfig.precisionModeKindSet.insert(getKindFromNodeName(kindName));
667	}
668	precConfig.quantConfig = getQuantizationConfiguration();
669
670	} else if (mode == QuantizationMode::Profile) {
671
672	// Profiling parameters.
673	precConfig.profConfig.numHistogramBins = numHistogramBinsOpt;
674
675	// By default everything will be lowered for profiling. However this may
676	// cause performance issues for some models, e.g. if a model has group
677	// Convolutions which explode the size of the graph when lowered. Thus allow
678	// for disabling certain NodeKinds for profiling. This means that during
679	// quantization, these nodes should also not be lowered by the backend.
680	for (llvm::StringRef kindName : doNotLowerNodesForProfilingOpt) {
681	precConfig.precisionModeKindSet.insert(getKindFromNodeName(kindName));
682	}
683
684	} else {
685	LOG(FATAL) << "Quantization mode not supported";
686	}
687
688	// When converting the model placeholders, if the placeholders are already
689	// allocated, we should also convert the backing tensors. Since this procedure
690	// is not yet in place, we only convert when emitting a bundle.
691	if (convertPlaceholdersOpt && !emittingBundle()) {
692	llvm::errs() << "The flag 'convert-placeholders' can only be used when "
693	"emitting a bundle!\n";
694	std::exit(`1`);
695	}
696	cctx.optimizationOpts.foldElemKindConversionIntoIO = convertPlaceholdersOpt;
697
698	return cctx;
699	}
700
701	CompilationContext Loader::getCompilationContext() {
702	if (!dumpProfileFileOpt.empty()) {
703	return Loader::getCompilationContext(QuantizationMode::Profile);
704	} else if (!loadProfileFileOpt.empty()) {
705	return Loader::getCompilationContext(QuantizationMode::Quantize);
706	} else {
707	return Loader::getCompilationContext(QuantizationMode::None);
708	}
709	}
710
711	void Loader::compile(PlaceholderBindings &bindings) {
712	CompilationContext cctx = getCompilationContext();
713	cctx.bindings = &bindings;
714	compile(cctx);
715	}
716
717	void Loader::compile(CompilationContext &cctx) {
718
719	// Dump the DAG before compilation if needed.
720	if (!dumpGraphDAGFileBeforeCompilationOpt.empty()) {
721	F_->dumpDAG(dumpGraphDAGFileBeforeCompilationOpt.c_str());
722	}
723
724	// Store a raw pointer to the Module, we pass the unique_ptr to HostManager
725	// but the Module is stored by Hostmanager so the pointer will remain valid.
726	auto module = M_.get();
727
728	if (emittingBundle()) {
729	// Create bundle directory if not exists.
730	if (!llvm::sys::fs::is_directory(emitBundle)) {
731	llvm::sys::fs::create_directory(emitBundle);
732	}
733	// Emit IR for the graph, compile it and save as a bundle. Replicate the
734	// same optimizations seen during normal execution inside addNetwork().
735	EXIT_ON_ERR(::glow::optimizeFunctionBeforeLowering(F_, cctx));
736	EXIT_ON_ERR(::glow::optimizeFunction(F_, *backend_, cctx));
737	backend_->save(F_, emitBundle, networkName,
738	mainEntryName.empty() ? networkName : mainEntryName);
739	} else {
740	// Emit IR for the graph and compile it.
741	cctx.saturateHost = !runAllInputsOnAllDevices;
742	auto error = hostManager_->addNetwork(std::move(M_), cctx);
743	EXIT_ON_ERR(std::move(error));
744	// After partitioning, the original function may be removed. Need to update
745	// F_.
746	F_ = module->getFunctions().front();
747	}
748	if (dumpGraphOpt) {
749	for (auto function : module->getFunctions()) {
750	function->dump();
751	}
752	}
753	if (!dumpGraphDAGFileOpt.empty()) {
754	for (auto function : module->getFunctions()) {
755	std::string filename =
756	function->getFilename() + "_" + dumpGraphDAGFileOpt;
757	if (module->getFunctions().size() == `1`) {
758	filename = dumpGraphDAGFileOpt;
759	}
760	function->dumpDAG(filename.c_str());
761	}
762	}
763	// Store compilation info in the Loader.
764	compilationInfo_ = cctx.info;
765	}
766
767	void Loader::runInference(PlaceholderBindings &bindings, size_t batchSize) {
768	assert(!emittingBundle() &&
769	"No inference is performed in the bundle generation mode.");
770	unsigned iterations = iterationsOpt == `0` ? `1` : iterationsOpt;
771	llvm::Timer timer("Infer", "Infer");
772	if (timeOpt) {
773	timer.startTimer();
774	}
775	for (unsigned i = `0`; i < iterations; i++) {
776	auto runErr = hostManager_->runNetworkBlocking(functionName_, bindings);
777	EXIT_ON_ERR(std::move(runErr));
778	}
779	if (timeOpt) {
780	timer.stopTimer();
781	llvm::outs() << llvm::formatv("Wall time per item (s): {0:f4}\n",
782
783	timer.getTotalTime().getWallTime() /
784	iterations / batchSize);
785	}
786	}
787
788	void Loader::runInference(ExecutionContext *context, size_t batchSize) {
789	std::unique_ptr<ExecutionContext> contextP(context);
790
791	unsigned iterations = iterationsOpt == `0` ? `1` : iterationsOpt;
792	llvm::Timer timer("Infer", "Infer");
793	if (timeOpt) {
794	timer.startTimer();
795	}
796
797	for (unsigned i = `0`; i < iterations; i++) {
798	std::promise<void> runPromise;
799	auto fut = runPromise.get_future();
800	std::unique_ptr<Error> runErr;
801	hostManager_->runNetwork(
802	functionName_, std::move(contextP),
803	[&runPromise, &runErr](runtime::RunIdentifierTy, Error err,
804	std::unique_ptr<ExecutionContext> contextPtr) {
805	// Don't really delete context since we don't own it.
806	contextPtr.release();
807
808	runErr = glow::make_unique<Error>(std::move(err));
809	runPromise.set_value();
810	});
811	fut.wait();
812	EXIT_ON_ERR(std::move(*DCHECK_NOTNULL(runErr.get())));
813	}
814	if (timeOpt) {
815	timer.stopTimer();
816	llvm::outs() << llvm::formatv("Wall time per item (s): {0:f4}\n",
817	timer.getTotalTime().getWallTime() /
818	iterations / batchSize);
819	}
820	}
821
822	static bool comparePI(const NodeProfilingInfo &a, const NodeProfilingInfo &b) {
823	return (a.nodeOutputName_.compare(b.nodeOutputName_) < `0`);
824	}
825
826	void Loader::generateAndSerializeProfilingInfos(PlaceholderBindings &bindings) {
827	assert(!dumpProfileFileOpt.empty() &&
828	"Filename to dump serialized profile to must not be empty.");
829	std::vector<NodeProfilingInfo> PI;
830	for (auto F : getModule()->getFunctions()) {
831	std::vector<NodeProfilingInfo> tmp =
832	quantization::generateNodeProfilingInfos(bindings, F, loweredMap_);
833	PI.insert(PI.end(), tmp.begin(), tmp.end());
834	}
835	std::sort(PI.begin(), PI.end(), comparePI);
836	serializeProfilingInfosToYaml(dumpProfileFileOpt,
837	compilationInfo_.graphPreLowerHash, PI);
838	}
839
840	Loader &Loader::registerExtension(std::unique_ptr<LoaderExtension> extension) {
841	loaderExtensionList_.push_back(std::move(extension));
842	return *this;
843	}
844
845	void Loader::postModelLoad(PlaceholderBindings &bindings,
846	ProtobufLoader &protoLoader,
847	llvm::StringMap<Placeholder *> &placeholderMap,
848	llvm::ArrayRef<TypeRef> inputImageType) {
849	for (auto &&ext : loaderExtensionList_) {
850	ext ->postModelLoad(*this, bindings, protoLoader, placeholderMap,
851	inputImageType);
852	}
853	}
854
855	void Loader::postModelLoad(PlaceholderBindings &bindings,
856	TFLiteModelLoader &tfloader,
857	llvm::StringMap<Placeholder *> &placeholderMap,
858	llvm::ArrayRef<TypeRef> inputImageType) {
859	for (auto &&ext : loaderExtensionList_) {
860	ext ->postModelLoad(*this, bindings, tfloader, placeholderMap,
861	inputImageType);
862	}
863	}
864
865	void Loader::inferInitMiniBatch(PlaceholderBindings &bindings,
866	size_t minibatchIndex, size_t minibatchSize) {
867	for (auto &&ext : loaderExtensionList_) {
868	ext ->inferInitMiniBatch(*this, bindings, minibatchIndex, minibatchSize);
869	}
870	}
871
872	void Loader::inferEndMiniBatch(PlaceholderBindings &bindings,
873	size_t minibatchIndex, size_t minibatchSize) {
874	for (auto &&ext : loaderExtensionList_) {
875	ext ->inferEndMiniBatch(*this, bindings, minibatchIndex, minibatchSize);
876	}
877	}
878
879	Loader::Loader(llvm::ArrayRef<size_t> configDeviceIDs) {
880	if (modelPathOpt.size() == `1`) {
881	if (llvm::sys::fs::is_directory(*modelPathOpt.begin())) {
882	caffe2NetDescFilename_ = modelPathOpt [`0`] + "/predict_net.pb";
883	caffe2NetWeightFilename_ = modelPathOpt [`0`] + "/init_net.pb";
884	} else {
885	llvm::StringRef modelPath = modelPathOpt [`0`];
886	if (modelPath.endswith("tflite")) {
887	tfliteModelFilename_ = modelPath.str();
888	} else {
889	onnxModelFilename_ = modelPath.str();
890	}
891	}
892	} else {
893	caffe2NetDescFilename_ = modelPathOpt [`0`];
894	caffe2NetWeightFilename_ = modelPathOpt [`1`];
895	}
896	M_.reset(new Module);
897
898	std::vector<std::unique_ptr<runtime::DeviceConfig>> configs;
899
900	if (configDeviceIDs.empty()) {
901	configs = runtime::generateDeviceConfigs(numDevices, ExecutionBackend);
902	} else {
903	for (size_t ID : configDeviceIDs) {
904	CHECK(ID < numDevices) << "IDs must be less than the number of devices";
905	auto config = glow::make_unique<runtime::DeviceConfig>(ExecutionBackend);
906	config ->deviceID = ID;
907	configs.push_back(std::move(config));
908	}
909	}
910
911	hostManager_ = glow::make_unique<runtime::HostManager>(std::move(configs));
912	backend_ = std::unique_ptr<Backend>(createBackend(ExecutionBackend));
913	F_ = M_->createFunction(modelPathOpt [`0`]);
914	functionName_ = modelPathOpt [`0`];
915	}
916

Browse the source code of glow/tools/loader/Loader.cpp