1 | /** |
2 | * Copyright (c) Glow Contributors. See CONTRIBUTORS file. |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | */ |
16 | |
17 | #include "Loader.h" |
18 | |
19 | #include "glow/Base/Image.h" |
20 | #include "glow/Base/Tensor.h" |
21 | #include "glow/Converter/TypeAToTypeBFunctionConverter.h" |
22 | #include "glow/IR/IR.h" |
23 | #include "glow/Importer/Caffe2ModelLoader.h" |
24 | #include "glow/Importer/ONNXModelLoader.h" |
25 | #include "glow/Importer/TFLiteModelLoader.h" |
26 | #include "glow/Optimizer/GraphOptimizer/CompilationContext.h" |
27 | #include "glow/Optimizer/GraphOptimizer/GraphOptimizer.h" |
28 | #include "glow/Quantization/Serialization.h" |
29 | #include "glow/Runtime/RuntimeTypes.h" |
30 | |
31 | #include "llvm/Support/CommandLine.h" |
32 | #include "llvm/Support/FileSystem.h" |
33 | #include "llvm/Support/FormatVariadic.h" |
34 | #include "llvm/Support/Path.h" |
35 | #include "llvm/Support/Signals.h" |
36 | #include "llvm/Support/Timer.h" |
37 | #include "llvm/Support/raw_ostream.h" |
38 | |
39 | #include <algorithm> |
40 | #include <future> |
41 | #include <sstream> |
42 | |
43 | using namespace glow; |
44 | |
45 | llvm::cl::OptionCategory loaderCat("Loader Options" ); |
46 | |
47 | std::vector<std::string> modelPathOpt; |
48 | static llvm::cl::list<std::string, std::vector<std::string>> modelPathOptF( |
49 | "model" , |
50 | llvm::cl::desc( |
51 | "Specify one of three:\n" |
52 | "1. Path to ONNX model file.\n" |
53 | "2. Two paths to Caffe2 model files: network structure and weight.\n" |
54 | "3. Path to directory with the Caffe2 network structure " |
55 | "<predict_net.pb> and weight <init_net.pb> files." ), |
56 | llvm::cl::value_desc("modelPath" ), llvm::cl::Required, llvm::cl::OneOrMore, |
57 | llvm::cl::cat(loaderCat), llvm::cl::location(modelPathOpt)); |
58 | static llvm::cl::alias modelPathAOpt("m" , llvm::cl::desc("Alias for -model" ), |
59 | llvm::cl::aliasopt(modelPathOptF), |
60 | llvm::cl::cat(loaderCat)); |
61 | |
62 | namespace { |
63 | |
64 | llvm::cl::opt<bool> |
65 | verbose("verbose" , |
66 | llvm::cl::desc("Specify whether to run with verbose output" ), |
67 | llvm::cl::Optional, llvm::cl::cat(loaderCat)); |
68 | |
69 | llvm::cl::opt<std::string> dumpProfileFileOpt( |
70 | "dump-profile" , |
71 | llvm::cl::desc("Perform quantization profiling for a given graph " |
72 | "and dump result to the file." ), |
73 | llvm::cl::value_desc("profile.yaml" ), llvm::cl::Optional, |
74 | llvm::cl::cat(loaderCat)); |
75 | |
76 | llvm::cl::opt<quantization::Schema> quantizationSchema( |
77 | "quantization-schema" , |
78 | llvm::cl::desc("Specify which quantization schema to use" ), |
79 | llvm::cl::Optional, |
80 | llvm::cl::values( |
81 | clEnumValN(quantization::Schema::Asymmetric, "asymmetric" , |
82 | "Use asymmetric ranges" ), |
83 | clEnumValN(quantization::Schema::Symmetric, "symmetric" , |
84 | "Use symmetric ranges" ), |
85 | clEnumValN(quantization::Schema::SymmetricWithUnsigned, |
86 | "symmetric_with_uint8" , |
87 | "Use symmetric ranges with potentially uint8 ranges" ), |
88 | clEnumValN(quantization::Schema::SymmetricWithPower2Scale, |
89 | "symmetric_with_power2_scale" , |
90 | "Use symmetric ranges with power of 2 scaling factor" )), |
91 | llvm::cl::init(quantization::Schema::Asymmetric), llvm::cl::cat(loaderCat)); |
92 | |
93 | llvm::cl::opt<quantization::Calibration> quantizationCalibrationOpt( |
94 | "quantization-calibration" , |
95 | llvm::cl::desc("Specify which quantization calibration method to use" ), |
96 | llvm::cl::Optional, |
97 | llvm::cl::values( |
98 | clEnumValN(quantization::Calibration::None, "none" , "No calibration" ), |
99 | clEnumValN(quantization::Calibration::KLMinimization, "KL" , |
100 | "Quantization calibration method based on minimizing the " |
101 | "Kullback-Leibler divergence metric (relative entropy)" )), |
102 | llvm::cl::init(quantization::Calibration::None), llvm::cl::cat(loaderCat)); |
103 | |
104 | llvm::cl::opt<bool> calibrateConstantsOpt( |
105 | "calibrate-constants" , |
106 | llvm::cl::desc("Option to enable the quantization calibration for constant " |
107 | "weights which is disabled by default." ), |
108 | llvm::cl::init(false), llvm::cl::Optional, llvm::cl::cat(loaderCat)); |
109 | |
110 | llvm::cl::opt<ElemKind> quantizationPrecision( |
111 | "quantization-precision" , |
112 | llvm::cl::desc("Specify which quantization precision to use, e.g., Int8" ), |
113 | llvm::cl::Optional, |
114 | llvm::cl::values( |
115 | clEnumValN(ElemKind::Int8QTy, "Int8" , "Use Int8 quantization" ), |
116 | clEnumValN(ElemKind::Int16QTy, "Int16" , "Use Int16 quantization" )), |
117 | llvm::cl::init(ElemKind::Int8QTy), llvm::cl::cat(loaderCat)); |
118 | |
119 | llvm::cl::opt<ElemKind> quantizationPrecisionBias( |
120 | "quantization-precision-bias" , |
121 | llvm::cl::desc("Specify which quantization precision to use for bias " |
122 | "of Convolution and Fully Connected nodes." ), |
123 | llvm::cl::Optional, |
124 | llvm::cl::values( |
125 | clEnumValN(ElemKind::Int8QTy, "Int8" , "Use Int8 bias quantization" ), |
126 | clEnumValN(ElemKind::Int16QTy, "Int16" , "Use Int16 bias quantization" ), |
127 | clEnumValN(ElemKind::Int32QTy, "Int32" , "Use Int32 bias quantization" )), |
128 | llvm::cl::init(ElemKind::Int32QTy), llvm::cl::cat(loaderCat)); |
129 | |
130 | llvm::cl::opt<bool> |
131 | enableRowwiseOpt("enable-rowwise" , |
132 | llvm::cl::desc("Enable rowwise quantized FullyConnected." ), |
133 | llvm::cl::Optional, llvm::cl::init(false), |
134 | llvm::cl::cat(loaderCat)); |
135 | |
136 | llvm::cl::opt<bool> enableChannelwiseOpt( |
137 | "enable-channelwise" , |
138 | llvm::cl::desc("Enable channelwise quantized Convolution." ), |
139 | llvm::cl::Optional, llvm::cl::init(false), llvm::cl::cat(loaderCat)); |
140 | |
141 | llvm::cl::opt<std::string> loadProfileFileOpt( |
142 | "load-profile" , |
143 | llvm::cl::desc("Load quantization profile file and quantize the graph" ), |
144 | llvm::cl::value_desc("profile.yaml" ), llvm::cl::Optional, |
145 | llvm::cl::cat(loaderCat)); |
146 | |
147 | llvm::cl::list<std::string> keepOriginalPrecisionForNodesOpt( |
148 | "keep-original-precision-for-nodes" , |
149 | llvm::cl::desc( |
150 | "Use to specify the name of nodes (e.g. Add, Div, etc.) that should " |
151 | "be kept as is when conversion/quantization is requested. " |
152 | "All nodes of the listed kinds will be kept as is;" |
153 | "e.g. if Add is specified and there are multiple Add nodes " |
154 | "in the input loaded model, none would be quantized/converted." ), |
155 | llvm::cl::value_desc("NodeNames (e.g. Add,Div)" ), llvm::cl::ZeroOrMore, |
156 | llvm::cl::CommaSeparated, llvm::cl::cat(loaderCat)); |
157 | |
158 | llvm::cl::list<std::string> doNotLowerNodesForProfilingOpt( |
159 | "do-not-lower-nodes-for-profiling" , |
160 | llvm::cl::desc( |
161 | "Use to specify the name of nodes (e.g. Convolution, FullyConnected, " |
162 | "etc.) that should not be lowered during profiling. All nodes of the " |
163 | "listed kinds will be kept as is; e.g. if Conv is specified and the " |
164 | "model has group convolutions then the convolution will not be lowered " |
165 | "for profiling. This means when using the profile for quantization, " |
166 | "the node should not be lowered then either." ), |
167 | llvm::cl::value_desc("NodeNames (e.g. Convolution,FullyConnected)" ), |
168 | llvm::cl::ZeroOrMore, llvm::cl::CommaSeparated, llvm::cl::cat(loaderCat)); |
169 | |
170 | llvm::cl::opt<std::string> ExecutionBackend( |
171 | "backend" , |
172 | llvm::cl::desc("Backend to use, e.g., Interpreter, CPU, OpenCL:" ), |
173 | llvm::cl::init("Interpreter" ), llvm::cl::cat(loaderCat)); |
174 | |
175 | /// Debugging options. |
176 | llvm::cl::OptionCategory |
177 | modelExportCat("How to export the Glow Intermediate Representation/Graphs" , |
178 | "These options are for debugging the " |
179 | "graphs by writing the IR/Graphs to " |
180 | "given files/stdout" ); |
181 | |
182 | llvm::cl::opt<std::string> dumpGraphDAGFileBeforeCompilationOpt( |
183 | "dump-graph-DAG-before-compile" , |
184 | llvm::cl::desc("Specify the file to export the Graph in DOT format" ), |
185 | llvm::cl::value_desc("file.dot" ), llvm::cl::cat(modelExportCat)); |
186 | |
187 | llvm::cl::opt<std::string> dumpGraphDAGFileOpt( |
188 | "dump-graph-DAG" , |
189 | llvm::cl::desc("Specify the file to export the Graph in DOT format" ), |
190 | llvm::cl::value_desc("file.dot" ), llvm::cl::cat(modelExportCat)); |
191 | |
192 | llvm::cl::opt<bool> dumpGraphOpt("dump-graph" , |
193 | llvm::cl::desc("Prints Graph to stdout" ), |
194 | llvm::cl::cat(modelExportCat)); |
195 | |
196 | llvm::cl::opt<bool> |
197 | convertToFP16("convert-to-fp16" , |
198 | llvm::cl::desc("Run all floating-point computation in fp16." ), |
199 | llvm::cl::init(false), llvm::cl::cat(loaderCat)); |
200 | |
201 | llvm::cl::opt<PrecisionConfiguration::Float16Format> fp16Format( |
202 | "fp16-format" , llvm::cl::desc("fp16 format to use." ), |
203 | llvm::cl::values(clEnumValN(PrecisionConfiguration::Float16Format::FP16, |
204 | "fp16" , "Use fp16" ), |
205 | clEnumValN(PrecisionConfiguration::Float16Format::BFloat16, |
206 | "bfloat16" , "Use bfloat16" )), |
207 | llvm::cl::init(PrecisionConfiguration::Float16Format::FP16), |
208 | llvm::cl::cat(loaderCat)); |
209 | |
210 | llvm::cl::opt<bool> convertPlaceholdersOpt( |
211 | "convert-placeholders" , |
212 | llvm::cl::desc("Convert model placeholders by merging ConvertTo, Quantize " |
213 | "and Dequantize nodes into the model inputs and outputs." ), |
214 | llvm::cl::init(false), llvm::cl::cat(loaderCat)); |
215 | |
216 | /// Emit a bundle into the specified output directory. |
217 | llvm::cl::opt<std::string> |
218 | emitBundle("emit-bundle" , |
219 | llvm::cl::desc("Output directory for the bundle serialization" ), |
220 | llvm::cl::cat(loaderCat)); |
221 | |
222 | llvm::cl::opt<bool> assertAllNodesQuantizedOpt( |
223 | "assert-all-nodes-quantized" , |
224 | llvm::cl::desc( |
225 | "Debugging tool, used to assert the quantizer quantizes all nodes in " |
226 | "the model, or abort otherwise. When false, nodes that are unsupported " |
227 | "as quantized by the backend will be left unquantized, and may have " |
228 | "their inputs dequantized/outputs quantized as necessary. Can be used " |
229 | "in conjunction with -keep-original-precision-for-nodes to explicitly " |
230 | "whitelist node kinds that are allowed to be left unquantized." ), |
231 | llvm::cl::init(false), llvm::cl::cat(loaderCat)); |
232 | |
233 | llvm::cl::opt<unsigned> numHistogramBinsOpt( |
234 | "num-histogram-bins" , |
235 | llvm::cl::desc("Number of bins used for histogram during profiling. If " |
236 | "histogram based calibration is used then the number of " |
237 | "histogram bins must be greater than 255 in order for any " |
238 | "calibration to take place (in the order of 1000's)." ), |
239 | llvm::cl::init(10), llvm::cl::value_desc("N" ), llvm::cl::cat(loaderCat)); |
240 | |
241 | /// Name of the network being bundled. |
242 | llvm::cl::opt<std::string> networkName( |
243 | "network-name" , |
244 | llvm::cl::desc("Name of the network being bundled. This name is used as a " |
245 | "prefix for all the files that are generated." ), |
246 | llvm::cl::cat(loaderCat)); |
247 | |
248 | /// Name of the main entry of the bundle. |
249 | llvm::cl::opt<std::string> |
250 | mainEntryName("main-entry-name" , |
251 | llvm::cl::desc("Name of the main entry in the bundle. " |
252 | "This name is used as the function name " |
253 | "of the entry point to the network." ), |
254 | llvm::cl::cat(loaderCat)); |
255 | |
256 | } // namespace |
257 | |
258 | // These are outside the namespace so they can be used by the image-classifier. |
259 | std::vector<std::string> modelInputsOpt; |
260 | static llvm::cl::list<std::string, std::vector<std::string>> modelInputsOptF( |
261 | "model-input" , llvm::cl::ZeroOrMore, llvm::cl::location(modelInputsOpt), |
262 | llvm::cl::desc( |
263 | " For ONNX models the inputs of the graph can be inferred \n" |
264 | " automatically and hence this option is not mandatory. \n" |
265 | " For Caffe2 models the graph definition does not contain \n" |
266 | " the description of the inputs and hence must be provided \n" |
267 | " explicitly using this option. One or more model inputs \n" |
268 | " are provided using the following format: \n" |
269 | " -model-input=<inputName1>,<inputType1>,<inputShape1> \n" |
270 | " -model-input=<inputName2>,<inputType2>,<inputShape2> \n" |
271 | " .................................................... \n" |
272 | " For quantized types the format is slightly different since\n" |
273 | " the scale and offset parameters should also be provided: \n" |
274 | " -model-input=<name>,<type>,<scale>,<offset>,<shape> \n" |
275 | " For example we can can provide one or more inputs: \n" |
276 | " -model-input=input_03_data,float,[1] \n" |
277 | " -model-input=data_bias,int32,[1,32,32] \n" |
278 | " -model-input=data,int8q,0.123,-13,[1,10] \n" |
279 | " If only the name is provided, the default type is 'float' \n" |
280 | " and the default shape is '[1]': \n" |
281 | " -model-input=<inputName1> \n" |
282 | " The supported types are: \n" |
283 | " - float, float16 (floating point types) \n" |
284 | " - int32, int64 (integer types) \n" |
285 | " - int8q, int16q, int32q (integer quantized types) \n" |
286 | " - bool (logic type)\n" ), |
287 | llvm::cl::value_desc("name,[type,[scale,offset],shape]" ), |
288 | llvm::cl::cat(loaderCat)); |
289 | |
290 | llvm::cl::alias modelInputName("model-input-name" , |
291 | llvm::cl::desc("Alias for -model-input" ), |
292 | llvm::cl::aliasopt(modelInputsOptF), |
293 | llvm::cl::cat(loaderCat)); |
294 | |
295 | llvm::cl::opt<unsigned> numDevices("num-devices" , |
296 | llvm::cl::desc("Number of Devices to use" ), |
297 | llvm::cl::init(1), llvm::cl::value_desc("N" ), |
298 | llvm::cl::cat(loaderCat)); |
299 | |
300 | llvm::cl::opt<bool> runAllInputsOnAllDevices( |
301 | "run-all-inputs-on-all-devices" , |
302 | llvm::cl::desc("Run all inputs on all devices. Used for testing purposes." ), |
303 | llvm::cl::init(false), llvm::cl::cat(loaderCat)); |
304 | |
305 | llvm::cl::opt<bool> |
306 | timeOpt("time" , |
307 | llvm::cl::desc("Print timer output to stderr detailing how long it " |
308 | "takes for the program to execute" ), |
309 | llvm::cl::Optional, llvm::cl::cat(loaderCat)); |
310 | |
311 | llvm::cl::opt<unsigned> iterationsOpt( |
312 | "iterations" , llvm::cl::desc("Number of iterations to perform" ), |
313 | llvm::cl::Optional, llvm::cl::init(0), llvm::cl::cat(loaderCat)); |
314 | |
315 | std::string Loader::getModelOptPath() { |
316 | // If given a single path, return it. |
317 | if (modelPathOpt.size() == 1 && |
318 | llvm::sys::fs::is_directory(*modelPathOpt.begin())) { |
319 | return *modelPathOpt.begin(); |
320 | } |
321 | |
322 | // Model path must be to one or more files. Use the path of the first file. |
323 | size_t found = modelPathOpt[0].find_last_of("/" ); |
324 | return found == std::string::npos ? "." : modelPathOpt[0].substr(0, found); |
325 | } |
326 | |
327 | llvm::StringRef Loader::getModelOptDir() { |
328 | assert(modelPathOpt.size() == 1 && |
329 | llvm::sys::fs::is_directory(*modelPathOpt.begin()) && |
330 | "Model path must be a single directory." ); |
331 | return modelPathOpt[0]; |
332 | } |
333 | |
334 | bool glow::emittingBundle() { return !emitBundle.empty(); } |
335 | |
336 | bool glow::profilingGraph() { return !dumpProfileFileOpt.empty(); } |
337 | |
338 | /// Parse the 'modelInputsOpt' option and get the model input names and types. |
339 | /// The expected format is one of the following: |
340 | /// - <name> (default type is 'float', default shape is '[1]') |
341 | /// - <name>,<type>,<shape> for non-quantized types. |
342 | /// - <name>,<type>,<scale>,<offset>,<shape> for quantized types. |
343 | static void getModelInputs(std::vector<std::string> &inputNames, |
344 | std::vector<Type> *inputTypes) { |
345 | for (const auto &str : modelInputsOpt) { |
346 | // Parse name. |
347 | auto strPair = llvm::StringRef(str).split(','); |
348 | llvm::StringRef name = strPair.first; |
349 | CHECK(name.size()) << "Model input name empty" ; |
350 | |
351 | // Verify name is unique and add to vector. |
352 | for (const auto &nameIter : inputNames) { |
353 | if (name.equals(nameIter)) { |
354 | LOG(FATAL) << strFormat("Model input name \"%s\" is not unique. Check " |
355 | "the graph definition for the input names." , |
356 | std::string(name).c_str()); |
357 | } |
358 | } |
359 | inputNames.push_back(name.str()); |
360 | |
361 | if (!inputTypes) { |
362 | continue; |
363 | } |
364 | |
365 | // If only the name is provided, use the default type and shape. |
366 | if (strPair.second.size() == 0) { |
367 | inputTypes->push_back(Type(ElemKind::FloatTy, {1})); |
368 | continue; |
369 | } |
370 | |
371 | // Parse type. |
372 | strPair = strPair.second.split(','); |
373 | llvm::StringRef type = strPair.first; |
374 | CHECK(type.size()) << "Model input type empty" ; |
375 | ElemKind kind; |
376 | if (type.equals("float" )) { |
377 | kind = ElemKind::FloatTy; |
378 | } else if (type.equals("float16" )) { |
379 | kind = ElemKind::Float16Ty; |
380 | } else if (type.equals("bfloat16" )) { |
381 | kind = ElemKind::BFloat16Ty; |
382 | } else if (type.equals("int8q" )) { |
383 | kind = ElemKind::Int8QTy; |
384 | } else if (type.equals("int16q" )) { |
385 | kind = ElemKind::Int16QTy; |
386 | } else if (type.equals("int32q" )) { |
387 | kind = ElemKind::Int32QTy; |
388 | } else if (type.equals("int32" )) { |
389 | kind = ElemKind::Int32ITy; |
390 | } else if (type.equals("int64" )) { |
391 | kind = ElemKind::Int64ITy; |
392 | } else if (type.equals("bool" )) { |
393 | kind = ElemKind::BoolTy; |
394 | } else { |
395 | LOG(FATAL) << strFormat("Model input type \"%s\" not supported" , |
396 | std::string(type).c_str()); |
397 | } |
398 | |
399 | // For quantized type get scale and offset. |
400 | double scale; |
401 | int32_t offset; |
402 | if (isQuantizedElemKind(kind)) { |
403 | strPair = strPair.second.split(','); |
404 | CHECK(strPair.first.size()) << "Model input scale empty" ; |
405 | CHECK(!strPair.first.getAsDouble(scale)) |
406 | << "Model input scale parameter invalid" ; |
407 | strPair = strPair.second.split(','); |
408 | CHECK(strPair.first.size()) << "Model input offset empty" ; |
409 | CHECK(!strPair.first.getAsInteger(0, offset)) |
410 | << "Model input offset parameter invalid" ; |
411 | } |
412 | |
413 | // Parse shape string. |
414 | llvm::StringRef shape = strPair.second; |
415 | CHECK(shape.size()) << "Model input shape empty" ; |
416 | ShapeVector dims; |
417 | CHECK_EQ(shape.front(), '[') << "First shape char should be [" ; |
418 | shape = shape.drop_front(); |
419 | CHECK_EQ(shape.back(), ']') << "First shape char should be ]" ; |
420 | shape = shape.drop_back(); |
421 | CHECK(shape.size()) << "Model input shape empty" ; |
422 | size_t val; |
423 | while (shape.contains(',')) { |
424 | auto splitRes = shape.split(','); |
425 | CHECK(!splitRes.first.getAsInteger(0, val)) |
426 | << "Model input shape integer invalid" ; |
427 | dims.push_back(val); |
428 | shape = splitRes.second; |
429 | } |
430 | CHECK(!shape.getAsInteger(0, val)) << "Model input shape integer invalid" ; |
431 | dims.push_back(val); |
432 | |
433 | // Build type and add to vector. |
434 | if (isQuantizedElemKind(kind)) { |
435 | inputTypes->push_back(Type(kind, dims, (float)scale, offset)); |
436 | } else { |
437 | inputTypes->push_back(Type(kind, dims)); |
438 | } |
439 | } |
440 | } |
441 | |
442 | void Loader::loadModel(PlaceholderBindings *bindings, |
443 | llvm::ArrayRef<TypeRef> inputType) { |
444 | |
445 | // Get model input names and types. |
446 | std::vector<std::string> inputNames; |
447 | std::vector<Type> inputTypes; |
448 | getModelInputs(inputNames, &inputTypes); |
449 | std::vector<const char *> inputNameRefs; |
450 | std::vector<TypeRef> inputTypeRefs; |
451 | for (size_t idx = 0, e = inputNames.size(); idx < e; idx++) { |
452 | inputNameRefs.push_back(inputNames[idx].c_str()); |
453 | inputTypeRefs.push_back(&inputTypes[idx]); |
454 | } |
455 | |
456 | // Use explicit input type if given. |
457 | if (inputType.size()) { |
458 | inputTypeRefs = inputType; |
459 | } |
460 | |
461 | // Load the model based on the model format. |
462 | if (!getCaffe2NetDescFilename().empty()) { |
463 | // For Caffe2 format the input placeholder names/types must be provided |
464 | // explicitly (mandatory). |
465 | std::unique_ptr<ProtobufLoader> protoLoader; |
466 | protoLoader.reset(new Caffe2ModelLoader( |
467 | getCaffe2NetDescFilename().str(), getCaffe2NetWeightFilename().str(), |
468 | inputNameRefs, inputTypeRefs, *getFunction())); |
469 | // Load the maps between original model names and the placeholders. |
470 | inputPlaceholderByName_ = protoLoader->getInputVarsMapping(); |
471 | outputPlaceholderByName_ = protoLoader->getOutputVarsMapping(); |
472 | if (bindings) { |
473 | postModelLoad(*bindings, *protoLoader.get(), outputPlaceholderByName_, |
474 | inputType); |
475 | } |
476 | } else if (!getTFLiteModelFilename().empty()) { |
477 | // For TensorFlowLite format the input placeholder names/types are not |
478 | // provided since are used directly from the model. |
479 | auto tfliteLoader = glow::make_unique<TFLiteModelLoader>( |
480 | getTFLiteModelFilename().str(), getFunction()); |
481 | // Load the maps between original model names and the placeholders. |
482 | inputPlaceholderByName_ = tfliteLoader->getInputPlaceholderMap(); |
483 | outputPlaceholderByName_ = tfliteLoader->getOutputPlaceholderMap(); |
484 | // Since TensorFlowLite loader currently does not have the capability to |
485 | // enforce the input type (for batching) we must validate that when the |
486 | // input type is explicitly given it actually matches the model input type. |
487 | if (bindings) { |
488 | postModelLoad(*bindings, *tfliteLoader, outputPlaceholderByName_, |
489 | inputType); |
490 | } |
491 | if (inputType.size()) { |
492 | CHECK(inputPlaceholderByName_.size() == 1) |
493 | << "Model is expected to have only 1 input!" ; |
494 | Placeholder *inpPH = inputPlaceholderByName_.begin()->second; |
495 | auto modelBatchSize = inpPH->getType()->dims()[0]; |
496 | auto inputBatchSize = inputType[0]->dims()[0]; |
497 | CHECK(inputBatchSize == modelBatchSize) |
498 | << "Mismatch between the model batch size (" << modelBatchSize |
499 | << ") and the dataset batch size (" << inputBatchSize << ")! " |
500 | << "If you are using the 'image-classifier' tool set the " |
501 | << "dataset batch size with the option '-minibatch=" << modelBatchSize |
502 | << "'!" ; |
503 | } |
504 | } else { |
505 | // For ONNX format the input placeholders names/types can be optionally |
506 | // provided but is not mandatory. If not provided (the arrays are empty) |
507 | // they are derived automatically. One might want to provide explicitly |
508 | // the input placeholder types in order to override the placeholder sizes |
509 | // (one such example is the batch size). |
510 | std::unique_ptr<ProtobufLoader> protoLoader; |
511 | protoLoader.reset(new ONNXModelLoader(getOnnxModelFilename().str(), |
512 | inputNameRefs, inputTypeRefs, |
513 | *getFunction())); |
514 | // Load the maps between original model names and the placeholders. |
515 | inputPlaceholderByName_ = protoLoader->getInputVarsMapping(); |
516 | outputPlaceholderByName_ = protoLoader->getOutputVarsMapping(); |
517 | if (bindings) { |
518 | postModelLoad(*bindings, *protoLoader.get(), outputPlaceholderByName_, |
519 | inputType); |
520 | } |
521 | } |
522 | } |
523 | |
524 | static bool commandLineIsInvalid() { |
525 | if (!dumpProfileFileOpt.empty() && |
526 | (!loadProfileFileOpt.empty() || convertToFP16)) { |
527 | llvm::errs() << "Loader: the -" << dumpProfileFileOpt.ArgStr |
528 | << " option cannot be specified at the same time as either -" |
529 | << loadProfileFileOpt.ArgStr << " or -" << convertToFP16.ArgStr |
530 | << ".\n" ; |
531 | return true; |
532 | } |
533 | |
534 | if (emitBundle.getNumOccurrences()) { |
535 | if (networkName.getNumOccurrences()) { |
536 | if (networkName.empty()) { |
537 | llvm::errs() << "Loader: -" << networkName.ArgStr |
538 | << " must not be empty.\n" ; |
539 | return true; |
540 | } // FIXME: else make sure networkName does not have any sequence of |
541 | // characters that could turn into evil stuff in the assembler. |
542 | } else { |
543 | // By default, use the last directory in the model path |
544 | // as the name of the network. |
545 | // Only do that when there is just one path specified. |
546 | if (modelPathOpt.size() == 1) { |
547 | for (auto it = llvm::sys::path::rbegin(modelPathOpt[0]), |
548 | end = llvm::sys::path::rend(modelPathOpt[0]); |
549 | it != end; ++it) { |
550 | networkName = std::string(*it); |
551 | // Strip extension (if any). |
552 | size_t lastDotPos = networkName.find_last_of("." ); |
553 | if (lastDotPos != std::string::npos) { |
554 | networkName = networkName.substr(0, lastDotPos); |
555 | } |
556 | // Empty names are replaced by '.' (see Path.h in LLVM). |
557 | if (!networkName.empty() && networkName != "." ) { |
558 | break; |
559 | } |
560 | } |
561 | } |
562 | if (networkName.empty()) { |
563 | llvm::errs() << "Loader: Use -" << networkName.ArgStr |
564 | << " to specify a non-empty network name.\n" ; |
565 | return true; |
566 | } |
567 | } |
568 | } else if (networkName.getNumOccurrences()) { |
569 | llvm::errs() << "Loader: -" << networkName.ArgStr |
570 | << " only makes sense when -" << emitBundle.ArgStr |
571 | << " is used.\n" ; |
572 | return true; |
573 | } |
574 | return false; |
575 | } |
576 | |
577 | /// Clear external storage for cmd args defined in Loader. |
578 | static void initCmdArgVars() { |
579 | llvm::cl::ResetAllOptionOccurrences(); |
580 | modelInputsOpt.clear(); |
581 | modelPathOpt.clear(); |
582 | } |
583 | |
584 | void glow::parseCommandLine(int argc, char **argv) { |
585 | |
586 | initCmdArgVars(); |
587 | |
588 | llvm::cl::SetVersionPrinter([](llvm::raw_ostream &os) { |
589 | #ifdef GLOW_VERSION |
590 | os << "Glow Tools version: " << GLOW_VERSION << "\n" ; |
591 | #endif |
592 | }); |
593 | // TODO - registered once to avoid error: |
594 | // "LLVM ERROR: too many signal callbacks already registered." |
595 | static bool stackTraceRegistered = false; |
596 | if (!stackTraceRegistered) { |
597 | stackTraceRegistered = true; |
598 | llvm::sys::PrintStackTraceOnErrorSignal(argv[0]); |
599 | } |
600 | llvm::cl::ParseCommandLineOptions( |
601 | argc, argv, |
602 | " The Glow compiler\n\n" |
603 | "Glow is a compiler for neural network accelerators.\n" ); |
604 | |
605 | if (commandLineIsInvalid()) { |
606 | std::exit(1); |
607 | } |
608 | |
609 | if (modelPathOpt.size() > 2) { |
610 | llvm::errs() << "-model flag should have either 1 or 2 paths assigned. " |
611 | "Please see flag's description.\n" ; |
612 | std::exit(1); |
613 | } |
614 | } |
615 | |
616 | quantization::QuantizationConfiguration Loader::getQuantizationConfiguration() { |
617 | quantization::QuantizationConfiguration quantConfig; |
618 | quantConfig.precision = quantizationPrecision; |
619 | quantConfig.precisionBias = quantizationPrecisionBias; |
620 | quantConfig.schema = quantizationSchema; |
621 | quantConfig.calibration = quantizationCalibrationOpt; |
622 | quantConfig.calibrateConstants = calibrateConstantsOpt; |
623 | quantConfig.enableRowwise = enableRowwiseOpt; |
624 | quantConfig.enableChannelwise = enableChannelwiseOpt; |
625 | quantConfig.assertAllNodesQuantized = assertAllNodesQuantizedOpt; |
626 | if (!loadProfileFileOpt.empty()) { |
627 | auto fileExists = deserializeProfilingInfosFromYaml( |
628 | loadProfileFileOpt, quantConfig.graphPreLowerHash, quantConfig.infos); |
629 | CHECK(fileExists) << strFormat("Profile file \"%s\" does not exist!" , |
630 | loadProfileFileOpt.c_str()); |
631 | } |
632 | quantConfig.checkGraphPreLowerHash = true; |
633 | return quantConfig; |
634 | } |
635 | |
636 | CompilationContext Loader::getCompilationContext(QuantizationMode mode) { |
637 | |
638 | // Common configurations. |
639 | CompilationContext cctx; |
640 | cctx.loweredInfoMap = &loweredMap_; |
641 | PrecisionConfiguration &precConfig = cctx.precisionConfig; |
642 | precConfig.convertToFP16 = convertToFP16; |
643 | precConfig.float16Format = fp16Format; |
644 | |
645 | // Specific configurations. |
646 | precConfig.quantMode = mode; |
647 | if (mode == QuantizationMode::None) { |
648 | |
649 | // By default, when converting models, all nodes that can be converted are |
650 | // converted. However, some models may need to keep higher precision for |
651 | // some nodes to prevent high accuracy loss. Those nodes are gathered via |
652 | // the keepOriginalPrecisionForNodesOpt option and passed to the related |
653 | // conversion function. |
654 | for (llvm::StringRef kindName : keepOriginalPrecisionForNodesOpt) { |
655 | precConfig.precisionModeKindSet.insert(getKindFromNodeName(kindName)); |
656 | } |
657 | |
658 | } else if (mode == QuantizationMode::Quantize) { |
659 | |
660 | // By default, when converting models, all nodes that can be converted are |
661 | // converted. However, some models may need to keep higher precision for |
662 | // some nodes to prevent high accuracy loss. Those nodes are gathered via |
663 | // the keepOriginalPrecisionForNodesOpt option and passed to the related |
664 | // conversion function. |
665 | for (llvm::StringRef kindName : keepOriginalPrecisionForNodesOpt) { |
666 | precConfig.precisionModeKindSet.insert(getKindFromNodeName(kindName)); |
667 | } |
668 | precConfig.quantConfig = getQuantizationConfiguration(); |
669 | |
670 | } else if (mode == QuantizationMode::Profile) { |
671 | |
672 | // Profiling parameters. |
673 | precConfig.profConfig.numHistogramBins = numHistogramBinsOpt; |
674 | |
675 | // By default everything will be lowered for profiling. However this may |
676 | // cause performance issues for some models, e.g. if a model has group |
677 | // Convolutions which explode the size of the graph when lowered. Thus allow |
678 | // for disabling certain NodeKinds for profiling. This means that during |
679 | // quantization, these nodes should also not be lowered by the backend. |
680 | for (llvm::StringRef kindName : doNotLowerNodesForProfilingOpt) { |
681 | precConfig.precisionModeKindSet.insert(getKindFromNodeName(kindName)); |
682 | } |
683 | |
684 | } else { |
685 | LOG(FATAL) << "Quantization mode not supported" ; |
686 | } |
687 | |
688 | // When converting the model placeholders, if the placeholders are already |
689 | // allocated, we should also convert the backing tensors. Since this procedure |
690 | // is not yet in place, we only convert when emitting a bundle. |
691 | if (convertPlaceholdersOpt && !emittingBundle()) { |
692 | llvm::errs() << "The flag 'convert-placeholders' can only be used when " |
693 | "emitting a bundle!\n" ; |
694 | std::exit(1); |
695 | } |
696 | cctx.optimizationOpts.foldElemKindConversionIntoIO = convertPlaceholdersOpt; |
697 | |
698 | return cctx; |
699 | } |
700 | |
701 | CompilationContext Loader::getCompilationContext() { |
702 | if (!dumpProfileFileOpt.empty()) { |
703 | return Loader::getCompilationContext(QuantizationMode::Profile); |
704 | } else if (!loadProfileFileOpt.empty()) { |
705 | return Loader::getCompilationContext(QuantizationMode::Quantize); |
706 | } else { |
707 | return Loader::getCompilationContext(QuantizationMode::None); |
708 | } |
709 | } |
710 | |
711 | void Loader::compile(PlaceholderBindings &bindings) { |
712 | CompilationContext cctx = getCompilationContext(); |
713 | cctx.bindings = &bindings; |
714 | compile(cctx); |
715 | } |
716 | |
717 | void Loader::compile(CompilationContext &cctx) { |
718 | |
719 | // Dump the DAG before compilation if needed. |
720 | if (!dumpGraphDAGFileBeforeCompilationOpt.empty()) { |
721 | F_->dumpDAG(dumpGraphDAGFileBeforeCompilationOpt.c_str()); |
722 | } |
723 | |
724 | // Store a raw pointer to the Module, we pass the unique_ptr to HostManager |
725 | // but the Module is stored by Hostmanager so the pointer will remain valid. |
726 | auto module = M_.get(); |
727 | |
728 | if (emittingBundle()) { |
729 | // Create bundle directory if not exists. |
730 | if (!llvm::sys::fs::is_directory(emitBundle)) { |
731 | llvm::sys::fs::create_directory(emitBundle); |
732 | } |
733 | // Emit IR for the graph, compile it and save as a bundle. Replicate the |
734 | // same optimizations seen during normal execution inside addNetwork(). |
735 | EXIT_ON_ERR(::glow::optimizeFunctionBeforeLowering(F_, cctx)); |
736 | EXIT_ON_ERR(::glow::optimizeFunction(F_, *backend_, cctx)); |
737 | backend_->save(F_, emitBundle, networkName, |
738 | mainEntryName.empty() ? networkName : mainEntryName); |
739 | } else { |
740 | // Emit IR for the graph and compile it. |
741 | cctx.saturateHost = !runAllInputsOnAllDevices; |
742 | auto error = hostManager_->addNetwork(std::move(M_), cctx); |
743 | EXIT_ON_ERR(std::move(error)); |
744 | // After partitioning, the original function may be removed. Need to update |
745 | // F_. |
746 | F_ = module->getFunctions().front(); |
747 | } |
748 | if (dumpGraphOpt) { |
749 | for (auto function : module->getFunctions()) { |
750 | function->dump(); |
751 | } |
752 | } |
753 | if (!dumpGraphDAGFileOpt.empty()) { |
754 | for (auto function : module->getFunctions()) { |
755 | std::string filename = |
756 | function->getFilename() + "_" + dumpGraphDAGFileOpt; |
757 | if (module->getFunctions().size() == 1) { |
758 | filename = dumpGraphDAGFileOpt; |
759 | } |
760 | function->dumpDAG(filename.c_str()); |
761 | } |
762 | } |
763 | // Store compilation info in the Loader. |
764 | compilationInfo_ = cctx.info; |
765 | } |
766 | |
767 | void Loader::runInference(PlaceholderBindings &bindings, size_t batchSize) { |
768 | assert(!emittingBundle() && |
769 | "No inference is performed in the bundle generation mode." ); |
770 | unsigned iterations = iterationsOpt == 0 ? 1 : iterationsOpt; |
771 | llvm::Timer timer("Infer" , "Infer" ); |
772 | if (timeOpt) { |
773 | timer.startTimer(); |
774 | } |
775 | for (unsigned i = 0; i < iterations; i++) { |
776 | auto runErr = hostManager_->runNetworkBlocking(functionName_, bindings); |
777 | EXIT_ON_ERR(std::move(runErr)); |
778 | } |
779 | if (timeOpt) { |
780 | timer.stopTimer(); |
781 | llvm::outs() << llvm::formatv("Wall time per item (s): {0:f4}\n" , |
782 | |
783 | timer.getTotalTime().getWallTime() / |
784 | iterations / batchSize); |
785 | } |
786 | } |
787 | |
788 | void Loader::runInference(ExecutionContext *context, size_t batchSize) { |
789 | std::unique_ptr<ExecutionContext> contextP(context); |
790 | |
791 | unsigned iterations = iterationsOpt == 0 ? 1 : iterationsOpt; |
792 | llvm::Timer timer("Infer" , "Infer" ); |
793 | if (timeOpt) { |
794 | timer.startTimer(); |
795 | } |
796 | |
797 | for (unsigned i = 0; i < iterations; i++) { |
798 | std::promise<void> runPromise; |
799 | auto fut = runPromise.get_future(); |
800 | std::unique_ptr<Error> runErr; |
801 | hostManager_->runNetwork( |
802 | functionName_, std::move(contextP), |
803 | [&runPromise, &runErr](runtime::RunIdentifierTy, Error err, |
804 | std::unique_ptr<ExecutionContext> contextPtr) { |
805 | // Don't really delete context since we don't own it. |
806 | contextPtr.release(); |
807 | |
808 | runErr = glow::make_unique<Error>(std::move(err)); |
809 | runPromise.set_value(); |
810 | }); |
811 | fut.wait(); |
812 | EXIT_ON_ERR(std::move(*DCHECK_NOTNULL(runErr.get()))); |
813 | } |
814 | if (timeOpt) { |
815 | timer.stopTimer(); |
816 | llvm::outs() << llvm::formatv("Wall time per item (s): {0:f4}\n" , |
817 | timer.getTotalTime().getWallTime() / |
818 | iterations / batchSize); |
819 | } |
820 | } |
821 | |
822 | static bool comparePI(const NodeProfilingInfo &a, const NodeProfilingInfo &b) { |
823 | return (a.nodeOutputName_.compare(b.nodeOutputName_) < 0); |
824 | } |
825 | |
826 | void Loader::generateAndSerializeProfilingInfos(PlaceholderBindings &bindings) { |
827 | assert(!dumpProfileFileOpt.empty() && |
828 | "Filename to dump serialized profile to must not be empty." ); |
829 | std::vector<NodeProfilingInfo> PI; |
830 | for (auto F : getModule()->getFunctions()) { |
831 | std::vector<NodeProfilingInfo> tmp = |
832 | quantization::generateNodeProfilingInfos(bindings, F, loweredMap_); |
833 | PI.insert(PI.end(), tmp.begin(), tmp.end()); |
834 | } |
835 | std::sort(PI.begin(), PI.end(), comparePI); |
836 | serializeProfilingInfosToYaml(dumpProfileFileOpt, |
837 | compilationInfo_.graphPreLowerHash, PI); |
838 | } |
839 | |
840 | Loader &Loader::registerExtension(std::unique_ptr<LoaderExtension> extension) { |
841 | loaderExtensionList_.push_back(std::move(extension)); |
842 | return *this; |
843 | } |
844 | |
845 | void Loader::postModelLoad(PlaceholderBindings &bindings, |
846 | ProtobufLoader &protoLoader, |
847 | llvm::StringMap<Placeholder *> &placeholderMap, |
848 | llvm::ArrayRef<TypeRef> inputImageType) { |
849 | for (auto &&ext : loaderExtensionList_) { |
850 | ext->postModelLoad(*this, bindings, protoLoader, placeholderMap, |
851 | inputImageType); |
852 | } |
853 | } |
854 | |
855 | void Loader::postModelLoad(PlaceholderBindings &bindings, |
856 | TFLiteModelLoader &tfloader, |
857 | llvm::StringMap<Placeholder *> &placeholderMap, |
858 | llvm::ArrayRef<TypeRef> inputImageType) { |
859 | for (auto &&ext : loaderExtensionList_) { |
860 | ext->postModelLoad(*this, bindings, tfloader, placeholderMap, |
861 | inputImageType); |
862 | } |
863 | } |
864 | |
865 | void Loader::inferInitMiniBatch(PlaceholderBindings &bindings, |
866 | size_t minibatchIndex, size_t minibatchSize) { |
867 | for (auto &&ext : loaderExtensionList_) { |
868 | ext->inferInitMiniBatch(*this, bindings, minibatchIndex, minibatchSize); |
869 | } |
870 | } |
871 | |
872 | void Loader::inferEndMiniBatch(PlaceholderBindings &bindings, |
873 | size_t minibatchIndex, size_t minibatchSize) { |
874 | for (auto &&ext : loaderExtensionList_) { |
875 | ext->inferEndMiniBatch(*this, bindings, minibatchIndex, minibatchSize); |
876 | } |
877 | } |
878 | |
879 | Loader::Loader(llvm::ArrayRef<size_t> configDeviceIDs) { |
880 | if (modelPathOpt.size() == 1) { |
881 | if (llvm::sys::fs::is_directory(*modelPathOpt.begin())) { |
882 | caffe2NetDescFilename_ = modelPathOpt[0] + "/predict_net.pb" ; |
883 | caffe2NetWeightFilename_ = modelPathOpt[0] + "/init_net.pb" ; |
884 | } else { |
885 | llvm::StringRef modelPath = modelPathOpt[0]; |
886 | if (modelPath.endswith("tflite" )) { |
887 | tfliteModelFilename_ = modelPath.str(); |
888 | } else { |
889 | onnxModelFilename_ = modelPath.str(); |
890 | } |
891 | } |
892 | } else { |
893 | caffe2NetDescFilename_ = modelPathOpt[0]; |
894 | caffe2NetWeightFilename_ = modelPathOpt[1]; |
895 | } |
896 | M_.reset(new Module); |
897 | |
898 | std::vector<std::unique_ptr<runtime::DeviceConfig>> configs; |
899 | |
900 | if (configDeviceIDs.empty()) { |
901 | configs = runtime::generateDeviceConfigs(numDevices, ExecutionBackend); |
902 | } else { |
903 | for (size_t ID : configDeviceIDs) { |
904 | CHECK(ID < numDevices) << "IDs must be less than the number of devices" ; |
905 | auto config = glow::make_unique<runtime::DeviceConfig>(ExecutionBackend); |
906 | config->deviceID = ID; |
907 | configs.push_back(std::move(config)); |
908 | } |
909 | } |
910 | |
911 | hostManager_ = glow::make_unique<runtime::HostManager>(std::move(configs)); |
912 | backend_ = std::unique_ptr<Backend>(createBackend(ExecutionBackend)); |
913 | F_ = M_->createFunction(modelPathOpt[0]); |
914 | functionName_ = modelPathOpt[0]; |
915 | } |
916 | |