1 | /** |
2 | * Copyright (c) Glow Contributors. See CONTRIBUTORS file. |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | */ |
16 | |
17 | /** |
18 | * Contributed by Xperi Corporation on August 13, 2019 |
19 | */ |
20 | |
21 | #include "Loader.h" |
22 | |
23 | #include "glow/Base/Tensor.h" |
24 | #include "glow/Converter/TypeAToTypeBFunctionConverter.h" |
25 | #include "glow/Graph/Node.h" |
26 | #include "glow/Graph/Nodes.h" |
27 | #include "glow/Importer/Caffe2ModelLoader.h" |
28 | #include "glow/Importer/ONNXModelLoader.h" |
29 | |
30 | #include "llvm/ADT/StringSwitch.h" |
31 | #include "llvm/Support/CommandLine.h" |
32 | #include "llvm/Support/Format.h" |
33 | #include "llvm/Support/raw_ostream.h" |
34 | |
35 | #include <fstream> |
36 | #include <iomanip> |
37 | #include <iostream> |
38 | #include <memory> |
39 | #include <queue> |
40 | #include <sstream> |
41 | |
42 | using namespace glow; |
43 | |
44 | namespace { |
45 | llvm::cl::OptionCategory inputLoaderCat("Input Loader Options" ); |
46 | |
47 | llvm::cl::opt<std::string> modelInputName( |
48 | "model-input-name" , |
49 | llvm::cl::desc("The name of the variable for the model's input data." ), |
50 | llvm::cl::value_desc("string" ), llvm::cl::Required, |
51 | llvm::cl::cat(inputLoaderCat)); |
52 | |
53 | llvm::cl::list<std::string> inputFilenames( |
54 | llvm::cl::Positional, |
55 | llvm::cl::desc( |
56 | "<input file(s)> Input file name(s) from which input is read. " |
57 | "Input is read byte-wise, so the file is assumed to be " |
58 | "a byte-stream. For instance, if the input tensor is a " |
59 | "2x3 matrix of 32-bit floats, then the file is expected " |
60 | "to contain 4x2x3 = 48 bytes. The values are loaded into " |
61 | "the tensor column-wise: (1, 1), (1, 2), (1, 3), (2, 1), ..., (3, 3)" ), |
62 | llvm::cl::value_desc("space-separated strings" ), llvm::cl::ZeroOrMore); |
63 | |
64 | llvm::cl::list<unsigned> inputTensorDimensions( |
65 | "input-tensor-dims" , |
66 | llvm::cl::desc("Comma-separated list of input tensor dimensions" ), |
67 | llvm::cl::value_desc("unsigned int" ), llvm::cl::OneOrMore, |
68 | llvm::cl::CommaSeparated, llvm::cl::cat(inputLoaderCat)); |
69 | |
70 | llvm::cl::list<std::string> outputTensorNames( |
71 | "output-tensor-names" , |
72 | llvm::cl::desc("Comma-separated list of output tensor names" ), |
73 | llvm::cl::value_desc("list of strings" ), llvm::cl::OneOrMore, |
74 | llvm::cl::CommaSeparated, llvm::cl::cat(inputLoaderCat)); |
75 | |
76 | llvm::cl::opt<std::string> inputFileList( |
77 | "input-file-list" , |
78 | llvm::cl::desc("Name of the file containing list of files (one per line) " |
79 | "to process. This " |
80 | "is equivalent to passing each file name individually. " ), |
81 | llvm::cl::value_desc("string" ), llvm::cl::Optional, |
82 | llvm::cl::cat(inputLoaderCat)); |
83 | |
84 | llvm::cl::opt<bool> convertInAndOutToFp16( |
85 | "convert-input-to-fp16" , |
86 | llvm::cl::desc( |
87 | "Convert the input and output tensors of the network to fp16" ), |
88 | llvm::cl::cat(inputLoaderCat)); |
89 | |
90 | llvm::cl::opt<bool> |
91 | writeOutput("write-output" , |
92 | llvm::cl::desc("Write output of the inference (only applicable " |
93 | "when not building a bundle." ), |
94 | llvm::cl::cat(inputLoaderCat)); |
95 | } // unnamed namespace |
96 | |
97 | /// Creates and \returns the ProtobufLoader given \p loader and the |
98 | /// \p inputType. |
99 | static std::unique_ptr<ProtobufLoader> |
100 | createProtobufLoader(Loader &loader, const TypeRef inputType) { |
101 | std::unique_ptr<ProtobufLoader> ptbLoader; |
102 | const bool caffe2Model{!loader.getCaffe2NetDescFilename().empty()}; |
103 | |
104 | if (caffe2Model) { |
105 | ptbLoader.reset(new Caffe2ModelLoader( |
106 | loader.getCaffe2NetDescFilename().str(), |
107 | loader.getCaffe2NetWeightFilename().str(), {modelInputName.c_str()}, |
108 | {inputType}, *loader.getFunction())); |
109 | } else { |
110 | ptbLoader.reset(new ONNXModelLoader(loader.getOnnxModelFilename().str(), |
111 | {modelInputName.c_str()}, {inputType}, |
112 | *loader.getFunction())); |
113 | } |
114 | |
115 | return ptbLoader; |
116 | } |
117 | |
118 | /// Builds the network and \returns a pair of the form (input placeholder ptr, |
119 | /// (output name, output tensor ptr)). \p loader - the loader \p inputType - the |
120 | /// input type \p ioBindings - a reference to the placeholder bindings |
121 | /// (allocated in this function) |
122 | static std::pair<Placeholder *, std::unordered_map<std::string, Tensor *>> |
123 | buildNetwork(Loader &loader, const TypeRef inputType, |
124 | PlaceholderBindings &ioBindings) { |
125 | std::unique_ptr<ProtobufLoader> LD; |
126 | const char *inputName{modelInputName.c_str()}; |
127 | Placeholder *inputPH; |
128 | Placeholder *outputPH; |
129 | Tensor *outputTensor; |
130 | std::pair<Placeholder *, std::unordered_map<std::string, Tensor *>> ret; |
131 | |
132 | // Create the protobuf loader and allocate io bindings. |
133 | LD = createProtobufLoader(loader, inputType); |
134 | (void)ioBindings.allocate(loader.getModule()->getPlaceholders()); |
135 | |
136 | // Convert to Fp16 if required. |
137 | if (convertInAndOutToFp16) { |
138 | PrecisionConfiguration precConfig; |
139 | TypeAToTypeBFunctionConverter converter(*loader.getFunction(), |
140 | ElemKind::FloatTy, |
141 | ElemKind::Float16Ty, precConfig); |
142 | for (auto *placeholder : loader.getModule()->getPlaceholders()) |
143 | converter.convertPlaceholder(*placeholder, &ioBindings); |
144 | } |
145 | |
146 | // Compile the network |
147 | loader.compile(ioBindings); |
148 | |
149 | // Grab the input placeholder |
150 | inputPH = |
151 | llvm::cast<Placeholder>(EXIT_ON_ERR(LD->getNodeValueByName(inputName))); |
152 | ret.first = inputPH; |
153 | |
154 | // Grab all output placeholders by name/tensor |
155 | for (const std::string &name : outputTensorNames) { |
156 | outputPH = EXIT_ON_ERR(LD->getOutputByName(name)); |
157 | outputTensor = ioBindings.get(outputPH); |
158 | ret.second.insert(std::make_pair(name, outputTensor)); |
159 | } |
160 | |
161 | return ret; |
162 | } |
163 | |
164 | /// Gathers input from the files specified (either a single file containing one |
165 | /// input file name per line, or multiple input files) \p files - a reference of |
166 | /// type std::vector<std::string> that contains the gathered input filenames. |
167 | static void gatherFiles(std::vector<std::string> &files) { |
168 | // Grab any files specified on the command line as positional arguments. |
169 | for (auto file : inputFilenames) { |
170 | files.push_back(file); |
171 | } |
172 | // If a file with input file names was specified, read the input file names |
173 | // from the specified file. |
174 | if (inputFileList.size() != 0) { |
175 | std::ifstream fstrm{inputFileList}; |
176 | if (!fstrm) { |
177 | llvm::errs() << "Error processing input file list " << inputFileList |
178 | << "\n" ; |
179 | exit(1); |
180 | } |
181 | |
182 | std::string file; |
183 | while (std::getline(fstrm, file)) { |
184 | files.push_back(file); |
185 | std::ifstream check{file}; |
186 | if (!check) { |
187 | llvm::errs() << "Error processing input file " << file << "\n" ; |
188 | exit(1); |
189 | } |
190 | } |
191 | } |
192 | } |
193 | |
194 | /// Loads input data of size \p size from a given \p file into \p inputData. |
195 | static void loadInputData(const std::string &file, std::vector<char> &inputData, |
196 | std::size_t size) { |
197 | std::ifstream inputFile(file.c_str(), std::ios::binary); |
198 | inputFile.seekg(0, std::ios::end); |
199 | |
200 | if (inputFile.tellg() != long(size)) { |
201 | llvm::errs() << "Size of " << file << " does not match expected size " |
202 | << size << "\n" ; |
203 | exit(1); |
204 | } |
205 | |
206 | inputFile.seekg(0, std::ios::beg); |
207 | inputFile.read(inputData.data(), size); |
208 | } |
209 | |
210 | /// Run inference given the created \p loader, \p ioBindings, and \p |
211 | /// ioPlaceholders. \p inputData - the vector containing our input data (as raw |
212 | /// bytes) \p outputData - a pair of the form (output tensor name, (output |
213 | /// bytes, output size)) |
214 | static void runInference( |
215 | Loader &loader, PlaceholderBindings &ioBindings, |
216 | std::pair<Placeholder *, std::unordered_map<std::string, Tensor *>> |
217 | &ioPlaceholders, |
218 | const std::vector<char> &inputData, |
219 | std::unordered_map<std::string, std::pair<std::vector<char>, dim_t>> |
220 | &outputData) { |
221 | // Grab a pointer to the input tensor from the placeholders |
222 | Tensor *inputT = ioBindings.get(ioPlaceholders.first); |
223 | |
224 | // Copy the raw input data from inputData into the input tensor. |
225 | std::memcpy(inputT->getUnsafePtr(), inputData.data(), inputData.size()); |
226 | |
227 | // If we must first convert to Fp16, do so. |
228 | if (convertInAndOutToFp16) { |
229 | inputT->convertToType(ElemKind::Float16Ty); |
230 | } |
231 | |
232 | // Finally, run inference. The input data is already stored inside the input |
233 | // tensor, inside the ioBindings. The batch size is 1. |
234 | loader.runInference(ioBindings, 1); |
235 | |
236 | // Finally, store our output - we may have multiple output tensors, so sort |
237 | // the output into the correct named output bins. |
238 | for (auto &keyval : ioPlaceholders.second) { |
239 | outputData.insert( |
240 | std::make_pair(keyval.first, std::make_pair(std::vector<char>{}, 0))); |
241 | outputData[keyval.first].first.reserve( |
242 | ioPlaceholders.second[keyval.first]->getSizeInBytes()); |
243 | outputData[keyval.first].second = |
244 | ioPlaceholders.second[keyval.first]->getSizeInBytes(); |
245 | std::memcpy(outputData[keyval.first].first.data(), |
246 | ioPlaceholders.second[keyval.first]->getUnsafePtr(), |
247 | ioPlaceholders.second[keyval.first]->getSizeInBytes()); |
248 | } |
249 | } |
250 | |
251 | /// Write out \p outputData into \p file. |
252 | static void writeOutputData( |
253 | const std::unordered_map<std::string, std::pair<std::vector<char>, dim_t>> |
254 | &outputData, |
255 | const std::string &file) { |
256 | if (writeOutput) { |
257 | std::ofstream outputFile; |
258 | std::string name; |
259 | |
260 | // The output file is formated as [input file name].[output tensor |
261 | // name].out.dat |
262 | for (auto &keyval : outputData) { |
263 | name = file; |
264 | name += "." ; |
265 | name += keyval.first; |
266 | name += ".out.dat" ; |
267 | |
268 | outputFile.open(name.c_str(), std::ios::out | std::ios::binary); |
269 | if (!outputFile) { |
270 | std::cerr << "Unable to open output file: " << name << std::endl; |
271 | return; |
272 | } |
273 | outputFile.write(keyval.second.first.data(), keyval.second.second); |
274 | outputFile.close(); |
275 | } |
276 | } |
277 | } |
278 | |
279 | int main(int argc, char **argv) { |
280 | PlaceholderBindings ioBindings; // IO Bindings |
281 | std::pair<Placeholder *, std::unordered_map<std::string, Tensor *>> |
282 | ioPlaceholders; // first = input placeholder, |
283 | // second = <output name, output tensor> |
284 | |
285 | // This must be called before a loader instance is created. |
286 | parseCommandLine(argc, argv); |
287 | Loader loader; |
288 | |
289 | std::vector<dim_t> dims; |
290 | std::vector<char> inputData; |
291 | std::vector<std::string> files; |
292 | Tensor inputT; |
293 | |
294 | for (auto dim : inputTensorDimensions) { |
295 | dims.push_back(dim); |
296 | } |
297 | |
298 | inputT.reset(ElemKind::FloatTy, dims); |
299 | ioPlaceholders = buildNetwork(loader, &inputT.getType(), ioBindings); |
300 | |
301 | if (emittingBundle()) { |
302 | if (!inputFileList.empty() || inputFilenames.size() != 0) { |
303 | llvm::errs() << "WARNING: input files specification has no effect when " |
304 | "emitting bundle.\n" ; |
305 | } |
306 | return 0; |
307 | } |
308 | |
309 | if (inputFileList.empty() && inputFilenames.size() == 0) { |
310 | llvm::errs() |
311 | << "Args: Either positional <input file(s)> or -input-file-list " |
312 | "must be used to specify input data when not outputting bundle.\n" ; |
313 | std::exit(1); |
314 | } |
315 | |
316 | if (!inputFileList.empty() && inputFilenames.size() != 0) { |
317 | llvm::errs() << "Args: Either positional <input file(s)> or " |
318 | "-input-file-list (but not both) " |
319 | "must be used to specify input data.\n" ; |
320 | std::exit(1); |
321 | } |
322 | |
323 | // Stores the list of files containing input in "files". |
324 | gatherFiles(files); |
325 | for (auto &file : files) { |
326 | inputData.clear(); |
327 | // The size of input is computed from input dimensions, known from command |
328 | // line arguments, and the size of float. |
329 | inputData.reserve(inputT.getSizeInBytes()); |
330 | // Every output is identified by its name (std::string), and is stored in |
331 | // a byte array; it also carries information about its size. So |
332 | // first = name |
333 | // second = <byte array, array size>. |
334 | std::unordered_map<std::string, std::pair<std::vector<char>, dim_t>> |
335 | outputData; |
336 | |
337 | // Reads input from file to the inputData vector, of max size = the capacity |
338 | // of the input tensor. |
339 | loadInputData(file, inputData, inputT.getSizeInBytes()); |
340 | // Output data is stored in outputData. |
341 | runInference(loader, ioBindings, ioPlaceholders, inputData, outputData); |
342 | // Writes output to a file whose base name is given by "file". |
343 | writeOutputData(outputData, file); |
344 | } |
345 | |
346 | // Are we profiling? If so, spit out the profile. |
347 | if (profilingGraph()) { |
348 | loader.generateAndSerializeProfilingInfos(ioBindings); |
349 | } |
350 | |
351 | return 0; |
352 | } |
353 | |