1/**
2 * Copyright (c) Glow Contributors. See CONTRIBUTORS file.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "Loader.h"
18#include "LoaderUtils.h"
19#include "glow/Base/TensorSerialization.h"
20#include "llvm/Support/CommandLine.h"
21
22using namespace glow;
23
24llvm::cl::OptionCategory modelProfilerCat("Model Profiler Options");
25
26namespace {
27llvm::cl::list<std::string> inputDatasetOpts(
28 "input-dataset", llvm::cl::ZeroOrMore,
29 llvm::cl::desc(
30 "Provide a dataset for a model input as a set of file paths by using \n"
31 "this option with the following format: \n"
32 " -input-dataset=<name>,<format>,<source>,<opts> \n"
33 "<name> the name of the model input placeholder (tensor) where the \n"
34 " dataset files will be loaded during run-time. \n"
35 "<format> the format of all the files from the given dataset: \n"
36 " - 'rawbin': raw binary format. Each binary file corresponds\n"
37 " to a tensor and contains the data serialized as a binary \n"
38 " blob without extra meta information (tensor data type or \n"
39 " shape) because the tensor is statically configured before\n"
40 " loading the data. The data is expected to be serialized \n"
41 " with the correct size and layout as the tensor in which \n"
42 " it will be loaded. For example, for a float32 tensor with\n"
43 " shape [2,3], the binary file is expected to have the size\n"
44 " 2 x 3 x 4 (float32) = 24 bytes. \n"
45 " - 'rawtxt': raw text format. Each text file corresponds to \n"
46 " a tensor and contains data serialized as a linear list \n"
47 " of comma separated values in text format without extra \n"
48 " meta information (tensor data type or shape) because the \n"
49 " tensor is statically configured before loading the data. \n"
50 " The data is expected to be serialized with the correct \n"
51 " size and layout as the tensor in which it will be loaded.\n"
52 " For example, for a float32 tensor with shape [2,3], the \n"
53 " text file is expected to contain a list of 6 values \n"
54 " separated by comma like this (extra spaces and newlines \n"
55 " are allowed): \n"
56 " 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, \n"
57 "<source> specifies the dataset source: \n"
58 " - 'file': the dataset is specified as a text file which \n"
59 " contains the relative or absolute paths of all the files \n"
60 " in the dataset, listed one per line, separated by comma \n"
61 " or not. The path of the dataset file is given as the \n"
62 " first argument in the <opts> list. If a second argument \n"
63 " is given in the <opts> list (optional), that will be \n"
64 " concatenated (prepended) to all the paths from the file. \n"
65 " The dataset file must contain only ONE PATH PER LINE. \n"
66 " After the first comma or space character, the rest of the\n"
67 " line is ignored. All the examples below are valid: \n"
68 " data0.bin \n"
69 " data1.bin, \n"
70 " data2.bin 'cat' \n"
71 " data3.bin,dog \n"
72 " data4.bin ,2 \n"
73 " data5.bin,1 \n"
74 " Do NOT use file paths which contain spaces. \n"
75 " - 'dir': the dataset is specified as all the files from a \n"
76 " given directory listed alphabetically. The directory path\n"
77 " is specified with the first argument in the <opts> list. \n"
78 " Make sure the directory does not contain other items than\n"
79 " the dataset files (folders, symlinks, etc). \n"
80 "<opts> extra options dependent on the <source> field. \n"
81 "This option will be used for each of the model inputs. \n"
82 "\nExample 1: \n"
83 " -input-dataset=input1,rawbin,file,dataset.csv \n"
84 " The dataset paths for the 'input1' model input are read from the\n"
85 " 'dataset.csv' file which could have the following content: \n"
86 " /data_folder/data0.dat, \n"
87 " /data_folder/data1.dat, \n"
88 " ....................... \n"
89 " All the files listed are assumed to be in raw binary format. \n"
90 "\nExample 2: \n"
91 " -input-dataset=input2,rawbin,file,dataset.csv,/data_folder \n"
92 " The dataset files for the 'input2' model input are read from the\n"
93 " 'dataset.csv' file which could have the following content: \n"
94 " data0.dat, \n"
95 " data1.dat, \n"
96 " .......... \n"
97 " All the file paths listed will be concatenated (prepended) with \n"
98 " the '/data_folder' base directory path when loading. All the \n"
99 " files listed are assumed to be in raw binary format. \n"
100 "\nExample 3: \n"
101 " -input-dataset=input3,rawtxt,dir,/data_folder \n"
102 " The dataset files for the 'input3' model input are all the files\n"
103 " from the '/data_folder' directory listed alphabetically. The \n"
104 " files are assumed to be in raw text format.\n"),
105 llvm::cl::value_desc("name,format,source,opts"),
106 llvm::cl::cat(modelProfilerCat));
107} // namespace
108
109/// Parse the 'input-dataset' option and get the arguments.
110static void
111getInputDatasets(std::vector<std::string> &inputNames,
112 std::vector<std::string> &inputFormats,
113 std::vector<std::string> &inputSources,
114 std::vector<std::vector<std::string>> &inputOptions) {
115 for (const auto &str : inputDatasetOpts) {
116 // Parse name.
117 auto strPair = llvm::StringRef(str).split(',');
118 llvm::StringRef name = strPair.first;
119 checkCond(name.size(), "Model input name for dataset is empty!");
120 inputNames.push_back(name.str());
121 // Parse format.
122 strPair = strPair.second.split(',');
123 llvm::StringRef format = strPair.first;
124 checkCond(format.size(),
125 strFormat("Model input dataset format is empty for '%s'!",
126 name.data()));
127 inputFormats.push_back(format.str());
128 // Parse source.
129 strPair = strPair.second.split(',');
130 llvm::StringRef source = strPair.first;
131 checkCond(source.size(),
132 strFormat("Model input dataset source is empty for '%s'!",
133 name.data()));
134 inputSources.push_back(source.str());
135 // Parse options (optional).
136 std::vector<std::string> options;
137 while (strPair.second.size() != 0) {
138 strPair = strPair.second.split(',');
139 llvm::StringRef opt = strPair.first;
140 checkCond(opt.size(),
141 strFormat("Model input dataset options is empty for '%s'!",
142 name.data()));
143 options.push_back(opt.str());
144 }
145 inputOptions.push_back(options);
146 }
147}
148
149int main(int argc, char **argv) {
150
151 // Parse command line parameters. All the options will be available as part of
152 // the loader object.
153 parseCommandLine(argc, argv);
154
155 // Dump profile option should be set.
156 checkCond(profilingGraph(),
157 "Use the 'dump-profile' option to specify the dump profile path!");
158
159 // Get the input dataset options.
160 std::vector<std::string> inputNames;
161 std::vector<std::string> inputFormats;
162 std::vector<std::string> inputSources;
163 std::vector<std::vector<std::string>> inputOptions;
164 getInputDatasets(inputNames, inputFormats, inputSources, inputOptions);
165 auto numInputDatasets = inputNames.size();
166 checkCond(numInputDatasets >= 1,
167 "At least one input dataset must be specified using the "
168 "'input-dataset' option!");
169
170 // Get profiling dataset.
171 std::vector<UnlabeledDataSet> inputDatasets(numInputDatasets);
172 for (size_t idx = 0; idx < numInputDatasets; idx++) {
173 auto inputName = inputNames[idx];
174 auto inputSrc = inputSources[idx];
175 auto inputOpts = inputOptions[idx];
176 if (inputSrc == "file") {
177 // Get dataset paths from file.
178 if (inputOpts.size() == 1) {
179 inputDatasets[idx] = readUnlabeledDataSetFromFile(inputOpts[0], "");
180 } else if (inputOpts.size() == 2) {
181 inputDatasets[idx] =
182 readUnlabeledDataSetFromFile(inputOpts[0], inputOpts[1]);
183 } else {
184 exitWithErr(strFormat("Invalid number of parameters provided for the "
185 "dataset 'file' of the '%s' input!",
186 inputName.c_str()));
187 }
188 } else if (inputSrc == "dir") {
189 // Get dataset paths from directory.
190 if (inputOpts.size() == 1) {
191 inputDatasets[idx] = readUnlabeledDataSetFromDir(inputOpts[0]);
192 } else {
193 exitWithErr(strFormat("Invalid number of parameters provided for the "
194 "dataset 'dir' of the '%s' input!",
195 inputName.c_str()));
196 }
197 } else {
198 exitWithErr(strFormat("Input dataset source '%s' is not supported!",
199 inputSrc.c_str()));
200 }
201 }
202
203 // Verify we have the same number of entries for all the datasets.
204 size_t entryNum = inputDatasets[0].size();
205 for (size_t idx = 1; idx < numInputDatasets; idx++) {
206 checkCond(inputDatasets[idx].size() == entryNum,
207 strFormat("The profiling dataset for the input '%s' does not "
208 "have the same number of entries as the other inputs!",
209 inputNames[idx].c_str()));
210 }
211
212 // Initialize the loader object.
213 Loader loader;
214
215 // Load the model.
216 loader.loadModel();
217
218 // Get the model input placeholders in the same order as the input dataset
219 // options.
220 auto inputVarsMapping = loader.getInputPlaceholderMap();
221 auto modelNumInputs = inputVarsMapping.size();
222 checkCond(modelNumInputs == numInputDatasets,
223 "Not all the model inputs where provided with the 'input-dataset' "
224 "parameter!");
225 std::vector<Placeholder *> inputPlaceholders;
226 for (const auto &name : inputNames) {
227 auto it = inputVarsMapping.find(name);
228 checkCond(
229 it != inputVarsMapping.end(),
230 strFormat("Name '%s' is not a model input placeholder!", name.c_str()));
231 inputPlaceholders.push_back(it->second);
232 }
233
234 // Allocate tensors for all placeholders.
235 PlaceholderBindings bindings;
236 bindings.allocate(loader.getModule()->getPlaceholders());
237
238 // Get compilation options for profiling.
239 CompilationContext cctx =
240 loader.getCompilationContext(QuantizationMode::Profile);
241 cctx.bindings = &bindings;
242
243 // Compile the function.
244 loader.compile(cctx);
245
246 // Run profiling for all the dataset entries. The profiling information is
247 // automatically aggregated for all the inference runs.
248 for (size_t entryIdx = 0; entryIdx < entryNum; entryIdx++) {
249
250 // Load tensor data.
251 for (size_t inputIdx = 0; inputIdx < modelNumInputs; inputIdx++) {
252 Tensor *inputTensor = bindings.get(inputPlaceholders[inputIdx]);
253 std::string filePath = inputDatasets[inputIdx][entryIdx];
254 std::string fileFormat = inputFormats[inputIdx];
255 if (fileFormat == "rawbin") {
256 TensorSerializationOptions opts;
257 opts.withType = false;
258 glow::loadTensorFromBinaryFile(*inputTensor, filePath.c_str(), opts);
259 } else if (fileFormat == "rawtxt") {
260 TensorSerializationOptions opts;
261 opts.withType = false;
262 glow::loadTensorFromTextFile(*inputTensor, filePath.c_str(), opts);
263 } else {
264 exitWithErr(strFormat("Input dataset format '%s' invalid!",
265 fileFormat.c_str()));
266 }
267 }
268
269 // Run inference.
270 loader.runInference(bindings, 1 /*batchSize*/);
271 }
272
273 // Dump the final profile.
274 loader.generateAndSerializeProfilingInfos(bindings);
275
276 return 0;
277}
278