1 | /** |
2 | * Copyright (c) Glow Contributors. See CONTRIBUTORS file. |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | */ |
16 | |
17 | #include "Loader.h" |
18 | #include "LoaderUtils.h" |
19 | #include "glow/Base/TensorSerialization.h" |
20 | #include "llvm/Support/CommandLine.h" |
21 | |
22 | using namespace glow; |
23 | |
24 | llvm::cl::OptionCategory modelProfilerCat("Model Profiler Options" ); |
25 | |
26 | namespace { |
27 | llvm::cl::list<std::string> inputDatasetOpts( |
28 | "input-dataset" , llvm::cl::ZeroOrMore, |
29 | llvm::cl::desc( |
30 | "Provide a dataset for a model input as a set of file paths by using \n" |
31 | "this option with the following format: \n" |
32 | " -input-dataset=<name>,<format>,<source>,<opts> \n" |
33 | "<name> the name of the model input placeholder (tensor) where the \n" |
34 | " dataset files will be loaded during run-time. \n" |
35 | "<format> the format of all the files from the given dataset: \n" |
36 | " - 'rawbin': raw binary format. Each binary file corresponds\n" |
37 | " to a tensor and contains the data serialized as a binary \n" |
38 | " blob without extra meta information (tensor data type or \n" |
39 | " shape) because the tensor is statically configured before\n" |
40 | " loading the data. The data is expected to be serialized \n" |
41 | " with the correct size and layout as the tensor in which \n" |
42 | " it will be loaded. For example, for a float32 tensor with\n" |
43 | " shape [2,3], the binary file is expected to have the size\n" |
44 | " 2 x 3 x 4 (float32) = 24 bytes. \n" |
45 | " - 'rawtxt': raw text format. Each text file corresponds to \n" |
46 | " a tensor and contains data serialized as a linear list \n" |
47 | " of comma separated values in text format without extra \n" |
48 | " meta information (tensor data type or shape) because the \n" |
49 | " tensor is statically configured before loading the data. \n" |
50 | " The data is expected to be serialized with the correct \n" |
51 | " size and layout as the tensor in which it will be loaded.\n" |
52 | " For example, for a float32 tensor with shape [2,3], the \n" |
53 | " text file is expected to contain a list of 6 values \n" |
54 | " separated by comma like this (extra spaces and newlines \n" |
55 | " are allowed): \n" |
56 | " 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, \n" |
57 | "<source> specifies the dataset source: \n" |
58 | " - 'file': the dataset is specified as a text file which \n" |
59 | " contains the relative or absolute paths of all the files \n" |
60 | " in the dataset, listed one per line, separated by comma \n" |
61 | " or not. The path of the dataset file is given as the \n" |
62 | " first argument in the <opts> list. If a second argument \n" |
63 | " is given in the <opts> list (optional), that will be \n" |
64 | " concatenated (prepended) to all the paths from the file. \n" |
65 | " The dataset file must contain only ONE PATH PER LINE. \n" |
66 | " After the first comma or space character, the rest of the\n" |
67 | " line is ignored. All the examples below are valid: \n" |
68 | " data0.bin \n" |
69 | " data1.bin, \n" |
70 | " data2.bin 'cat' \n" |
71 | " data3.bin,dog \n" |
72 | " data4.bin ,2 \n" |
73 | " data5.bin,1 \n" |
74 | " Do NOT use file paths which contain spaces. \n" |
75 | " - 'dir': the dataset is specified as all the files from a \n" |
76 | " given directory listed alphabetically. The directory path\n" |
77 | " is specified with the first argument in the <opts> list. \n" |
78 | " Make sure the directory does not contain other items than\n" |
79 | " the dataset files (folders, symlinks, etc). \n" |
80 | "<opts> extra options dependent on the <source> field. \n" |
81 | "This option will be used for each of the model inputs. \n" |
82 | "\nExample 1: \n" |
83 | " -input-dataset=input1,rawbin,file,dataset.csv \n" |
84 | " The dataset paths for the 'input1' model input are read from the\n" |
85 | " 'dataset.csv' file which could have the following content: \n" |
86 | " /data_folder/data0.dat, \n" |
87 | " /data_folder/data1.dat, \n" |
88 | " ....................... \n" |
89 | " All the files listed are assumed to be in raw binary format. \n" |
90 | "\nExample 2: \n" |
91 | " -input-dataset=input2,rawbin,file,dataset.csv,/data_folder \n" |
92 | " The dataset files for the 'input2' model input are read from the\n" |
93 | " 'dataset.csv' file which could have the following content: \n" |
94 | " data0.dat, \n" |
95 | " data1.dat, \n" |
96 | " .......... \n" |
97 | " All the file paths listed will be concatenated (prepended) with \n" |
98 | " the '/data_folder' base directory path when loading. All the \n" |
99 | " files listed are assumed to be in raw binary format. \n" |
100 | "\nExample 3: \n" |
101 | " -input-dataset=input3,rawtxt,dir,/data_folder \n" |
102 | " The dataset files for the 'input3' model input are all the files\n" |
103 | " from the '/data_folder' directory listed alphabetically. The \n" |
104 | " files are assumed to be in raw text format.\n" ), |
105 | llvm::cl::value_desc("name,format,source,opts" ), |
106 | llvm::cl::cat(modelProfilerCat)); |
107 | } // namespace |
108 | |
109 | /// Parse the 'input-dataset' option and get the arguments. |
110 | static void |
111 | getInputDatasets(std::vector<std::string> &inputNames, |
112 | std::vector<std::string> &inputFormats, |
113 | std::vector<std::string> &inputSources, |
114 | std::vector<std::vector<std::string>> &inputOptions) { |
115 | for (const auto &str : inputDatasetOpts) { |
116 | // Parse name. |
117 | auto strPair = llvm::StringRef(str).split(','); |
118 | llvm::StringRef name = strPair.first; |
119 | checkCond(name.size(), "Model input name for dataset is empty!" ); |
120 | inputNames.push_back(name.str()); |
121 | // Parse format. |
122 | strPair = strPair.second.split(','); |
123 | llvm::StringRef format = strPair.first; |
124 | checkCond(format.size(), |
125 | strFormat("Model input dataset format is empty for '%s'!" , |
126 | name.data())); |
127 | inputFormats.push_back(format.str()); |
128 | // Parse source. |
129 | strPair = strPair.second.split(','); |
130 | llvm::StringRef source = strPair.first; |
131 | checkCond(source.size(), |
132 | strFormat("Model input dataset source is empty for '%s'!" , |
133 | name.data())); |
134 | inputSources.push_back(source.str()); |
135 | // Parse options (optional). |
136 | std::vector<std::string> options; |
137 | while (strPair.second.size() != 0) { |
138 | strPair = strPair.second.split(','); |
139 | llvm::StringRef opt = strPair.first; |
140 | checkCond(opt.size(), |
141 | strFormat("Model input dataset options is empty for '%s'!" , |
142 | name.data())); |
143 | options.push_back(opt.str()); |
144 | } |
145 | inputOptions.push_back(options); |
146 | } |
147 | } |
148 | |
149 | int main(int argc, char **argv) { |
150 | |
151 | // Parse command line parameters. All the options will be available as part of |
152 | // the loader object. |
153 | parseCommandLine(argc, argv); |
154 | |
155 | // Dump profile option should be set. |
156 | checkCond(profilingGraph(), |
157 | "Use the 'dump-profile' option to specify the dump profile path!" ); |
158 | |
159 | // Get the input dataset options. |
160 | std::vector<std::string> inputNames; |
161 | std::vector<std::string> inputFormats; |
162 | std::vector<std::string> inputSources; |
163 | std::vector<std::vector<std::string>> inputOptions; |
164 | getInputDatasets(inputNames, inputFormats, inputSources, inputOptions); |
165 | auto numInputDatasets = inputNames.size(); |
166 | checkCond(numInputDatasets >= 1, |
167 | "At least one input dataset must be specified using the " |
168 | "'input-dataset' option!" ); |
169 | |
170 | // Get profiling dataset. |
171 | std::vector<UnlabeledDataSet> inputDatasets(numInputDatasets); |
172 | for (size_t idx = 0; idx < numInputDatasets; idx++) { |
173 | auto inputName = inputNames[idx]; |
174 | auto inputSrc = inputSources[idx]; |
175 | auto inputOpts = inputOptions[idx]; |
176 | if (inputSrc == "file" ) { |
177 | // Get dataset paths from file. |
178 | if (inputOpts.size() == 1) { |
179 | inputDatasets[idx] = readUnlabeledDataSetFromFile(inputOpts[0], "" ); |
180 | } else if (inputOpts.size() == 2) { |
181 | inputDatasets[idx] = |
182 | readUnlabeledDataSetFromFile(inputOpts[0], inputOpts[1]); |
183 | } else { |
184 | exitWithErr(strFormat("Invalid number of parameters provided for the " |
185 | "dataset 'file' of the '%s' input!" , |
186 | inputName.c_str())); |
187 | } |
188 | } else if (inputSrc == "dir" ) { |
189 | // Get dataset paths from directory. |
190 | if (inputOpts.size() == 1) { |
191 | inputDatasets[idx] = readUnlabeledDataSetFromDir(inputOpts[0]); |
192 | } else { |
193 | exitWithErr(strFormat("Invalid number of parameters provided for the " |
194 | "dataset 'dir' of the '%s' input!" , |
195 | inputName.c_str())); |
196 | } |
197 | } else { |
198 | exitWithErr(strFormat("Input dataset source '%s' is not supported!" , |
199 | inputSrc.c_str())); |
200 | } |
201 | } |
202 | |
203 | // Verify we have the same number of entries for all the datasets. |
204 | size_t entryNum = inputDatasets[0].size(); |
205 | for (size_t idx = 1; idx < numInputDatasets; idx++) { |
206 | checkCond(inputDatasets[idx].size() == entryNum, |
207 | strFormat("The profiling dataset for the input '%s' does not " |
208 | "have the same number of entries as the other inputs!" , |
209 | inputNames[idx].c_str())); |
210 | } |
211 | |
212 | // Initialize the loader object. |
213 | Loader loader; |
214 | |
215 | // Load the model. |
216 | loader.loadModel(); |
217 | |
218 | // Get the model input placeholders in the same order as the input dataset |
219 | // options. |
220 | auto inputVarsMapping = loader.getInputPlaceholderMap(); |
221 | auto modelNumInputs = inputVarsMapping.size(); |
222 | checkCond(modelNumInputs == numInputDatasets, |
223 | "Not all the model inputs where provided with the 'input-dataset' " |
224 | "parameter!" ); |
225 | std::vector<Placeholder *> inputPlaceholders; |
226 | for (const auto &name : inputNames) { |
227 | auto it = inputVarsMapping.find(name); |
228 | checkCond( |
229 | it != inputVarsMapping.end(), |
230 | strFormat("Name '%s' is not a model input placeholder!" , name.c_str())); |
231 | inputPlaceholders.push_back(it->second); |
232 | } |
233 | |
234 | // Allocate tensors for all placeholders. |
235 | PlaceholderBindings bindings; |
236 | bindings.allocate(loader.getModule()->getPlaceholders()); |
237 | |
238 | // Get compilation options for profiling. |
239 | CompilationContext cctx = |
240 | loader.getCompilationContext(QuantizationMode::Profile); |
241 | cctx.bindings = &bindings; |
242 | |
243 | // Compile the function. |
244 | loader.compile(cctx); |
245 | |
246 | // Run profiling for all the dataset entries. The profiling information is |
247 | // automatically aggregated for all the inference runs. |
248 | for (size_t entryIdx = 0; entryIdx < entryNum; entryIdx++) { |
249 | |
250 | // Load tensor data. |
251 | for (size_t inputIdx = 0; inputIdx < modelNumInputs; inputIdx++) { |
252 | Tensor *inputTensor = bindings.get(inputPlaceholders[inputIdx]); |
253 | std::string filePath = inputDatasets[inputIdx][entryIdx]; |
254 | std::string fileFormat = inputFormats[inputIdx]; |
255 | if (fileFormat == "rawbin" ) { |
256 | TensorSerializationOptions opts; |
257 | opts.withType = false; |
258 | glow::loadTensorFromBinaryFile(*inputTensor, filePath.c_str(), opts); |
259 | } else if (fileFormat == "rawtxt" ) { |
260 | TensorSerializationOptions opts; |
261 | opts.withType = false; |
262 | glow::loadTensorFromTextFile(*inputTensor, filePath.c_str(), opts); |
263 | } else { |
264 | exitWithErr(strFormat("Input dataset format '%s' invalid!" , |
265 | fileFormat.c_str())); |
266 | } |
267 | } |
268 | |
269 | // Run inference. |
270 | loader.runInference(bindings, 1 /*batchSize*/); |
271 | } |
272 | |
273 | // Dump the final profile. |
274 | loader.generateAndSerializeProfilingInfos(bindings); |
275 | |
276 | return 0; |
277 | } |
278 | |