ModelProfiler.cpp source code [glow/tools/loader/ModelProfiler.cpp]

1	/**
2	* Copyright (c) Glow Contributors. See CONTRIBUTORS file.
3	*
4	* Licensed under the Apache License, Version 2.0 (the "License");
5	* you may not use this file except in compliance with the License.
6	* You may obtain a copy of the License at
7	*
8	* http://www.apache.org/licenses/LICENSE-2.0
9	*
10	* Unless required by applicable law or agreed to in writing, software
11	* distributed under the License is distributed on an "AS IS" BASIS,
12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13	* See the License for the specific language governing permissions and
14	* limitations under the License.
15	*/
16
17	#include "Loader.h"
18	#include "LoaderUtils.h"
19	#include "glow/Base/TensorSerialization.h"
20	#include "llvm/Support/CommandLine.h"
21
22	using namespace glow;
23
24	llvm::cl::OptionCategory modelProfilerCat("Model Profiler Options");
25
26	namespace {
27	llvm::cl::list<std::string> inputDatasetOpts(
28	"input-dataset", llvm::cl::ZeroOrMore,
29	llvm::cl::desc (
30	"Provide a dataset for a model input as a set of file paths by using \n"
31	"this option with the following format: \n"
32	" -input-dataset=<name>,<format>,<source>,<opts> \n"
33	"<name> the name of the model input placeholder (tensor) where the \n"
34	" dataset files will be loaded during run-time. \n"
35	"<format> the format of all the files from the given dataset: \n"
36	" - 'rawbin': raw binary format. Each binary file corresponds\n"
37	" to a tensor and contains the data serialized as a binary \n"
38	" blob without extra meta information (tensor data type or \n"
39	" shape) because the tensor is statically configured before\n"
40	" loading the data. The data is expected to be serialized \n"
41	" with the correct size and layout as the tensor in which \n"
42	" it will be loaded. For example, for a float32 tensor with\n"
43	" shape [2,3], the binary file is expected to have the size\n"
44	" 2 x 3 x 4 (float32) = 24 bytes. \n"
45	" - 'rawtxt': raw text format. Each text file corresponds to \n"
46	" a tensor and contains data serialized as a linear list \n"
47	" of comma separated values in text format without extra \n"
48	" meta information (tensor data type or shape) because the \n"
49	" tensor is statically configured before loading the data. \n"
50	" The data is expected to be serialized with the correct \n"
51	" size and layout as the tensor in which it will be loaded.\n"
52	" For example, for a float32 tensor with shape [2,3], the \n"
53	" text file is expected to contain a list of 6 values \n"
54	" separated by comma like this (extra spaces and newlines \n"
55	" are allowed): \n"
56	" 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, \n"
57	"<source> specifies the dataset source: \n"
58	" - 'file': the dataset is specified as a text file which \n"
59	" contains the relative or absolute paths of all the files \n"
60	" in the dataset, listed one per line, separated by comma \n"
61	" or not. The path of the dataset file is given as the \n"
62	" first argument in the <opts> list. If a second argument \n"
63	" is given in the <opts> list (optional), that will be \n"
64	" concatenated (prepended) to all the paths from the file. \n"
65	" The dataset file must contain only ONE PATH PER LINE. \n"
66	" After the first comma or space character, the rest of the\n"
67	" line is ignored. All the examples below are valid: \n"
68	" data0.bin \n"
69	" data1.bin, \n"
70	" data2.bin 'cat' \n"
71	" data3.bin,dog \n"
72	" data4.bin ,2 \n"
73	" data5.bin,1 \n"
74	" Do NOT use file paths which contain spaces. \n"
75	" - 'dir': the dataset is specified as all the files from a \n"
76	" given directory listed alphabetically. The directory path\n"
77	" is specified with the first argument in the <opts> list. \n"
78	" Make sure the directory does not contain other items than\n"
79	" the dataset files (folders, symlinks, etc). \n"
80	"<opts> extra options dependent on the <source> field. \n"
81	"This option will be used for each of the model inputs. \n"
82	"\nExample 1: \n"
83	" -input-dataset=input1,rawbin,file,dataset.csv \n"
84	" The dataset paths for the 'input1' model input are read from the\n"
85	" 'dataset.csv' file which could have the following content: \n"
86	" /data_folder/data0.dat, \n"
87	" /data_folder/data1.dat, \n"
88	" ....................... \n"
89	" All the files listed are assumed to be in raw binary format. \n"
90	"\nExample 2: \n"
91	" -input-dataset=input2,rawbin,file,dataset.csv,/data_folder \n"
92	" The dataset files for the 'input2' model input are read from the\n"
93	" 'dataset.csv' file which could have the following content: \n"
94	" data0.dat, \n"
95	" data1.dat, \n"
96	" .......... \n"
97	" All the file paths listed will be concatenated (prepended) with \n"
98	" the '/data_folder' base directory path when loading. All the \n"
99	" files listed are assumed to be in raw binary format. \n"
100	"\nExample 3: \n"
101	" -input-dataset=input3,rawtxt,dir,/data_folder \n"
102	" The dataset files for the 'input3' model input are all the files\n"
103	" from the '/data_folder' directory listed alphabetically. The \n"
104	" files are assumed to be in raw text format.\n"),
105	llvm::cl::value_desc ("name,format,source,opts"),
106	llvm::cl::cat (modelProfilerCat));
107	} // namespace
108
109	/// Parse the 'input-dataset' option and get the arguments.
110	static void
111	getInputDatasets(std::vector<std::string> &inputNames,
112	std::vector<std::string> &inputFormats,
113	std::vector<std::string> &inputSources,
114	std::vector<std::vector<std::string>> &inputOptions) {
115	for (const auto &str : inputDatasetOpts) {
116	// Parse name.
117	auto strPair = llvm::StringRef (str).split(`','`);
118	llvm::StringRef name = strPair.first;
119	checkCond(name.size(), "Model input name for dataset is empty!");
120	inputNames.push_back(name.str());
121	// Parse format.
122	strPair = strPair.second.split(`','`);
123	llvm::StringRef format = strPair.first;
124	checkCond(format.size(),
125	strFormat("Model input dataset format is empty for '%s'!",
126	name.data()));
127	inputFormats.push_back(format.str());
128	// Parse source.
129	strPair = strPair.second.split(`','`);
130	llvm::StringRef source = strPair.first;
131	checkCond(source.size(),
132	strFormat("Model input dataset source is empty for '%s'!",
133	name.data()));
134	inputSources.push_back(source.str());
135	// Parse options (optional).
136	std::vector<std::string> options;
137	while (strPair.second.size() != `0`) {
138	strPair = strPair.second.split(`','`);
139	llvm::StringRef opt = strPair.first;
140	checkCond(opt.size(),
141	strFormat("Model input dataset options is empty for '%s'!",
142	name.data()));
143	options.push_back(opt.str());
144	}
145	inputOptions.push_back(options);
146	}
147	}
148
149	int main(int argc, char **argv) {
150
151	// Parse command line parameters. All the options will be available as part of
152	// the loader object.
153	parseCommandLine(argc, argv);
154
155	// Dump profile option should be set.
156	checkCond(profilingGraph(),
157	"Use the 'dump-profile' option to specify the dump profile path!");
158
159	// Get the input dataset options.
160	std::vector<std::string> inputNames;
161	std::vector<std::string> inputFormats;
162	std::vector<std::string> inputSources;
163	std::vector<std::vector<std::string>> inputOptions;
164	getInputDatasets(inputNames, inputFormats, inputSources, inputOptions);
165	auto numInputDatasets = inputNames.size();
166	checkCond(numInputDatasets >= `1`,
167	"At least one input dataset must be specified using the "
168	"'input-dataset' option!");
169
170	// Get profiling dataset.
171	std::vector<UnlabeledDataSet> inputDatasets(numInputDatasets);
172	for (size_t idx = `0`; idx < numInputDatasets; idx++) {
173	auto inputName = inputNames [idx];
174	auto inputSrc = inputSources [idx];
175	auto inputOpts = inputOptions [idx];
176	if (inputSrc == "file") {
177	// Get dataset paths from file.
178	if (inputOpts.size() == `1`) {
179	inputDatasets [idx] = readUnlabeledDataSetFromFile(inputOpts [`0`], "");
180	} else if (inputOpts.size() == `2`) {
181	inputDatasets [idx] =
182	readUnlabeledDataSetFromFile(inputOpts [`0`], inputOpts [`1`]);
183	} else {
184	exitWithErr(strFormat("Invalid number of parameters provided for the "
185	"dataset 'file' of the '%s' input!",
186	inputName.c_str()));
187	}
188	} else if (inputSrc == "dir") {
189	// Get dataset paths from directory.
190	if (inputOpts.size() == `1`) {
191	inputDatasets [idx] = readUnlabeledDataSetFromDir(inputOpts [`0`]);
192	} else {
193	exitWithErr(strFormat("Invalid number of parameters provided for the "
194	"dataset 'dir' of the '%s' input!",
195	inputName.c_str()));
196	}
197	} else {
198	exitWithErr(strFormat("Input dataset source '%s' is not supported!",
199	inputSrc.c_str()));
200	}
201	}
202
203	// Verify we have the same number of entries for all the datasets.
204	size_t entryNum = inputDatasets [`0`].size();
205	for (size_t idx = `1`; idx < numInputDatasets; idx++) {
206	checkCond(inputDatasets [idx].size() == entryNum,
207	strFormat("The profiling dataset for the input '%s' does not "
208	"have the same number of entries as the other inputs!",
209	inputNames [idx].c_str()));
210	}
211
212	// Initialize the loader object.
213	Loader loader;
214
215	// Load the model.
216	loader.loadModel();
217
218	// Get the model input placeholders in the same order as the input dataset
219	// options.
220	auto inputVarsMapping = loader.getInputPlaceholderMap();
221	auto modelNumInputs = inputVarsMapping.size();
222	checkCond(modelNumInputs == numInputDatasets,
223	"Not all the model inputs where provided with the 'input-dataset' "
224	"parameter!");
225	std::vector<Placeholder *> inputPlaceholders;
226	for (const auto &name : inputNames) {
227	auto it = inputVarsMapping.find(name);
228	checkCond(
229	it != inputVarsMapping.end(),
230	strFormat("Name '%s' is not a model input placeholder!", name.c_str()));
231	inputPlaceholders.push_back(it ->second);
232	}
233
234	// Allocate tensors for all placeholders.
235	PlaceholderBindings bindings;
236	bindings.allocate(loader.getModule()->getPlaceholders());
237
238	// Get compilation options for profiling.
239	CompilationContext cctx =
240	loader.getCompilationContext(QuantizationMode::Profile);
241	cctx.bindings = &bindings;
242
243	// Compile the function.
244	loader.compile(cctx);
245
246	// Run profiling for all the dataset entries. The profiling information is
247	// automatically aggregated for all the inference runs.
248	for (size_t entryIdx = `0`; entryIdx < entryNum; entryIdx++) {
249
250	// Load tensor data.
251	for (size_t inputIdx = `0`; inputIdx < modelNumInputs; inputIdx++) {
252	Tensor *inputTensor = bindings.get(inputPlaceholders [inputIdx]);
253	std::string filePath = inputDatasets [inputIdx][entryIdx];
254	std::string fileFormat = inputFormats [inputIdx];
255	if (fileFormat == "rawbin") {
256	TensorSerializationOptions opts;
257	opts.withType = false;
258	glow::loadTensorFromBinaryFile(*inputTensor, filePath.c_str(), opts);
259	} else if (fileFormat == "rawtxt") {
260	TensorSerializationOptions opts;
261	opts.withType = false;
262	glow::loadTensorFromTextFile(*inputTensor, filePath.c_str(), opts);
263	} else {
264	exitWithErr(strFormat("Input dataset format '%s' invalid!",
265	fileFormat.c_str()));
266	}
267	}
268
269	// Run inference.
270	loader.runInference(bindings, `1` /batchSize/);
271	}
272
273	// Dump the final profile.
274	loader.generateAndSerializeProfilingInfos(bindings);
275
276	return `0`;
277	}
278

Browse the source code of glow/tools/loader/ModelProfiler.cpp