ModelTuner.cpp source code [glow/tools/loader/ModelTuner.cpp]

1	/**
2	* Copyright (c) Glow Contributors. See CONTRIBUTORS file.
3	*
4	* Licensed under the Apache License, Version 2.0 (the "License");
5	* you may not use this file except in compliance with the License.
6	* You may obtain a copy of the License at
7	*
8	* http://www.apache.org/licenses/LICENSE-2.0
9	*
10	* Unless required by applicable law or agreed to in writing, software
11	* distributed under the License is distributed on an "AS IS" BASIS,
12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13	* See the License for the specific language governing permissions and
14	* limitations under the License.
15	*/
16
17	#include "Loader.h"
18	#include "LoaderUtils.h"
19
20	#include "glow/Base/Image.h"
21	#include "glow/Graph/Graph.h"
22	#include "glow/Graph/Nodes.h"
23	#include "glow/Graph/Utils.h"
24	#include "glow/Quantization/Serialization.h"
25	#include "glow/Support/Support.h"
26
27	#include "llvm/Support/CommandLine.h"
28	#include "llvm/Support/FileSystem.h"
29	#include "llvm/Support/Path.h"
30	#include "llvm/Support/raw_ostream.h"
31
32	#include <chrono>
33	#include <fstream>
34	#include <memory>
35	#include <sstream>
36
37	using namespace glow;
38
39	namespace {
40
41	/// Model Tuner options
42	llvm::cl::OptionCategory modelTunerCat("Model Tuner Options");
43
44	llvm::cl::opt<std::string> datasetFileOpt(
45	"dataset-file", llvm::cl::Required,
46	llvm::cl::desc ("Path to the dataset description file which contains on "
47	"each line a file path and an integer label separated by "
48	"space or comma. The integer labels start with 0 (0,1,..)."
49	"An example might look like this:\n"
50	" image0.png 0 \n"
51	" image1.png 13 \n"
52	" ............. \n"
53	"Another example might look like this:\n"
54	" image0.png,0, \n"
55	" image1.png,13, \n"
56	" ............. \n"),
57	llvm::cl::value_desc ("file.txt\|file.csv"), llvm::cl::cat (modelTunerCat));
58
59	llvm::cl::opt<std::string> datasetPathOpt(
60	"dataset-path", llvm::cl::Required,
61	llvm::cl::desc ("The path of the directory where the dataset entries are "
62	"located."),
63	llvm::cl::value_desc ("directory path"), llvm::cl::cat (modelTunerCat));
64
65	llvm::cl::opt<std::string> dumpTunedProfileFileOpt(
66	"dump-tuned-profile",
67	llvm::cl::desc ("Output quantization profile obtained after tuning."),
68	llvm::cl::value_desc ("profile_output.yaml"), llvm::cl::Required,
69	llvm::cl::cat (modelTunerCat));
70
71	llvm::cl::opt<float> targetAccuracyOpt(
72	"target-accuracy",
73	llvm::cl::desc ("Stop the quantization tuning/calibration procedure when \n"
74	"the accuracy has reached or surpassed the given value. \n"
75	"A float value between 0.0 and 1.0 is expected. If not \n"
76	"specified, the tuning will run until completion. "),
77	llvm::cl::value_desc ("float"), llvm::cl::Optional, llvm::cl::init(`1.0`),
78	llvm::cl::cat (modelTunerCat));
79
80	llvm::cl::opt<unsigned> maxIterPerNodeOpt(
81	"max-iter-per-node",
82	llvm::cl::desc ("Maximum number of tuning iterations per node (default 3)."),
83	llvm::cl::value_desc ("int"), llvm::cl::Optional, llvm::cl::init(`3`),
84	llvm::cl::cat (modelTunerCat));
85
86	llvm::cl::opt<float> accDropSkipOpt(
87	"acc-drop-skip",
88	llvm::cl::desc ("The accuracy drop for which the tuning of any node is \n"
89	"skipped. The default value is 0.05 (5%)."),
90	llvm::cl::value_desc ("float"), llvm::cl::Optional, llvm::cl::init(`0.05`),
91	llvm::cl::cat (modelTunerCat));
92	} // namespace
93
94	/// Get maximum confidence class (index and value) for the model output.
95	static std::pair<unsigned, float> getOutputClass(Tensor *T) {
96	CHECK(T->getElementType() == ElemKind::FloatTy)
97	<< "Model output is expected to be float!";
98	auto TH = T->getHandle<float>();
99	float maxVal = TH.raw(`0`);
100	unsigned maxIdx = `0`;
101	for (unsigned idx = `1`; idx < TH.size(); ++idx) {
102	if (TH.raw(idx) > maxVal) {
103	maxVal = TH.raw(idx);
104	maxIdx = idx;
105	}
106	}
107	return std::make_pair(maxIdx, maxVal);
108	}
109
110	/// Function to run the model using the given \p dataset and compute the
111	/// accuracy. If \p quantize flag is given then the model is additionally
112	/// quantized using the profiling information \p pInfos.
113	float runModelAndGetAccuracy(LabeledDataSet &dataset, bool quantize,
114	std::vector<NodeProfilingInfo> &pInfos) {
115
116	// Initialize the loader object.
117	Loader loader;
118
119	// Load the model.
120	loader.loadModel();
121
122	// Allocate tensors for all placeholders.
123	PlaceholderBindings bindings;
124	bindings.allocate(loader.getModule()->getPlaceholders());
125
126	// Get input/output placeholders.
127	auto inpPHMap = loader.getInputPlaceholderMap();
128	auto outPHMap = loader.getOutputPlaceholderMap();
129	CHECK(inpPHMap.size() == `1`) << "Model is expected to have only 1 input!";
130	CHECK(outPHMap.size() == `1`) << "Model is expected to have only 1 output!";
131	Placeholder *input = inpPHMap.begin()->second;
132	Placeholder *output = outPHMap.begin()->second;
133
134	// Get compilation options.
135	CompilationContext cctx;
136	if (quantize) {
137	// Get compilation options for quantization.
138	cctx = loader.getCompilationContext(QuantizationMode::Quantize);
139	// Force the given profiling infos.
140	cctx.precisionConfig.quantConfig.infos = pInfos;
141	} else {
142	// Get compilation options for running the model as-is.
143	cctx = loader.getCompilationContext(QuantizationMode::None);
144	}
145	cctx.bindings = &bindings;
146
147	// Compile the function.
148	loader.compile(cctx);
149
150	// Run the function for all the dataset.
151	size_t correct = `0`;
152	for (const auto &data : dataset) {
153	// Read the image and preprocess.
154	Tensor inputImg = readPngPpmImageAndPreprocess(data.first, imageNormMode [`0`],
155	imageChannelOrderOpt [`0`],
156	imageLayoutOpt [`0`]);
157	auto imgShape = inputImg.getType().dims();
158	Tensor inputTensor =
159	inputImg.getUnowned({`1`, imgShape [`0`], imgShape [`1`], imgShape [`2`]});
160	updateInputPlaceholders(*cctx.bindings, {input}, {&inputTensor});
161	// Run inference.
162	loader.runInference(*cctx.bindings, `1`);
163	// Get output class.
164	auto cls = getOutputClass(cctx.bindings->get(output));
165	if (cls.first == data.second) {
166	++correct;
167	}
168	}
169
170	// Compute accuracy.
171	return ((float)correct) / dataset.size();
172	}
173
174	/// Function to tune a given tensor for the given function with the given
175	/// dataset.
176	float tuneQuantizationForTensor(std::vector<NodeProfilingInfo> &pInfos,
177	LabeledDataSet &dataset, unsigned qIdx,
178	float bestAcc) {
179
180	// Tuning parameters.
181	unsigned maxIterPerNode = maxIterPerNodeOpt;
182	float accDropSkip = accDropSkipOpt;
183
184	// Backup profiling parameters for this tensor.
185	auto bestTPP = pInfos [qIdx].tensorProfilingParams_;
186
187	// Get tensor average value.
188	float tensorAvgVal = quantization::getTensorAverageValue(bestTPP);
189
190	// Get quantization configuration.
191	auto quantConfig = Loader::getQuantizationConfiguration();
192
193	// Run the tune iterations for this tensor.
194	for (unsigned iterIdx = `0`; iterIdx < maxIterPerNode; ++iterIdx) {
195
196	// Get current min/max range.
197	float rangeMin = pInfos [qIdx].tensorProfilingParams_.min;
198	float rangeMax = pInfos [qIdx].tensorProfilingParams_.max;
199
200	// Skip tuning for this tensor if range is empty.
201	if (rangeMin == rangeMax) {
202	llvm::outs() << " Tuning skipped for this tensor: not required\n";
203	break;
204	}
205
206	// Get testing min/max range by repeatedly shrinking with a factor of 2.
207	float testMin, testMax;
208	if (quantConfig.schema == quantization::Asymmetric) {
209	// Shrink tensor min/max range around average value.
210	testMin = tensorAvgVal - (tensorAvgVal - rangeMin) / `2.0`;
211	testMax = tensorAvgVal + (rangeMax - tensorAvgVal) / `2.0`;
212	} else if (quantConfig.schema == quantization::Symmetric \|\|
213	quantConfig.schema == quantization::SymmetricWithUnsigned \|\|
214	quantConfig.schema == quantization::SymmetricWithPower2Scale) {
215	// Shrink tensor min/max range around 0.
216	float rangeAbsMin = std::abs(rangeMin);
217	float rangeAbsMax = std::abs(rangeMax);
218	float rangeAbs = rangeAbsMax > rangeAbsMin ? rangeAbsMax : rangeAbsMin;
219	testMin = -rangeAbs / `2.0f`;
220	testMax = +rangeAbs / `2.0f`;
221	} else {
222	llvm_unreachable("Quantization schema not supported!");
223	}
224
225	// Set the testing range.
226	pInfos [qIdx].tensorProfilingParams_.min = testMin;
227	pInfos [qIdx].tensorProfilingParams_.max = testMax;
228	llvm::outs() << strFormat(" [%d/%d] Testing range = [%.4f, %.4f]\n",
229	iterIdx + `1`, maxIterPerNode, testMin, testMax);
230
231	// Quantize model and compute accuracy for current params.
232	float currAcc = runModelAndGetAccuracy(dataset, true, pInfos);
233	llvm::outs() << strFormat(" Accuracy = %.4f %%\n", currAcc * `100`);
234
235	// If we obtain EXACTLY the same accuracy then the profiling parameters
236	// of this tensor have no side effects (most probably are not used).
237	if (currAcc == bestAcc) {
238	llvm::outs()
239	<< " Tuning stopped for this tensor: accuracy not improved\n";
240	break;
241	}
242
243	// If current accuracy is better then save the profiling parameters.
244	if (currAcc > bestAcc) {
245	bestAcc = currAcc;
246	bestTPP = pInfos [qIdx].tensorProfilingParams_;
247	}
248
249	// If the current accuracy drops below the best accuracy with a given delta
250	// then skip the tuning for the current tensor.
251	bool lastIter = (iterIdx == (maxIterPerNode - `1`));
252	if (!lastIter && (currAcc < (bestAcc - accDropSkip))) {
253	llvm::outs() << " Tuning stopped for this tensor: accuracy dropped more "
254	"than \"acc-drop-skip\"\n";
255	break;
256	}
257	}
258
259	// Save best profiling parameters for this tensor.
260	pInfos [qIdx].tensorProfilingParams_ = bestTPP;
261	llvm::outs() << strFormat("Best accuracy : %.4f %%\n", bestAcc * `100`);
262	return bestAcc;
263	}
264
265	int main(int argc, char **argv) {
266
267	// Parse command line parameters. All the options will be available as part of
268	// the loader object.
269	parseCommandLine(argc, argv);
270
271	// Get the input profile used for tuning.
272	auto quantConfig = Loader::getQuantizationConfiguration();
273	CHECK(quantConfig.infos.size())
274	<< "Input profile not found. Use the -load-profile option!";
275	auto pInfosTune = quantConfig.infos;
276	int tensorQNum = pInfosTune.size();
277
278	// Read tuning dataset.
279	LabeledDataSet datasetTune =
280	readLabeledDataSet(datasetFileOpt, datasetPathOpt);
281
282	// Set output stream to unbuffered state to flush every time.
283	llvm::outs().SetUnbuffered();
284
285	// Compute initial accuracy.
286	llvm::outs() << strFormat("\nComputing initial accuracy ... \n");
287	float accValF = runModelAndGetAccuracy(datasetTune, false, pInfosTune);
288	float accValQ = runModelAndGetAccuracy(datasetTune, true, pInfosTune);
289	llvm::outs() << strFormat("Initial accuracy: %.4f %% (FLOAT)\n",
290	accValF * `100`);
291	llvm::outs() << strFormat("Initial accuracy: %.4f %% (QUANTIZED)\n",
292	accValQ * `100`);
293	llvm::outs() << strFormat("Target accuracy: %.4f %% (QUANTIZED)\n",
294	targetAccuracyOpt * `100`);
295	llvm::outs() << strFormat("Number of tensors: %d\n\n", tensorQNum);
296
297	// Perform tuning for all tunable tensors.
298	float accVal = accValQ;
299	auto startTime = getTimeStamp();
300	for (int tensorQIdx = `0`; tensorQIdx < tensorQNum; ++tensorQIdx) {
301
302	// Stop tuning if target accuracy is achieved.
303	if (accVal > targetAccuracyOpt) {
304	llvm::outs() << "Target accuracy achieved! Tuning is stopped ...\n";
305	break;
306	}
307
308	// Tune the quantization for this tensor.
309	auto tensorName = pInfosTune [tensorQIdx].nodeOutputName_.data();
310	llvm::outs() << strFormat("[%d/%d] Tuning quantization for tensor \"%s\"\n",
311	tensorQIdx + `1`, tensorQNum, tensorName);
312	accVal =
313	tuneQuantizationForTensor(pInfosTune, datasetTune, tensorQIdx, accVal);
314
315	// Display estimated remaining time and stats.
316	unsigned iterSec = getDurationSec(startTime) / (tensorQIdx + `1`);
317	unsigned remSec = iterSec * (tensorQNum - tensorQIdx - `1`);
318	unsigned remMin = (remSec / `60`) % `60`;
319	unsigned remHrs = (remSec / `60`) / `60`;
320	llvm::outs() << strFormat("Iteration time: %d seconds\n", iterSec);
321	llvm::outs() << strFormat("Remaining time: %d hours %d minutes\n\n", remHrs,
322	remMin);
323	}
324
325	// Print final accuracy.
326	llvm::outs() << strFormat("\nFinal accuracy: %.4f %% (QUANTIZED)\n\n",
327	accVal * `100`);
328
329	// Print total time.
330	unsigned totSec, totMin, totHrs;
331	getDuration(startTime, totSec, totMin, totHrs);
332	llvm::outs() << strFormat("Total time: %d hours %d minutes\n\n", totHrs,
333	totMin);
334
335	// Serialize the tuned output profile.
336	serializeProfilingInfosToYaml(dumpTunedProfileFileOpt,
337	quantConfig.graphPreLowerHash, pInfosTune);
338
339	return `0`;
340	}
341

Browse the source code of glow/tools/loader/ModelTuner.cpp