1/**
2 * Copyright (c) Glow Contributors. See CONTRIBUTORS file.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "Loader.h"
18#include "LoaderUtils.h"
19
20#include "glow/Base/Image.h"
21#include "glow/Graph/Graph.h"
22#include "glow/Graph/Nodes.h"
23#include "glow/Graph/Utils.h"
24#include "glow/Quantization/Serialization.h"
25#include "glow/Support/Support.h"
26
27#include "llvm/Support/CommandLine.h"
28#include "llvm/Support/FileSystem.h"
29#include "llvm/Support/Path.h"
30#include "llvm/Support/raw_ostream.h"
31
32#include <chrono>
33#include <fstream>
34#include <memory>
35#include <sstream>
36
37using namespace glow;
38
39namespace {
40
41/// Model Tuner options
42llvm::cl::OptionCategory modelTunerCat("Model Tuner Options");
43
44llvm::cl::opt<std::string> datasetFileOpt(
45 "dataset-file", llvm::cl::Required,
46 llvm::cl::desc("Path to the dataset description file which contains on "
47 "each line a file path and an integer label separated by "
48 "space or comma. The integer labels start with 0 (0,1,..)."
49 "An example might look like this:\n"
50 " image0.png 0 \n"
51 " image1.png 13 \n"
52 " ............. \n"
53 "Another example might look like this:\n"
54 " image0.png,0, \n"
55 " image1.png,13, \n"
56 " ............. \n"),
57 llvm::cl::value_desc("file.txt|file.csv"), llvm::cl::cat(modelTunerCat));
58
59llvm::cl::opt<std::string> datasetPathOpt(
60 "dataset-path", llvm::cl::Required,
61 llvm::cl::desc("The path of the directory where the dataset entries are "
62 "located."),
63 llvm::cl::value_desc("directory path"), llvm::cl::cat(modelTunerCat));
64
65llvm::cl::opt<std::string> dumpTunedProfileFileOpt(
66 "dump-tuned-profile",
67 llvm::cl::desc("Output quantization profile obtained after tuning."),
68 llvm::cl::value_desc("profile_output.yaml"), llvm::cl::Required,
69 llvm::cl::cat(modelTunerCat));
70
71llvm::cl::opt<float> targetAccuracyOpt(
72 "target-accuracy",
73 llvm::cl::desc("Stop the quantization tuning/calibration procedure when \n"
74 "the accuracy has reached or surpassed the given value. \n"
75 "A float value between 0.0 and 1.0 is expected. If not \n"
76 "specified, the tuning will run until completion. "),
77 llvm::cl::value_desc("float"), llvm::cl::Optional, llvm::cl::init(1.0),
78 llvm::cl::cat(modelTunerCat));
79
80llvm::cl::opt<unsigned> maxIterPerNodeOpt(
81 "max-iter-per-node",
82 llvm::cl::desc("Maximum number of tuning iterations per node (default 3)."),
83 llvm::cl::value_desc("int"), llvm::cl::Optional, llvm::cl::init(3),
84 llvm::cl::cat(modelTunerCat));
85
86llvm::cl::opt<float> accDropSkipOpt(
87 "acc-drop-skip",
88 llvm::cl::desc("The accuracy drop for which the tuning of any node is \n"
89 "skipped. The default value is 0.05 (5%)."),
90 llvm::cl::value_desc("float"), llvm::cl::Optional, llvm::cl::init(0.05),
91 llvm::cl::cat(modelTunerCat));
92} // namespace
93
94/// Get maximum confidence class (index and value) for the model output.
95static std::pair<unsigned, float> getOutputClass(Tensor *T) {
96 CHECK(T->getElementType() == ElemKind::FloatTy)
97 << "Model output is expected to be float!";
98 auto TH = T->getHandle<float>();
99 float maxVal = TH.raw(0);
100 unsigned maxIdx = 0;
101 for (unsigned idx = 1; idx < TH.size(); ++idx) {
102 if (TH.raw(idx) > maxVal) {
103 maxVal = TH.raw(idx);
104 maxIdx = idx;
105 }
106 }
107 return std::make_pair(maxIdx, maxVal);
108}
109
110/// Function to run the model using the given \p dataset and compute the
111/// accuracy. If \p quantize flag is given then the model is additionally
112/// quantized using the profiling information \p pInfos.
113float runModelAndGetAccuracy(LabeledDataSet &dataset, bool quantize,
114 std::vector<NodeProfilingInfo> &pInfos) {
115
116 // Initialize the loader object.
117 Loader loader;
118
119 // Load the model.
120 loader.loadModel();
121
122 // Allocate tensors for all placeholders.
123 PlaceholderBindings bindings;
124 bindings.allocate(loader.getModule()->getPlaceholders());
125
126 // Get input/output placeholders.
127 auto inpPHMap = loader.getInputPlaceholderMap();
128 auto outPHMap = loader.getOutputPlaceholderMap();
129 CHECK(inpPHMap.size() == 1) << "Model is expected to have only 1 input!";
130 CHECK(outPHMap.size() == 1) << "Model is expected to have only 1 output!";
131 Placeholder *input = inpPHMap.begin()->second;
132 Placeholder *output = outPHMap.begin()->second;
133
134 // Get compilation options.
135 CompilationContext cctx;
136 if (quantize) {
137 // Get compilation options for quantization.
138 cctx = loader.getCompilationContext(QuantizationMode::Quantize);
139 // Force the given profiling infos.
140 cctx.precisionConfig.quantConfig.infos = pInfos;
141 } else {
142 // Get compilation options for running the model as-is.
143 cctx = loader.getCompilationContext(QuantizationMode::None);
144 }
145 cctx.bindings = &bindings;
146
147 // Compile the function.
148 loader.compile(cctx);
149
150 // Run the function for all the dataset.
151 size_t correct = 0;
152 for (const auto &data : dataset) {
153 // Read the image and preprocess.
154 Tensor inputImg = readPngPpmImageAndPreprocess(data.first, imageNormMode[0],
155 imageChannelOrderOpt[0],
156 imageLayoutOpt[0]);
157 auto imgShape = inputImg.getType().dims();
158 Tensor inputTensor =
159 inputImg.getUnowned({1, imgShape[0], imgShape[1], imgShape[2]});
160 updateInputPlaceholders(*cctx.bindings, {input}, {&inputTensor});
161 // Run inference.
162 loader.runInference(*cctx.bindings, 1);
163 // Get output class.
164 auto cls = getOutputClass(cctx.bindings->get(output));
165 if (cls.first == data.second) {
166 ++correct;
167 }
168 }
169
170 // Compute accuracy.
171 return ((float)correct) / dataset.size();
172}
173
174/// Function to tune a given tensor for the given function with the given
175/// dataset.
176float tuneQuantizationForTensor(std::vector<NodeProfilingInfo> &pInfos,
177 LabeledDataSet &dataset, unsigned qIdx,
178 float bestAcc) {
179
180 // Tuning parameters.
181 unsigned maxIterPerNode = maxIterPerNodeOpt;
182 float accDropSkip = accDropSkipOpt;
183
184 // Backup profiling parameters for this tensor.
185 auto bestTPP = pInfos[qIdx].tensorProfilingParams_;
186
187 // Get tensor average value.
188 float tensorAvgVal = quantization::getTensorAverageValue(bestTPP);
189
190 // Get quantization configuration.
191 auto quantConfig = Loader::getQuantizationConfiguration();
192
193 // Run the tune iterations for this tensor.
194 for (unsigned iterIdx = 0; iterIdx < maxIterPerNode; ++iterIdx) {
195
196 // Get current min/max range.
197 float rangeMin = pInfos[qIdx].tensorProfilingParams_.min;
198 float rangeMax = pInfos[qIdx].tensorProfilingParams_.max;
199
200 // Skip tuning for this tensor if range is empty.
201 if (rangeMin == rangeMax) {
202 llvm::outs() << " Tuning skipped for this tensor: not required\n";
203 break;
204 }
205
206 // Get testing min/max range by repeatedly shrinking with a factor of 2.
207 float testMin, testMax;
208 if (quantConfig.schema == quantization::Asymmetric) {
209 // Shrink tensor min/max range around average value.
210 testMin = tensorAvgVal - (tensorAvgVal - rangeMin) / 2.0;
211 testMax = tensorAvgVal + (rangeMax - tensorAvgVal) / 2.0;
212 } else if (quantConfig.schema == quantization::Symmetric ||
213 quantConfig.schema == quantization::SymmetricWithUnsigned ||
214 quantConfig.schema == quantization::SymmetricWithPower2Scale) {
215 // Shrink tensor min/max range around 0.
216 float rangeAbsMin = std::abs(rangeMin);
217 float rangeAbsMax = std::abs(rangeMax);
218 float rangeAbs = rangeAbsMax > rangeAbsMin ? rangeAbsMax : rangeAbsMin;
219 testMin = -rangeAbs / 2.0f;
220 testMax = +rangeAbs / 2.0f;
221 } else {
222 llvm_unreachable("Quantization schema not supported!");
223 }
224
225 // Set the testing range.
226 pInfos[qIdx].tensorProfilingParams_.min = testMin;
227 pInfos[qIdx].tensorProfilingParams_.max = testMax;
228 llvm::outs() << strFormat(" [%d/%d] Testing range = [%.4f, %.4f]\n",
229 iterIdx + 1, maxIterPerNode, testMin, testMax);
230
231 // Quantize model and compute accuracy for current params.
232 float currAcc = runModelAndGetAccuracy(dataset, true, pInfos);
233 llvm::outs() << strFormat(" Accuracy = %.4f %%\n", currAcc * 100);
234
235 // If we obtain EXACTLY the same accuracy then the profiling parameters
236 // of this tensor have no side effects (most probably are not used).
237 if (currAcc == bestAcc) {
238 llvm::outs()
239 << " Tuning stopped for this tensor: accuracy not improved\n";
240 break;
241 }
242
243 // If current accuracy is better then save the profiling parameters.
244 if (currAcc > bestAcc) {
245 bestAcc = currAcc;
246 bestTPP = pInfos[qIdx].tensorProfilingParams_;
247 }
248
249 // If the current accuracy drops below the best accuracy with a given delta
250 // then skip the tuning for the current tensor.
251 bool lastIter = (iterIdx == (maxIterPerNode - 1));
252 if (!lastIter && (currAcc < (bestAcc - accDropSkip))) {
253 llvm::outs() << " Tuning stopped for this tensor: accuracy dropped more "
254 "than \"acc-drop-skip\"\n";
255 break;
256 }
257 }
258
259 // Save best profiling parameters for this tensor.
260 pInfos[qIdx].tensorProfilingParams_ = bestTPP;
261 llvm::outs() << strFormat("Best accuracy : %.4f %%\n", bestAcc * 100);
262 return bestAcc;
263}
264
265int main(int argc, char **argv) {
266
267 // Parse command line parameters. All the options will be available as part of
268 // the loader object.
269 parseCommandLine(argc, argv);
270
271 // Get the input profile used for tuning.
272 auto quantConfig = Loader::getQuantizationConfiguration();
273 CHECK(quantConfig.infos.size())
274 << "Input profile not found. Use the -load-profile option!";
275 auto pInfosTune = quantConfig.infos;
276 int tensorQNum = pInfosTune.size();
277
278 // Read tuning dataset.
279 LabeledDataSet datasetTune =
280 readLabeledDataSet(datasetFileOpt, datasetPathOpt);
281
282 // Set output stream to unbuffered state to flush every time.
283 llvm::outs().SetUnbuffered();
284
285 // Compute initial accuracy.
286 llvm::outs() << strFormat("\nComputing initial accuracy ... \n");
287 float accValF = runModelAndGetAccuracy(datasetTune, false, pInfosTune);
288 float accValQ = runModelAndGetAccuracy(datasetTune, true, pInfosTune);
289 llvm::outs() << strFormat("Initial accuracy: %.4f %% (FLOAT)\n",
290 accValF * 100);
291 llvm::outs() << strFormat("Initial accuracy: %.4f %% (QUANTIZED)\n",
292 accValQ * 100);
293 llvm::outs() << strFormat("Target accuracy: %.4f %% (QUANTIZED)\n",
294 targetAccuracyOpt * 100);
295 llvm::outs() << strFormat("Number of tensors: %d\n\n", tensorQNum);
296
297 // Perform tuning for all tunable tensors.
298 float accVal = accValQ;
299 auto startTime = getTimeStamp();
300 for (int tensorQIdx = 0; tensorQIdx < tensorQNum; ++tensorQIdx) {
301
302 // Stop tuning if target accuracy is achieved.
303 if (accVal > targetAccuracyOpt) {
304 llvm::outs() << "Target accuracy achieved! Tuning is stopped ...\n";
305 break;
306 }
307
308 // Tune the quantization for this tensor.
309 auto tensorName = pInfosTune[tensorQIdx].nodeOutputName_.data();
310 llvm::outs() << strFormat("[%d/%d] Tuning quantization for tensor \"%s\"\n",
311 tensorQIdx + 1, tensorQNum, tensorName);
312 accVal =
313 tuneQuantizationForTensor(pInfosTune, datasetTune, tensorQIdx, accVal);
314
315 // Display estimated remaining time and stats.
316 unsigned iterSec = getDurationSec(startTime) / (tensorQIdx + 1);
317 unsigned remSec = iterSec * (tensorQNum - tensorQIdx - 1);
318 unsigned remMin = (remSec / 60) % 60;
319 unsigned remHrs = (remSec / 60) / 60;
320 llvm::outs() << strFormat("Iteration time: %d seconds\n", iterSec);
321 llvm::outs() << strFormat("Remaining time: %d hours %d minutes\n\n", remHrs,
322 remMin);
323 }
324
325 // Print final accuracy.
326 llvm::outs() << strFormat("\nFinal accuracy: %.4f %% (QUANTIZED)\n\n",
327 accVal * 100);
328
329 // Print total time.
330 unsigned totSec, totMin, totHrs;
331 getDuration(startTime, totSec, totMin, totHrs);
332 llvm::outs() << strFormat("Total time: %d hours %d minutes\n\n", totHrs,
333 totMin);
334
335 // Serialize the tuned output profile.
336 serializeProfilingInfosToYaml(dumpTunedProfileFileOpt,
337 quantConfig.graphPreLowerHash, pInfosTune);
338
339 return 0;
340}
341