1 | /** |
2 | * Copyright (c) Glow Contributors. See CONTRIBUTORS file. |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | */ |
16 | |
17 | #include "Loader.h" |
18 | #include "LoaderUtils.h" |
19 | |
20 | #include "glow/Base/Image.h" |
21 | #include "glow/Graph/Graph.h" |
22 | #include "glow/Graph/Nodes.h" |
23 | #include "glow/Graph/Utils.h" |
24 | #include "glow/Quantization/Serialization.h" |
25 | #include "glow/Support/Support.h" |
26 | |
27 | #include "llvm/Support/CommandLine.h" |
28 | #include "llvm/Support/FileSystem.h" |
29 | #include "llvm/Support/Path.h" |
30 | #include "llvm/Support/raw_ostream.h" |
31 | |
32 | #include <chrono> |
33 | #include <fstream> |
34 | #include <memory> |
35 | #include <sstream> |
36 | |
37 | using namespace glow; |
38 | |
39 | namespace { |
40 | |
41 | /// Model Tuner options |
42 | llvm::cl::OptionCategory modelTunerCat("Model Tuner Options" ); |
43 | |
44 | llvm::cl::opt<std::string> datasetFileOpt( |
45 | "dataset-file" , llvm::cl::Required, |
46 | llvm::cl::desc("Path to the dataset description file which contains on " |
47 | "each line a file path and an integer label separated by " |
48 | "space or comma. The integer labels start with 0 (0,1,..)." |
49 | "An example might look like this:\n" |
50 | " image0.png 0 \n" |
51 | " image1.png 13 \n" |
52 | " ............. \n" |
53 | "Another example might look like this:\n" |
54 | " image0.png,0, \n" |
55 | " image1.png,13, \n" |
56 | " ............. \n" ), |
57 | llvm::cl::value_desc("file.txt|file.csv" ), llvm::cl::cat(modelTunerCat)); |
58 | |
59 | llvm::cl::opt<std::string> datasetPathOpt( |
60 | "dataset-path" , llvm::cl::Required, |
61 | llvm::cl::desc("The path of the directory where the dataset entries are " |
62 | "located." ), |
63 | llvm::cl::value_desc("directory path" ), llvm::cl::cat(modelTunerCat)); |
64 | |
65 | llvm::cl::opt<std::string> dumpTunedProfileFileOpt( |
66 | "dump-tuned-profile" , |
67 | llvm::cl::desc("Output quantization profile obtained after tuning." ), |
68 | llvm::cl::value_desc("profile_output.yaml" ), llvm::cl::Required, |
69 | llvm::cl::cat(modelTunerCat)); |
70 | |
71 | llvm::cl::opt<float> targetAccuracyOpt( |
72 | "target-accuracy" , |
73 | llvm::cl::desc("Stop the quantization tuning/calibration procedure when \n" |
74 | "the accuracy has reached or surpassed the given value. \n" |
75 | "A float value between 0.0 and 1.0 is expected. If not \n" |
76 | "specified, the tuning will run until completion. " ), |
77 | llvm::cl::value_desc("float" ), llvm::cl::Optional, llvm::cl::init(1.0), |
78 | llvm::cl::cat(modelTunerCat)); |
79 | |
80 | llvm::cl::opt<unsigned> maxIterPerNodeOpt( |
81 | "max-iter-per-node" , |
82 | llvm::cl::desc("Maximum number of tuning iterations per node (default 3)." ), |
83 | llvm::cl::value_desc("int" ), llvm::cl::Optional, llvm::cl::init(3), |
84 | llvm::cl::cat(modelTunerCat)); |
85 | |
86 | llvm::cl::opt<float> accDropSkipOpt( |
87 | "acc-drop-skip" , |
88 | llvm::cl::desc("The accuracy drop for which the tuning of any node is \n" |
89 | "skipped. The default value is 0.05 (5%)." ), |
90 | llvm::cl::value_desc("float" ), llvm::cl::Optional, llvm::cl::init(0.05), |
91 | llvm::cl::cat(modelTunerCat)); |
92 | } // namespace |
93 | |
94 | /// Get maximum confidence class (index and value) for the model output. |
95 | static std::pair<unsigned, float> getOutputClass(Tensor *T) { |
96 | CHECK(T->getElementType() == ElemKind::FloatTy) |
97 | << "Model output is expected to be float!" ; |
98 | auto TH = T->getHandle<float>(); |
99 | float maxVal = TH.raw(0); |
100 | unsigned maxIdx = 0; |
101 | for (unsigned idx = 1; idx < TH.size(); ++idx) { |
102 | if (TH.raw(idx) > maxVal) { |
103 | maxVal = TH.raw(idx); |
104 | maxIdx = idx; |
105 | } |
106 | } |
107 | return std::make_pair(maxIdx, maxVal); |
108 | } |
109 | |
110 | /// Function to run the model using the given \p dataset and compute the |
111 | /// accuracy. If \p quantize flag is given then the model is additionally |
112 | /// quantized using the profiling information \p pInfos. |
113 | float runModelAndGetAccuracy(LabeledDataSet &dataset, bool quantize, |
114 | std::vector<NodeProfilingInfo> &pInfos) { |
115 | |
116 | // Initialize the loader object. |
117 | Loader loader; |
118 | |
119 | // Load the model. |
120 | loader.loadModel(); |
121 | |
122 | // Allocate tensors for all placeholders. |
123 | PlaceholderBindings bindings; |
124 | bindings.allocate(loader.getModule()->getPlaceholders()); |
125 | |
126 | // Get input/output placeholders. |
127 | auto inpPHMap = loader.getInputPlaceholderMap(); |
128 | auto outPHMap = loader.getOutputPlaceholderMap(); |
129 | CHECK(inpPHMap.size() == 1) << "Model is expected to have only 1 input!" ; |
130 | CHECK(outPHMap.size() == 1) << "Model is expected to have only 1 output!" ; |
131 | Placeholder *input = inpPHMap.begin()->second; |
132 | Placeholder *output = outPHMap.begin()->second; |
133 | |
134 | // Get compilation options. |
135 | CompilationContext cctx; |
136 | if (quantize) { |
137 | // Get compilation options for quantization. |
138 | cctx = loader.getCompilationContext(QuantizationMode::Quantize); |
139 | // Force the given profiling infos. |
140 | cctx.precisionConfig.quantConfig.infos = pInfos; |
141 | } else { |
142 | // Get compilation options for running the model as-is. |
143 | cctx = loader.getCompilationContext(QuantizationMode::None); |
144 | } |
145 | cctx.bindings = &bindings; |
146 | |
147 | // Compile the function. |
148 | loader.compile(cctx); |
149 | |
150 | // Run the function for all the dataset. |
151 | size_t correct = 0; |
152 | for (const auto &data : dataset) { |
153 | // Read the image and preprocess. |
154 | Tensor inputImg = readPngPpmImageAndPreprocess(data.first, imageNormMode[0], |
155 | imageChannelOrderOpt[0], |
156 | imageLayoutOpt[0]); |
157 | auto imgShape = inputImg.getType().dims(); |
158 | Tensor inputTensor = |
159 | inputImg.getUnowned({1, imgShape[0], imgShape[1], imgShape[2]}); |
160 | updateInputPlaceholders(*cctx.bindings, {input}, {&inputTensor}); |
161 | // Run inference. |
162 | loader.runInference(*cctx.bindings, 1); |
163 | // Get output class. |
164 | auto cls = getOutputClass(cctx.bindings->get(output)); |
165 | if (cls.first == data.second) { |
166 | ++correct; |
167 | } |
168 | } |
169 | |
170 | // Compute accuracy. |
171 | return ((float)correct) / dataset.size(); |
172 | } |
173 | |
174 | /// Function to tune a given tensor for the given function with the given |
175 | /// dataset. |
176 | float tuneQuantizationForTensor(std::vector<NodeProfilingInfo> &pInfos, |
177 | LabeledDataSet &dataset, unsigned qIdx, |
178 | float bestAcc) { |
179 | |
180 | // Tuning parameters. |
181 | unsigned maxIterPerNode = maxIterPerNodeOpt; |
182 | float accDropSkip = accDropSkipOpt; |
183 | |
184 | // Backup profiling parameters for this tensor. |
185 | auto bestTPP = pInfos[qIdx].tensorProfilingParams_; |
186 | |
187 | // Get tensor average value. |
188 | float tensorAvgVal = quantization::getTensorAverageValue(bestTPP); |
189 | |
190 | // Get quantization configuration. |
191 | auto quantConfig = Loader::getQuantizationConfiguration(); |
192 | |
193 | // Run the tune iterations for this tensor. |
194 | for (unsigned iterIdx = 0; iterIdx < maxIterPerNode; ++iterIdx) { |
195 | |
196 | // Get current min/max range. |
197 | float rangeMin = pInfos[qIdx].tensorProfilingParams_.min; |
198 | float rangeMax = pInfos[qIdx].tensorProfilingParams_.max; |
199 | |
200 | // Skip tuning for this tensor if range is empty. |
201 | if (rangeMin == rangeMax) { |
202 | llvm::outs() << " Tuning skipped for this tensor: not required\n" ; |
203 | break; |
204 | } |
205 | |
206 | // Get testing min/max range by repeatedly shrinking with a factor of 2. |
207 | float testMin, testMax; |
208 | if (quantConfig.schema == quantization::Asymmetric) { |
209 | // Shrink tensor min/max range around average value. |
210 | testMin = tensorAvgVal - (tensorAvgVal - rangeMin) / 2.0; |
211 | testMax = tensorAvgVal + (rangeMax - tensorAvgVal) / 2.0; |
212 | } else if (quantConfig.schema == quantization::Symmetric || |
213 | quantConfig.schema == quantization::SymmetricWithUnsigned || |
214 | quantConfig.schema == quantization::SymmetricWithPower2Scale) { |
215 | // Shrink tensor min/max range around 0. |
216 | float rangeAbsMin = std::abs(rangeMin); |
217 | float rangeAbsMax = std::abs(rangeMax); |
218 | float rangeAbs = rangeAbsMax > rangeAbsMin ? rangeAbsMax : rangeAbsMin; |
219 | testMin = -rangeAbs / 2.0f; |
220 | testMax = +rangeAbs / 2.0f; |
221 | } else { |
222 | llvm_unreachable("Quantization schema not supported!" ); |
223 | } |
224 | |
225 | // Set the testing range. |
226 | pInfos[qIdx].tensorProfilingParams_.min = testMin; |
227 | pInfos[qIdx].tensorProfilingParams_.max = testMax; |
228 | llvm::outs() << strFormat(" [%d/%d] Testing range = [%.4f, %.4f]\n" , |
229 | iterIdx + 1, maxIterPerNode, testMin, testMax); |
230 | |
231 | // Quantize model and compute accuracy for current params. |
232 | float currAcc = runModelAndGetAccuracy(dataset, true, pInfos); |
233 | llvm::outs() << strFormat(" Accuracy = %.4f %%\n" , currAcc * 100); |
234 | |
235 | // If we obtain EXACTLY the same accuracy then the profiling parameters |
236 | // of this tensor have no side effects (most probably are not used). |
237 | if (currAcc == bestAcc) { |
238 | llvm::outs() |
239 | << " Tuning stopped for this tensor: accuracy not improved\n" ; |
240 | break; |
241 | } |
242 | |
243 | // If current accuracy is better then save the profiling parameters. |
244 | if (currAcc > bestAcc) { |
245 | bestAcc = currAcc; |
246 | bestTPP = pInfos[qIdx].tensorProfilingParams_; |
247 | } |
248 | |
249 | // If the current accuracy drops below the best accuracy with a given delta |
250 | // then skip the tuning for the current tensor. |
251 | bool lastIter = (iterIdx == (maxIterPerNode - 1)); |
252 | if (!lastIter && (currAcc < (bestAcc - accDropSkip))) { |
253 | llvm::outs() << " Tuning stopped for this tensor: accuracy dropped more " |
254 | "than \"acc-drop-skip\"\n" ; |
255 | break; |
256 | } |
257 | } |
258 | |
259 | // Save best profiling parameters for this tensor. |
260 | pInfos[qIdx].tensorProfilingParams_ = bestTPP; |
261 | llvm::outs() << strFormat("Best accuracy : %.4f %%\n" , bestAcc * 100); |
262 | return bestAcc; |
263 | } |
264 | |
265 | int main(int argc, char **argv) { |
266 | |
267 | // Parse command line parameters. All the options will be available as part of |
268 | // the loader object. |
269 | parseCommandLine(argc, argv); |
270 | |
271 | // Get the input profile used for tuning. |
272 | auto quantConfig = Loader::getQuantizationConfiguration(); |
273 | CHECK(quantConfig.infos.size()) |
274 | << "Input profile not found. Use the -load-profile option!" ; |
275 | auto pInfosTune = quantConfig.infos; |
276 | int tensorQNum = pInfosTune.size(); |
277 | |
278 | // Read tuning dataset. |
279 | LabeledDataSet datasetTune = |
280 | readLabeledDataSet(datasetFileOpt, datasetPathOpt); |
281 | |
282 | // Set output stream to unbuffered state to flush every time. |
283 | llvm::outs().SetUnbuffered(); |
284 | |
285 | // Compute initial accuracy. |
286 | llvm::outs() << strFormat("\nComputing initial accuracy ... \n" ); |
287 | float accValF = runModelAndGetAccuracy(datasetTune, false, pInfosTune); |
288 | float accValQ = runModelAndGetAccuracy(datasetTune, true, pInfosTune); |
289 | llvm::outs() << strFormat("Initial accuracy: %.4f %% (FLOAT)\n" , |
290 | accValF * 100); |
291 | llvm::outs() << strFormat("Initial accuracy: %.4f %% (QUANTIZED)\n" , |
292 | accValQ * 100); |
293 | llvm::outs() << strFormat("Target accuracy: %.4f %% (QUANTIZED)\n" , |
294 | targetAccuracyOpt * 100); |
295 | llvm::outs() << strFormat("Number of tensors: %d\n\n" , tensorQNum); |
296 | |
297 | // Perform tuning for all tunable tensors. |
298 | float accVal = accValQ; |
299 | auto startTime = getTimeStamp(); |
300 | for (int tensorQIdx = 0; tensorQIdx < tensorQNum; ++tensorQIdx) { |
301 | |
302 | // Stop tuning if target accuracy is achieved. |
303 | if (accVal > targetAccuracyOpt) { |
304 | llvm::outs() << "Target accuracy achieved! Tuning is stopped ...\n" ; |
305 | break; |
306 | } |
307 | |
308 | // Tune the quantization for this tensor. |
309 | auto tensorName = pInfosTune[tensorQIdx].nodeOutputName_.data(); |
310 | llvm::outs() << strFormat("[%d/%d] Tuning quantization for tensor \"%s\"\n" , |
311 | tensorQIdx + 1, tensorQNum, tensorName); |
312 | accVal = |
313 | tuneQuantizationForTensor(pInfosTune, datasetTune, tensorQIdx, accVal); |
314 | |
315 | // Display estimated remaining time and stats. |
316 | unsigned iterSec = getDurationSec(startTime) / (tensorQIdx + 1); |
317 | unsigned remSec = iterSec * (tensorQNum - tensorQIdx - 1); |
318 | unsigned remMin = (remSec / 60) % 60; |
319 | unsigned remHrs = (remSec / 60) / 60; |
320 | llvm::outs() << strFormat("Iteration time: %d seconds\n" , iterSec); |
321 | llvm::outs() << strFormat("Remaining time: %d hours %d minutes\n\n" , remHrs, |
322 | remMin); |
323 | } |
324 | |
325 | // Print final accuracy. |
326 | llvm::outs() << strFormat("\nFinal accuracy: %.4f %% (QUANTIZED)\n\n" , |
327 | accVal * 100); |
328 | |
329 | // Print total time. |
330 | unsigned totSec, totMin, totHrs; |
331 | getDuration(startTime, totSec, totMin, totHrs); |
332 | llvm::outs() << strFormat("Total time: %d hours %d minutes\n\n" , totHrs, |
333 | totMin); |
334 | |
335 | // Serialize the tuned output profile. |
336 | serializeProfilingInfosToYaml(dumpTunedProfileFileOpt, |
337 | quantConfig.graphPreLowerHash, pInfosTune); |
338 | |
339 | return 0; |
340 | } |
341 | |