1 | /** |
2 | * Copyright (c) Glow Contributors. See CONTRIBUTORS file. |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | */ |
16 | |
17 | #include "BackendTestUtils.h" |
18 | |
19 | #include "glow/ExecutionEngine/ExecutionEngine.h" |
20 | #include "glow/Graph/Graph.h" |
21 | #include "glow/IR/IR.h" |
22 | #include "glow/Optimizer/GraphOptimizer/GraphOptimizer.h" |
23 | #include "glow/Quantization/Base/Base.h" |
24 | #include "glow/Quantization/Base/Calibration.h" |
25 | #include "glow/Quantization/Base/Profile.h" |
26 | #include "glow/Quantization/Quantization.h" |
27 | #include "glow/Quantization/Serialization.h" |
28 | |
29 | #include "gtest/gtest.h" |
30 | |
31 | #include "llvm/ADT/SmallVector.h" |
32 | #include "llvm/Support/FileSystem.h" |
33 | |
34 | namespace glow { |
35 | |
36 | using llvm::cast; |
37 | |
38 | class Quantization : public ::testing::Test {}; |
39 | |
40 | class Operator |
41 | : public ::testing::TestWithParam<::std::tuple<std::string, std::string>> { |
42 | protected: |
43 | ExecutionEngine profileEE{}; |
44 | ExecutionEngine backendSpecificEE{}; |
45 | |
46 | void SetUp() override { |
47 | std::string backend1; |
48 | std::string backend2; |
49 | std::tie(backend1, backend2) = GetParam(); |
50 | profileEE.setBackendName(backend1); |
51 | backendSpecificEE.setBackendName(backend2); |
52 | } |
53 | }; |
54 | |
55 | bool operator==(const std::vector<float> &lhs, const std::vector<float> &rhs) { |
56 | return std::equal(lhs.begin(), lhs.end(), rhs.begin()); |
57 | } |
58 | |
59 | bool operator==(const NodeProfilingInfo &lhs, const NodeProfilingInfo &rhs) { |
60 | return lhs.min() == rhs.min() && lhs.max() == rhs.max() && |
61 | lhs.nodeOutputName_ == rhs.nodeOutputName_ && |
62 | lhs.histogram() == rhs.histogram(); |
63 | } |
64 | |
65 | bool operator==(const NodeQuantizationInfo &lhs, |
66 | const NodeQuantizationInfo &rhs) { |
67 | return lhs.scale() == rhs.scale() && lhs.offset() == rhs.offset() && |
68 | lhs.nodeOutputName_ == rhs.nodeOutputName_; |
69 | } |
70 | |
71 | /// This is a mock backend which extended support of quantized operators. |
72 | class MockQuantBackend : public Backend { |
73 | // The actual backend being wrapped. |
74 | std::unique_ptr<Backend> backend_; |
75 | |
76 | public: |
77 | MockQuantBackend() { backend_.reset(createBackend("Interpreter" )); } |
78 | |
79 | std::string getBackendName() const override { return "Interpreter" ; } |
80 | |
81 | Expected<std::unique_ptr<CompiledFunction>> |
82 | compile(Function *F, const BackendOptions &opts) const override { |
83 | return backend_->compile(F, opts); |
84 | } |
85 | |
86 | runtime::DeviceManager * |
87 | createDeviceManager(const runtime::DeviceConfig &deviceConfig) override { |
88 | return nullptr; |
89 | } |
90 | |
91 | bool isOpSupported(const NodeInfo &NI) const override { |
92 | if (NI.getKind() == Kinded::Kind::SoftMaxNodeKind || |
93 | NI.getKind() == Kinded::Kind::LocalResponseNormalizationNodeKind || |
94 | NI.getKind() == Kinded::Kind::SaveNodeKind || |
95 | NI.getKind() == Kinded::Kind::ReluNodeKind || |
96 | NI.getKind() == Kinded::Kind::SelectNodeKind || |
97 | NI.getKind() == Kinded::Kind::LogNodeKind || |
98 | NI.getKind() == Kinded::Kind::SigmoidNodeKind || |
99 | NI.getKind() == Kinded::Kind::TanhNodeKind) { |
100 | return true; |
101 | } |
102 | return backend_->isOpSupported(NI); |
103 | } |
104 | }; |
105 | |
106 | /// Simple tests to verify the histogram rescale. |
107 | TEST(Quantization, rescaleHistogramTest) { |
108 | EXPECT_EQ(quantization::rescaleHistogram({}, 0.0f, 1.0f, 0.0f, 2.0).size(), |
109 | 0); |
110 | EXPECT_EQ( |
111 | quantization::rescaleHistogram({1, 2, 3, 4}, 0.0f, 1.0f, -1.0f, 1.0), |
112 | std::vector<float>({0, 0, 3, 7})); |
113 | EXPECT_EQ( |
114 | quantization::rescaleHistogram({2, 4, 6, 8}, -1.0f, 1.0f, 0.0f, 1.0), |
115 | std::vector<float>({3, 3, 4, 4})); |
116 | } |
117 | |
118 | /// Simple tests to verify the KL optimization. |
119 | TEST(Quantization, optimizeKLTest) { |
120 | // Test that an all-zero histogram does not raise exceptions. |
121 | std::vector<float> histAllZero(1000, 0); |
122 | quantization::FloatRange rangeAllZero = |
123 | quantization::optimizeKL(histAllZero, 0.f, 1.0f, 255); |
124 | EXPECT_EQ(rangeAllZero.first, 0.f); |
125 | EXPECT_EQ(rangeAllZero.second, 1.0f); |
126 | |
127 | // Test that an empty histogram does not raise exceptions. |
128 | std::vector<float> histEmpty; |
129 | quantization::FloatRange rangeEmpty = |
130 | quantization::optimizeKL(histEmpty, 0.f, 1.0f, 255); |
131 | EXPECT_EQ(rangeEmpty.first, 0.f); |
132 | EXPECT_EQ(rangeEmpty.second, 1.0f); |
133 | } |
134 | |
135 | void testProfilingInfosSerialization(std::vector<NodeProfilingInfo> &expected) { |
136 | llvm::SmallVector<char, 10> resultPath; |
137 | llvm::sys::fs::createTemporaryFile("prefix" , "suffix" , resultPath); |
138 | std::string filePath(resultPath.begin(), resultPath.end()); |
139 | llvm::hash_code hash = 13; |
140 | serializeProfilingInfosToYaml(filePath, hash, expected); |
141 | std::vector<NodeProfilingInfo> deserialized; |
142 | llvm::hash_code hashDeserialized; |
143 | auto fileExists = deserializeProfilingInfosFromYaml( |
144 | filePath, hashDeserialized, deserialized); |
145 | llvm::sys::fs::remove(filePath); |
146 | EXPECT_TRUE(fileExists); |
147 | EXPECT_EQ(static_cast<size_t>(hash), static_cast<size_t>(hashDeserialized)); |
148 | EXPECT_EQ(expected, deserialized); |
149 | } |
150 | |
151 | TEST(Quantization, DeserializeNonExistingFile) { |
152 | std::string fakeFilePath = "/fake" ; |
153 | std::vector<NodeProfilingInfo> deserialized; |
154 | llvm::hash_code hashDeserialized; |
155 | auto fileExists = deserializeProfilingInfosFromYaml( |
156 | fakeFilePath, hashDeserialized, deserialized); |
157 | EXPECT_FALSE(fileExists); |
158 | } |
159 | |
160 | TEST(Quantization, ProfilingSerialize) { |
161 | std::vector<float> histEmpty; |
162 | std::vector<float> hist = {0, 1, 2, 3, 4}; |
163 | std::vector<NodeProfilingInfo> expected{{"first" , {1.0, 10.0, histEmpty}}, |
164 | {"second" , {-1.0, 3.0, hist}}, |
165 | {"third" , {-10.0, 30.0, hist}}, |
166 | {"fourth" , {0.1, 10.0, hist}}, |
167 | {"fifth" , {0.123, 30.0, hist}}}; |
168 | testProfilingInfosSerialization(expected); |
169 | } |
170 | |
171 | TEST(Quantization, ProfilingSerializePower2Range) { |
172 | std::vector<NodeProfilingInfo> expected{ |
173 | {"pwr_0" , {1.0000000000f, 1.0f}}, {"pwr_1" , {0.5000000000f, 2.0f}}, |
174 | {"pwr_2" , {0.2500000000f, 4.0f}}, {"pwr_3" , {0.1250000000f, 8.0f}}, |
175 | {"pwr_4" , {0.0625000000f, 16.0f}}, {"pwr_5" , {0.0312500000f, 32.0f}}, |
176 | {"pwr_6" , {0.0156250000f, 64.0f}}, {"pwr_7" , {0.0078125000f, 128.0f}}, |
177 | {"pwr_8" , {0.0039062500f, 256.0f}}, {"pwr_9" , {0.0019531250f, 512.0f}}}; |
178 | testProfilingInfosSerialization(expected); |
179 | } |
180 | |
181 | #if LLVM_VERSION_MAJOR < 8 |
182 | TEST(Quantization, ProfilingSerializeEmpty) { |
183 | std::vector<NodeProfilingInfo> expected; |
184 | testProfilingInfosSerialization(expected); |
185 | } |
186 | #endif |
187 | |
188 | TEST(Quantization, tensorAverageValue) { |
189 | { |
190 | float min = -10.0; |
191 | float max = 10.0; |
192 | std::vector<float> hist = {64, 64}; |
193 | TensorProfilingParams profParams(min, max, hist); |
194 | float avgVal = quantization::getTensorAverageValue(profParams); |
195 | EXPECT_FLOAT_EQ(avgVal, 0.0); |
196 | } |
197 | { |
198 | float min = -10.0; |
199 | float max = 10.0; |
200 | std::vector<float> hist = {0, 64}; |
201 | TensorProfilingParams profParams(min, max, hist); |
202 | float avgVal = quantization::getTensorAverageValue(profParams); |
203 | EXPECT_FLOAT_EQ(avgVal, 5.0); |
204 | } |
205 | { |
206 | float min = 0.0; |
207 | float max = 10.0; |
208 | std::vector<float> hist = {64, 0}; |
209 | TensorProfilingParams profParams(min, max, hist); |
210 | float avgVal = quantization::getTensorAverageValue(profParams); |
211 | EXPECT_FLOAT_EQ(avgVal, 2.5); |
212 | } |
213 | } |
214 | |
215 | template <typename From, typename To> static To clip(From in) { |
216 | static_assert(sizeof(From) >= sizeof(To), |
217 | "Clip should reduce the variable size" ); |
218 | auto mx = std::numeric_limits<To>::max(); |
219 | auto mn = std::numeric_limits<To>::min(); |
220 | return std::max<From>(mn, std::min<From>(mx, in)); |
221 | } |
222 | |
223 | TEST(Quantization, quantScaleOffset) { |
224 | // Test different scale values from 1<<-23 to 1>>1. |
225 | float scales[] = { |
226 | 0.0000001596f, 0.00000025f, 0.000000995f, 0.0000035f, 0.00000952f, |
227 | 0.00000113f, 0.000721f, 0.0000721f, 0.0000172f, 0.0000951f, |
228 | 0.0000721f, 0.0000341f, 0.0000222f, 0.0000172f, 0.000752f, |
229 | 0.000371f, 0.000321f, 0.000223f, 0.000112f, 0.00852f, |
230 | 0.00671f, 0.00592f, 0.00200f, 0.00107f, 0.0931f, |
231 | 0.0721f, 0.031f, 0.014f, 0.0132f, 0.712f, |
232 | 0.613f, 0.412f, 0.223f, 0.134f, 1.0f, |
233 | 1.13f, 1.612f, 1.523f, 2.0f}; |
234 | |
235 | // Try all scale factors: |
236 | for (float scale : scales) { |
237 | // Try all legal integers within the range: |
238 | for (int8_t input = -128; input < 127; input++) { |
239 | int32_t sum32num = round(input / scale); |
240 | |
241 | auto TR = quantization::quantizeScaleOffset32To8(scale, 0); |
242 | int32_t computed = TR.transform(sum32num); |
243 | |
244 | EXPECT_NEAR(input, computed, 1); |
245 | } |
246 | } |
247 | } |
248 | |
249 | TEST(Quantization, quantScaleOffsetPower2Scale) { |
250 | // Test different power of 2 scale values (from 2^-10 to 2^1). |
251 | float scales[] = {0.0009765625f, 0.0019531250f, 0.0039062500f, 0.0078125000f, |
252 | 0.0156250000f, 0.0312500000f, 0.0625000000f, 0.1250000000f, |
253 | 0.2500000000f, 0.5000000000f, 1.0000000000f, 2.0000000000f}; |
254 | |
255 | // Try all scale factors: |
256 | for (float scale : scales) { |
257 | // Try all legal integers within the range: |
258 | for (int8_t input = -128; input < 127; input++) { |
259 | int32_t sum32num = round(input / scale); |
260 | auto TR = quantization::quantizeScaleOffset32To8(scale, 0); |
261 | EXPECT_EQ(quantization::isFloatPowerOf2(scale), true); |
262 | EXPECT_EQ(TR.pre, 0); |
263 | int exp = quantization::getFloat2Exp(scale); |
264 | if (exp > 0) { |
265 | EXPECT_EQ(TR.scale, (int)scale); |
266 | EXPECT_EQ(TR.post, 0); |
267 | } else { |
268 | EXPECT_EQ(TR.scale, 1); |
269 | EXPECT_EQ(TR.post, -exp); |
270 | } |
271 | int32_t computed = TR.transform(sum32num); |
272 | EXPECT_NEAR(input, computed, 1); |
273 | } |
274 | } |
275 | } |
276 | |
277 | template <class qtype> |
278 | void quantizeTensorTest( |
279 | ElemKind qTy, quantization::Schema schema, |
280 | quantization::Calibration calibration = quantization::Calibration::None) { |
281 | // optimizeKL required histogram bins size to be atleast 255 so N is set to |
282 | // 256 |
283 | dim_t N = 256; |
284 | float maxValue = 255.0; |
285 | if (qTy == ElemKind::Int8QTy) { |
286 | N = 6; |
287 | maxValue = 5.0; |
288 | calibration = quantization::Calibration::None; |
289 | } |
290 | // Map float [0.0; maxValue] to a quantized type using its entire value range. |
291 | std::vector<float> hist(N, 1); |
292 | TensorQuantizationParams quantParams = |
293 | chooseQuantizationParams({0.0, maxValue, hist}, schema, qTy, calibration); |
294 | |
295 | // Create an FP32 tensor with N elements and initialize it with numbers from 0 |
296 | // to maxValue. |
297 | Tensor inputFP32(ElemKind::FloatTy, {N}); |
298 | Handle<float> THFP32 = inputFP32.getHandle<float>(); |
299 | for (unsigned i = 0; i < N; ++i) { |
300 | THFP32.at({i}) = i * 1.0f; |
301 | } |
302 | |
303 | // Quantize the tensor. |
304 | auto quantizedFP32 = |
305 | quantization::quantizeTensor(inputFP32, quantParams, qTy); |
306 | // Check that the dequantized result is close to the original values before |
307 | // the quantization. |
308 | Handle<qtype> THquantizedFP32 = quantizedFP32.getHandle<qtype>(); |
309 | for (unsigned i = 0; i < N; ++i) { |
310 | EXPECT_NEAR(THFP32.at({i}), |
311 | quantization::dequantize(THquantizedFP32.at({i}), quantParams), |
312 | 0.05f); |
313 | } |
314 | |
315 | // Create an FP16 tensor with N elements and initialize it with numbers from 0 |
316 | // to maxValue. |
317 | Tensor inputFP16(ElemKind::Float16Ty, {N}); |
318 | Handle<float16> THFP16 = inputFP16.getHandle<float16>(); |
319 | for (unsigned i = 0; i < N; ++i) { |
320 | THFP16.at({i}) = i * 1.0f; |
321 | } |
322 | |
323 | // Quantize the tensor. |
324 | auto quantizedFP16 = |
325 | quantization::quantizeTensor(inputFP16, quantParams, qTy); |
326 | // Check that the dequantized result is close to the original values before |
327 | // the quantization. |
328 | Handle<qtype> THquantizedFP16 = quantizedFP16.getHandle<qtype>(); |
329 | for (unsigned i = 0; i < N; ++i) { |
330 | EXPECT_NEAR(THFP16.at({i}), |
331 | quantization::dequantize(THquantizedFP16.at({i}), quantParams), |
332 | 0.05f); |
333 | } |
334 | } |
335 | |
336 | TEST(Quantization, quantizeTensorAsymmetricInt8) { |
337 | quantizeTensorTest<int8_t>(ElemKind::Int8QTy, |
338 | quantization::Schema::Asymmetric); |
339 | } |
340 | TEST(Quantization, quantizeTensorAsymmetricInt8KLMinimization) { |
341 | quantizeTensorTest<int8_t>(ElemKind::Int8QTy, |
342 | quantization::Schema::Asymmetric, |
343 | quantization::Calibration::KLMinimization); |
344 | } |
345 | TEST(Quantization, quantizeTensorAsymmetricInt16) { |
346 | quantizeTensorTest<int16_t>(ElemKind::Int16QTy, |
347 | quantization::Schema::Asymmetric); |
348 | } |
349 | TEST(Quantization, quantizeTensorAsymmetricInt16KLMinimization) { |
350 | quantizeTensorTest<int16_t>(ElemKind::Int16QTy, |
351 | quantization::Schema::Asymmetric, |
352 | quantization::Calibration::KLMinimization); |
353 | } |
354 | TEST(Quantization, quantizeTensorAsymmetricInt32) { |
355 | quantizeTensorTest<int32_t>(ElemKind::Int32QTy, |
356 | quantization::Schema::Asymmetric); |
357 | } |
358 | TEST(Quantization, quantizeTensorAsymmetricInt32KLMinimization) { |
359 | quantizeTensorTest<int32_t>(ElemKind::Int32QTy, |
360 | quantization::Schema::Asymmetric, |
361 | quantization::Calibration::KLMinimization); |
362 | } |
363 | TEST(Quantization, quantizeTensorSymmetricInt8) { |
364 | quantizeTensorTest<int8_t>(ElemKind::Int8QTy, |
365 | quantization::Schema::Symmetric); |
366 | } |
367 | TEST(Quantization, quantizeTensorSymmetricInt8KLMinimization) { |
368 | quantizeTensorTest<int8_t>(ElemKind::Int8QTy, quantization::Schema::Symmetric, |
369 | quantization::Calibration::KLMinimization); |
370 | } |
371 | TEST(Quantization, quantizeTensorSymmetricInt16) { |
372 | quantizeTensorTest<int16_t>(ElemKind::Int16QTy, |
373 | quantization::Schema::Symmetric); |
374 | } |
375 | TEST(Quantization, quantizeTensorSymmetricInt16KLMinimization) { |
376 | quantizeTensorTest<int16_t>(ElemKind::Int16QTy, |
377 | quantization::Schema::Symmetric, |
378 | quantization::Calibration::KLMinimization); |
379 | } |
380 | TEST(Quantization, quantizeTensorSymmetricInt32) { |
381 | quantizeTensorTest<int32_t>(ElemKind::Int32QTy, |
382 | quantization::Schema::Symmetric); |
383 | } |
384 | TEST(Quantization, quantizeTensorSymmetricInt32KLMinimization) { |
385 | quantizeTensorTest<int32_t>(ElemKind::Int32QTy, |
386 | quantization::Schema::Symmetric, |
387 | quantization::Calibration::KLMinimization); |
388 | } |
389 | TEST(Quantization, quantizeTensorSymmetricUInt8) { |
390 | quantizeTensorTest<int8_t>(ElemKind::Int8QTy, |
391 | quantization::Schema::SymmetricWithUnsigned); |
392 | } |
393 | TEST(Quantization, quantizeTensorSymmetricUInt8KLMinimization) { |
394 | quantizeTensorTest<int8_t>(ElemKind::Int8QTy, |
395 | quantization::Schema::SymmetricWithUnsigned, |
396 | quantization::Calibration::KLMinimization); |
397 | } |
398 | TEST(Quantization, quantizeTensorSymmetricUInt16) { |
399 | quantizeTensorTest<int16_t>(ElemKind::Int16QTy, |
400 | quantization::Schema::SymmetricWithUnsigned); |
401 | } |
402 | TEST(Quantization, quantizeTensorSymmetricUInt16KLMinimization) { |
403 | quantizeTensorTest<int16_t>(ElemKind::Int16QTy, |
404 | quantization::Schema::SymmetricWithUnsigned, |
405 | quantization::Calibration::KLMinimization); |
406 | } |
407 | TEST(Quantization, quantizeTensorSymmetricUInt32) { |
408 | quantizeTensorTest<int32_t>(ElemKind::Int32QTy, |
409 | quantization::Schema::SymmetricWithUnsigned); |
410 | } |
411 | TEST(Quantization, quantizeTensorSymmetricUInt32KLMinimization) { |
412 | quantizeTensorTest<int32_t>(ElemKind::Int32QTy, |
413 | quantization::Schema::SymmetricWithUnsigned, |
414 | quantization::Calibration::KLMinimization); |
415 | } |
416 | TEST(Quantization, quantizeTensorSymmetricPwr2Int8) { |
417 | quantizeTensorTest<int8_t>(ElemKind::Int8QTy, |
418 | quantization::Schema::SymmetricWithPower2Scale); |
419 | } |
420 | TEST(Quantization, quantizeTensorSymmetricPwr2Int8KLMinimization) { |
421 | quantizeTensorTest<int8_t>(ElemKind::Int8QTy, |
422 | quantization::Schema::SymmetricWithPower2Scale, |
423 | quantization::Calibration::KLMinimization); |
424 | } |
425 | TEST(Quantization, quantizeTensorSymmetricPwr2Int16) { |
426 | quantizeTensorTest<int16_t>(ElemKind::Int16QTy, |
427 | quantization::Schema::SymmetricWithPower2Scale); |
428 | } |
429 | TEST(Quantization, quantizeTensorSymmetricPwr2Int16KLMinimization) { |
430 | quantizeTensorTest<int16_t>(ElemKind::Int16QTy, |
431 | quantization::Schema::SymmetricWithPower2Scale, |
432 | quantization::Calibration::KLMinimization); |
433 | } |
434 | TEST(Quantization, quantizeTensorSymmetricPwr2Int32) { |
435 | quantizeTensorTest<int32_t>(ElemKind::Int32QTy, |
436 | quantization::Schema::SymmetricWithPower2Scale); |
437 | } |
438 | TEST(Quantization, quantizeTensorSymmetricPwr2Int32KLMinimization) { |
439 | quantizeTensorTest<int32_t>(ElemKind::Int32QTy, |
440 | quantization::Schema::SymmetricWithPower2Scale, |
441 | quantization::Calibration::KLMinimization); |
442 | } |
443 | |
444 | /// Test 4-bit fused rowwise quantization. |
445 | template <typename T> void fused4BitRowwiseQuantizationTest(ElemKind qTy) { |
446 | // Create an FP32 tensor with 12 elements and initialize it |
447 | // with numbers from the following test inputs here. |
448 | // 1. Input that contains at least one +ve, one -ve and zero. |
449 | // 2. Input that contains at least one +ve and zero. |
450 | // 3. Input that contains at least one -ve and zero. |
451 | // 4. Input that contains at least only (+ve) numbers. |
452 | // 5. Input that contains at least only (-ve) numbers. |
453 | // 'deltas' is used to create the above 5 test cases hermetically. |
454 | auto deltas = {-3, 0, 3, -7, 7}; |
455 | for (const auto &delta : deltas) { |
456 | Tensor inputFP32(ElemKind::FloatTy, {2, 6}); |
457 | Tensor dequantized(ElemKind::FloatTy, {2, 6}); |
458 | dim_t col = inputFP32.dims()[1] / 2 + 2 * sizeof(T); |
459 | Tensor quantized(qTy, {2, col}, /* dummy scale */ 1.0, |
460 | /* dummy offset */ 0); |
461 | Handle<float> inputH = inputFP32.getHandle<float>(); |
462 | for (dim_t i = 0; i < 2; i++) { |
463 | for (dim_t j = 0; j < 6; j++) { |
464 | inputH.at({i, j}) = (i + j) * 1.0f + delta; |
465 | } |
466 | } |
467 | |
468 | quantization::tensorFusedRowwiseQuantization<T>(inputFP32, quantized); |
469 | dequantized = |
470 | quantization::tensor4BitsFusedRowwiseDequantization(quantized); |
471 | |
472 | Handle<float> dequantizedH = dequantized.getHandle<float>(); |
473 | for (dim_t i = 0; i < 2; i++) { |
474 | for (dim_t j = 0; j < 6; j++) { |
475 | EXPECT_NEAR(inputH.at({i, j}), dequantizedH.at({i, j}), 0.02f); |
476 | } |
477 | } |
478 | } |
479 | } |
480 | |
481 | /// Test 4-bit fused rowwise fp32 scale/offset quantization. |
482 | TEST(Quantization, fused4BitsFP32RowwiseQuantizeTensor) { |
483 | fused4BitRowwiseQuantizationTest<float>(ElemKind::UInt4FusedQTy); |
484 | } |
485 | |
486 | /// Test 4-bit fused rowwise fp16 quantization. |
487 | TEST(Quantization, fused4BitsFP16RowwiseQuantizeTensor) { |
488 | fused4BitRowwiseQuantizationTest<float16_t>(ElemKind::UInt4FusedFP16QTy); |
489 | } |
490 | |
491 | /// When quantizing a scalar the quantization should not lose precision: the |
492 | /// quantize->dequantize pair applied to a float scalar should preserve the |
493 | /// value (up to the precision lost by dividing/multiplying with the scale). |
494 | void quantizeScalarTest(float val, ElemKind qTy, quantization::Schema schema) { |
495 | ExecutionEngine EE{}; |
496 | auto &mod = EE.getModule(); |
497 | Function *F = mod.createFunction("main" ); |
498 | PlaceholderBindings bindings; |
499 | |
500 | // Choose quantization parameters |
501 | auto TQP = quantization::chooseQuantizationParams({val, val}, schema, qTy); |
502 | |
503 | // Create quantize/dequantize network for a single float value |
504 | auto *input = mod.createPlaceholder(ElemKind::FloatTy, {1}, "val" , false); |
505 | auto inputQTy = mod.uniqueType(qTy, {1}, TQP.scale, TQP.offset); |
506 | QuantizeNode *quant = F->createQuantize("quant" , input, inputQTy); |
507 | DequantizeNode *dequant = |
508 | F->createDequantize("dequant" , quant, ElemKind::FloatTy); |
509 | SaveNode *save = F->createSave("save" , dequant); |
510 | |
511 | // Allocate placeholders, set input, run, get output |
512 | auto inpH = bindings.allocate(input)->getHandle(); |
513 | auto outH = bindings.allocate(save->getPlaceholder())->getHandle(); |
514 | inpH.at({0}) = val; |
515 | EE.compile(CompilationMode::Infer); |
516 | EE.run(bindings); |
517 | float outVal = outH.raw(0); |
518 | EXPECT_NEAR(val, outVal, 0.0000000001); |
519 | } |
520 | |
521 | TEST(Quantization, quantizeScalarTestInt8) { |
522 | quantizeScalarTest(0.0, ElemKind::Int8QTy, quantization::Schema::Asymmetric); |
523 | quantizeScalarTest(0.0, ElemKind::Int8QTy, quantization::Schema::Symmetric); |
524 | quantizeScalarTest(0.0, ElemKind::Int8QTy, |
525 | quantization::Schema::SymmetricWithUnsigned); |
526 | quantizeScalarTest(1.3, ElemKind::Int8QTy, quantization::Schema::Asymmetric); |
527 | quantizeScalarTest(1.3, ElemKind::Int8QTy, quantization::Schema::Symmetric); |
528 | quantizeScalarTest(1.3, ElemKind::Int8QTy, |
529 | quantization::Schema::SymmetricWithUnsigned); |
530 | quantizeScalarTest(-1.3, ElemKind::Int8QTy, quantization::Schema::Asymmetric); |
531 | quantizeScalarTest(-1.3, ElemKind::Int8QTy, quantization::Schema::Symmetric); |
532 | quantizeScalarTest(-1.3, ElemKind::Int8QTy, |
533 | quantization::Schema::SymmetricWithUnsigned); |
534 | } |
535 | |
536 | TEST(Quantization, quantizeScalarTestInt16) { |
537 | quantizeScalarTest(0.0, ElemKind::Int16QTy, quantization::Schema::Asymmetric); |
538 | quantizeScalarTest(0.0, ElemKind::Int16QTy, quantization::Schema::Symmetric); |
539 | quantizeScalarTest(0.0, ElemKind::Int16QTy, |
540 | quantization::Schema::SymmetricWithUnsigned); |
541 | quantizeScalarTest(1.3, ElemKind::Int16QTy, quantization::Schema::Asymmetric); |
542 | quantizeScalarTest(1.3, ElemKind::Int16QTy, quantization::Schema::Symmetric); |
543 | quantizeScalarTest(1.3, ElemKind::Int16QTy, |
544 | quantization::Schema::SymmetricWithUnsigned); |
545 | quantizeScalarTest(-1.3, ElemKind::Int16QTy, |
546 | quantization::Schema::Asymmetric); |
547 | quantizeScalarTest(-1.3, ElemKind::Int16QTy, quantization::Schema::Symmetric); |
548 | quantizeScalarTest(-1.3, ElemKind::Int16QTy, |
549 | quantization::Schema::SymmetricWithUnsigned); |
550 | } |
551 | |
552 | TEST(Quantization, quantizeScalarTestInt32) { |
553 | quantizeScalarTest(0.0, ElemKind::Int32QTy, quantization::Schema::Asymmetric); |
554 | quantizeScalarTest(0.0, ElemKind::Int32QTy, quantization::Schema::Symmetric); |
555 | quantizeScalarTest(0.0, ElemKind::Int32QTy, |
556 | quantization::Schema::SymmetricWithUnsigned); |
557 | quantizeScalarTest(1.3, ElemKind::Int32QTy, quantization::Schema::Asymmetric); |
558 | quantizeScalarTest(1.3, ElemKind::Int32QTy, quantization::Schema::Symmetric); |
559 | quantizeScalarTest(1.3, ElemKind::Int32QTy, |
560 | quantization::Schema::SymmetricWithUnsigned); |
561 | quantizeScalarTest(-1.3, ElemKind::Int32QTy, |
562 | quantization::Schema::Asymmetric); |
563 | quantizeScalarTest(-1.3, ElemKind::Int32QTy, quantization::Schema::Symmetric); |
564 | quantizeScalarTest(-1.3, ElemKind::Int32QTy, |
565 | quantization::Schema::SymmetricWithUnsigned); |
566 | } |
567 | |
568 | /// Check corner case when bias is quantized as int32 with unconstrained |
569 | /// scale and offset parameters and used within a subtraction bias - biasOffset |
570 | /// which is expected to be within int32 limits. |
571 | static void quantizeBiasInt32CornerCaseTest(float val) { |
572 | // Choose bias quantization parameters |
573 | float biasF = val; |
574 | auto biasTQP = quantization::chooseQuantizationParams( |
575 | {biasF, biasF}, quantization::Schema::Asymmetric, ElemKind::Int32QTy); |
576 | |
577 | // Quantize the tensor. |
578 | Tensor biasTF(ElemKind::FloatTy, {1}); |
579 | biasTF.getHandle<float>().at({0}) = biasF; |
580 | auto biasTQ = |
581 | quantization::quantizeTensor(biasTF, biasTQP, ElemKind::Int32QTy); |
582 | int32_t biasQ = biasTQ.getHandle<int32_t>().at({0}); |
583 | int32_t biasOffset = biasTQP.offset; |
584 | |
585 | // Compute difference and check against int32 limits. |
586 | int64_t diff = ((int64_t)biasQ) - ((int64_t)biasOffset); |
587 | EXPECT_TRUE(std::numeric_limits<int32_t>::min() <= diff); |
588 | EXPECT_TRUE(diff <= std::numeric_limits<int32_t>::max()); |
589 | } |
590 | |
591 | TEST(Quantization, quantizeBiasInt32CornerCaseTests) { |
592 | quantizeBiasInt32CornerCaseTest(0.0); |
593 | quantizeBiasInt32CornerCaseTest(0.3); |
594 | quantizeBiasInt32CornerCaseTest(-0.3); |
595 | quantizeBiasInt32CornerCaseTest(0.0000003); |
596 | quantizeBiasInt32CornerCaseTest(-0.0000003); |
597 | quantizeBiasInt32CornerCaseTest(30000000.0); |
598 | quantizeBiasInt32CornerCaseTest(-30000000.0); |
599 | } |
600 | |
601 | /// Verify the quantization utility function which performs finer grained |
602 | /// quantization along a given dimension for given \p qSchema and \p qTy. |
603 | template <class eTy> |
604 | static void quantizeTensorRowwise(quantization::Schema qSchema, ElemKind qTy) { |
605 | dim_t numCols = 20; |
606 | dim_t qDim = 0; |
607 | dim_t qStep = 1; |
608 | |
609 | // Initialize tensors. |
610 | Tensor tensor(ElemKind::FloatTy, {2, numCols}); |
611 | Tensor row1(ElemKind::FloatTy, {numCols}); |
612 | Tensor row2(ElemKind::FloatTy, {numCols}); |
613 | auto tensorH = tensor.getHandle<float>(); |
614 | auto row1H = row1.getHandle<float>(); |
615 | auto row2H = row2.getHandle<float>(); |
616 | for (dim_t idx = 0; idx < numCols; idx++) { |
617 | tensorH.at({0, idx}) = float(idx); |
618 | tensorH.at({1, idx}) = float(idx) - 128.0; |
619 | row1H.raw(idx) = float(idx); |
620 | row2H.raw(idx) = float(idx) - 128.0; |
621 | } |
622 | |
623 | // Quantize rowwise using specialized function. |
624 | Tensor scales(ElemKind::FloatTy, {2}); |
625 | Tensor offsets(ElemKind::Int32ITy, {2}); |
626 | getTensorQuantizationParams(tensor, scales, offsets, qSchema, qTy, qDim, |
627 | qStep); |
628 | Tensor tensorQ = |
629 | quantization::quantizeTensor(tensor, scales, offsets, qTy, qDim, qStep); |
630 | auto tensorQH = tensorQ.getHandle<eTy>(); |
631 | auto scalesH = scales.getHandle<float>(); |
632 | auto offsetsH = offsets.getHandle<int32_t>(); |
633 | |
634 | // Quantize rowwise using per-tensor functions. |
635 | float row1Min = tensorH.at({0, 0}); |
636 | float row1Max = tensorH.at({0, numCols - 1}); |
637 | float row2Min = tensorH.at({1, 0}); |
638 | float row2Max = tensorH.at({1, numCols - 1}); |
639 | auto TQP1 = |
640 | quantization::chooseQuantizationParams({row1Min, row1Max}, qSchema, qTy); |
641 | auto TQP2 = |
642 | quantization::chooseQuantizationParams({row2Min, row2Max}, qSchema, qTy); |
643 | Tensor row1Q = quantization::quantizeTensor(row1, TQP1, qTy); |
644 | Tensor row2Q = quantization::quantizeTensor(row2, TQP2, qTy); |
645 | auto row1QH = row1Q.getHandle<eTy>(); |
646 | auto row2QH = row2Q.getHandle<eTy>(); |
647 | |
648 | // Check. |
649 | EXPECT_EQ(TQP1.scale, scalesH.raw(0)); |
650 | EXPECT_EQ(TQP2.scale, scalesH.raw(1)); |
651 | EXPECT_EQ(TQP1.offset, offsetsH.raw(0)); |
652 | EXPECT_EQ(TQP2.offset, offsetsH.raw(1)); |
653 | for (dim_t idx = 0; idx < 3; idx++) { |
654 | EXPECT_EQ(tensorQH.at({0, idx}), row1QH.raw(idx)); |
655 | EXPECT_EQ(tensorQH.at({1, idx}), row2QH.raw(idx)); |
656 | } |
657 | } |
658 | |
659 | TEST(Quantization, QuantizeTensorRowwiseTest) { |
660 | quantizeTensorRowwise<int8_t>(quantization::Schema::Asymmetric, |
661 | ElemKind::Int8QTy); |
662 | quantizeTensorRowwise<int16_t>(quantization::Schema::Asymmetric, |
663 | ElemKind::Int16QTy); |
664 | quantizeTensorRowwise<int32_t>(quantization::Schema::Asymmetric, |
665 | ElemKind::Int32QTy); |
666 | quantizeTensorRowwise<int8_t>(quantization::Schema::Symmetric, |
667 | ElemKind::Int8QTy); |
668 | quantizeTensorRowwise<int16_t>(quantization::Schema::Symmetric, |
669 | ElemKind::Int16QTy); |
670 | quantizeTensorRowwise<int32_t>(quantization::Schema::Symmetric, |
671 | ElemKind::Int32QTy); |
672 | quantizeTensorRowwise<int8_t>(quantization::Schema::SymmetricWithUnsigned, |
673 | ElemKind::Int8QTy); |
674 | quantizeTensorRowwise<int16_t>(quantization::Schema::SymmetricWithUnsigned, |
675 | ElemKind::Int16QTy); |
676 | quantizeTensorRowwise<int32_t>(quantization::Schema::SymmetricWithUnsigned, |
677 | ElemKind::Int32QTy); |
678 | quantizeTensorRowwise<int8_t>(quantization::Schema::SymmetricWithPower2Scale, |
679 | ElemKind::Int8QTy); |
680 | quantizeTensorRowwise<int16_t>(quantization::Schema::SymmetricWithPower2Scale, |
681 | ElemKind::Int16QTy); |
682 | quantizeTensorRowwise<int32_t>(quantization::Schema::SymmetricWithPower2Scale, |
683 | ElemKind::Int32QTy); |
684 | } |
685 | |
686 | /// Helper for quantizing a simple Conv with precision \p quantizationPrecision |
687 | /// while the bias is quantized using \p quantizationPrecisionBias. |
688 | static void quantizeSimpleConvGraph(ElemKind quantizationPrecision, |
689 | ElemKind quantizationPrecisionBias) { |
690 | ExecutionEngine EE{}; |
691 | auto &mod = EE.getModule(); |
692 | Function *F = mod.createFunction("main" ); |
693 | |
694 | auto *input = |
695 | mod.createPlaceholder(ElemKind::FloatTy, {1, 4, 4, 1}, "input" , false); |
696 | auto *filter = mod.createConstant(ElemKind::FloatTy, {2, 2, 2, 1}, "filter" ); |
697 | auto *bias = mod.createConstant(ElemKind::FloatTy, {2}, "bias" ); |
698 | auto outTy = mod.uniqueType(ElemKind::FloatTy, {1, 4, 8, 2}); |
699 | PlaceholderBindings bindings; |
700 | bindings.allocate(input)->getHandle().randomize(0.f, 2.f, mod.getPRNG()); |
701 | filter->getHandle().randomize(-1.0, 1.0, mod.getPRNG()); |
702 | bias->getHandle().randomize(-1.0, 1.0, mod.getPRNG()); |
703 | |
704 | auto *CN = F->createConv("Conv" , input, filter, bias, outTy, {2, 2}, {1, 1}, |
705 | {0, 2, 1, 3}, 1); |
706 | auto *S = F->createSave("ret" , CN); |
707 | bindings.allocate(S->getPlaceholder()); |
708 | |
709 | quantization::QuantizationConfiguration quantConfig{{ |
710 | {input->getOutput().generateNodeOutputName(), {0.0f, 2.0f}}, |
711 | {filter->getOutput().generateNodeOutputName(), {0.0f, 3.0f}}, |
712 | {bias->getOutput().generateNodeOutputName(), {0.0f, 4.0f}}, |
713 | {CN->getResult().generateNodeOutputName(), {0.0f, 6.0f}}, |
714 | }}; |
715 | |
716 | quantConfig.precision = quantizationPrecision; |
717 | quantConfig.precisionBias = quantizationPrecisionBias; |
718 | quantConfig.assertAllNodesQuantized = true; |
719 | std::unique_ptr<Backend> backend(createBackend(EE.getBackendName())); |
720 | quantization::quantizeFunction(F, quantConfig, *backend); |
721 | |
722 | // Make sure that graph can be compiled and run. |
723 | EE.compile(CompilationMode::Infer); |
724 | EE.run(bindings); |
725 | } |
726 | |
727 | /// Test that a simple Conv graph can be quantized in Int8QTy and Int8QTy bias. |
728 | TEST(Quantization, QuantizeGraph_Int8_BiasInt8) { |
729 | quantizeSimpleConvGraph(ElemKind::Int8QTy, ElemKind::Int8QTy); |
730 | } |
731 | |
732 | /// Test that a simple Conv graph can be quantized in Int8QTy and Int32QTy bias. |
733 | TEST(Quantization, QuantizeGraph_Int8_BiasInt32) { |
734 | quantizeSimpleConvGraph(ElemKind::Int8QTy, ElemKind::Int32QTy); |
735 | } |
736 | |
737 | /// Test that a simple Conv graph can be quantized in Int16QTy and Int16QTy |
738 | /// bias. |
739 | TEST(Quantization, QuantizeGraph_Int16_BiasInt16) { |
740 | quantizeSimpleConvGraph(ElemKind::Int16QTy, ElemKind::Int16QTy); |
741 | } |
742 | |
743 | /// Test that a simple Conv graph can be quantized in Int16QTy and Int32QTy |
744 | /// bias. |
745 | TEST(Quantization, QuantizeGraph_Int16_BiasInt32) { |
746 | quantizeSimpleConvGraph(ElemKind::Int16QTy, ElemKind::Int32QTy); |
747 | } |
748 | |
749 | /// Test that when a node is quantized before its users are quantized then the |
750 | /// users correctly find the quantization parameters. This tests that updating |
751 | /// the nodeToTQP_ map in FunctionQuantizer::postProcessing() works correctly. |
752 | TEST(Quantization, TestQuantizedInputBeforeQuantizedNode) { |
753 | ExecutionEngine EE{}; |
754 | auto &mod = EE.getModule(); |
755 | Function *F = mod.createFunction("main" ); |
756 | |
757 | auto *input = mod.createPlaceholder(ElemKind::FloatTy, {3}, "input" , true); |
758 | PlaceholderBindings bindings; |
759 | bindings.allocate(input)->getHandle().randomize(-1.0, 1.0, mod.getPRNG()); |
760 | |
761 | // Note: Intentionally add successive reshapes so the GraphOptimizer merges |
762 | // them and creates a new one. This way the newly created Reshape will be |
763 | // placed at the end of the list of nodes in F, and then it will be quantized |
764 | // before SN. I think this is the most straightforward way to cover the logic |
765 | // path inside FunctionQuantizer::postProcessing() that updates nodeToTQP_. |
766 | auto *reshape1 = F->createReshape("reshape1" , input, {3, 1}); |
767 | auto *reshape2 = F->createReshape("reshape2" , reshape1, {1, 3}); |
768 | auto *SN = F->createSlice("slice" , reshape2, {0, 1}, {1, 2}); |
769 | auto *S = F->createSave("ret" , SN); |
770 | bindings.allocate(S->getPlaceholder()); |
771 | |
772 | // We need to optimize here first so that the two reshapes are merged. |
773 | optimize(F, CompilationMode::Infer); |
774 | |
775 | ReshapeNode *newReshape = llvm::dyn_cast<ReshapeNode>(SN->getInput()); |
776 | ASSERT_TRUE(newReshape); |
777 | |
778 | quantization::QuantizationConfiguration quantConfig{{ |
779 | {input->getOutput().generateNodeOutputName(), {-1.0, 1.0}}, |
780 | {newReshape->getResult().generateNodeOutputName(), {-1.0, 1.0}}, |
781 | {NodeValue::generateNodeOutputName(SN->getName().str()), {-1.0, 1.0}}, |
782 | }}; |
783 | |
784 | quantConfig.assertAllNodesQuantized = true; |
785 | std::unique_ptr<Backend> backend(createBackend(EE.getBackendName())); |
786 | quantization::quantizeFunction(F, quantConfig, *backend); |
787 | |
788 | // Remove unnecessary conversions. |
789 | optimize(F, CompilationMode::Infer); |
790 | |
791 | // Now we verify that the SliceNode was in fact quantized. |
792 | { |
793 | auto *saveNode = llvm::dyn_cast<SaveNode>(F->getNodeByName(S->getName())); |
794 | ASSERT_TRUE(saveNode); |
795 | auto *deqNode = |
796 | llvm::dyn_cast<DequantizeNode>(saveNode->getInput().getNode()); |
797 | ASSERT_TRUE(deqNode); |
798 | auto *sliceNode = llvm::dyn_cast<SliceNode>(deqNode->getInput().getNode()); |
799 | ASSERT_TRUE(sliceNode); |
800 | EXPECT_TRUE(sliceNode->getResult().getType()->isQuantizedType()); |
801 | } |
802 | } |
803 | |
804 | /// Test enabling RowwiseQuantizedFullyConnected in Glow quantization |
805 | /// procedure. A FC can be quantized and converted to a |
806 | /// RowwiseQuantizedFullyConnected if: |
807 | /// 1. The weights of FC is constant; |
808 | /// 2. Use -enable-rowwise option or set enableRowwise param in |
809 | /// quantization::quantizeFunction to true. In unittest, the later one is used. |
810 | static void |
811 | enableRowwiseQuantizedFullyConnected(ElemKind quantizationPrecision, |
812 | ElemKind quantizationPrecisionBias) { |
813 | ExecutionEngine EE{}; |
814 | auto &mod = EE.getModule(); |
815 | Function *F = mod.createFunction("main" ); |
816 | |
817 | auto *input = mod.createPlaceholder(ElemKind::FloatTy, {1, 3}, "input" , true); |
818 | auto *W = mod.createPlaceholder(ElemKind::FloatTy, {3, 2}, "weights" , true); |
819 | auto *B = mod.createPlaceholder(ElemKind::FloatTy, {2}, "bias" , true); |
820 | PlaceholderBindings bindings; |
821 | bindings.allocate(input)->getHandle().randomize(0.2f, 2.f, mod.getPRNG()); |
822 | bindings.allocate(W)->init(Tensor::InitKind::Xavier, 3, mod.getPRNG()); |
823 | bindings.allocate(B)->init(Tensor::InitKind::Broadcast, 0.1, mod.getPRNG()); |
824 | |
825 | auto *WC = mod.createConstant(ElemKind::FloatTy, W->dims(), "wc" ); |
826 | auto *FC = F->createFullyConnected("FC" , input, WC, B); |
827 | auto *S = F->createSave("ret" , FC); |
828 | bindings.allocate(S->getPlaceholder()); |
829 | |
830 | LoweredInfoMap loweredMapForQuant; |
831 | CompilationContext cctx(/* bindings */ nullptr, &loweredMapForQuant); |
832 | ::glow::lower(F, cctx); |
833 | |
834 | // Get the MatMul node and the Batched_Add node. |
835 | MatMulNode *matMul; |
836 | BatchedAddNode *batchedAdd; |
837 | for (Node &N : F->getNodes()) { |
838 | if (N.getKind() == Kinded::Kind::MatMulNodeKind) { |
839 | matMul = llvm::cast<MatMulNode>(&N); |
840 | } |
841 | if (N.getKind() == Kinded::Kind::BatchedAddNodeKind) { |
842 | batchedAdd = llvm::cast<BatchedAddNode>(&N); |
843 | } |
844 | } |
845 | ASSERT_TRUE(matMul); |
846 | ASSERT_TRUE(batchedAdd); |
847 | |
848 | quantization::QuantizationConfiguration quantConfig{{ |
849 | {input->getOutput().generateNodeOutputName(), {0.2f, 2.0f}}, |
850 | {WC->getOutput().generateNodeOutputName(), {0.3f, 3.0f}}, |
851 | {B->getOutput().generateNodeOutputName(), {0.4f, 4.0f}}, |
852 | {matMul->getResult().generateNodeOutputName(), {0.6f, 6.0f}}, |
853 | {batchedAdd->getResult().generateNodeOutputName(), {0.6f, 6.0f}}, |
854 | }}; |
855 | |
856 | quantConfig.precision = quantizationPrecision; |
857 | quantConfig.precisionBias = quantizationPrecisionBias; |
858 | quantConfig.enableRowwise = true; |
859 | quantConfig.assertAllNodesQuantized = true; |
860 | std::unique_ptr<Backend> backend(createBackend(EE.getBackendName())); |
861 | quantization::quantizeFunction(F, quantConfig, *backend, loweredMapForQuant); |
862 | |
863 | // Check the graph structure after quantization. |
864 | auto *saveNode = llvm::dyn_cast<SaveNode>(F->getNodeByName(S->getName())); |
865 | ASSERT_TRUE(saveNode); |
866 | auto *deqNode = |
867 | llvm::dyn_cast<DequantizeNode>(saveNode->getInput().getNode()); |
868 | ASSERT_TRUE(deqNode); |
869 | auto *rwNode = llvm::dyn_cast<RowwiseQuantizedFullyConnectedNode>( |
870 | deqNode->getInput().getNode()); |
871 | ASSERT_TRUE(rwNode); |
872 | auto *inNode = llvm::dyn_cast<QuantizeNode>(rwNode->getInput().getNode()); |
873 | ASSERT_TRUE(inNode); |
874 | auto *biasNode = llvm::dyn_cast<QuantizeNode>(rwNode->getBias().getNode()); |
875 | ASSERT_TRUE(biasNode); |
876 | auto *weightsNode = llvm::dyn_cast<Constant>(rwNode->getWeights().getNode()); |
877 | ASSERT_TRUE(weightsNode); |
878 | auto *scalesNode = llvm::dyn_cast<Constant>(rwNode->getScales().getNode()); |
879 | ASSERT_TRUE(scalesNode); |
880 | auto *offsetsNode = llvm::dyn_cast<Constant>(rwNode->getOffsets().getNode()); |
881 | ASSERT_TRUE(offsetsNode); |
882 | |
883 | // Make sure that graph can be compiled and run. We check the correctness of |
884 | // RowwiseQuantizedFullyConnected in operatorTests.cpp. |
885 | EE.compile(CompilationMode::Infer); |
886 | |
887 | EE.run(bindings); |
888 | } |
889 | |
890 | TEST(Quantization, enableRowwiseQuantizedFullyConnected_Int8_BiasInt8) { |
891 | enableRowwiseQuantizedFullyConnected(ElemKind::Int8QTy, ElemKind::Int8QTy); |
892 | } |
893 | |
894 | TEST(Quantization, enableRowwiseQuantizedFullyConnected_Int8_BiasInt32) { |
895 | enableRowwiseQuantizedFullyConnected(ElemKind::Int8QTy, ElemKind::Int32QTy); |
896 | } |
897 | |
898 | /// Test enabling RowwiseQuantizedFullyConnected with Symmetric quantization. |
899 | TEST(Quantization, enableRowwiseQuantizedFullyConnectedSymmetric) { |
900 | ExecutionEngine EE{}; |
901 | auto &mod = EE.getModule(); |
902 | PlaceholderBindings bindings; |
903 | Function *F = mod.createFunction("main" ); |
904 | |
905 | auto *input = mod.createPlaceholder(ElemKind::FloatTy, {10, 80}, "in" , false); |
906 | auto *FC = F->createFullyConnected(bindings, "FC" , input, 100); |
907 | auto *res = F->createSave("save" , FC); |
908 | bindings.allocate(res->getPlaceholder()); |
909 | bindings.allocate(input)->getHandle().randomize(-1.f, 6.f, mod.getPRNG()); |
910 | |
911 | ::glow::convertPlaceholdersToConstants(F, bindings, |
912 | {input, res->getPlaceholder()}); |
913 | |
914 | // Note that we generate values for the Weights because they will be used |
915 | // during rowwise-quantization to select each row's scale/offset. |
916 | auto *WC = llvm::cast<Constant>(FC->getWeights()); |
917 | WC->getPayloadMutable().getHandle().randomize(-0.7, 1.1, mod.getPRNG()); |
918 | auto *BC = llvm::cast<Constant>(FC->getBias()); |
919 | |
920 | TensorProfilingParams inputTPP = {-1.0, 6.0}; |
921 | TensorProfilingParams matmulTPP = {0.0, 10.0}; |
922 | TensorProfilingParams batchedaddTPP = {0.0, 10.0}; |
923 | TensorProfilingParams biasTPP = {0, 20}; |
924 | |
925 | TensorQuantizationParams inputTQP = chooseQuantizationParams( |
926 | inputTPP, quantization::Schema::Symmetric, ElemKind::Int8QTy); |
927 | TensorQuantizationParams matmulTQP = chooseQuantizationParams( |
928 | matmulTPP, quantization::Schema::Symmetric, ElemKind::Int8QTy); |
929 | TensorQuantizationParams batchedaddTQP = chooseQuantizationParams( |
930 | batchedaddTPP, quantization::Schema::Symmetric, ElemKind::Int8QTy); |
931 | TensorQuantizationParams biasTQP = chooseQuantizationParams( |
932 | biasTPP, quantization::Schema::Symmetric, ElemKind::Int8QTy); |
933 | |
934 | EXPECT_EQ(inputTQP.offset, 0); |
935 | EXPECT_EQ(matmulTQP.offset, 0); |
936 | EXPECT_EQ(batchedaddTQP.offset, 0); |
937 | EXPECT_EQ(biasTQP.offset, 0); |
938 | |
939 | LoweredInfoMap loweredMapForQuant; |
940 | CompilationContext cctx(/* bindings */ nullptr, &loweredMapForQuant); |
941 | ::glow::lower(F, cctx); |
942 | |
943 | // Get the MatMul node and the Batched_Add node. |
944 | MatMulNode *matMul; |
945 | BatchedAddNode *batchedAdd; |
946 | for (Node &N : F->getNodes()) { |
947 | if (N.getKind() == Kinded::Kind::MatMulNodeKind) { |
948 | matMul = llvm::cast<MatMulNode>(&N); |
949 | } |
950 | if (N.getKind() == Kinded::Kind::BatchedAddNodeKind) { |
951 | batchedAdd = llvm::cast<BatchedAddNode>(&N); |
952 | } |
953 | } |
954 | ASSERT_TRUE(matMul); |
955 | ASSERT_TRUE(batchedAdd); |
956 | |
957 | // Note: Using dummy offset for the weights, as it should be |
958 | // rowwise-quantized. |
959 | quantization::QuantizationConfiguration quantConfig{{ |
960 | {input->getOutput().generateNodeOutputName(), inputTPP}, |
961 | {WC->getOutput().generateNodeOutputName(), {-0.7, 1.1}}, |
962 | {BC->getOutput().generateNodeOutputName(), biasTPP}, |
963 | {matMul->getResult().generateNodeOutputName(), matmulTPP}, |
964 | {batchedAdd->getResult().generateNodeOutputName(), batchedaddTPP}, |
965 | }}; |
966 | |
967 | quantConfig.schema = quantization::Schema::Symmetric; |
968 | quantConfig.enableRowwise = true; |
969 | quantConfig.assertAllNodesQuantized = true; |
970 | std::unique_ptr<Backend> backend(createBackend(EE.getBackendName())); |
971 | quantization::quantizeFunction(F, quantConfig, *backend, loweredMapForQuant); |
972 | |
973 | // Check the graph structure after quantization. |
974 | auto *saveNode = llvm::dyn_cast<SaveNode>(F->getNodeByName(res->getName())); |
975 | ASSERT_TRUE(saveNode); |
976 | auto *deqNode = |
977 | llvm::dyn_cast<DequantizeNode>(saveNode->getInput().getNode()); |
978 | ASSERT_TRUE(deqNode); |
979 | auto *rwNode = llvm::dyn_cast<RowwiseQuantizedFullyConnectedNode>( |
980 | deqNode->getInput().getNode()); |
981 | ASSERT_TRUE(rwNode); |
982 | auto *inNode = llvm::dyn_cast<QuantizeNode>(rwNode->getInput().getNode()); |
983 | ASSERT_TRUE(inNode); |
984 | auto *biasNode = llvm::dyn_cast<QuantizeNode>(rwNode->getBias().getNode()); |
985 | ASSERT_TRUE(biasNode); |
986 | auto *weightsNode = llvm::dyn_cast<Constant>(rwNode->getWeights().getNode()); |
987 | ASSERT_TRUE(weightsNode); |
988 | auto *scalesNode = llvm::dyn_cast<Constant>(rwNode->getScales().getNode()); |
989 | ASSERT_TRUE(scalesNode); |
990 | auto *offsetsNode = llvm::dyn_cast<Constant>(rwNode->getOffsets().getNode()); |
991 | ASSERT_TRUE(offsetsNode); |
992 | |
993 | // Because we're using symmetric quantization, the offsets should all be zero. |
994 | const auto offsetsH = offsetsNode->getPayload().getHandle<int32_t>(); |
995 | EXPECT_TRUE(offsetsH.isZero()); |
996 | |
997 | // Make sure that graph can be compiled and run. We check the correctness of |
998 | // RowwiseQuantizedFullyConnected in operatorTests.cpp. |
999 | EE.compile(CompilationMode::Infer); |
1000 | |
1001 | EE.run(bindings); |
1002 | } |
1003 | |
1004 | /// Test enabling ChannelwiseQuantizedConv2D in the quantization procedure. |
1005 | /// A standard Convolution node can be quantized and converted to a |
1006 | /// ChannelwiseQuantizedConvolution if: |
1007 | /// 1. The filter and bias are constants. |
1008 | /// 2. Use -enable-channelwise option or set enableChannelwise param in |
1009 | /// quantization::quantizeFunction to true. |
1010 | static void enableChannelwiseQuantizedConv2D(ElemKind qPrec, ElemKind qPrecBias, |
1011 | quantization::Schema schema) { |
1012 | ExecutionEngine EE{}; |
1013 | auto &mod = EE.getModule(); |
1014 | Function *F = mod.createFunction("main" ); |
1015 | PlaceholderBindings bindings; |
1016 | |
1017 | // Convolution parameters. |
1018 | std::vector<dim_t> inputDims = {5, 3, 3, 2}; |
1019 | std::vector<dim_t> filterDims = {4, 2, 2, 1}; |
1020 | std::vector<dim_t> biasDims = {4}; |
1021 | std::vector<dim_t> outputDims = {5, 2, 2, 4}; |
1022 | std::vector<unsigned_t> kernels = {2, 2}; |
1023 | std::vector<unsigned_t> strides = {1, 1}; |
1024 | std::vector<unsigned_t> pads = {0, 0, 0, 0}; |
1025 | dim_t group = 2; |
1026 | std::vector<unsigned_t> dilation = {1, 1}; |
1027 | |
1028 | // Create input placeholder. |
1029 | auto *input = |
1030 | mod.createPlaceholder(ElemKind::FloatTy, inputDims, "input" , false); |
1031 | bindings.allocate(input)->getHandle<float>().randomize(-1.0, 1.0, |
1032 | mod.getPRNG()); |
1033 | |
1034 | // Create filter constant. |
1035 | auto *filterC = mod.createConstant(ElemKind::FloatTy, filterDims, "filterC" ); |
1036 | filterC->getPayloadMutable().getHandle<float>().randomize(-1.0, 1.0, |
1037 | mod.getPRNG()); |
1038 | |
1039 | // Create bias constant. |
1040 | auto *biasC = mod.createConstant(ElemKind::FloatTy, biasDims, "biasC" ); |
1041 | biasC->getPayloadMutable().getHandle<float>().randomize(-1.0, 1.0, |
1042 | mod.getPRNG()); |
1043 | |
1044 | // Create Convolution. |
1045 | auto *outTy = mod.uniqueType(ElemKind::FloatTy, outputDims); |
1046 | ConvolutionNode *conv = |
1047 | F->createConv("Conv" , input, filterC, biasC, outTy, kernels, strides, |
1048 | pads, group, dilation); |
1049 | SaveNode *save = F->createSave("save" , conv); |
1050 | bindings.allocate(save->getPlaceholder()); |
1051 | |
1052 | // Quantize function. Choose asymmetric ranges to test quantization params. |
1053 | quantization::QuantizationConfiguration quantConfig{{ |
1054 | {input->getOutput().generateNodeOutputName(), {-2.0, 1.0}}, |
1055 | {filterC->getOutput().generateNodeOutputName(), {-1.0, 2.0}}, |
1056 | {biasC->getOutput().generateNodeOutputName(), {0.0, 3.0}}, |
1057 | {conv->getResult().generateNodeOutputName(), {-3.0, 0.0}}, |
1058 | }}; |
1059 | quantConfig.schema = schema; |
1060 | quantConfig.precision = qPrec; |
1061 | quantConfig.precisionBias = qPrecBias; |
1062 | quantConfig.enableChannelwise = true; |
1063 | quantConfig.assertAllNodesQuantized = true; |
1064 | std::unique_ptr<Backend> backend(createBackend(EE.getBackendName())); |
1065 | quantization::quantizeFunction(F, quantConfig, *backend); |
1066 | |
1067 | // Check the graph structure after quantization. |
1068 | auto *saveNode = llvm::dyn_cast<SaveNode>(F->getNodeByName(save->getName())); |
1069 | ASSERT_TRUE(saveNode); |
1070 | auto *deqNode = |
1071 | llvm::dyn_cast<DequantizeNode>(saveNode->getInput().getNode()); |
1072 | ASSERT_TRUE(deqNode); |
1073 | auto *cwqConvNode = llvm::dyn_cast<ChannelwiseQuantizedConvolutionNode>( |
1074 | deqNode->getInput().getNode()); |
1075 | ASSERT_TRUE(cwqConvNode); |
1076 | auto *inputNode = |
1077 | llvm::dyn_cast<QuantizeNode>(cwqConvNode->getInput().getNode()); |
1078 | ASSERT_TRUE(inputNode); |
1079 | auto *filterNode = |
1080 | llvm::dyn_cast<Constant>(cwqConvNode->getFilter().getNode()); |
1081 | ASSERT_TRUE(filterNode); |
1082 | auto *biasNode = llvm::dyn_cast<Constant>(cwqConvNode->getBias().getNode()); |
1083 | ASSERT_TRUE(biasNode); |
1084 | auto *filterScalesNode = |
1085 | llvm::dyn_cast<Constant>(cwqConvNode->getFilterScales().getNode()); |
1086 | ASSERT_TRUE(filterScalesNode); |
1087 | auto *filterOffsetsNode = |
1088 | llvm::dyn_cast<Constant>(cwqConvNode->getFilterOffsets().getNode()); |
1089 | ASSERT_TRUE(filterOffsetsNode); |
1090 | auto *biasScalesNode = |
1091 | llvm::dyn_cast<Constant>(cwqConvNode->getBiasScales().getNode()); |
1092 | ASSERT_TRUE(biasScalesNode); |
1093 | auto *biasOffsetsNode = |
1094 | llvm::dyn_cast<Constant>(cwqConvNode->getBiasOffsets().getNode()); |
1095 | ASSERT_TRUE(biasOffsetsNode); |
1096 | |
1097 | // Check precisions. |
1098 | ASSERT_EQ(inputNode->getResult().getElementType(), qPrec); |
1099 | ASSERT_EQ(filterNode->getOutput().getElementType(), qPrec); |
1100 | ASSERT_EQ(biasNode->getOutput().getElementType(), qPrecBias); |
1101 | ASSERT_EQ(filterScalesNode->getOutput().getElementType(), ElemKind::FloatTy); |
1102 | ASSERT_EQ(filterOffsetsNode->getOutput().getElementType(), |
1103 | ElemKind::Int32ITy); |
1104 | ASSERT_EQ(biasScalesNode->getOutput().getElementType(), ElemKind::FloatTy); |
1105 | ASSERT_EQ(biasOffsetsNode->getOutput().getElementType(), ElemKind::Int32ITy); |
1106 | ASSERT_EQ(cwqConvNode->getResult().getElementType(), qPrec); |
1107 | |
1108 | // Check quantization parameters. |
1109 | validateQuantizationParams({inputNode->getResult().getType()->getScale(), |
1110 | inputNode->getResult().getType()->getOffset()}, |
1111 | schema, qPrec); |
1112 | validateQuantizationParams({cwqConvNode->getResult().getType()->getScale(), |
1113 | cwqConvNode->getResult().getType()->getOffset()}, |
1114 | schema, qPrec); |
1115 | for (dim_t idx = 0; idx < outputDims[3]; idx++) { |
1116 | auto filterScalesH = filterScalesNode->getPayload().getHandle<float>(); |
1117 | auto filterOffsetsH = filterOffsetsNode->getPayload().getHandle<int32_t>(); |
1118 | auto biasScalesH = biasScalesNode->getPayload().getHandle<float>(); |
1119 | auto biasOffsetsH = biasOffsetsNode->getPayload().getHandle<int32_t>(); |
1120 | validateQuantizationParams( |
1121 | {filterScalesH.raw(idx), filterOffsetsH.raw(idx)}, schema, qPrec); |
1122 | validateQuantizationParams({biasScalesH.raw(idx), biasOffsetsH.raw(idx)}, |
1123 | schema, qPrecBias); |
1124 | } |
1125 | |
1126 | // Make sure that graph can be compiled and run. We check the correctness of |
1127 | // ChannelwiseQuantizedConvolution in OperatorTest.cpp. |
1128 | EE.compile(CompilationMode::Infer); |
1129 | EE.run(bindings); |
1130 | } |
1131 | |
1132 | TEST(Quantization, enableChannelwiseQuantizedConv2D_Int8_BiasInt8) { |
1133 | enableChannelwiseQuantizedConv2D(ElemKind::Int8QTy, ElemKind::Int8QTy, |
1134 | quantization::Schema::Asymmetric); |
1135 | enableChannelwiseQuantizedConv2D(ElemKind::Int8QTy, ElemKind::Int8QTy, |
1136 | quantization::Schema::Symmetric); |
1137 | enableChannelwiseQuantizedConv2D(ElemKind::Int8QTy, ElemKind::Int8QTy, |
1138 | quantization::Schema::SymmetricWithUnsigned); |
1139 | enableChannelwiseQuantizedConv2D( |
1140 | ElemKind::Int8QTy, ElemKind::Int8QTy, |
1141 | quantization::Schema::SymmetricWithPower2Scale); |
1142 | } |
1143 | |
1144 | TEST(Quantization, enableChannelwiseQuantizedConv2D_Int8_BiasInt32) { |
1145 | enableChannelwiseQuantizedConv2D(ElemKind::Int8QTy, ElemKind::Int32QTy, |
1146 | quantization::Schema::Asymmetric); |
1147 | enableChannelwiseQuantizedConv2D(ElemKind::Int8QTy, ElemKind::Int32QTy, |
1148 | quantization::Schema::Symmetric); |
1149 | enableChannelwiseQuantizedConv2D(ElemKind::Int8QTy, ElemKind::Int32QTy, |
1150 | quantization::Schema::SymmetricWithUnsigned); |
1151 | enableChannelwiseQuantizedConv2D( |
1152 | ElemKind::Int8QTy, ElemKind::Int32QTy, |
1153 | quantization::Schema::SymmetricWithPower2Scale); |
1154 | } |
1155 | |
1156 | /// Check that SLWS is correctly fused rowwise-quantized by the quantizer. |
1157 | TEST(Quantization, enableRowwiseQuantizedSLWS) { |
1158 | ExecutionEngine EE{}; |
1159 | auto &mod = EE.getModule(); |
1160 | Function *F = mod.createFunction("main" ); |
1161 | PlaceholderBindings bindings; |
1162 | |
1163 | auto *data = mod.createPlaceholder(ElemKind::FloatTy, {3, 1}, "data" , false); |
1164 | auto *weights = |
1165 | mod.createPlaceholder(ElemKind::FloatTy, {8}, "weights" , false); |
1166 | auto *indices = |
1167 | mod.createPlaceholder(ElemKind::Int64ITy, {8}, "indices" , false); |
1168 | auto *lengths = |
1169 | mod.createPlaceholder(ElemKind::Int32ITy, {4}, "lengths" , false); |
1170 | |
1171 | // Don't worry about allocating them as we are not going to run anyway. |
1172 | bindings.allocate(data); |
1173 | bindings.allocate(weights); |
1174 | bindings.allocate(indices); |
1175 | bindings.allocate(lengths); |
1176 | |
1177 | auto *SLWS = F->createSparseLengthsWeightedSum("SLWS" , data, weights, indices, |
1178 | lengths); |
1179 | auto *res = F->createSave("save" , SLWS); |
1180 | ::glow::convertPlaceholdersToConstants( |
1181 | F, bindings, {indices, lengths, res->getPlaceholder()}); |
1182 | bindings.allocate(res->getPlaceholder()); |
1183 | |
1184 | quantization::QuantizationConfiguration quantConfig{{ |
1185 | {SLWS->getData().generateNodeOutputName(), {0.2f, 2.0f}}, |
1186 | {SLWS->getWeights().generateNodeOutputName(), {0.3f, 3.0f}}, |
1187 | {SLWS->getResult().generateNodeOutputName(), {0.4f, 4.0f}}, |
1188 | }}; |
1189 | |
1190 | quantConfig.enableRowwise = true; |
1191 | quantConfig.assertAllNodesQuantized = true; |
1192 | std::unique_ptr<Backend> backend(createBackend(EE.getBackendName())); |
1193 | quantization::quantizeFunction(F, quantConfig, *backend); |
1194 | std::string saveName = std::string(res->getName()); |
1195 | EE.compile(CompilationMode::Infer); |
1196 | |
1197 | // Check the graph structure after quantization. |
1198 | F = EE.getModule().getFunctions().front(); |
1199 | auto *saveNode = llvm::dyn_cast<SaveNode>(F->getNodeByName(saveName)); |
1200 | ASSERT_TRUE(saveNode); |
1201 | auto *FRWQSLWS = |
1202 | llvm::dyn_cast<FusedRowwiseQuantizedSparseLengthsWeightedSumNode>( |
1203 | saveNode->getInput().getNode()); |
1204 | ASSERT_TRUE(FRWQSLWS); |
1205 | } |
1206 | |
1207 | /// Quantize ReLU node and make sure that quantized version |
1208 | /// has quantization parameters mapping to non-negative floating |
1209 | /// point range. |
1210 | TEST(Quantization, quantizeReLU) { |
1211 | ExecutionEngine EE{}; |
1212 | std::unique_ptr<Backend> backend(new MockQuantBackend); |
1213 | EE.setBackendName("Interpreter" ); |
1214 | auto &mod = EE.getModule(); |
1215 | Function *F = mod.createFunction("main" ); |
1216 | auto *input = mod.createPlaceholder(ElemKind::FloatTy, {1, 3}, "input" , true); |
1217 | auto *relu = F->createRELU("ReLU" , input); |
1218 | PlaceholderBindings bindings; |
1219 | auto *ret = F->createSave("ret" , relu); |
1220 | std::string retName = std::string(ret->getName()); |
1221 | // Make sure that offset quantization parameter of ReLU is set |
1222 | // such that it produces non-negative floating point range. |
1223 | quantization::QuantizationConfiguration quantConfig{ |
1224 | {{input->getOutput().generateNodeOutputName(), {0.2f, 2.0f}}, |
1225 | {relu->getResult().generateNodeOutputName(), {0.0f, 3.0f}}}}; |
1226 | quantConfig.assertAllNodesQuantized = true; |
1227 | quantization::quantizeFunction(F, quantConfig, *backend); |
1228 | EE.compile(CompilationMode::Infer); |
1229 | |
1230 | // Compute tensor quantization parameters for verification. |
1231 | auto reluTQP = chooseQuantizationParams({0.0f, 3.0f}, quantConfig.schema, |
1232 | quantConfig.precision); |
1233 | |
1234 | F = EE.getModule().getFunctions().front(); |
1235 | auto *save = llvm::cast<SaveNode>(F->getNodeByName(retName)); |
1236 | ASSERT_TRUE(llvm::isa<DequantizeNode>(save->getInput().getNode())); |
1237 | auto *dequantize = llvm::cast<DequantizeNode>(save->getInput().getNode()); |
1238 | ASSERT_TRUE(llvm::isa<MaxNode>(dequantize->getInput().getNode())); |
1239 | |
1240 | MaxNode *max = llvm::cast<MaxNode>(dequantize->getInput().getNode()); |
1241 | ASSERT_TRUE(max->getResult().getType()->isQuantizedType()); |
1242 | EXPECT_EQ(max->getResult().getType()->getOffset(), reluTQP.offset); |
1243 | EXPECT_EQ(max->getResult().getType()->getScale(), reluTQP.scale); |
1244 | } |
1245 | |
1246 | /// Quantize Log, Sigmoid, and Tanh nodes and make sure that quantized versions |
1247 | /// are implemented as IntLookupTables, because the Interpreter only supports |
1248 | /// them as such. |
1249 | TEST(Quantization, quantizeLookupTables) { |
1250 | ExecutionEngine EE{}; |
1251 | auto &mod = EE.getModule(); |
1252 | Function *F = mod.createFunction("main" ); |
1253 | auto *input = mod.createPlaceholder(ElemKind::FloatTy, {1, 3}, "input" , true); |
1254 | auto *LN = F->createLog("log" , input); |
1255 | auto *SN = F->createSigmoid("sigmoid" , LN); |
1256 | auto *TN = F->createTanh("tanh" , SN); |
1257 | auto *ret = F->createSave("ret" , TN); |
1258 | |
1259 | quantization::QuantizationConfiguration quantConfig{ |
1260 | {{input->getOutput().generateNodeOutputName(), {0.2f, 2.0f}}, |
1261 | {LN->getResult().generateNodeOutputName(LN->getName().str()), |
1262 | {0.3f, 3.0f}}, |
1263 | {SN->getResult().generateNodeOutputName(), {0.4f, 4.0f}}, |
1264 | {TN->getResult().generateNodeOutputName(), {0.5f, 5.0f}}}}; |
1265 | quantConfig.assertAllNodesQuantized = true; |
1266 | std::unique_ptr<Backend> backend(createBackend(EE.getBackendName())); |
1267 | quantization::quantizeFunction(F, quantConfig, *backend); |
1268 | optimize(F, CompilationMode::Infer); |
1269 | |
1270 | // Compute the quantization parameters based on the requirements of the |
1271 | // Sigmoid/Tanh or on the input/output values for Log. |
1272 | auto logInpTQP = chooseQuantizationParams({0.2, 2.0}, quantConfig.schema, |
1273 | quantConfig.precision); |
1274 | auto logOutTQP = chooseQuantizationParams({0.3, 3.0}, quantConfig.schema, |
1275 | quantConfig.precision); |
1276 | auto sigmoidInpTQP = chooseQuantizationParams({-6.0, 6.0}, quantConfig.schema, |
1277 | quantConfig.precision); |
1278 | auto sigmoidOutTQP = chooseQuantizationParams({0.0, 1.0}, quantConfig.schema, |
1279 | quantConfig.precision); |
1280 | auto tanhInpTQP = chooseQuantizationParams({-3.0, 3.0}, quantConfig.schema, |
1281 | quantConfig.precision); |
1282 | auto tanhOutTQP = chooseQuantizationParams({-1.0, 1.0}, quantConfig.schema, |
1283 | quantConfig.precision); |
1284 | |
1285 | auto *save = llvm::cast<SaveNode>(F->getNodeByName(ret->getName())); |
1286 | auto *dequantizeTanh = |
1287 | llvm::dyn_cast<DequantizeNode>(save->getInput().getNode()); |
1288 | ASSERT_TRUE(dequantizeTanh); |
1289 | auto *tanhILT = |
1290 | llvm::dyn_cast<IntLookupTableNode>(dequantizeTanh->getInput().getNode()); |
1291 | ASSERT_TRUE(tanhILT); |
1292 | EXPECT_FLOAT_EQ(tanhILT->getResult().getType()->getScale(), tanhOutTQP.scale); |
1293 | EXPECT_EQ(tanhILT->getResult().getType()->getOffset(), tanhOutTQP.offset); |
1294 | EXPECT_FLOAT_EQ(tanhILT->getInput().getType()->getScale(), tanhInpTQP.scale); |
1295 | EXPECT_EQ(tanhILT->getInput().getType()->getOffset(), tanhInpTQP.offset); |
1296 | |
1297 | auto *rescaleSigmoid = |
1298 | llvm::dyn_cast<RescaleQuantizedNode>(tanhILT->getInput().getNode()); |
1299 | ASSERT_TRUE(rescaleSigmoid); |
1300 | auto *sigmoidILT = |
1301 | llvm::dyn_cast<IntLookupTableNode>(rescaleSigmoid->getInput().getNode()); |
1302 | ASSERT_TRUE(sigmoidILT); |
1303 | EXPECT_FLOAT_EQ(sigmoidILT->getResult().getType()->getScale(), |
1304 | sigmoidOutTQP.scale); |
1305 | EXPECT_EQ(sigmoidILT->getResult().getType()->getOffset(), |
1306 | sigmoidOutTQP.offset); |
1307 | EXPECT_FLOAT_EQ(sigmoidILT->getInput().getType()->getScale(), |
1308 | sigmoidInpTQP.scale); |
1309 | EXPECT_EQ(sigmoidILT->getInput().getType()->getOffset(), |
1310 | sigmoidInpTQP.offset); |
1311 | |
1312 | auto *rescaleLog = |
1313 | llvm::dyn_cast<RescaleQuantizedNode>(sigmoidILT->getInput().getNode()); |
1314 | ASSERT_TRUE(rescaleLog); |
1315 | auto *logILT = |
1316 | llvm::dyn_cast<IntLookupTableNode>(rescaleLog->getInput().getNode()); |
1317 | ASSERT_TRUE(logILT); |
1318 | EXPECT_FLOAT_EQ(logILT->getResult().getType()->getScale(), logOutTQP.scale); |
1319 | EXPECT_EQ(logILT->getResult().getType()->getOffset(), logOutTQP.offset); |
1320 | EXPECT_FLOAT_EQ(logILT->getInput().getType()->getScale(), logInpTQP.scale); |
1321 | EXPECT_EQ(logILT->getInput().getType()->getOffset(), logInpTQP.offset); |
1322 | } |
1323 | |
1324 | /// Quantize Log, Sigmoid, and Tanh nodes and make sure that they are not |
1325 | /// replaced by LookupTables because the backend supports them directly. |
1326 | TEST(Quantization, quantizeWithoutLookupTables) { |
1327 | ExecutionEngine EE{}; |
1328 | std::unique_ptr<Backend> backend(new MockQuantBackend); |
1329 | EE.setBackendName("Interpreter" ); |
1330 | auto &mod = EE.getModule(); |
1331 | Function *F = mod.createFunction("main" ); |
1332 | auto *input = mod.createPlaceholder(ElemKind::FloatTy, {1, 3}, "input" , true); |
1333 | auto *LN = F->createLog("log" , input); |
1334 | auto *SN = F->createSigmoid("sigmoid" , LN); |
1335 | auto *TN = F->createTanh("tanh" , SN); |
1336 | auto *ret = F->createSave("ret" , TN); |
1337 | |
1338 | quantization::QuantizationConfiguration quantConfig{ |
1339 | {{input->getOutput().generateNodeOutputName(), {0.2f, 2.0f}}, |
1340 | {LN->getResult().generateNodeOutputName(), {0.3f, 3.0f}}, |
1341 | {SN->getResult().generateNodeOutputName(), {0.4f, 4.0f}}, |
1342 | {TN->getResult().generateNodeOutputName(), {0.5f, 5.0f}}}}; |
1343 | quantConfig.assertAllNodesQuantized = true; |
1344 | quantization::quantizeFunction(F, quantConfig, *backend); |
1345 | optimize(F, CompilationMode::Infer); |
1346 | |
1347 | // Compute the quantization parameters for validation. |
1348 | auto logInpTQP = chooseQuantizationParams({0.2, 2.0}, quantConfig.schema, |
1349 | quantConfig.precision); |
1350 | auto logOutTQP = chooseQuantizationParams({0.3, 3.0}, quantConfig.schema, |
1351 | quantConfig.precision); |
1352 | auto sigmoidInpTQP = chooseQuantizationParams({0.3, 3.0}, quantConfig.schema, |
1353 | quantConfig.precision); |
1354 | auto sigmoidOutTQP = chooseQuantizationParams( |
1355 | {0.4f, 4.0f}, quantConfig.schema, quantConfig.precision); |
1356 | auto tanhInpTQP = chooseQuantizationParams({0.4f, 4.0f}, quantConfig.schema, |
1357 | quantConfig.precision); |
1358 | auto tanhOutTQP = chooseQuantizationParams({0.5f, 5.0f}, quantConfig.schema, |
1359 | quantConfig.precision); |
1360 | |
1361 | auto *save = llvm::cast<SaveNode>(F->getNodeByName(ret->getName())); |
1362 | auto *dequantize = llvm::dyn_cast<DequantizeNode>(save->getInput().getNode()); |
1363 | ASSERT_TRUE(dequantize); |
1364 | auto *tanh = llvm::dyn_cast<TanhNode>(dequantize->getInput()); |
1365 | ASSERT_TRUE(tanh); |
1366 | EXPECT_FLOAT_EQ(tanh->getResult().getType()->getScale(), tanhOutTQP.scale); |
1367 | EXPECT_EQ(tanh->getResult().getType()->getOffset(), tanhOutTQP.offset); |
1368 | EXPECT_FLOAT_EQ(tanh->getInput().getType()->getScale(), tanhInpTQP.scale); |
1369 | EXPECT_EQ(tanh->getInput().getType()->getOffset(), tanhInpTQP.offset); |
1370 | |
1371 | auto *sigmoid = llvm::dyn_cast<SigmoidNode>(tanh->getInput()); |
1372 | ASSERT_TRUE(sigmoid); |
1373 | EXPECT_FLOAT_EQ(sigmoid->getResult().getType()->getScale(), |
1374 | sigmoidOutTQP.scale); |
1375 | EXPECT_EQ(sigmoid->getResult().getType()->getOffset(), sigmoidOutTQP.offset); |
1376 | EXPECT_FLOAT_EQ(sigmoid->getInput().getType()->getScale(), |
1377 | sigmoidInpTQP.scale); |
1378 | EXPECT_EQ(sigmoid->getInput().getType()->getOffset(), sigmoidInpTQP.offset); |
1379 | |
1380 | auto *log = llvm::dyn_cast<LogNode>(sigmoid->getInput()); |
1381 | ASSERT_TRUE(log); |
1382 | EXPECT_FLOAT_EQ(log->getResult().getType()->getScale(), logOutTQP.scale); |
1383 | EXPECT_EQ(log->getResult().getType()->getOffset(), logOutTQP.offset); |
1384 | EXPECT_FLOAT_EQ(log->getInput().getType()->getScale(), logInpTQP.scale); |
1385 | EXPECT_EQ(log->getInput().getType()->getOffset(), logInpTQP.offset); |
1386 | } |
1387 | |
1388 | /// Fills the tensor \p H with some stable random data with the seed \p seed |
1389 | /// and the range [-scale .. scale]. |
1390 | static void fillStableRandomData(Handle<float> H, size_t seed, |
1391 | float scale = 1) { |
1392 | for (size_t i = 0, e = H.size(); i < e; i++) { |
1393 | H.raw(i) = scale * (float((int(i * 1921 + seed) % 100) - 50) / 50); |
1394 | } |
1395 | } |
1396 | |
1397 | /// Builds a simple graph, returns back input var and save node through refs. |
1398 | static Function *createSimpleGraphForQuantization(Module *M, |
1399 | PlaceholderBindings &bindings, |
1400 | Placeholder *A, |
1401 | Placeholder *B, |
1402 | llvm::StringRef funcName) { |
1403 | Function *F = M->createFunction(funcName); |
1404 | |
1405 | fillStableRandomData(bindings.allocate(A)->getHandle(), 1100, 1); |
1406 | |
1407 | fillStableRandomData(bindings.allocate(B)->getHandle(), 2001, 1); |
1408 | |
1409 | ConvolutionNode *CV = F->createConv(bindings, "conv" , A, 16, 5, 1, 2, 2); |
1410 | auto *bias = cast<Placeholder>(CV->getBias()); |
1411 | auto *filter = cast<Placeholder>(CV->getFilter()); |
1412 | fillStableRandomData(bindings.get(bias)->getHandle(), 2001, 1); |
1413 | fillStableRandomData(bindings.get(filter)->getHandle(), 1000, 1); |
1414 | |
1415 | auto *RL = F->createRELU("relu" , CV); |
1416 | auto *MP = F->createMaxPool("maxPool" , RL, 2, 2, 1); |
1417 | // Just add noop transpose. |
1418 | auto *T = F->createTranspose("transpose" , MP->getResult(), {0, 1, 2, 3}); |
1419 | // Noop reshape, make sure conversion quantization procedure works well. |
1420 | auto *R = F->createReshape("reshape" , T, T->getResult().dims()); |
1421 | auto *AP = F->createAvgPool("avgPool" , R, 2, 2, 1); |
1422 | |
1423 | FullyConnectedNode *FC = F->createFullyConnected(bindings, "fc" , AP, 10); |
1424 | |
1425 | // Noop slice, make sure conversion quantization procedure works well. |
1426 | auto *S = |
1427 | F->createSlice("slice" , FC, {0, 1}, |
1428 | {FC->getResult().dims()[0], FC->getResult().dims()[1]}); |
1429 | auto *bias2 = cast<Placeholder>(FC->getBias()); |
1430 | auto *filter2 = cast<Placeholder>(FC->getWeights()); |
1431 | |
1432 | fillStableRandomData(bindings.get(bias2)->getHandle(), 3001, 1); |
1433 | fillStableRandomData(bindings.get(filter2)->getHandle(), 4000, 1); |
1434 | |
1435 | auto *CN = F->createConcat("concat" , {S, B}, 0); |
1436 | auto *SP = F->createSplat("splat" , B->getType(), 10.0); |
1437 | auto *O = F->createConcat("concat" , {CN, SP}, 0); |
1438 | auto *TN = F->createTranspose("transpose" , O, {1, 0}); |
1439 | auto *BRAN = F->createBatchedReduceAdd("batchedreduceadd" , TN, 0); |
1440 | auto *TLN = F->createTile("tile" , BRAN, 2, 0); |
1441 | auto *SN = F->createSplat("splat" , TLN->getResult().getType(), 100.0); |
1442 | auto *MN = F->createMax("max" , SN, TLN); |
1443 | auto *CLTE = F->createCmpLTE("cmplte" , MN, SN); |
1444 | auto *SLN = F->createSelect("select" , CLTE, SN, MN); |
1445 | auto *save = F->createSave("save" , SLN); |
1446 | bindings.allocate(save->getPlaceholder()); |
1447 | return F; |
1448 | } |
1449 | |
1450 | /// Helper for an end to end test profiling a model on \p profileEE, then |
1451 | /// quantizing and running it on \p backendSpecificEE, quantizing with precision |
1452 | /// \p quantizationPrecision and disabling quantization for all Kinds in |
1453 | /// \p keepOriginalPrecisionForNodes. Results are compared from the profiling |
1454 | /// run and quantization run. |
1455 | static void |
1456 | testQuantizationEnd2End(ExecutionEngine &profileEE, |
1457 | ExecutionEngine &backendSpecificEE, |
1458 | ElemKind quantizationPrecision, |
1459 | const KindSet &keepOriginalPrecisionForNodes = {}) { |
1460 | auto *mod = &profileEE.getModule(); |
1461 | auto *modBackend = &backendSpecificEE.getModule(); |
1462 | PlaceholderBindings bindings; |
1463 | PlaceholderBindings bindingsBackend; |
1464 | |
1465 | auto *A = |
1466 | mod->createPlaceholder(ElemKind::FloatTy, {1, 32, 32, 2}, "A" , false); |
1467 | auto *B = mod->createPlaceholder(ElemKind::FloatTy, {10, 9}, "B" , false); |
1468 | auto *AB = modBackend->createPlaceholder(ElemKind::FloatTy, {1, 32, 32, 2}, |
1469 | "A" , false); |
1470 | auto *BB = |
1471 | modBackend->createPlaceholder(ElemKind::FloatTy, {10, 9}, "B" , false); |
1472 | |
1473 | // STEP1 - Generate the first network to record the quantization parameters. |
1474 | createSimpleGraphForQuantization(mod, bindings, A, B, "main" ); |
1475 | createSimpleGraphForQuantization(modBackend, bindingsBackend, AB, BB, "main" ); |
1476 | |
1477 | LoweredInfoMap loweredMapForProf; |
1478 | CompilationContext cctxProf{&bindings, &loweredMapForProf}; |
1479 | cctxProf.precisionConfig.quantMode = QuantizationMode::Profile; |
1480 | profileEE.compile(cctxProf); |
1481 | bindings.allocate(mod->getPlaceholders()); |
1482 | |
1483 | // Run graph to capture profile. |
1484 | profileEE.run(bindings, "main" ); |
1485 | |
1486 | // STEP2 - Use the profile to quantize a network. |
1487 | LoweredInfoMap loweredMapForQuant; |
1488 | CompilationContext cctxQuant{&bindings, &loweredMapForQuant}; |
1489 | |
1490 | // Get quantization infos and build new quantized graph. |
1491 | PrecisionConfiguration &precConfig = cctxQuant.precisionConfig; |
1492 | precConfig.quantMode = QuantizationMode::Quantize; |
1493 | precConfig.quantConfig.infos = quantization::generateNodeProfilingInfos( |
1494 | bindings, mod->getFunctions().front(), loweredMapForProf); |
1495 | precConfig.quantConfig.precision = quantizationPrecision; |
1496 | precConfig.quantConfig.assertAllNodesQuantized = true; |
1497 | precConfig.precisionModeKindSet = keepOriginalPrecisionForNodes; |
1498 | |
1499 | backendSpecificEE.compile(cctxQuant); |
1500 | bindingsBackend.allocate(modBackend->getPlaceholders()); |
1501 | backendSpecificEE.run(bindingsBackend); |
1502 | |
1503 | // STEP3 - Compare the results of the original and quantized functions. |
1504 | auto result1Handle = |
1505 | bindings.get(bindings.getPlaceholderByNameSlow("save" ))->getHandle(); |
1506 | auto result2Handle = |
1507 | bindingsBackend.get(bindingsBackend.getPlaceholderByNameSlow("save" )) |
1508 | ->getHandle(); |
1509 | |
1510 | EXPECT_EQ(result1Handle.size(), result2Handle.size()); |
1511 | |
1512 | for (int i = 0, e = result1Handle.size(); i < e; ++i) { |
1513 | float mx = result2Handle.raw(result2Handle.minMaxArg().second); |
1514 | double diff = std::fabs(result2Handle.raw(i) - result1Handle.raw(i)) / mx; |
1515 | |
1516 | // Allow 3% difference. |
1517 | EXPECT_NEAR(diff, 0, 0.03); |
1518 | } |
1519 | } |
1520 | |
1521 | /// End to end quantization test for Int8 quantization. |
1522 | TEST_P(Operator, end2endInt8) { |
1523 | // The OpenCL backend does not support some of the nodes in the test; |
1524 | // explicitly whitelist them here as staying in float, so that the quantizer |
1525 | // does not complain. |
1526 | KindSet keepOriginalPrecisionForNodes; |
1527 | if (backendSpecificEE.getBackendName() == "OpenCL" ) { |
1528 | keepOriginalPrecisionForNodes.insert(Kinded::Kind::SelectNodeKind); |
1529 | keepOriginalPrecisionForNodes.insert(Kinded::Kind::CmpLTENodeKind); |
1530 | keepOriginalPrecisionForNodes.insert( |
1531 | Kinded::Kind::BatchedReduceAddNodeKind); |
1532 | } |
1533 | |
1534 | testQuantizationEnd2End(profileEE, backendSpecificEE, ElemKind::Int8QTy, |
1535 | keepOriginalPrecisionForNodes); |
1536 | } |
1537 | |
1538 | /// Fills the tensor \p H with some stable random integers with the seed \p seed |
1539 | /// and the range [0, scale). |
1540 | static void fillStableRandomIndex(Handle<int64_t> H, size_t seed, |
1541 | size_t scale = 10) { |
1542 | for (size_t i = 0, e = H.size(); i < e; i++) { |
1543 | H.raw(i) = int(i * 1921 + seed) % scale; |
1544 | } |
1545 | } |
1546 | |
1547 | /// Builds a graph with two GRUs and saves output from last hidden node. |
1548 | static Function *createGRUForQuantization(Module *M, |
1549 | PlaceholderBindings &bindings, |
1550 | llvm::StringRef funcName) { |
1551 | Function *F = M->createFunction(funcName); |
1552 | |
1553 | constexpr unsigned sequenceSize = 2; |
1554 | constexpr unsigned embeddingSize = 10; |
1555 | constexpr unsigned languageSize = 10; |
1556 | constexpr unsigned batchSize = 5; |
1557 | constexpr unsigned hiddenSize = 3 * embeddingSize; |
1558 | |
1559 | // STEP1 - Initialize inputs into GRU |
1560 | auto *emb = F->getParent()->createPlaceholder( |
1561 | ElemKind::FloatTy, {languageSize, embeddingSize}, "embedding" , false); |
1562 | fillStableRandomData(bindings.allocate(emb)->getHandle(), 4565, 1); |
1563 | |
1564 | auto *input = F->getParent()->createPlaceholder( |
1565 | ElemKind::Int64ITy, {batchSize, sequenceSize}, "input" , false); |
1566 | fillStableRandomIndex(bindings.allocate(input)->getHandle<int64_t>(), 7227, |
1567 | 10); |
1568 | |
1569 | auto *hiddenInit = F->getParent()->createPlaceholder( |
1570 | ElemKind::FloatTy, {batchSize, embeddingSize}, "hiddenInit" , false); |
1571 | bindings.allocate(hiddenInit)->zero(); |
1572 | Node *hidden = hiddenInit; |
1573 | |
1574 | for (unsigned step = 0; step < sequenceSize; step++) { |
1575 | // STEP2 - Gather a single set of embeddings for the GRU |
1576 | Node *inputEmbedded = F->createGather("gru.embedding" , emb, input); |
1577 | Node *inputSlice = |
1578 | F->createSlice("gru.inputSlice" , inputEmbedded, {0, step, 0}, |
1579 | {batchSize, step + 1, embeddingSize}); |
1580 | Node *reshape = |
1581 | F->createReshape("gru.reshape" , inputSlice, {batchSize, embeddingSize}); |
1582 | |
1583 | // STEP3 - Generate a GRU |
1584 | // reference implementation: |
1585 | // https://github.com/pytorch/pytorch/blob/dd5c195646b941d3e20a72847ac48c41e272b8b2/torch/nn/_functions/rnn.py#L46 |
1586 | // similar to /examples/fr2en.cpp |
1587 | |
1588 | auto *FCi = |
1589 | F->createFullyConnected(bindings, "gru.fci" , reshape, hiddenSize); |
1590 | auto *biasI = cast<Placeholder>(FCi->getBias()); |
1591 | auto *filterI = cast<Placeholder>(FCi->getWeights()); |
1592 | fillStableRandomData(bindings.get(biasI)->getHandle(), 8877, 1); |
1593 | fillStableRandomData(bindings.get(filterI)->getHandle(), 1441, 1); |
1594 | |
1595 | auto *FCh = |
1596 | F->createFullyConnected(bindings, "gru.fch" , hidden, hiddenSize); |
1597 | auto *biasH = cast<Placeholder>(FCh->getBias()); |
1598 | auto *filterH = cast<Placeholder>(FCh->getWeights()); |
1599 | fillStableRandomData(bindings.get(biasH)->getHandle(), 9009, 1); |
1600 | fillStableRandomData(bindings.get(filterH)->getHandle(), 1001, 1); |
1601 | |
1602 | Node *i_r = |
1603 | F->createSlice("gru.i_r" , FCi, {0, 0}, {batchSize, embeddingSize}); |
1604 | Node *i_i = F->createSlice("gru.i_i" , FCi, {0, embeddingSize}, |
1605 | {batchSize, 2 * embeddingSize}); |
1606 | Node *i_n = F->createSlice("gru.i_n" , FCi, {0, 2 * embeddingSize}, |
1607 | {batchSize, 3 * embeddingSize}); |
1608 | |
1609 | Node *h_r = |
1610 | F->createSlice("gru.h_r" , FCh, {0, 0}, {batchSize, embeddingSize}); |
1611 | Node *h_i = F->createSlice("gru.h_i" , FCh, {0, embeddingSize}, |
1612 | {batchSize, 2 * embeddingSize}); |
1613 | Node *h_n = F->createSlice("gru.h_n" , FCh, {0, 2 * embeddingSize}, |
1614 | {batchSize, 3 * embeddingSize}); |
1615 | |
1616 | Node *resetgate = F->createSigmoid("gru.resetgate" , |
1617 | F->createAdd("i_r_plus_h_r" , i_r, h_r)); |
1618 | Node *inputgate = F->createSigmoid("gru.inputgate" , |
1619 | F->createAdd("i_i_plus_h_i" , i_i, h_i)); |
1620 | Node *newgate = F->createTanh( |
1621 | "gru.newgate" , |
1622 | F->createAdd("i_n_plus_rg_mult_h_n" , i_n, |
1623 | F->createMul("rg_mult_h_n" , resetgate, h_n))); |
1624 | hidden = F->createAdd( |
1625 | "gru.newhidden" , newgate, |
1626 | F->createMul("ig_mult_hmng" , inputgate, |
1627 | F->createSub("hidden_minus_newgate" , hidden, newgate))); |
1628 | } |
1629 | // No-op TopK selection to test quantization |
1630 | Node *downsample = F->createTopK("gru.downsample" , hidden, embeddingSize / 2); |
1631 | |
1632 | auto *save = F->createSave("save" , {downsample, 0}); |
1633 | bindings.allocate(save->getPlaceholder()); |
1634 | return F; |
1635 | } |
1636 | |
1637 | TEST_P(Operator, end2endGRU) { |
1638 | // STEP1 - Generate the first network to record the quantization parameters. |
1639 | auto *mod = &profileEE.getModule(); |
1640 | auto *modBackend = &backendSpecificEE.getModule(); |
1641 | PlaceholderBindings bindings; |
1642 | PlaceholderBindings bindingsBackend; |
1643 | createGRUForQuantization(mod, bindings, "main" ); |
1644 | createGRUForQuantization(modBackend, bindingsBackend, "main" ); |
1645 | |
1646 | LoweredInfoMap loweredMapForProf; |
1647 | CompilationContext cctxProf{&bindings, &loweredMapForProf}; |
1648 | cctxProf.precisionConfig.quantMode = QuantizationMode::Profile; |
1649 | profileEE.compile(cctxProf); |
1650 | |
1651 | // Run graph to capture profile. |
1652 | profileEE.run(bindings); |
1653 | |
1654 | LoweredInfoMap loweredMapForQuant; |
1655 | CompilationContext cctxQuant{&bindings, &loweredMapForQuant}; |
1656 | cctxQuant.precisionConfig.quantMode = QuantizationMode::Quantize; |
1657 | PrecisionConfiguration &precConfig = cctxQuant.precisionConfig; |
1658 | precConfig.quantConfig.infos = quantization::generateNodeProfilingInfos( |
1659 | bindings, mod->getFunctions().front(), loweredMapForProf); |
1660 | |
1661 | // The OpenCL backend does not support some of the nodes in the test; |
1662 | // explicitly whitelist them here as staying in float, so that the quantizer |
1663 | // does not complain. |
1664 | KindSet doNotQuantizeKinds; |
1665 | if (backendSpecificEE.getBackendName() == "OpenCL" ) { |
1666 | precConfig.precisionModeKindSet.insert(Kinded::Kind::TanhNodeKind); |
1667 | precConfig.precisionModeKindSet.insert(Kinded::Kind::SigmoidNodeKind); |
1668 | precConfig.precisionModeKindSet.insert(Kinded::Kind::GatherNodeKind); |
1669 | } |
1670 | |
1671 | // STEP2 - Use the profile to quantize a network. |
1672 | |
1673 | backendSpecificEE.compile(cctxQuant); |
1674 | backendSpecificEE.run(bindingsBackend); |
1675 | |
1676 | // STEP3 - Compare the results of the original and quantized functions. |
1677 | auto result1Handle = |
1678 | bindings.get(bindings.getPlaceholderByNameSlow("save" ))->getHandle(); |
1679 | auto result2Handle = |
1680 | bindingsBackend.get(bindingsBackend.getPlaceholderByNameSlow("save" )) |
1681 | ->getHandle(); |
1682 | |
1683 | EXPECT_EQ(result1Handle.size(), result2Handle.size()); |
1684 | |
1685 | for (int i = 0, e = result1Handle.size(); i < e; ++i) { |
1686 | float mx = result2Handle.raw(result2Handle.minMaxArg().second); |
1687 | double diff = std::fabs(result2Handle.raw(i) - result1Handle.raw(i)) / mx; |
1688 | |
1689 | // Allow 3% difference. |
1690 | EXPECT_NEAR(diff, 0, 0.03); |
1691 | } |
1692 | } |
1693 | |
1694 | TEST(Quantization, rescaleSameType) { |
1695 | ExecutionEngine EE{}; |
1696 | PlaceholderBindings bindings; |
1697 | auto &mod = EE.getModule(); |
1698 | auto *F = mod.createFunction("foo" ); |
1699 | auto *input = |
1700 | mod.createPlaceholder(ElemKind::Int8QTy, {1, 1}, 0.5, 11, "input" , true); |
1701 | bindings.allocate(input)->init(Tensor::InitKind::Broadcast, 21, |
1702 | mod.getPRNG()); |
1703 | |
1704 | auto *Q = F->createRescaleQuantized( |
1705 | "rescale" , input, mod.uniqueType(ElemKind::Int8QTy, {1, 1}, 0.5, 11)); |
1706 | auto *D = F->createDequantize("dequantize" , Q, ElemKind::FloatTy); |
1707 | auto *save = F->createSave("ret" , D); |
1708 | auto *result = bindings.allocate(save->getPlaceholder()); |
1709 | |
1710 | EXPECT_EQ(F->getNodes().size(), 3); |
1711 | EE.compile(CompilationMode::Infer); |
1712 | |
1713 | EE.run(bindings); |
1714 | F = EE.getModule().getFunctions().front(); |
1715 | EXPECT_EQ(F->getNodes().size(), 2); |
1716 | |
1717 | auto RH = result->getHandle(); |
1718 | EXPECT_NEAR(RH.at({0, 0}), 5.0, 0.001); |
1719 | } |
1720 | |
1721 | TEST(Quantization, optimizeRescaleQuantize) { |
1722 | ExecutionEngine EE{}; |
1723 | PlaceholderBindings bindings; |
1724 | auto &mod = EE.getModule(); |
1725 | auto *F = mod.createFunction("foo" ); |
1726 | auto *input = mod.createPlaceholder(ElemKind::FloatTy, {1, 1}, "input" , true); |
1727 | bindings.allocate(input)->init(Tensor::InitKind::Broadcast, 21, |
1728 | mod.getPRNG()); |
1729 | |
1730 | auto *Q = F->createQuantize( |
1731 | "quant" , input, mod.uniqueType(ElemKind::Int8QTy, {1, 1}, 0.25, 4)); |
1732 | auto *RS = F->createRescaleQuantized( |
1733 | "rescale" , Q, mod.uniqueType(ElemKind::Int8QTy, {1, 1}, 0.5, 11)); |
1734 | auto *D = F->createDequantize("dequantize" , RS, ElemKind::FloatTy); |
1735 | auto *save = F->createSave("ret" , D); |
1736 | auto *result = bindings.allocate(save->getPlaceholder()); |
1737 | |
1738 | EXPECT_EQ(F->getNodes().size(), 4); |
1739 | EE.compile(CompilationMode::Infer); |
1740 | |
1741 | EE.run(bindings); |
1742 | |
1743 | EXPECT_EQ(EE.getModule().getFunctions().front()->getNodes().size(), 1); |
1744 | |
1745 | auto RH = result->getHandle(); |
1746 | EXPECT_NEAR(RH.at({0, 0}), 21.0, 0.001); |
1747 | } |
1748 | |
1749 | /// Check that our asymmetric quantization schema produces |
1750 | /// the expected scales and offsets for various ranges for Int8. |
1751 | TEST(Quantization, chooseQuantizationAsymmetricInt8) { |
1752 | // Map float [0.0; 6.0] to int [-128; 127]. |
1753 | TensorQuantizationParams asymmetricParams = chooseQuantizationParams( |
1754 | {0.0, 6.0}, quantization::Schema::Asymmetric, ElemKind::Int8QTy); |
1755 | // Dequantization formula is scale(X - offset). |
1756 | // So |
1757 | // 1. scale(-128 - offset) == 0.0 |
1758 | // 2. scale(127 - offset) == 6.0 |
1759 | // Given scale != 0, #1 gives -128 == offset |
1760 | // Then #2, gives scale == 6.0 / (127 - (-128)). |
1761 | EXPECT_EQ(asymmetricParams.offset, -128); |
1762 | EXPECT_NEAR(asymmetricParams.scale, 6.0 / 255, 0.001); |
1763 | |
1764 | // Map float [-3.0; 3.0] to int [-128; 127]. |
1765 | asymmetricParams = chooseQuantizationParams( |
1766 | {-3.0, 3.0}, quantization::Schema::Asymmetric, ElemKind::Int8QTy); |
1767 | // Dequantization formula is scale(X - offset). |
1768 | // So in theory, we should get |
1769 | // 1. scale(-128 - offset) == -3.0 |
1770 | // 2. scale(127 - offset) == 3.0 |
1771 | // Given scale != 0, #1 + #2 gives scale(-128 + 127 - 2*offset) == 0.0 |
1772 | // offset == -1 / -2 == 0.5 |
1773 | // Then #2 or #1, gives scale == 3.0 / 127.5. |
1774 | // However, when we get symmetric ranges (i.e., [-X; X]), |
1775 | // we actually force the zero point to map to 0. |
1776 | // In other words, scale(0 - offset) == 0.0, so our offset is 0. |
1777 | // Then our scale is simply: (inputMax - inputMin) / (outputMax - outputMin). |
1778 | // (3.0 - (-3.0)) / (127 - (-128)) == 6.0 / 255. |
1779 | EXPECT_EQ(asymmetricParams.offset, 0); |
1780 | EXPECT_NEAR(asymmetricParams.scale, 6.0 / 255, 0.001); |
1781 | |
1782 | // Map float [-2.0; 5.0] to int [-128; 127]. |
1783 | asymmetricParams = chooseQuantizationParams( |
1784 | {-2.0, 5.0}, quantization::Schema::Asymmetric, ElemKind::Int8QTy); |
1785 | // Scale: (5.0 - (-2.0)) / (127 - (-128)) == 7.0 / 255.0 |
1786 | // Offset from min: scale(-128 - offset) == -2.0 |
1787 | // 7.0 / 255.0 * (-128 - offset) == -2.0 |
1788 | // -128 - offset == -2.0 * 255.0 / 7.0 |
1789 | // offset == 2.0 * 255.0 / 7.0 - 128 |
1790 | // offset == ~-55 |
1791 | EXPECT_EQ(asymmetricParams.offset, (int32_t)(2.0 * 255 / 7.0 - 128)); |
1792 | EXPECT_NEAR(asymmetricParams.scale, 7.0 / 255, 0.001); |
1793 | |
1794 | // Map float [2.0; 5.0] to int [-128; 127]. |
1795 | // Make sure we extend the range to include 0.0, i.e., |
1796 | // we really map [0.0; 5.0] to int [-128; 127]. |
1797 | asymmetricParams = chooseQuantizationParams( |
1798 | {2.0, 5.0}, quantization::Schema::Asymmetric, ElemKind::Int8QTy); |
1799 | // Scale: (5.0 - (0.0)) / (127 - (-128)) == 5.0 / 255.0 |
1800 | // Offset from min: scale(-128 - offset) == 0.0 |
1801 | EXPECT_EQ(asymmetricParams.offset, -128); |
1802 | EXPECT_NEAR(asymmetricParams.scale, 5.0 / 255, 0.001); |
1803 | |
1804 | // Map float [-8.0; -2.0] to int [-128; 127]. |
1805 | // Make sure we extend the range to include 0.0, i.e., |
1806 | // we really map [-8.0; 0.0] to int [-128; 127]. |
1807 | asymmetricParams = chooseQuantizationParams( |
1808 | {-8.0, -2.0}, quantization::Schema::Asymmetric, ElemKind::Int8QTy); |
1809 | // Scale: (0.0 - (-8.0)) / (127 - (-128)) == 8.0 / 255.0 |
1810 | // Offset from min: scale(127 - offset) == 0.0 |
1811 | EXPECT_EQ(asymmetricParams.offset, 127); |
1812 | EXPECT_NEAR(asymmetricParams.scale, 8.0 / 255, 0.001); |
1813 | } |
1814 | |
1815 | /// Check that our symmetric quantization schema produces |
1816 | /// the expected scales and offsets for various ranges for Int8. |
1817 | TEST(Quantization, chooseQuantizationSymmetricInt8) { |
1818 | // Map float [0.0; 6.0] to int [-128; 127]. |
1819 | // With symmetric mapping, we basically map [-6.0; 6.0] |
1820 | TensorQuantizationParams symmetricParams = chooseQuantizationParams( |
1821 | {0.0, 6.0}, quantization::Schema::Symmetric, ElemKind::Int8QTy); |
1822 | // With symmetric mapping offset should always be zero. |
1823 | EXPECT_EQ(symmetricParams.offset, 0); |
1824 | EXPECT_NEAR(symmetricParams.scale, 12.0 / 255, 0.001); |
1825 | |
1826 | // Map float [-3.0; 3.0] to int [-128; 127]. |
1827 | symmetricParams = chooseQuantizationParams( |
1828 | {-3.0, 3.0}, quantization::Schema::Symmetric, ElemKind::Int8QTy); |
1829 | EXPECT_EQ(symmetricParams.offset, 0); |
1830 | EXPECT_NEAR(symmetricParams.scale, 6.0 / 255, 0.001); |
1831 | |
1832 | // Map float [-2.0; 5.0] to int [-128; 127]. |
1833 | // => [-5.0; 5.0] range for symmetric mode. |
1834 | symmetricParams = chooseQuantizationParams( |
1835 | {-2.0, 5.0}, quantization::Schema::Symmetric, ElemKind::Int8QTy); |
1836 | EXPECT_EQ(symmetricParams.offset, 0); |
1837 | EXPECT_NEAR(symmetricParams.scale, 10.0 / 255, 0.001); |
1838 | |
1839 | // Map float [2.0; 5.0] to int [-128; 127]. |
1840 | // Ranges are extended to include 0. |
1841 | // => [0.0; 5.0] range for symmetric mode. |
1842 | symmetricParams = chooseQuantizationParams( |
1843 | {2.0, 5.0}, quantization::Schema::Symmetric, ElemKind::Int8QTy); |
1844 | // Scale: (5.0 - (0.0)) / (127 - (-128)) == 5.0 / 255.0 |
1845 | // Offset from min: scale(-128 - offset) == 0.0 |
1846 | EXPECT_EQ(symmetricParams.offset, 0); |
1847 | EXPECT_NEAR(symmetricParams.scale, 10.0 / 255, 0.001); |
1848 | |
1849 | // Map float [-8.0; -2.0] to int [-128; 127]. |
1850 | // => [-8.0; 8.0] range for symmetric mode. |
1851 | symmetricParams = chooseQuantizationParams( |
1852 | {-8.0, -2.0}, quantization::Schema::Symmetric, ElemKind::Int8QTy); |
1853 | EXPECT_EQ(symmetricParams.offset, 0); |
1854 | EXPECT_NEAR(symmetricParams.scale, 16.0 / 255, 0.001); |
1855 | } |
1856 | |
1857 | /// Check that our asymmetric quantization schema produces |
1858 | /// the expected scales and offsets for various ranges for Int16. |
1859 | TEST(Quantization, chooseQuantizationAsymmetricInt16) { |
1860 | // Map float [0.0; 6.0] to int [-32768; 32767]. |
1861 | TensorQuantizationParams asymmetricParams = chooseQuantizationParams( |
1862 | {0.0, 6.0}, quantization::Schema::Asymmetric, ElemKind::Int16QTy); |
1863 | // Dequantization formula is scale(X - offset). |
1864 | // So |
1865 | // 1. scale(-32768 - offset) == 0.0 |
1866 | // 2. scale(32767 - offset) == 6.0 |
1867 | // Given scale != 0, #1 gives -32768 == offset |
1868 | // Then #2, gives scale == 6.0 / (32767 - (-32768)). |
1869 | EXPECT_EQ(asymmetricParams.offset, -32768); |
1870 | EXPECT_NEAR(asymmetricParams.scale, 6.0 / 65535, 0.00009); |
1871 | |
1872 | // Map float [-3.0; 3.0] to int [-32768; 32767]. |
1873 | asymmetricParams = chooseQuantizationParams( |
1874 | {-3.0, 3.0}, quantization::Schema::Asymmetric, ElemKind::Int16QTy); |
1875 | // Dequantization formula is scale(X - offset). |
1876 | // So in theory, we should get |
1877 | // 1. scale(-32768 - offset) == -3.0 |
1878 | // 2. scale(32767 - offset) == 3.0 |
1879 | // Given scale != 0, #1 + #2 gives scale(-32768 + 32767 - 2*offset) == 0.0 |
1880 | // offset == -1 / -2 == 0.5 |
1881 | // Then #2 or #1, gives scale == 3.0 / 32767.5. |
1882 | // However, when we get symmetric ranges (i.e., [-X; X]), |
1883 | // we actually force the zero point to map to 0. |
1884 | // In other words, scale(0 - offset) == 0.0, so our offset is 0. |
1885 | // Then our scale is simply: (inputMax - inputMin) / (outputMax - outputMin). |
1886 | // (3.0 - (-3.0)) / (32767 - (-32768)) == 6.0 / 255. |
1887 | EXPECT_EQ(asymmetricParams.offset, 0); |
1888 | EXPECT_NEAR(asymmetricParams.scale, 6.0 / 65535, 0.00009); |
1889 | |
1890 | // Map float [-2.0; 5.0] to int [-32768; 32767]. |
1891 | asymmetricParams = chooseQuantizationParams( |
1892 | {-2.0, 5.0}, quantization::Schema::Asymmetric, ElemKind::Int16QTy); |
1893 | // Scale: (5.0 - (-2.0)) / (32767 - (-32768)) == 7.0 / 255.0 |
1894 | // Offset from min: scale(-32768 - offset) == -2.0 |
1895 | // 7.0 / 255.0 * (-32768 - offset) == -2.0 |
1896 | // -32768 - offset == -2.0 * 255.0 / 7.0 |
1897 | // offset == 2.0 * 255.0 / 7.0 - 32768 |
1898 | // offset == ~-55 |
1899 | EXPECT_EQ(asymmetricParams.offset, std::round(2.0 * 65535 / 7.0 - 32768)); |
1900 | EXPECT_NEAR(asymmetricParams.scale, 7.0 / 65535, 0.00009); |
1901 | |
1902 | // Map float [2.0; 5.0] to int [-32768; 32767]. |
1903 | // Make sure we extend the range to include 0.0, i.e., |
1904 | // we really map [0.0; 5.0] to int [-32768; 32767]. |
1905 | asymmetricParams = chooseQuantizationParams( |
1906 | {2.0, 5.0}, quantization::Schema::Asymmetric, ElemKind::Int16QTy); |
1907 | // Scale: (5.0 - (0.0)) / (32767 - (-32768)) == 5.0 / 255.0 |
1908 | // Offset from min: scale(-32768 - offset) == 0.0 |
1909 | EXPECT_EQ(asymmetricParams.offset, -32768); |
1910 | EXPECT_NEAR(asymmetricParams.scale, 5.0 / 65535, 0.00009); |
1911 | |
1912 | // Map float [-8.0; -2.0] to int [-32768; 32767]. |
1913 | // Make sure we extend the range to include 0.0, i.e., |
1914 | // we really map [-8.0; 0.0] to int [-32768; 32767]. |
1915 | asymmetricParams = chooseQuantizationParams( |
1916 | {-8.0, -2.0}, quantization::Schema::Asymmetric, ElemKind::Int16QTy); |
1917 | // Scale: (0.0 - (-8.0)) / (32767 - (-32768)) == 8.0 / 255.0 |
1918 | // Offset from min: scale(32767 - offset) == 0.0 |
1919 | EXPECT_EQ(asymmetricParams.offset, 32767); |
1920 | EXPECT_NEAR(asymmetricParams.scale, 8.0 / 65535, 0.00009); |
1921 | } |
1922 | |
1923 | /// Check that our symmetric quantization schema produces |
1924 | /// the expected scales and offsets for various ranges for Int16. |
1925 | TEST(Quantization, chooseQuantizationSymmetricInt16) { |
1926 | // Map float [0.0; 6.0] to int [-32768; 32767]. |
1927 | // With symmetric mapping, we basically map [-6.0; 6.0] |
1928 | TensorQuantizationParams symmetricParams = chooseQuantizationParams( |
1929 | {0.0, 6.0}, quantization::Schema::Symmetric, ElemKind::Int16QTy); |
1930 | // With symmetric mapping offset should always be zero. |
1931 | EXPECT_EQ(symmetricParams.offset, 0); |
1932 | EXPECT_NEAR(symmetricParams.scale, 12.0 / 65535, 0.00009); |
1933 | |
1934 | // Map float [-3.0; 3.0] to int [-32768; 32767]. |
1935 | symmetricParams = chooseQuantizationParams( |
1936 | {-3.0, 3.0}, quantization::Schema::Symmetric, ElemKind::Int16QTy); |
1937 | EXPECT_EQ(symmetricParams.offset, 0); |
1938 | EXPECT_NEAR(symmetricParams.scale, 6.0 / 65535, 0.00009); |
1939 | |
1940 | // Map float [-2.0; 5.0] to int [-32768; 32767]. |
1941 | // => [-5.0; 5.0] range for symmetric mode. |
1942 | symmetricParams = chooseQuantizationParams( |
1943 | {-2.0, 5.0}, quantization::Schema::Symmetric, ElemKind::Int16QTy); |
1944 | EXPECT_EQ(symmetricParams.offset, 0); |
1945 | EXPECT_NEAR(symmetricParams.scale, 10.0 / 65535, 0.00009); |
1946 | |
1947 | // Map float [2.0; 5.0] to int [-32768; 32767]. |
1948 | // Ranges are extended to include 0. |
1949 | // => [0.0; 5.0] range for symmetric mode. |
1950 | symmetricParams = chooseQuantizationParams( |
1951 | {2.0, 5.0}, quantization::Schema::Symmetric, ElemKind::Int16QTy); |
1952 | // Scale: (5.0 - (0.0)) / (32767 - (-32768)) == 5.0 / 65535.0 |
1953 | // Offset from min: scale(-32768 - offset) == 0.0 |
1954 | EXPECT_EQ(symmetricParams.offset, 0); |
1955 | EXPECT_NEAR(symmetricParams.scale, 10.0 / 65535, 0.00009); |
1956 | |
1957 | // Map float [-8.0; -2.0] to int [-32768; 32767]. |
1958 | // => [-8.0; 8.0] range for symmetric mode. |
1959 | symmetricParams = chooseQuantizationParams( |
1960 | {-8.0, -2.0}, quantization::Schema::Symmetric, ElemKind::Int16QTy); |
1961 | EXPECT_EQ(symmetricParams.offset, 0); |
1962 | EXPECT_NEAR(symmetricParams.scale, 16.0 / 65535, 0.00009); |
1963 | } |
1964 | |
1965 | /// Check quantization symmetry in presence of infinities. |
1966 | TEST(Quantization, chooseQuantizationSymmetricInf) { |
1967 | auto sym = quantization::Schema::Symmetric; |
1968 | // Check for Int8 precision. |
1969 | EXPECT_EQ( |
1970 | chooseQuantizationParams({-INFINITY, INFINITY}, sym, ElemKind::Int8QTy) |
1971 | .offset, |
1972 | 0); |
1973 | EXPECT_EQ( |
1974 | chooseQuantizationParams({INFINITY, INFINITY}, sym, ElemKind::Int8QTy) |
1975 | .offset, |
1976 | 0); |
1977 | EXPECT_EQ( |
1978 | chooseQuantizationParams({-INFINITY, -INFINITY}, sym, ElemKind::Int8QTy) |
1979 | .offset, |
1980 | 0); |
1981 | EXPECT_EQ(chooseQuantizationParams({-INFINITY, 1.0f}, sym, ElemKind::Int8QTy) |
1982 | .offset, |
1983 | 0); |
1984 | EXPECT_EQ(chooseQuantizationParams({-INFINITY, -1.0f}, sym, ElemKind::Int8QTy) |
1985 | .offset, |
1986 | 0); |
1987 | EXPECT_EQ(chooseQuantizationParams({-1.0f, INFINITY}, sym, ElemKind::Int8QTy) |
1988 | .offset, |
1989 | 0); |
1990 | EXPECT_EQ( |
1991 | chooseQuantizationParams({1.0f, INFINITY}, sym, ElemKind::Int8QTy).offset, |
1992 | 0); |
1993 | // Check for Int16 precision. |
1994 | EXPECT_EQ( |
1995 | chooseQuantizationParams({-INFINITY, INFINITY}, sym, ElemKind::Int16QTy) |
1996 | .offset, |
1997 | 0); |
1998 | EXPECT_EQ( |
1999 | chooseQuantizationParams({INFINITY, INFINITY}, sym, ElemKind::Int16QTy) |
2000 | .offset, |
2001 | 0); |
2002 | EXPECT_EQ( |
2003 | chooseQuantizationParams({-INFINITY, -INFINITY}, sym, ElemKind::Int16QTy) |
2004 | .offset, |
2005 | 0); |
2006 | EXPECT_EQ(chooseQuantizationParams({-INFINITY, 1.0f}, sym, ElemKind::Int16QTy) |
2007 | .offset, |
2008 | 0); |
2009 | EXPECT_EQ( |
2010 | chooseQuantizationParams({-INFINITY, -1.0f}, sym, ElemKind::Int16QTy) |
2011 | .offset, |
2012 | 0); |
2013 | EXPECT_EQ(chooseQuantizationParams({-1.0f, INFINITY}, sym, ElemKind::Int16QTy) |
2014 | .offset, |
2015 | 0); |
2016 | EXPECT_EQ(chooseQuantizationParams({1.0f, INFINITY}, sym, ElemKind::Int16QTy) |
2017 | .offset, |
2018 | 0); |
2019 | } |
2020 | |
2021 | /// Check that Relu can use our symmetric quantization schema. |
2022 | TEST(Quantization, reluCanUseSymmetricSchema) { |
2023 | PlaceholderBindings bindings; |
2024 | ExecutionEngine EE{}; |
2025 | auto &mod = EE.getModule(); |
2026 | Function *F = mod.createFunction("main" ); |
2027 | |
2028 | Placeholder *input = |
2029 | mod.createPlaceholder(ElemKind::FloatTy, {10}, "input" , false); |
2030 | auto *inputTensor = bindings.allocate(input); |
2031 | auto IH = inputTensor->getHandle<float>(); |
2032 | for (dim_t i = 0; i < 10; i++) { |
2033 | IH.at({i}) = (i % 2 == 0) ? 5 : -5; |
2034 | } |
2035 | |
2036 | // Create symmetric params that will be used for Relu. |
2037 | TensorQuantizationParams reluParams = |
2038 | chooseQuantizationParams({0.0, 10.0}, quantization::Schema::Symmetric); |
2039 | TypeRef reluTy = mod.uniqueType(ElemKind::Int8QTy, {10}, reluParams.scale, |
2040 | reluParams.offset); |
2041 | TensorQuantizationParams inputParams = |
2042 | chooseQuantizationParams({-10.0, 10.0}, quantization::Schema::Symmetric); |
2043 | |
2044 | QuantizeNode *QN = |
2045 | F->createQuantize("quant" , input, |
2046 | mod.uniqueType(ElemKind::Int8QTy, {10}, |
2047 | inputParams.scale, inputParams.offset)); |
2048 | ReluNode *RN = F->createRELU("relu" , QN, reluTy); |
2049 | DequantizeNode *DN = F->createDequantize("dequantize" , RN, ElemKind::FloatTy); |
2050 | SaveNode *SN = F->createSave("save" , DN); |
2051 | auto *res = bindings.allocate(SN->getPlaceholder()); |
2052 | |
2053 | EE.compile(CompilationMode::Infer); |
2054 | EE.run(bindings); |
2055 | |
2056 | // Verify all negative values were correctly set to zero. |
2057 | auto RH = res->getHandle(); |
2058 | for (dim_t i = 0; i < 10; i++) { |
2059 | if (i % 2 == 0) { |
2060 | EXPECT_NEAR(RH.at({i}), 5, 0.05); |
2061 | } else { |
2062 | EXPECT_EQ(RH.at({i}), 0); |
2063 | } |
2064 | } |
2065 | } |
2066 | |
2067 | /// Check that our symmetric with uint8 quantization schema produces |
2068 | /// the expected scales and offsets for various ranges. |
2069 | TEST(Quantization, chooseQuantizationSymmetricWithUInt8) { |
2070 | // Map float [0.0; 6.0] to int [-128; 127]. |
2071 | // With symmetric with uint8 mapping, we basically map [0.0; 6.0] |
2072 | TensorQuantizationParams symmetricParams = chooseQuantizationParams( |
2073 | {0.0, 6.0}, quantization::Schema::SymmetricWithUnsigned); |
2074 | // Given this is a purely positive range, we should use uint8, |
2075 | // thus int8 - (-128). |
2076 | EXPECT_EQ(symmetricParams.offset, -128); |
2077 | EXPECT_NEAR(symmetricParams.scale, 6.0 / 255, 0.001); |
2078 | |
2079 | // Map float [-3.0; 3.0] to int [-128; 127]. |
2080 | symmetricParams = chooseQuantizationParams( |
2081 | {-3.0, 3.0}, quantization::Schema::SymmetricWithUnsigned); |
2082 | EXPECT_EQ(symmetricParams.offset, 0); |
2083 | EXPECT_NEAR(symmetricParams.scale, 6.0 / 255, 0.001); |
2084 | |
2085 | // Map float [-2.0; 5.0] to int [-128; 127]. |
2086 | // This has negative value, thus we fall back to purely symmetric. |
2087 | // => [-5.0; 5.0] range for symmetric mode. |
2088 | symmetricParams = chooseQuantizationParams( |
2089 | {-2.0, 5.0}, quantization::Schema::SymmetricWithUnsigned); |
2090 | EXPECT_EQ(symmetricParams.offset, 0); |
2091 | EXPECT_NEAR(symmetricParams.scale, 10.0 / 255, 0.001); |
2092 | |
2093 | // Map float [0; 0] to int [-128; 127]. |
2094 | symmetricParams = chooseQuantizationParams( |
2095 | {0.0, 0.0}, quantization::Schema::SymmetricWithUnsigned); |
2096 | EXPECT_EQ(symmetricParams.offset, 0); |
2097 | EXPECT_NEAR(symmetricParams.scale, 0.1, 0.001); |
2098 | |
2099 | // Map float [2.0; 5.0] to int [-128; 127]. |
2100 | // All positive, using uint8. |
2101 | // However, our quantization schemas always include zero. |
2102 | // => [0.0; 5.0] range for uint8 mode. |
2103 | symmetricParams = chooseQuantizationParams( |
2104 | {2.0, 5.0}, quantization::Schema::SymmetricWithUnsigned); |
2105 | // Scale: (5.0 - (0.0)) / (127 - (-128)) == 5.0 / 255.0 |
2106 | // Offset from min: scale(-128 - offset) == 0.0 |
2107 | EXPECT_EQ(symmetricParams.offset, -128); |
2108 | EXPECT_NEAR(symmetricParams.scale, 5.0 / 255, 0.001); |
2109 | |
2110 | // Map float [-8.0; -2.0] to int [-128; 127]. |
2111 | // => [-8.0; 8.0] range for symmetric mode. |
2112 | symmetricParams = chooseQuantizationParams( |
2113 | {-8.0, -2.0}, quantization::Schema::SymmetricWithUnsigned); |
2114 | EXPECT_EQ(symmetricParams.offset, 0); |
2115 | EXPECT_NEAR(symmetricParams.scale, 16.0 / 255, 0.001); |
2116 | } |
2117 | |
2118 | /// Verify the SymmetricWithPower2Scale quantization schema. |
2119 | static void chooseQuantParamsPower2Scale(float min, float max, ElemKind qTy) { |
2120 | auto quantParams = quantization::chooseQuantizationParams( |
2121 | {min, max}, quantization::Schema::SymmetricWithPower2Scale, qTy); |
2122 | EXPECT_EQ(quantParams.offset, 0); |
2123 | EXPECT_TRUE(quantization::isFloatPowerOf2(quantParams.scale)); |
2124 | } |
2125 | |
2126 | TEST(Quantization, chooseQuantizationSymmetricWithPower2Scale) { |
2127 | chooseQuantParamsPower2Scale(-3.0, 6.0, ElemKind::Int8QTy); |
2128 | chooseQuantParamsPower2Scale(3.0, 6.0, ElemKind::Int16QTy); |
2129 | chooseQuantParamsPower2Scale(-6.0, 0.0, ElemKind::Int32QTy); |
2130 | } |
2131 | |
2132 | /// Check that LRN and Softmax are quantized. |
2133 | TEST(Quantization, quantizeSoftmaxAndLRN) { |
2134 | ExecutionEngine EE{}; |
2135 | PlaceholderBindings bindings; |
2136 | std::unique_ptr<Backend> backend(new MockQuantBackend); |
2137 | EE.setBackendName("Interpreter" ); |
2138 | |
2139 | auto &mod = EE.getModule(); |
2140 | Function *F = mod.createFunction("main" ); |
2141 | |
2142 | auto *input = |
2143 | mod.createPlaceholder(ElemKind::FloatTy, {1, 10}, "input" , true); |
2144 | auto *selected = |
2145 | mod.createPlaceholder(ElemKind::Int64ITy, {1, 10}, "selected" , true); |
2146 | auto *LRN = |
2147 | F->createLocalResponseNormalization("LRN" , input, 2, 1.0, 0.0001, 0.75); |
2148 | auto *SM = F->createSoftMax("softmax" , LRN, selected); |
2149 | auto *SN = F->createSave("ret" , SM); |
2150 | |
2151 | quantization::QuantizationConfiguration quantConfig{ |
2152 | {{input->getOutput().generateNodeOutputName(), {0.2f, 2.0f}}, |
2153 | {LRN->getResult().generateNodeOutputName(LRN->getName().str()), |
2154 | {0.3f, 3.0f}}, |
2155 | {SM->getResult().generateNodeOutputName(SM->getName().str()), |
2156 | {0.4f, 4.0f}}, |
2157 | {NodeValue::generateNodeOutputName(SN->getName().str()), {0.4f, 4.0f}}}}; |
2158 | |
2159 | quantConfig.assertAllNodesQuantized = true; |
2160 | quantization::quantizeFunction(F, quantConfig, *backend); |
2161 | |
2162 | auto qLRNIt = std::find_if( |
2163 | F->getNodes().begin(), F->getNodes().end(), [](const Node &node) -> bool { |
2164 | return llvm::isa<LocalResponseNormalizationNode>(&node) && |
2165 | node.getNthResult(LocalResponseNormalizationNode::ResultIdx) |
2166 | .getType() |
2167 | ->isQuantizedType(); |
2168 | }); |
2169 | ASSERT_NE(qLRNIt, F->getNodes().end()); |
2170 | auto qSMIt = std::find_if(F->getNodes().begin(), F->getNodes().end(), |
2171 | [](const Node &node) -> bool { |
2172 | return llvm::isa<SoftMaxNode>(&node) && |
2173 | node.getNthResult(SoftMaxNode::ResultIdx) |
2174 | .getType() |
2175 | ->isQuantizedType(); |
2176 | }); |
2177 | ASSERT_NE(qSMIt, F->getNodes().end()); |
2178 | |
2179 | // Make sure that SaveNode is not quantized. |
2180 | for (const auto &node : F->getNodes()) { |
2181 | if (auto *saveNode = llvm::dyn_cast<SaveNode>(&node)) { |
2182 | EXPECT_FALSE(saveNode->getInput().getType()->isQuantizedType()); |
2183 | } |
2184 | } |
2185 | } |
2186 | |
2187 | /// Check that Select is quantized. |
2188 | TEST(Quantization, quantizeSelect) { |
2189 | ExecutionEngine EE{}; |
2190 | PlaceholderBindings bindings; |
2191 | std::unique_ptr<Backend> backend(new MockQuantBackend); |
2192 | EE.setBackendName("Interpreter" ); |
2193 | |
2194 | auto &mod = EE.getModule(); |
2195 | Function *F = mod.createFunction("main" ); |
2196 | |
2197 | auto *LHS = mod.createPlaceholder(ElemKind::FloatTy, {1, 10}, "LHS" , false); |
2198 | auto *RHS = mod.createPlaceholder(ElemKind::FloatTy, {1, 10}, "RHS" , false); |
2199 | auto *cond = mod.createPlaceholder(ElemKind::BoolTy, {1, 10}, "cond" , false); |
2200 | auto *select = F->createSelect("select" , cond, LHS, RHS); |
2201 | F->createSave("save" , select); |
2202 | |
2203 | TensorProfilingParams LHSPP = {0.0, 1.0}; |
2204 | TensorProfilingParams RHSPP = {-1.3, 2.7}; |
2205 | TensorProfilingParams selectPP = {-2, 3.1}; |
2206 | |
2207 | quantization::QuantizationConfiguration quantConfig{ |
2208 | {{LHS->getOutput().generateNodeOutputName(), LHSPP}, |
2209 | {RHS->getOutput().generateNodeOutputName(), RHSPP}, |
2210 | {select->getResult().generateNodeOutputName(), selectPP}}}; |
2211 | |
2212 | quantConfig.assertAllNodesQuantized = true; |
2213 | quantization::quantizeFunction(F, quantConfig, *backend); |
2214 | |
2215 | // Get quantization parameters for verification. |
2216 | TensorQuantizationParams LHSQP = chooseQuantizationParams( |
2217 | LHSPP, quantConfig.schema, quantConfig.precision); |
2218 | TensorQuantizationParams RHSQP = chooseQuantizationParams( |
2219 | RHSPP, quantConfig.schema, quantConfig.precision); |
2220 | TensorQuantizationParams selectQP = chooseQuantizationParams( |
2221 | selectPP, quantConfig.schema, quantConfig.precision); |
2222 | |
2223 | auto it = std::find_if( |
2224 | F->getNodes().begin(), F->getNodes().end(), |
2225 | [](const Node &node) -> bool { return llvm::isa<SelectNode>(&node); }); |
2226 | ASSERT_NE(it, F->getNodes().end()); |
2227 | |
2228 | SelectNode *qSelect = llvm::cast<SelectNode>(&(*it)); |
2229 | TypeRef qSelectTy = qSelect->getResult().getType(); |
2230 | TypeRef qLHSTy = qSelect->getLHS().getType(); |
2231 | TypeRef qRHSTy = qSelect->getRHS().getType(); |
2232 | |
2233 | ASSERT_TRUE(qSelectTy->isQuantizedType()); |
2234 | EXPECT_EQ(qSelectTy->getScale(), selectQP.scale); |
2235 | EXPECT_EQ(qSelectTy->getOffset(), selectQP.offset); |
2236 | EXPECT_EQ(qLHSTy->getScale(), LHSQP.scale); |
2237 | EXPECT_EQ(qLHSTy->getOffset(), LHSQP.offset); |
2238 | EXPECT_EQ(qRHSTy->getScale(), RHSQP.scale); |
2239 | EXPECT_EQ(qRHSTy->getOffset(), RHSQP.offset); |
2240 | } |
2241 | |
2242 | /// Check that AvgPool is quantized, and its input and output have different |
2243 | /// scale and offset. |
2244 | TEST(Quantization, quantizeAvgPool) { |
2245 | ExecutionEngine EE{}; |
2246 | PlaceholderBindings bindings; |
2247 | std::unique_ptr<Backend> backend(new MockQuantBackend); |
2248 | EE.setBackendName("Interpreter" ); |
2249 | |
2250 | auto &mod = EE.getModule(); |
2251 | Function *F = mod.createFunction("main" ); |
2252 | |
2253 | auto *input = |
2254 | mod.createPlaceholder(ElemKind::FloatTy, {1, 3, 3, 1}, "input" , true); |
2255 | auto *pool = F->createAvgPool("pool" , input, {2, 2}, {1, 1}, {0, 0, 0, 0}); |
2256 | auto *s = F->createSave("save" , pool); |
2257 | |
2258 | quantization::QuantizationConfiguration quantConfig{{ |
2259 | {input->getOutput().generateNodeOutputName(), {-2.0f, 2.0f}}, |
2260 | {pool->getResult().generateNodeOutputName(), {0.3f, 3.0f}}, |
2261 | {NodeValue::generateNodeOutputName(s->getName().str()), {0.4f, 4.0f}}, |
2262 | }}; |
2263 | |
2264 | quantConfig.assertAllNodesQuantized = true; |
2265 | quantization::quantizeFunction(F, quantConfig, *backend); |
2266 | |
2267 | auto qPool = std::find_if(F->getNodes().begin(), F->getNodes().end(), |
2268 | [](const Node &node) -> bool { |
2269 | return llvm::isa<AvgPoolNode>(&node) && |
2270 | node.getNthResult(AvgPoolNode::ResultIdx) |
2271 | .getType() |
2272 | ->isQuantizedType(); |
2273 | }); |
2274 | ASSERT_NE(qPool, F->getNodes().end()); |
2275 | auto *avgPool = llvm::cast<AvgPoolNode>(qPool); |
2276 | ASSERT_NE(avgPool->getInput().getType()->getScale(), |
2277 | avgPool->getResult().getType()->getScale()); |
2278 | ASSERT_NE(avgPool->getInput().getType()->getOffset(), |
2279 | avgPool->getResult().getType()->getOffset()); |
2280 | } |
2281 | |
2282 | /// Test option to disable quantization of specific node kinds in the graph. |
2283 | TEST(Quantization, quantizeGraphPartially) { |
2284 | ExecutionEngine EE{}; |
2285 | PlaceholderBindings bindings; |
2286 | auto &mod = EE.getModule(); |
2287 | Function *F = mod.createFunction("main" ); |
2288 | |
2289 | auto *LHS = mod.createPlaceholder(ElemKind::FloatTy, {3, 3}, "lhs" , true); |
2290 | auto *RHS = mod.createPlaceholder(ElemKind::FloatTy, {3, 3}, "rhs" , true); |
2291 | bindings.allocate(LHS)->init(Tensor::InitKind::Xavier, 3, mod.getPRNG()); |
2292 | bindings.allocate(RHS)->init(Tensor::InitKind::Xavier, 3, mod.getPRNG()); |
2293 | |
2294 | auto *MMN = F->createMatMul("matmul" , LHS, RHS); |
2295 | auto *TN = F->createTanh("tanh" , MMN); |
2296 | auto *save = F->createSave("ret" , TN); |
2297 | auto *result = save->getPlaceholder(); |
2298 | bindings.allocate(result); |
2299 | |
2300 | // Note that we are creating profiling info even for nodes that will not be |
2301 | // quantized. This is how we expect quantizeFunction() to behave, as |
2302 | // quantization profiling will still get a profile for these nodes. |
2303 | quantization::QuantizationConfiguration quantConfig{{ |
2304 | {LHS->getOutput().generateNodeOutputName(), {0.3f, 3.0f}}, |
2305 | {RHS->getOutput().generateNodeOutputName(), {0.4f, 4.0f}}, |
2306 | {MMN->getResult().generateNodeOutputName(), {0.6f, 6.0f}}, |
2307 | {TN->getResult().generateNodeOutputName(), {0.5f, 5.0f}}, |
2308 | }}; |
2309 | |
2310 | // Do not quantize any tanh nodes. |
2311 | KindSet doNotQuantizeKinds; |
2312 | doNotQuantizeKinds.insert(Kinded::Kind::TanhNodeKind); |
2313 | |
2314 | quantConfig.assertAllNodesQuantized = true; |
2315 | std::unique_ptr<Backend> backend(createBackend(EE.getBackendName())); |
2316 | quantization::quantizeFunction(F, quantConfig, *backend, |
2317 | /* loweredMap */ {}, doNotQuantizeKinds); |
2318 | |
2319 | // Make sure that graph can be compiled and run. |
2320 | ::glow::convertPlaceholdersToConstants(F, bindings, {result}); |
2321 | |
2322 | CompilationContext cctx; |
2323 | cctx.compMode = CompilationMode::Infer; |
2324 | // Do not perform any compile-time constant folding. |
2325 | cctx.optimizationOpts.enableConstantFolding = false; |
2326 | EE.compile(cctx); |
2327 | |
2328 | EE.run(bindings); |
2329 | |
2330 | { |
2331 | // Verify that the output variable is not quantized, and that it has a |
2332 | // single save node writer, which is also not quantized. |
2333 | EXPECT_TRUE(!result->getType()->isQuantizedType()); |
2334 | ASSERT_EQ(result->getUsers().size(), 1); |
2335 | auto *SN = llvm::dyn_cast<SaveNode>(result->getUsers().begin()->getUser()); |
2336 | ASSERT_TRUE(SN); |
2337 | EXPECT_TRUE(!SN->getOutput().getType()->isQuantizedType()); |
2338 | |
2339 | // Verify that the tanh is not quantized. |
2340 | auto *TN = llvm::dyn_cast<TanhNode>(SN->getInput()); |
2341 | ASSERT_TRUE(TN); |
2342 | EXPECT_TRUE(!TN->getResult().getType()->isQuantizedType()); |
2343 | |
2344 | // Verify that the input to the tanh is a dequantize node. |
2345 | auto *DN = llvm::dyn_cast<DequantizeNode>(TN->getInput()); |
2346 | ASSERT_TRUE(DN); |
2347 | |
2348 | // Verify that the matmul is quantized. |
2349 | auto *MMN = llvm::dyn_cast<MatMulNode>(DN->getInput()); |
2350 | ASSERT_TRUE(MMN); |
2351 | EXPECT_TRUE(MMN->getResult().getType()->isQuantizedType()); |
2352 | |
2353 | // Verify that the variable inputs to the matmul are quantized. |
2354 | auto *LHS = llvm::dyn_cast<Constant>(MMN->getLHS()); |
2355 | ASSERT_TRUE(LHS); |
2356 | EXPECT_TRUE(LHS->getType()->isQuantizedType()); |
2357 | |
2358 | auto *RHS = llvm::dyn_cast<Constant>(MMN->getRHS()); |
2359 | ASSERT_TRUE(RHS); |
2360 | EXPECT_TRUE(RHS->getType()->isQuantizedType()); |
2361 | } |
2362 | } |
2363 | |
2364 | /// Test option to disable quantization of specific node kinds in the graph, |
2365 | /// where there are multiple of that node kind. |
2366 | TEST(Quantization, quantizeGraphPartiallyMultipleNodes) { |
2367 | ExecutionEngine EE{}; |
2368 | PlaceholderBindings bindings; |
2369 | auto &mod = EE.getModule(); |
2370 | Function *F = mod.createFunction("main" ); |
2371 | |
2372 | auto *LHS = mod.createPlaceholder(ElemKind::FloatTy, {3, 3}, "lhs" , true); |
2373 | auto *RHS = mod.createPlaceholder(ElemKind::FloatTy, {3, 3}, "rhs" , true); |
2374 | bindings.allocate(LHS)->init(Tensor::InitKind::Xavier, 3, mod.getPRNG()); |
2375 | bindings.allocate(RHS)->init(Tensor::InitKind::Xavier, 3, mod.getPRNG()); |
2376 | |
2377 | auto *TNLHS = F->createTanh("tanh" , LHS); |
2378 | auto *MMN = F->createMatMul("matmul" , TNLHS, RHS); |
2379 | auto *TN = F->createTanh("tanh" , MMN); |
2380 | auto *save = F->createSave("ret" , TN); |
2381 | auto *result = save->getPlaceholder(); |
2382 | bindings.allocate(result); |
2383 | |
2384 | // Note that we are creating profiling info even for nodes that will not be |
2385 | // quantized. This is how we expect quantizeFunction() to behave, as |
2386 | // quantization profiling will still get a profile for these nodes. |
2387 | quantization::QuantizationConfiguration quantConfig{{ |
2388 | {LHS->getOutput().generateNodeOutputName(), {0.3f, 3.0f}}, |
2389 | {TNLHS->getResult().generateNodeOutputName(), {0.4f, 4.0f}}, |
2390 | {RHS->getOutput().generateNodeOutputName(), {0.4f, 4.0f}}, |
2391 | {MMN->getResult().generateNodeOutputName(), {0.6f, 6.0f}}, |
2392 | {TN->getResult().generateNodeOutputName(), {0.5f, 5.0f}}, |
2393 | }}; |
2394 | |
2395 | // Do not quantize any tanh nodes. |
2396 | KindSet doNotQuantizeKinds; |
2397 | doNotQuantizeKinds.insert(Kinded::Kind::TanhNodeKind); |
2398 | |
2399 | quantConfig.assertAllNodesQuantized = true; |
2400 | std::unique_ptr<Backend> backend(createBackend(EE.getBackendName())); |
2401 | quantization::quantizeFunction(F, quantConfig, *backend, |
2402 | /* loweredMap */ {}, doNotQuantizeKinds); |
2403 | |
2404 | // Make sure that graph can be compiled and run. |
2405 | ::glow::convertPlaceholdersToConstants(F, bindings, {result}); |
2406 | |
2407 | CompilationContext cctx; |
2408 | cctx.compMode = CompilationMode::Infer; |
2409 | // Do not perform any compile-time constant folding. |
2410 | cctx.optimizationOpts.enableConstantFolding = false; |
2411 | EE.compile(cctx); |
2412 | |
2413 | EE.run(bindings); |
2414 | |
2415 | { |
2416 | // Verify that the output variable is not quantized, and that it has a |
2417 | // single save node writer, which is also not quantized. |
2418 | EXPECT_TRUE(!result->getType()->isQuantizedType()); |
2419 | ASSERT_EQ(result->getUsers().size(), 1); |
2420 | auto *SN = llvm::dyn_cast<SaveNode>(result->getUsers().begin()->getUser()); |
2421 | ASSERT_TRUE(SN); |
2422 | EXPECT_TRUE(!SN->getOutput().getType()->isQuantizedType()); |
2423 | |
2424 | // Verify that the tanh is not quantized. |
2425 | auto *TN1 = llvm::dyn_cast<TanhNode>(SN->getInput()); |
2426 | ASSERT_TRUE(TN1); |
2427 | EXPECT_TRUE(!TN1->getResult().getType()->isQuantizedType()); |
2428 | |
2429 | // Verify that the input to the tanh is a dequantize node. |
2430 | auto *DN = llvm::dyn_cast<DequantizeNode>(TN1->getInput()); |
2431 | ASSERT_TRUE(DN); |
2432 | |
2433 | // Verify that the matmul is quantized. |
2434 | auto *MMN = llvm::dyn_cast<MatMulNode>(DN->getInput()); |
2435 | ASSERT_TRUE(MMN); |
2436 | EXPECT_TRUE(MMN->getResult().getType()->isQuantizedType()); |
2437 | |
2438 | // Verify that the LHS input is a quantize node. |
2439 | auto *QN = llvm::dyn_cast<QuantizeNode>(MMN->getLHS()); |
2440 | ASSERT_TRUE(QN); |
2441 | |
2442 | // Verify that the second tanh node is also not quantized. |
2443 | auto *TN2 = llvm::dyn_cast<TanhNode>(QN->getInput()); |
2444 | ASSERT_TRUE(TN2); |
2445 | EXPECT_TRUE(!TN2->getResult().getType()->isQuantizedType()); |
2446 | |
2447 | // Verify that the input variable to the tanh is not quantized. |
2448 | auto *varTN2 = llvm::dyn_cast<Constant>(TN2->getInput()); |
2449 | ASSERT_TRUE(varTN2); |
2450 | EXPECT_TRUE(!varTN2->getType()->isQuantizedType()); |
2451 | |
2452 | // Verify that the RHS input to the matmul is a quantized variable. |
2453 | auto *RHS = llvm::dyn_cast<Constant>(MMN->getRHS()); |
2454 | ASSERT_TRUE(RHS); |
2455 | EXPECT_TRUE(RHS->getType()->isQuantizedType()); |
2456 | } |
2457 | } |
2458 | |
2459 | /// Test option to disable quantization of multiple specific node kinds in the |
2460 | /// graph. |
2461 | TEST(Quantization, quantizeGraphPartiallyMultipleKinds) { |
2462 | ExecutionEngine EE{}; |
2463 | PlaceholderBindings bindings; |
2464 | auto &mod = EE.getModule(); |
2465 | Function *F = mod.createFunction("main" ); |
2466 | |
2467 | auto *LHS = mod.createPlaceholder(ElemKind::FloatTy, {3, 3}, "lhs" , true); |
2468 | auto *RHS = mod.createPlaceholder(ElemKind::FloatTy, {3, 3}, "rhs" , true); |
2469 | bindings.allocate(LHS)->init(Tensor::InitKind::Xavier, 3, mod.getPRNG()); |
2470 | bindings.allocate(RHS)->init(Tensor::InitKind::Xavier, 3, mod.getPRNG()); |
2471 | |
2472 | auto *MMN = F->createMatMul("matmul" , LHS, RHS); |
2473 | auto *CN = F->createAdd("concat" , LHS, MMN); |
2474 | auto *TN = F->createTanh("tanh" , CN); |
2475 | auto *save = F->createSave("ret" , TN); |
2476 | auto *result = save->getPlaceholder(); |
2477 | bindings.allocate(result); |
2478 | |
2479 | // Note that we are creating profiling info even for nodes that will not be |
2480 | // quantized. This is how we expect quantizeFunction() to behave, as |
2481 | // quantization profiling will still get a profile for these nodes. |
2482 | quantization::QuantizationConfiguration quantConfig{{ |
2483 | {LHS->getOutput().generateNodeOutputName(), {0.3f, 3.0f}}, |
2484 | {RHS->getOutput().generateNodeOutputName(), {0.4f, 4.0f}}, |
2485 | {MMN->getResult().generateNodeOutputName(), {0.6f, 6.0f}}, |
2486 | {CN->getResult().generateNodeOutputName(), {0.6f, 6.0f}}, |
2487 | {TN->getResult().generateNodeOutputName(), {0.5f, 5.0f}}, |
2488 | }}; |
2489 | |
2490 | // Do not quantize any tanh or add nodes. |
2491 | KindSet doNotQuantizeKinds; |
2492 | doNotQuantizeKinds.insert(Kinded::Kind::TanhNodeKind); |
2493 | doNotQuantizeKinds.insert(Kinded::Kind::AddNodeKind); |
2494 | |
2495 | quantConfig.assertAllNodesQuantized = true; |
2496 | std::unique_ptr<Backend> backend(createBackend(EE.getBackendName())); |
2497 | quantization::quantizeFunction(F, quantConfig, *backend, |
2498 | /* loweredMap */ {}, doNotQuantizeKinds); |
2499 | |
2500 | // Make sure that graph can be compiled and run. |
2501 | ::glow::convertPlaceholdersToConstants(F, bindings, {result}); |
2502 | |
2503 | CompilationContext cctx; |
2504 | cctx.compMode = CompilationMode::Infer; |
2505 | // Do not perform any compile-time constant folding. |
2506 | cctx.optimizationOpts.enableConstantFolding = false; |
2507 | EE.compile(cctx); |
2508 | |
2509 | EE.run(bindings); |
2510 | |
2511 | { |
2512 | // Verify that the output variable is not quantized, and that it has a |
2513 | // single save node writer, which is also not quantized. |
2514 | EXPECT_TRUE(!result->getType()->isQuantizedType()); |
2515 | ASSERT_EQ(result->getUsers().size(), 1); |
2516 | auto *SN = llvm::dyn_cast<SaveNode>(result->getUsers().begin()->getUser()); |
2517 | ASSERT_TRUE(SN); |
2518 | EXPECT_TRUE(!SN->getOutput().getType()->isQuantizedType()); |
2519 | |
2520 | // Verify that the tanh is not quantized. |
2521 | auto *TN = llvm::dyn_cast<TanhNode>(SN->getInput()); |
2522 | ASSERT_TRUE(TN); |
2523 | EXPECT_TRUE(!TN->getResult().getType()->isQuantizedType()); |
2524 | |
2525 | // Verify that the input to the tanh is a non-quantized add node. |
2526 | auto *AN = llvm::dyn_cast<AddNode>(TN->getInput()); |
2527 | ASSERT_TRUE(AN); |
2528 | EXPECT_TRUE(!TN->getResult().getType()->isQuantizedType()); |
2529 | |
2530 | // Verify that the LHS input to the AddNode is an unquantized variable. |
2531 | auto varANLHS = llvm::dyn_cast<Constant>(AN->getLHS()); |
2532 | ASSERT_TRUE(varANLHS); |
2533 | EXPECT_TRUE(!varANLHS->getType()->isQuantizedType()); |
2534 | |
2535 | // Verify that the RHS input to the AddNode is a dequantize node. |
2536 | auto *DN = llvm::dyn_cast<DequantizeNode>(AN->getRHS()); |
2537 | ASSERT_TRUE(DN); |
2538 | |
2539 | // Verify that the matmul is quantized. |
2540 | auto *MMN = llvm::dyn_cast<MatMulNode>(DN->getInput()); |
2541 | ASSERT_TRUE(MMN); |
2542 | EXPECT_TRUE(MMN->getResult().getType()->isQuantizedType()); |
2543 | |
2544 | // Verify that the variable inputs to the matmul are quantized. |
2545 | auto *LHS = llvm::dyn_cast<Constant>(MMN->getLHS()); |
2546 | ASSERT_TRUE(LHS); |
2547 | EXPECT_TRUE(LHS->getType()->isQuantizedType()); |
2548 | |
2549 | auto *RHS = llvm::dyn_cast<Constant>(MMN->getRHS()); |
2550 | ASSERT_TRUE(RHS); |
2551 | EXPECT_TRUE(RHS->getType()->isQuantizedType()); |
2552 | } |
2553 | } |
2554 | |
2555 | /// Check that quantizeFunction directly converts the constants |
2556 | /// instead of leaving quantize node around. |
2557 | TEST(Quantization, quantizeFunctionConvertConstant) { |
2558 | ExecutionEngine EE{}; |
2559 | PlaceholderBindings bindings; |
2560 | auto &mod = EE.getModule(); |
2561 | Function *F = mod.createFunction("main" ); |
2562 | |
2563 | auto *LHS = mod.createPlaceholder(ElemKind::FloatTy, {3, 3}, "lhs" , true); |
2564 | auto *RHS = mod.createConstant(ElemKind::FloatTy, {3, 3}, "rhs" ); |
2565 | bindings.allocate(LHS)->init(Tensor::InitKind::Xavier, 3, mod.getPRNG()); |
2566 | RHS->getPayloadMutable().init(Tensor::InitKind::Xavier, 3, mod.getPRNG()); |
2567 | |
2568 | auto *MMN = F->createMatMul("matmul" , LHS, RHS); |
2569 | auto *save = F->createSave("ret" , MMN); |
2570 | auto *result = save->getPlaceholder(); |
2571 | bindings.allocate(result); |
2572 | |
2573 | // Note that we are creating profiling info even for nodes that will not be |
2574 | // quantized. This is how we expect quantizeFunction() to behave, as |
2575 | // quantization profiling will still get a profile for these nodes. |
2576 | quantization::QuantizationConfiguration quantConfig{{ |
2577 | {LHS->getOutput().generateNodeOutputName(), {0.3f, 3.0f}}, |
2578 | {RHS->getOutput().generateNodeOutputName(), {0.4f, 4.0f}}, |
2579 | {MMN->getResult().generateNodeOutputName(), {0.6f, 6.0f}}, |
2580 | }}; |
2581 | |
2582 | quantConfig.assertAllNodesQuantized = true; |
2583 | std::unique_ptr<Backend> backend(createBackend(EE.getBackendName())); |
2584 | quantization::quantizeFunction(F, quantConfig, *backend); |
2585 | |
2586 | optimize(F, CompilationMode::Infer); |
2587 | CompilationContext cctx; |
2588 | convertQuantizedConstants(F, cctx); |
2589 | |
2590 | { |
2591 | // Verify that the output variable is not quantized, and that it has a |
2592 | // single save node writer, which is also not quantized. |
2593 | EXPECT_TRUE(!result->getType()->isQuantizedType()); |
2594 | ASSERT_EQ(result->getUsers().size(), 1); |
2595 | auto *SN = llvm::dyn_cast<SaveNode>(result->getUsers().begin()->getUser()); |
2596 | ASSERT_TRUE(SN); |
2597 | EXPECT_TRUE(!SN->getOutput().getType()->isQuantizedType()); |
2598 | |
2599 | // Verify that the input to save is a dequantize node. |
2600 | auto *DN = llvm::dyn_cast<DequantizeNode>(SN->getInput()); |
2601 | ASSERT_TRUE(DN); |
2602 | |
2603 | // Verify that the matmul is quantized. |
2604 | auto *MMN = llvm::dyn_cast<MatMulNode>(DN->getInput()); |
2605 | ASSERT_TRUE(MMN); |
2606 | EXPECT_TRUE(MMN->getResult().getType()->isQuantizedType()); |
2607 | |
2608 | // Verify that the variable inputs to the matmul are quantized. |
2609 | auto *LHSQuantize = llvm::dyn_cast<QuantizeNode>(MMN->getLHS()); |
2610 | ASSERT_TRUE(LHSQuantize); |
2611 | EXPECT_EQ(LHSQuantize->getInput().getNode(), LHS); |
2612 | |
2613 | auto *RHS = llvm::dyn_cast<Constant>(MMN->getRHS()); |
2614 | ASSERT_TRUE(RHS); |
2615 | EXPECT_TRUE(RHS->getType()->isQuantizedType()); |
2616 | } |
2617 | |
2618 | // Make sure that graph can be compiled and run. |
2619 | EE.compile(CompilationMode::Infer); |
2620 | |
2621 | EE.run(bindings); |
2622 | } |
2623 | |
2624 | /// Check that the slice node doesn't change the quantization parameters between |
2625 | /// its input and output. |
2626 | TEST(Quantization, quantizeSlice) { |
2627 | ExecutionEngine EE{}; |
2628 | PlaceholderBindings bindings; |
2629 | auto &mod = EE.getModule(); |
2630 | Function *F = mod.createFunction("main" ); |
2631 | |
2632 | auto *input = mod.createPlaceholder(ElemKind::FloatTy, {4}, "input" , true); |
2633 | bindings.allocate(input)->init(Tensor::InitKind::Xavier, 3, mod.getPRNG()); |
2634 | |
2635 | auto *slice = F->createSlice("slice" , input, {2}, {3}); |
2636 | auto *save = F->createSave("ret" , slice); |
2637 | auto *result = save->getPlaceholder(); |
2638 | bindings.allocate(result); |
2639 | |
2640 | quantization::QuantizationConfiguration quantConfig{{ |
2641 | {slice->getResult().generateNodeOutputName(), {0.2f, 2.0f}}, |
2642 | {input->getOutput().generateNodeOutputName(), {0.4f, 4.0f}}, |
2643 | }}; |
2644 | |
2645 | // Compute quantization parameters for verification. |
2646 | auto sliceInpTQP = chooseQuantizationParams({0.4, 4.0}, quantConfig.schema, |
2647 | quantConfig.precision); |
2648 | |
2649 | quantConfig.assertAllNodesQuantized = true; |
2650 | std::unique_ptr<Backend> backend(createBackend(EE.getBackendName())); |
2651 | quantization::quantizeFunction(F, quantConfig, *backend); |
2652 | |
2653 | optimize(F, CompilationMode::Infer); |
2654 | |
2655 | { |
2656 | // Verify that the output variable is not quantized, and that it has a |
2657 | // single save node writer, which is also not quantized. |
2658 | EXPECT_TRUE(!result->getType()->isQuantizedType()); |
2659 | ASSERT_EQ(result->getUsers().size(), 1); |
2660 | auto *SN = llvm::dyn_cast<SaveNode>(result->getUsers().begin()->getUser()); |
2661 | ASSERT_TRUE(SN); |
2662 | EXPECT_TRUE(!SN->getOutput().getType()->isQuantizedType()); |
2663 | |
2664 | // Verify that the input to save is a dequantize node. |
2665 | auto *DN = llvm::dyn_cast<DequantizeNode>(SN->getInput()); |
2666 | ASSERT_TRUE(DN); |
2667 | |
2668 | // Verify that the slice is rescaled after being quantized. |
2669 | // The reason we need a rescale is because slicing doesn't perform rescaling |
2670 | // by itself. |
2671 | // Note: after optimization, the RescaleQuantized node created for the Slice |
2672 | // gets merged with the dequantize node. |
2673 | auto *qslice = llvm::dyn_cast<SliceNode>(DN->getInput()); |
2674 | ASSERT_TRUE(qslice); |
2675 | ASSERT_TRUE(qslice->getResult().getType()->isQuantizedType()); |
2676 | EXPECT_EQ(qslice->getResult().getType()->getOffset(), sliceInpTQP.offset); |
2677 | EXPECT_EQ(qslice->getResult().getType()->getScale(), sliceInpTQP.scale); |
2678 | |
2679 | // Verify that the variable inputs to the matmul are quantized. |
2680 | auto *qinput = llvm::dyn_cast<QuantizeNode>(qslice->getInput()); |
2681 | ASSERT_TRUE(qinput); |
2682 | EXPECT_EQ(qinput->getResult().getType()->getOffset(), |
2683 | qslice->getResult().getType()->getOffset()); |
2684 | EXPECT_EQ(qinput->getResult().getType()->getScale(), |
2685 | qslice->getResult().getType()->getScale()); |
2686 | EXPECT_EQ(qinput->getInput().getNode(), input); |
2687 | } |
2688 | |
2689 | // Make sure that graph can be compiled and run. |
2690 | EE.compile(CompilationMode::Infer); |
2691 | |
2692 | EE.run(bindings); |
2693 | } |
2694 | |
2695 | /// Check that the reshape node doesn't change the quantization parameters |
2696 | /// between its input and output. |
2697 | TEST(Quantization, quantizeReshape) { |
2698 | ExecutionEngine EE{}; |
2699 | PlaceholderBindings bindings; |
2700 | auto &mod = EE.getModule(); |
2701 | Function *F = mod.createFunction("main" ); |
2702 | |
2703 | auto *input = mod.createPlaceholder(ElemKind::FloatTy, {3, 3}, "input" , true); |
2704 | bindings.allocate(input)->init(Tensor::InitKind::Xavier, 3, mod.getPRNG()); |
2705 | |
2706 | auto *reshape = F->createReshape("reshape" , input, {9}); |
2707 | auto *save = F->createSave("ret" , reshape); |
2708 | auto *result = save->getPlaceholder(); |
2709 | bindings.allocate(result); |
2710 | |
2711 | quantization::QuantizationConfiguration quantConfig{{ |
2712 | {reshape->getResult().generateNodeOutputName(), {0.2f, 2.0f}}, |
2713 | {input->getOutput().generateNodeOutputName(), {0.4f, 4.0f}}, |
2714 | }}; |
2715 | |
2716 | // Compute quantization parameters for verification. |
2717 | auto reshapeInpTQP = chooseQuantizationParams({0.4, 4.0}, quantConfig.schema, |
2718 | quantConfig.precision); |
2719 | |
2720 | quantConfig.assertAllNodesQuantized = true; |
2721 | std::unique_ptr<Backend> backend(createBackend(EE.getBackendName())); |
2722 | quantization::quantizeFunction(F, quantConfig, *backend); |
2723 | |
2724 | { |
2725 | // Verify that the output variable is not quantized, and that it has a |
2726 | // single save node writer, which is also not quantized. |
2727 | EXPECT_TRUE(!result->getType()->isQuantizedType()); |
2728 | ASSERT_EQ(result->getUsers().size(), 1); |
2729 | auto *SN = llvm::dyn_cast<SaveNode>(result->getUsers().begin()->getUser()); |
2730 | ASSERT_TRUE(SN); |
2731 | EXPECT_TRUE(!SN->getOutput().getType()->isQuantizedType()); |
2732 | |
2733 | // Verify that the input to save is a dequantize node. |
2734 | auto *DN = llvm::dyn_cast<DequantizeNode>(SN->getInput()); |
2735 | ASSERT_TRUE(DN); |
2736 | |
2737 | // Verify that the reshape is rescaled after being quantized. |
2738 | // The reason we need a rescale is because reshaping doesn't perform |
2739 | // rescaling by itself. |
2740 | auto *RQ = llvm::dyn_cast<RescaleQuantizedNode>(DN->getInput()); |
2741 | ASSERT_TRUE(RQ); |
2742 | auto *qreshape = llvm::dyn_cast<ReshapeNode>(RQ->getInput()); |
2743 | ASSERT_TRUE(qreshape); |
2744 | ASSERT_TRUE(qreshape->getResult().getType()->isQuantizedType()); |
2745 | EXPECT_EQ(qreshape->getResult().getType()->getOffset(), |
2746 | reshapeInpTQP.offset); |
2747 | EXPECT_EQ(qreshape->getResult().getType()->getScale(), reshapeInpTQP.scale); |
2748 | |
2749 | // Verify that the input to the reshape is quantized. |
2750 | auto *qinput = llvm::dyn_cast<QuantizeNode>(qreshape->getInput()); |
2751 | ASSERT_TRUE(qinput); |
2752 | EXPECT_EQ(qinput->getResult().getType()->getOffset(), |
2753 | qreshape->getResult().getType()->getOffset()); |
2754 | EXPECT_EQ(qinput->getResult().getType()->getScale(), |
2755 | qreshape->getResult().getType()->getScale()); |
2756 | EXPECT_EQ(qinput->getInput().getNode(), input); |
2757 | } |
2758 | |
2759 | // Make sure that graph can be compiled and run. |
2760 | EE.compile(CompilationMode::Infer); |
2761 | |
2762 | EE.run(bindings); |
2763 | } |
2764 | |
2765 | /// Mock backend that does not lower FC nodes. |
2766 | class MockBackendUnloweredFC : public MockBackend { |
2767 | bool shouldLower(const Node *N) const override { |
2768 | if (N->getKind() == Kinded::Kind::FullyConnectedNodeKind) { |
2769 | return false; |
2770 | } |
2771 | return true; |
2772 | } |
2773 | bool isOpSupported(const NodeInfo &NI) const override { return true; } |
2774 | }; |
2775 | |
2776 | /// Mock backend that does lower FC nodes. |
2777 | class MockBackendLoweredFC : public MockBackend { |
2778 | bool shouldLower(const Node *N) const override { return true; } |
2779 | bool isOpSupported(const NodeInfo &NI) const override { return true; } |
2780 | }; |
2781 | |
2782 | /// Create a simple network with an FC given \p bindings, \p EE, and \p F. |
2783 | /// \returns the FC node. |
2784 | static FullyConnectedNode *createSimpleFCNet(PlaceholderBindings &bindings, |
2785 | ExecutionEngine &EE, Function &F) { |
2786 | auto &mod = EE.getModule(); |
2787 | auto *input = mod.createPlaceholder(ElemKind::FloatTy, {1, 3}, "input" , true); |
2788 | auto *W = mod.createPlaceholder(ElemKind::FloatTy, {3, 3}, "weights" , true); |
2789 | auto *B = mod.createPlaceholder(ElemKind::FloatTy, {3}, "bias" , true); |
2790 | |
2791 | bindings.allocate(input)->getHandle().randomize(-1.0, 1.0, mod.getPRNG()); |
2792 | bindings.allocate(W)->init(Tensor::InitKind::Xavier, 3, mod.getPRNG()); |
2793 | bindings.allocate(B)->init(Tensor::InitKind::Broadcast, 0.1, mod.getPRNG()); |
2794 | |
2795 | auto *FC = F.createFullyConnected("FC" , input, W, B); |
2796 | auto *S = F.createSave("ret" , FC); |
2797 | ::glow::convertPlaceholdersToConstants(&F, bindings, |
2798 | {input, S->getPlaceholder()}); |
2799 | bindings.allocate(S->getPlaceholder()); |
2800 | |
2801 | return FC; |
2802 | } |
2803 | |
2804 | /// Helper to look for a node with kind \p NodeClass in \p F. If found, \returns |
2805 | /// a pointer to the node. Otherwise \returns a nullptr. |
2806 | template <class NodeClass> |
2807 | static NodeClass *findNodeKindOrReturnNull(Function *F) { |
2808 | auto it = std::find_if( |
2809 | F->getNodes().begin(), F->getNodes().end(), |
2810 | [](const Node &node) -> bool { return llvm::isa<NodeClass>(&node); }); |
2811 | if (it == F->getNodes().end()) { |
2812 | return nullptr; |
2813 | } |
2814 | return &llvm::cast<NodeClass>(*it); |
2815 | } |
2816 | |
2817 | /// Profile and quantize a graph with an FC, and make sure that we find the |
2818 | /// correct quantization parameters, whether the \p BackendClass does or does |
2819 | /// not lower the FC given \p expectLoweredFC. Note that in this test we |
2820 | /// replicate the logic from optimizeFunction(), wherein we lower and then call |
2821 | /// profileQuantization(), in order to ensure each stage of the compilation |
2822 | /// pipeline for profiling/quantization is correct. |
2823 | template <class BackendClass> |
2824 | static void testProfileQuantizationOfFC(bool expectLoweredFC, |
2825 | bool rowwiseQuantizeFC) { |
2826 | ExecutionEngine profileEE{}; |
2827 | Function *profileF = profileEE.getModule().createFunction("profile" ); |
2828 | PlaceholderBindings profilebindings; |
2829 | FullyConnectedNode *FC = |
2830 | createSimpleFCNet(profilebindings, profileEE, *profileF); |
2831 | auto outputNameFC = FC->getResult().generateNodeOutputName(); |
2832 | auto weightsNameFC = FC->getWeights().generateNodeOutputName(); |
2833 | auto biasNameFC = FC->getBias().generateNodeOutputName(); |
2834 | auto inputNameFC = FC->getInput().generateNodeOutputName(); |
2835 | |
2836 | // Lower everything and keep track of the lowered components source nodes via |
2837 | // the loweredMap. |
2838 | LoweredInfoMap loweredMapForProf; |
2839 | CompilationContext cctx(/* bindings */ nullptr, &loweredMapForProf); |
2840 | lower(profileF, cctx); |
2841 | |
2842 | // Check that the lowered graph only contains the lowered components of the |
2843 | // FC (MM and BA) and not the FC itself. |
2844 | auto *loweredFC = findNodeKindOrReturnNull<FullyConnectedNode>(profileF); |
2845 | auto *loweredMM = findNodeKindOrReturnNull<MatMulNode>(profileF); |
2846 | auto *loweredBA = findNodeKindOrReturnNull<BatchedAddNode>(profileF); |
2847 | ASSERT_FALSE(loweredFC); |
2848 | ASSERT_TRUE(loweredMM); |
2849 | ASSERT_TRUE(loweredBA); |
2850 | auto outputNameMM = loweredMM->getResult().generateNodeOutputName(); |
2851 | auto outputNameBA = loweredBA->getResult().generateNodeOutputName(); |
2852 | |
2853 | glow::profileQuantization(profilebindings, profileF, |
2854 | cctx.precisionConfig.profConfig); |
2855 | |
2856 | // Compile/run to capture profile. |
2857 | profileEE.compile(CompilationMode::Infer); |
2858 | profileEE.run(profilebindings); |
2859 | |
2860 | // Get profiling infos and build new quantized graph, passing in the |
2861 | // loweredMapForProf to include the unlowered components in QI. |
2862 | profileF = profileEE.getModule().getFunctions().front(); |
2863 | quantization::QuantizationConfiguration quantConfig{ |
2864 | quantization::generateNodeProfilingInfos(profilebindings, profileF, |
2865 | loweredMapForProf)}; |
2866 | |
2867 | // Verify that we have node profiling infos for the FC and the lowered |
2868 | // components of the FC (MM and BA). |
2869 | NodeProfilingInfo *FCPI = nullptr, *MMPI = nullptr, *BAPI = nullptr, |
2870 | *FCWPI = nullptr, *FCBPI = nullptr, *FCIPI = nullptr; |
2871 | for (NodeProfilingInfo &NPI : quantConfig.infos) { |
2872 | if (NPI.nodeOutputName_ == outputNameFC) { |
2873 | FCPI = &NPI; |
2874 | } else if (NPI.nodeOutputName_ == outputNameMM) { |
2875 | MMPI = &NPI; |
2876 | } else if (NPI.nodeOutputName_ == outputNameBA) { |
2877 | BAPI = &NPI; |
2878 | } else if (NPI.nodeOutputName_ == weightsNameFC) { |
2879 | FCWPI = &NPI; |
2880 | } else if (NPI.nodeOutputName_ == biasNameFC) { |
2881 | FCBPI = &NPI; |
2882 | } else if (NPI.nodeOutputName_ == inputNameFC) { |
2883 | FCIPI = &NPI; |
2884 | } |
2885 | } |
2886 | ASSERT_TRUE(FCPI); |
2887 | ASSERT_TRUE(MMPI); |
2888 | ASSERT_TRUE(BAPI); |
2889 | ASSERT_TRUE(FCWPI); |
2890 | ASSERT_TRUE(FCBPI); |
2891 | ASSERT_TRUE(FCIPI); |
2892 | |
2893 | // Compute quantization parameters for verification. |
2894 | auto FCTQP = chooseQuantizationParams( |
2895 | FCPI->tensorProfilingParams_, quantConfig.schema, quantConfig.precision); |
2896 | auto MMTQP = chooseQuantizationParams( |
2897 | MMPI->tensorProfilingParams_, quantConfig.schema, quantConfig.precision); |
2898 | auto BATQP = chooseQuantizationParams( |
2899 | BAPI->tensorProfilingParams_, quantConfig.schema, quantConfig.precision); |
2900 | auto FCWTQP = chooseQuantizationParams( |
2901 | FCWPI->tensorProfilingParams_, quantConfig.schema, quantConfig.precision); |
2902 | auto FCBTQP = |
2903 | chooseQuantizationParams(FCBPI->tensorProfilingParams_, |
2904 | quantConfig.schema, quantConfig.precisionBias); |
2905 | auto FCITQP = chooseQuantizationParams( |
2906 | FCIPI->tensorProfilingParams_, quantConfig.schema, quantConfig.precision); |
2907 | |
2908 | // Now create the same original function in the backend we're testing. |
2909 | ExecutionEngine backendEE; |
2910 | BackendClass backend; |
2911 | Backend *backendPtr = &backend; |
2912 | // backendEE.setBackend(&backend, /* ownsBackend */ false); |
2913 | Function *backendF = backendEE.getModule().createFunction("quantized" ); |
2914 | PlaceholderBindings backendbindings; |
2915 | createSimpleFCNet(backendbindings, backendEE, *backendF); |
2916 | |
2917 | // Lower the function given the backend's preferences for lowering. |
2918 | LoweredInfoMap loweredMapForQuant; |
2919 | CompilationContext cctx2(/* bindings */ nullptr, &loweredMapForQuant); |
2920 | lower(backendF, cctx2, backendPtr); |
2921 | |
2922 | // Check that the backend lowered the function as expected. |
2923 | auto *floatFC = findNodeKindOrReturnNull<FullyConnectedNode>(backendF); |
2924 | auto *floatMM = findNodeKindOrReturnNull<MatMulNode>(backendF); |
2925 | auto *floatBA = findNodeKindOrReturnNull<BatchedAddNode>(backendF); |
2926 | if (expectLoweredFC) { |
2927 | ASSERT_FALSE(floatFC); |
2928 | ASSERT_TRUE(floatMM); |
2929 | ASSERT_TRUE(floatBA); |
2930 | } else { |
2931 | ASSERT_TRUE(floatFC); |
2932 | ASSERT_FALSE(floatMM); |
2933 | ASSERT_FALSE(floatBA); |
2934 | } |
2935 | |
2936 | // Quantize the function given the current backend we're testing along with |
2937 | // the quantization infos gathered. |
2938 | quantConfig.enableRowwise = rowwiseQuantizeFC; |
2939 | quantConfig.assertAllNodesQuantized = true; |
2940 | quantization::quantizeFunction(backendF, quantConfig, *backendPtr, |
2941 | loweredMapForQuant); |
2942 | |
2943 | // Optimize the graph to remove dead code and optimize away unnecessary |
2944 | // quantize nodes. Note that we do not do a full compile call here, as we have |
2945 | // already lowered. |
2946 | ::glow::optimize(backendF, CompilationMode::Infer); |
2947 | |
2948 | // Check that the graph is still structured as expected, and that the |
2949 | // scales/offsets are set as found in TQP. |
2950 | auto *quantFC = findNodeKindOrReturnNull<FullyConnectedNode>(backendF); |
2951 | auto *quantMM = findNodeKindOrReturnNull<MatMulNode>(backendF); |
2952 | auto *quantBA = findNodeKindOrReturnNull<BatchedAddNode>(backendF); |
2953 | auto *quantRowwiseFC = |
2954 | findNodeKindOrReturnNull<RowwiseQuantizedFullyConnectedNode>(backendF); |
2955 | |
2956 | if (rowwiseQuantizeFC) { |
2957 | EXPECT_FALSE(quantMM); |
2958 | EXPECT_FALSE(quantBA); |
2959 | EXPECT_FALSE(quantFC); |
2960 | |
2961 | ASSERT_TRUE(quantRowwiseFC); |
2962 | EXPECT_EQ(quantRowwiseFC->getResult().getType()->getScale(), FCTQP.scale); |
2963 | EXPECT_EQ(quantRowwiseFC->getResult().getType()->getOffset(), FCTQP.offset); |
2964 | |
2965 | EXPECT_EQ(quantRowwiseFC->getBias().getElementType(), ElemKind::Int32QTy); |
2966 | // Bias scale was changed with the product inputScale * weightsScale only |
2967 | // if the product was larger. |
2968 | if (FCWTQP.scale * FCITQP.scale > FCBTQP.scale) { |
2969 | EXPECT_EQ(quantRowwiseFC->getBias().getType()->getScale(), |
2970 | FCWTQP.scale * FCITQP.scale); |
2971 | EXPECT_EQ(quantRowwiseFC->getBias().getType()->getOffset(), 0); |
2972 | } else { |
2973 | EXPECT_EQ(quantRowwiseFC->getBias().getType()->getScale(), FCBTQP.scale); |
2974 | EXPECT_EQ(quantRowwiseFC->getBias().getType()->getOffset(), 0); |
2975 | } |
2976 | } else if (expectLoweredFC) { |
2977 | ASSERT_FALSE(quantFC); |
2978 | ASSERT_FALSE(quantRowwiseFC); |
2979 | |
2980 | ASSERT_TRUE(quantMM); |
2981 | EXPECT_EQ(quantMM->getResult().getType()->getScale(), MMTQP.scale); |
2982 | EXPECT_EQ(quantMM->getResult().getType()->getOffset(), MMTQP.offset); |
2983 | |
2984 | ASSERT_TRUE(quantBA); |
2985 | EXPECT_EQ(quantBA->getResult().getType()->getScale(), BATQP.scale); |
2986 | EXPECT_EQ(quantBA->getResult().getType()->getOffset(), BATQP.offset); |
2987 | |
2988 | EXPECT_EQ(quantBA->getSlice().getElementType(), ElemKind::Int32QTy); |
2989 | // Bias scale was changed with the product inputScale * weightsScale only |
2990 | // if the product was larger. |
2991 | if (FCWTQP.scale * FCITQP.scale > FCBTQP.scale) { |
2992 | EXPECT_EQ(quantBA->getSlice().getType()->getScale(), |
2993 | FCWTQP.scale * FCITQP.scale); |
2994 | EXPECT_EQ(quantBA->getSlice().getType()->getOffset(), 0); |
2995 | } else { |
2996 | EXPECT_EQ(quantBA->getSlice().getType()->getScale(), FCBTQP.scale); |
2997 | EXPECT_EQ(quantBA->getSlice().getType()->getOffset(), 0); |
2998 | } |
2999 | } else { |
3000 | ASSERT_FALSE(quantRowwiseFC); |
3001 | |
3002 | ASSERT_TRUE(quantFC); |
3003 | EXPECT_EQ(quantFC->getResult().getType()->getScale(), FCTQP.scale); |
3004 | EXPECT_EQ(quantFC->getResult().getType()->getOffset(), FCTQP.offset); |
3005 | |
3006 | ASSERT_FALSE(quantMM); |
3007 | ASSERT_FALSE(quantBA); |
3008 | |
3009 | EXPECT_EQ(quantFC->getBias().getElementType(), ElemKind::Int32QTy); |
3010 | // Bias scale was changed with the product inputScale * weightsScale only |
3011 | // if the product was larger. |
3012 | if (FCWTQP.scale * FCITQP.scale > FCBTQP.scale) { |
3013 | EXPECT_EQ(quantFC->getBias().getType()->getScale(), |
3014 | FCWTQP.scale * FCITQP.scale); |
3015 | EXPECT_EQ(quantFC->getBias().getType()->getOffset(), 0); |
3016 | } else { |
3017 | EXPECT_EQ(quantFC->getBias().getType()->getScale(), FCBTQP.scale); |
3018 | EXPECT_EQ(quantFC->getBias().getType()->getOffset(), 0); |
3019 | } |
3020 | } |
3021 | } |
3022 | |
3023 | /// Test that backends that do not lower FCs can find the quantization |
3024 | /// parameters of their nodes. |
3025 | TEST(Quantization, TestProfileQuantizationOfUnloweredFC) { |
3026 | testProfileQuantizationOfFC<MockBackendUnloweredFC>( |
3027 | /* expectLoweredFC */ false, /* rowwiseQuantizeFC */ false); |
3028 | } |
3029 | |
3030 | /// Test that backends that do lower FCs can find the quantization parameters of |
3031 | /// their nodes. |
3032 | TEST(Quantization, TestProfileQuantizationOfLoweredFC) { |
3033 | testProfileQuantizationOfFC<MockBackendLoweredFC>( |
3034 | /* expectLoweredFC */ true, /* rowwiseQuantizeFC */ false); |
3035 | } |
3036 | |
3037 | /// Test that backends that do not lower FCs can find the quantization |
3038 | /// parameters of their nodes and correctly rowwise quantize. |
3039 | TEST(Quantization, TestProfileQuantizationOfUnloweredFCRowwise) { |
3040 | testProfileQuantizationOfFC<MockBackendUnloweredFC>( |
3041 | /* expectLoweredFC */ false, /* rowwiseQuantizeFC */ true); |
3042 | } |
3043 | |
3044 | /// Test that backends that do lower FCs can find the quantization parameters of |
3045 | /// their nodes and correctly rowwise quantize even when lowering the FC. |
3046 | TEST(Quantization, TestProfileQuantizationOfLoweredFCRowwise) { |
3047 | testProfileQuantizationOfFC<MockBackendLoweredFC>( |
3048 | /* expectLoweredFC */ true, /* rowwiseQuantizeFC */ true); |
3049 | } |
3050 | |
3051 | /// Check that asserting quantization for the quantizer works as expected. |
3052 | TEST(Quantization, CheckAssertQuantization) { |
3053 | ExecutionEngine EE{}; |
3054 | std::unique_ptr<Backend> backend(createBackend(EE.getBackendName())); |
3055 | auto &mod = EE.getModule(); |
3056 | Function *F = mod.createFunction("main" ); |
3057 | auto *input = mod.createPlaceholder(ElemKind::FloatTy, {1, 3}, "input" , true); |
3058 | auto *relu = F->createRELU("ReLU" , input); |
3059 | PlaceholderBindings bindings; |
3060 | auto *save = F->createSave("ret" , relu); |
3061 | bindings.allocate(save->getPlaceholder()); |
3062 | |
3063 | quantization::QuantizationConfiguration quantConfig{ |
3064 | {{input->getOutput().generateNodeOutputName(), {0.2f, 2.0f}}, |
3065 | {relu->getResult().generateNodeOutputName(), {0.2f, 3.0f}}}}; |
3066 | quantConfig.precision = ElemKind::Int16QTy; |
3067 | quantConfig.assertAllNodesQuantized = true; |
3068 | |
3069 | // Expect this to die because quantizeFunction() is passed with |
3070 | // assertAllNodesQuantized true, and the Interpreter backend does not support |
3071 | // Int16QTy ReLU. |
3072 | Function *QF = F->clone("quant_clone1" ); |
3073 | EXPECT_DEATH(quantization::quantizeFunction(QF, quantConfig, *backend), "" ); |
3074 | |
3075 | { |
3076 | Function *QF = F->clone("quant_clone2" ); |
3077 | quantConfig.assertAllNodesQuantized = false; |
3078 | |
3079 | // This works fine because quantizeFunction() is passed with |
3080 | // assertAllNodesQuantized false, and so the ReLU will not be quantized as |
3081 | // the Interpreter does not support Int16QTy ReLU. |
3082 | quantization::quantizeFunction(QF, quantConfig, *backend); |
3083 | |
3084 | auto *saveNode = |
3085 | llvm::dyn_cast<SaveNode>(QF->getNodeByName(save->getName())); |
3086 | ASSERT_TRUE(saveNode); |
3087 | auto *reluNode = llvm::dyn_cast<ReluNode>(saveNode->getInput().getNode()); |
3088 | ASSERT_TRUE(reluNode); |
3089 | EXPECT_TRUE(!reluNode->getResult().getType()->isQuantizedType()); |
3090 | } |
3091 | |
3092 | { |
3093 | Function *QF = F->clone("quant_clone3" ); |
3094 | quantConfig.assertAllNodesQuantized = true; |
3095 | KindSet doNotQuantizeKinds; |
3096 | doNotQuantizeKinds.insert(Kinded::Kind::ReluNodeKind); |
3097 | |
3098 | // This works fine because quantizeFunction() is passed with |
3099 | // assertAllNodesQuantized true, but we explicitly tell the quantizer to |
3100 | // keep ReLU in its original precision. |
3101 | quantization::quantizeFunction(QF, quantConfig, *backend, |
3102 | /* loweredMap */ {}, doNotQuantizeKinds); |
3103 | |
3104 | auto *saveNode = |
3105 | llvm::dyn_cast<SaveNode>(QF->getNodeByName(save->getName())); |
3106 | ASSERT_TRUE(saveNode); |
3107 | auto *reluNode = llvm::dyn_cast<ReluNode>(saveNode->getInput().getNode()); |
3108 | ASSERT_TRUE(reluNode); |
3109 | EXPECT_TRUE(!reluNode->getResult().getType()->isQuantizedType()); |
3110 | } |
3111 | } |
3112 | |
3113 | /// Check that we can quantize nodes that have some quantized outputs as unused, |
3114 | /// e.g. a TopK node where values is unused but indices is. |
3115 | TEST(Quantization, QuantizationZeroUsersResult) { |
3116 | ExecutionEngine EE{}; |
3117 | auto &mod = EE.getModule(); |
3118 | PlaceholderBindings bindings; |
3119 | Function *F = mod.createFunction("main" ); |
3120 | auto *input = |
3121 | mod.createPlaceholder(ElemKind::FloatTy, {3, 1, 5}, "input" , false); |
3122 | |
3123 | bindings.allocate(input)->getHandle() = { |
3124 | 28, 4, 411, 19, 42, 0.4f, 0.4f, 0.4f, -0.4f, 0.45f, 7, 5, 9, 8, 100, |
3125 | }; |
3126 | |
3127 | // Note we intentionally do not save the topk's values result. |
3128 | auto *TK = F->createTopK("TopK" , input, 3); |
3129 | auto *SN = F->createSave("save_indices" , TK->getIndices()); |
3130 | bindings.allocate(SN->getPlaceholder()); |
3131 | |
3132 | quantization::QuantizationConfiguration quantConfig{ |
3133 | {{input->getOutput().generateNodeOutputName(), {0.2f, 2.0f}}, |
3134 | {TK->getValues().generateNodeOutputName(), {0.2f, 3.0f}}}}; |
3135 | quantConfig.assertAllNodesQuantized = true; |
3136 | |
3137 | std::unique_ptr<Backend> backend(createBackend(EE.getBackendName())); |
3138 | quantization::quantizeFunction(F, quantConfig, *backend); |
3139 | |
3140 | auto *qSN = llvm::dyn_cast<SaveNode>(F->getNodeByName(SN->getName())); |
3141 | ASSERT_TRUE(qSN); |
3142 | auto *qTK = llvm::dyn_cast<TopKNode>(qSN->getInput().getNode()); |
3143 | ASSERT_TRUE(qTK); |
3144 | EXPECT_TRUE(qTK->getValues().getType()->isQuantizedType()); |
3145 | } |
3146 | |
3147 | #ifdef GLOW_WITH_CPU |
3148 | |
3149 | GLOW_INSTANTIATE_TEST_SUITE_P( |
3150 | InterpAndCPUProfAndQuant, Operator, |
3151 | ::testing::Combine(::testing::Values("Interpreter" , "CPU" ), |
3152 | ::testing::Values("Interpreter" , "CPU" ))); |
3153 | |
3154 | #else |
3155 | GLOW_INSTANTIATE_TEST_SUITE_P( |
3156 | InterpreterProfAndQuant, Operator, |
3157 | ::testing::Combine(::testing::Values("Interpreter" ), |
3158 | ::testing::Values("Interpreter" ))); |
3159 | |
3160 | #endif // GLOW_WITH_CPU |
3161 | |
3162 | #ifdef GLOW_WITH_OPENCL |
3163 | GLOW_INSTANTIATE_TEST_SUITE_P( |
3164 | InterpProfOpenCLQuant, Operator, |
3165 | ::testing::Combine(::testing::Values("Interpreter" ), |
3166 | ::testing::Values("OpenCL" ))); |
3167 | #endif // GLOW_WITH_OPENCL |
3168 | |
3169 | } // namespace glow |
3170 | |