1 | /* Copyright 2016 The TensorFlow Authors. All Rights Reserved. |
2 | |
3 | Licensed under the Apache License, Version 2.0 (the "License"); |
4 | you may not use this file except in compliance with the License. |
5 | You may obtain a copy of the License at |
6 | |
7 | http://www.apache.org/licenses/LICENSE-2.0 |
8 | |
9 | Unless required by applicable law or agreed to in writing, software |
10 | distributed under the License is distributed on an "AS IS" BASIS, |
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | See the License for the specific language governing permissions and |
13 | limitations under the License. |
14 | ==============================================================================*/ |
15 | |
16 | #include "tensorflow/core/kernels/quantization_utils.h" |
17 | |
18 | namespace tensorflow { |
19 | |
20 | void GetOutputMinAndMaxForQuantizedAdd(float input_min, float input_max, |
21 | float smaller_input_min, |
22 | float smaller_input_max, |
23 | float* output_min, float* output_max) { |
24 | // We need to have a good range to add our two arguments together in. This |
25 | // is surprisingly tricky, since it has to satisfy a few different needs: |
26 | // - Must be symmetrical around zero, so that 0 + 0 = 0. |
27 | // - Must hold the largest of the argument ranges. |
28 | // - Should have enough range that the bits of the lowest and highest |
29 | // arguments overlap if possible without the lower getting truncated. |
30 | // - Should have some headroom so that there's no overflow. |
31 | // - Needs to be signed. |
32 | // This leads us to use a scheme where we (assuming the inputs are eight bit |
33 | // and the output is 32-bit) use the bottom 32 - 17 = 15 bits to store the |
34 | // accumulated results. This gives us all the properties we need. |
35 | *output_max = |
36 | std::max(input_max, std::max(-input_min, std::max(smaller_input_max, |
37 | -smaller_input_min))) * |
38 | (1 << 17); |
39 | *output_min = -(*output_max); |
40 | } |
41 | |
42 | } // namespace tensorflow |
43 | |