1 | /* Copyright 2015 The TensorFlow Authors. All Rights Reserved. |
2 | |
3 | Licensed under the Apache License, Version 2.0 (the "License"); |
4 | you may not use this file except in compliance with the License. |
5 | You may obtain a copy of the License at |
6 | |
7 | http://www.apache.org/licenses/LICENSE-2.0 |
8 | |
9 | Unless required by applicable law or agreed to in writing, software |
10 | distributed under the License is distributed on an "AS IS" BASIS, |
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | See the License for the specific language governing permissions and |
13 | limitations under the License. |
14 | ==============================================================================*/ |
15 | |
16 | // See docs in ../ops/array_ops.cc. |
17 | |
18 | #define EIGEN_USE_THREADS |
19 | |
20 | #include <math.h> |
21 | |
22 | #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" |
23 | #include "tensorflow/core/framework/op.h" |
24 | #include "tensorflow/core/framework/op_kernel.h" |
25 | #include "tensorflow/core/framework/type_traits.h" |
26 | #include "tensorflow/core/framework/types.h" |
27 | #include "tensorflow/core/kernels/meta_support.h" |
28 | #include "tensorflow/core/kernels/quantization_utils.h" |
29 | #include "tensorflow/core/lib/core/errors.h" |
30 | |
31 | namespace tensorflow { |
32 | |
33 | typedef Eigen::ThreadPoolDevice CPUDevice; |
34 | |
35 | template <class T1, class T2> |
36 | class QuantizeDownAndShrinkRangeOp : public OpKernel { |
37 | public: |
38 | explicit QuantizeDownAndShrinkRangeOp(OpKernelConstruction* ctx) |
39 | : OpKernel(ctx) {} |
40 | |
41 | void Compute(OpKernelContext* ctx) override { |
42 | const Tensor& input = ctx->input(0); |
43 | const Tensor& input_min = ctx->input(1); |
44 | const Tensor& input_max = ctx->input(2); |
45 | |
46 | OP_REQUIRES( |
47 | ctx, TensorShapeUtils::IsScalar(input_min.shape()), |
48 | errors::InvalidArgument("`input_min` must be rank 0 but is rank " , |
49 | input_min.dims())); |
50 | OP_REQUIRES( |
51 | ctx, TensorShapeUtils::IsScalar(input_max.shape()), |
52 | errors::InvalidArgument("`input_max` must be rank 0 but is rank " , |
53 | input_max.dims())); |
54 | |
55 | const float input_min_float = input_min.scalar<float>()(); |
56 | const float input_max_float = input_max.scalar<float>()(); |
57 | Tensor* output = nullptr; |
58 | OP_REQUIRES_OK(ctx, ctx->allocate_output(0, input.shape(), &output)); |
59 | Tensor* output_min = nullptr; |
60 | OP_REQUIRES_OK(ctx, ctx->allocate_output(1, TensorShape({}), &output_min)); |
61 | Tensor* output_max = nullptr; |
62 | OP_REQUIRES_OK(ctx, ctx->allocate_output(2, TensorShape({}), &output_max)); |
63 | |
64 | // See QuantizationRangeOp as well, which has a copy of this logic. |
65 | auto input_array = input.flat<T1>(); |
66 | const int32_t input_lowest_quantized = |
67 | static_cast<int32>(Eigen::NumTraits<T1>::lowest()); |
68 | const int32_t input_highest_quantized = |
69 | static_cast<int32>(Eigen::NumTraits<T1>::highest()); |
70 | T1 actual_min_quantized = input_highest_quantized; |
71 | T1 actual_max_quantized = input_lowest_quantized; |
72 | for (int i = 0; i < input_array.size(); ++i) { |
73 | const T1 value = input_array(i); |
74 | actual_min_quantized = std::min(actual_min_quantized, value); |
75 | actual_max_quantized = std::max(actual_max_quantized, value); |
76 | } |
77 | // We want to make sure that the minimum is no larger than zero, so that the |
78 | // convolution operation can run efficiently. |
79 | const float actual_min_float = |
80 | std::min(0.0f, QuantizedToFloat(actual_min_quantized, input_min_float, |
81 | input_max_float)); |
82 | const float actual_max_float = QuantizedToFloat( |
83 | actual_max_quantized, input_min_float, input_max_float); |
84 | |
85 | #if 0 |
86 | // This is the reference, non-eigen implementation: |
87 | auto output_array = output->flat<T2>(); |
88 | RequantizeManyInNewRange<T1, T2>(input_array.data(), input_array.size(), |
89 | input_min_float, input_max_float, |
90 | actual_min_float, actual_max_float, |
91 | output_array.data()); |
92 | #endif |
93 | |
94 | if (input_array.size() > 0) { |
95 | if (meta::IsSupportedAndEnabled() && std::is_same<T1, qint32>() && |
96 | std::is_same<T2, quint8>()) { |
97 | auto input_i32_array = input.flat<qint32>(); |
98 | meta::Requantize(ctx, input_i32_array.data(), input_i32_array.size(), |
99 | input_min_float, input_max_float, actual_min_float, |
100 | actual_max_float, output->flat<quint8>().data()); |
101 | } else { |
102 | RequantizeManyInNewRangeUsingEigen<T1, T2>( |
103 | ctx->eigen_device<CPUDevice>(), input, input_min_float, |
104 | input_max_float, actual_min_float, actual_max_float, output); |
105 | } |
106 | } |
107 | |
108 | output_min->flat<float>().setConstant(actual_min_float); |
109 | output_max->flat<float>().setConstant(actual_max_float); |
110 | } |
111 | }; |
112 | |
113 | REGISTER_KERNEL_BUILDER(Name("QuantizeDownAndShrinkRange" ) |
114 | .Device(DEVICE_CPU) |
115 | .TypeConstraint<qint32>("Tinput" ) |
116 | .TypeConstraint<quint8>("out_type" ), |
117 | QuantizeDownAndShrinkRangeOp<qint32, quint8>); |
118 | |
119 | } // namespace tensorflow |
120 | |