1 | /* Copyright 2015 The TensorFlow Authors. All Rights Reserved. |
2 | |
3 | Licensed under the Apache License, Version 2.0 (the "License"); |
4 | you may not use this file except in compliance with the License. |
5 | You may obtain a copy of the License at |
6 | |
7 | http://www.apache.org/licenses/LICENSE-2.0 |
8 | |
9 | Unless required by applicable law or agreed to in writing, software |
10 | distributed under the License is distributed on an "AS IS" BASIS, |
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | See the License for the specific language governing permissions and |
13 | limitations under the License. |
14 | ==============================================================================*/ |
15 | |
16 | // See docs in ../ops/array_ops.cc. |
17 | |
18 | #define EIGEN_USE_THREADS |
19 | |
20 | #include <math.h> |
21 | |
22 | #include "tensorflow/core/framework/op.h" |
23 | #include "tensorflow/core/framework/op_kernel.h" |
24 | #include "tensorflow/core/framework/tensor.h" |
25 | #include "tensorflow/core/framework/tensor_shape.h" |
26 | #include "tensorflow/core/framework/type_traits.h" |
27 | #include "tensorflow/core/framework/types.h" |
28 | #include "tensorflow/core/kernels/meta_support.h" |
29 | #include "tensorflow/core/kernels/quantization_utils.h" |
30 | #include "tensorflow/core/lib/core/errors.h" |
31 | |
32 | namespace tensorflow { |
33 | |
34 | typedef Eigen::ThreadPoolDevice CPUDevice; |
35 | |
36 | template <class T1, class T2> |
37 | class RequantizeOp : public OpKernel { |
38 | public: |
39 | explicit RequantizeOp(OpKernelConstruction* ctx) : OpKernel(ctx) {} |
40 | |
41 | void Compute(OpKernelContext* ctx) override { |
42 | const Tensor& input = ctx->input(0); |
43 | |
44 | const Tensor& input_min = ctx->input(1); |
45 | const Tensor& input_max = ctx->input(2); |
46 | const Tensor& requested_output_min = ctx->input(3); |
47 | const Tensor& requested_output_max = ctx->input(4); |
48 | OP_REQUIRES( |
49 | ctx, TensorShapeUtils::IsScalar(input_min.shape()), |
50 | errors::InvalidArgument("`input_min` must be rank 0 but is rank " , |
51 | input_min.dims())); |
52 | OP_REQUIRES( |
53 | ctx, TensorShapeUtils::IsScalar(input_max.shape()), |
54 | errors::InvalidArgument("`input_max` must be rank 0 but is rank " , |
55 | input_max.dims())); |
56 | OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(requested_output_min.shape()), |
57 | errors::InvalidArgument( |
58 | "`requested_output_min` must be rank 0 but is rank " , |
59 | requested_output_min.dims())); |
60 | OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(requested_output_max.shape()), |
61 | errors::InvalidArgument( |
62 | "`requested_output_max` must be rank 0 but is rank " , |
63 | requested_output_max.dims())); |
64 | |
65 | const float input_min_float = input_min.flat<float>()(0); |
66 | const float input_max_float = input_max.flat<float>()(0); |
67 | const float requested_output_min_float = |
68 | requested_output_min.flat<float>()(0); |
69 | const float requested_output_max_float = |
70 | requested_output_max.flat<float>()(0); |
71 | |
72 | Tensor* output = nullptr; |
73 | OP_REQUIRES_OK(ctx, ctx->allocate_output(0, input.shape(), &output)); |
74 | Tensor* output_min = nullptr; |
75 | OP_REQUIRES_OK(ctx, ctx->allocate_output(1, TensorShape({}), &output_min)); |
76 | Tensor* output_max = nullptr; |
77 | OP_REQUIRES_OK(ctx, ctx->allocate_output(2, TensorShape({}), &output_max)); |
78 | |
79 | OP_REQUIRES( |
80 | ctx, requested_output_min_float <= 0.0f, |
81 | errors::InvalidArgument("requested_output_min must be <= 0, but got " , |
82 | requested_output_min_float)); |
83 | OP_REQUIRES( |
84 | ctx, requested_output_max_float >= requested_output_min_float, |
85 | errors::InvalidArgument( |
86 | "requested_output_max must be >= requested_output_min, but got " , |
87 | requested_output_max_float, " and " , requested_output_min_float)); |
88 | |
89 | auto input_array = input.flat<T1>(); |
90 | |
91 | #if 0 |
92 | // This is the reference, non-eigen implementation: |
93 | auto output_array = output->flat<T2>(); |
94 | RequantizeManyInNewRange<T1, T2>( |
95 | input_array.data(), input_array.size(), |
96 | input_min_float, input_max_float, |
97 | requested_output_min_float, requested_output_max_float, |
98 | output_array.data()); |
99 | #endif |
100 | |
101 | if (input_array.size() > 0) { |
102 | if (meta::IsSupportedAndEnabled() && std::is_same<T1, qint32>() && |
103 | std::is_same<T2, quint8>()) { |
104 | auto input_i32_array = input.flat<qint32>(); |
105 | meta::Requantize(ctx, input_i32_array.data(), input_i32_array.size(), |
106 | input_min_float, input_max_float, |
107 | requested_output_min_float, requested_output_max_float, |
108 | output->flat<quint8>().data()); |
109 | } else { |
110 | RequantizeManyInNewRangeUsingEigen<T1, T2>( |
111 | ctx->eigen_device<CPUDevice>(), input, input_min_float, |
112 | input_max_float, requested_output_min_float, |
113 | requested_output_max_float, output); |
114 | } |
115 | } |
116 | |
117 | output_min->flat<float>().setConstant(requested_output_min_float); |
118 | output_max->flat<float>().setConstant(requested_output_max_float); |
119 | } |
120 | }; |
121 | |
122 | REGISTER_KERNEL_BUILDER(Name("Requantize" ) |
123 | .Device(DEVICE_CPU) |
124 | .TypeConstraint<qint32>("Tinput" ) |
125 | .TypeConstraint<quint8>("out_type" ), |
126 | RequantizeOp<qint32, quint8>); |
127 | |
128 | REGISTER_KERNEL_BUILDER(Name("Requantize" ) |
129 | .Device(DEVICE_CPU) |
130 | .TypeConstraint<qint32>("Tinput" ) |
131 | .TypeConstraint<qint8>("out_type" ), |
132 | RequantizeOp<qint32, qint8>); |
133 | |
134 | } // namespace tensorflow |
135 | |