1 | /* Copyright 2015 The TensorFlow Authors. All Rights Reserved. |
2 | |
3 | Licensed under the Apache License, Version 2.0 (the "License"); |
4 | you may not use this file except in compliance with the License. |
5 | You may obtain a copy of the License at |
6 | |
7 | http://www.apache.org/licenses/LICENSE-2.0 |
8 | |
9 | Unless required by applicable law or agreed to in writing, software |
10 | distributed under the License is distributed on an "AS IS" BASIS, |
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | See the License for the specific language governing permissions and |
13 | limitations under the License. |
14 | ==============================================================================*/ |
15 | |
16 | // Implements a quantized eight-bit version of the bias addition operation. |
17 | |
18 | #define EIGEN_USE_THREADS |
19 | |
20 | #include "tensorflow/core/framework/numeric_op.h" |
21 | #include "tensorflow/core/framework/op_kernel.h" |
22 | #include "tensorflow/core/framework/tensor.h" |
23 | #include "tensorflow/core/framework/tensor_shape.h" |
24 | #include "tensorflow/core/kernels/meta_support.h" |
25 | #include "tensorflow/core/kernels/ops_util.h" |
26 | #include "tensorflow/core/kernels/quantization_utils.h" |
27 | #include "tensorflow/core/lib/core/errors.h" |
28 | |
29 | namespace tensorflow { |
30 | |
31 | typedef Eigen::ThreadPoolDevice CPUDevice; |
32 | |
33 | template <class T1, class T2, class T3> |
34 | class QuantizedBiasAddOp : public OpKernel { |
35 | public: |
36 | explicit QuantizedBiasAddOp(OpKernelConstruction* context) |
37 | : OpKernel(context) {} |
38 | |
39 | void Compute(OpKernelContext* context) override { |
40 | const Tensor& input = context->input(0); |
41 | const Tensor& bias = context->input(1); |
42 | |
43 | const Tensor& min_input = context->input(2); |
44 | const Tensor& max_input = context->input(3); |
45 | const Tensor& min_bias = context->input(4); |
46 | const Tensor& max_bias = context->input(5); |
47 | OP_REQUIRES( |
48 | context, TensorShapeUtils::IsScalar(min_input.shape()), |
49 | errors::InvalidArgument("`min_input` must be rank 0 but is rank " , |
50 | min_input.dims())); |
51 | OP_REQUIRES( |
52 | context, TensorShapeUtils::IsScalar(max_input.shape()), |
53 | errors::InvalidArgument("`max_input` must be rank 0 but is rank " , |
54 | max_input.dims())); |
55 | OP_REQUIRES(context, TensorShapeUtils::IsScalar(min_bias.shape()), |
56 | errors::InvalidArgument( |
57 | "`min_bias` must be rank 0 but is rank " , min_bias.dims())); |
58 | OP_REQUIRES(context, TensorShapeUtils::IsScalar(max_bias.shape()), |
59 | errors::InvalidArgument( |
60 | "`max_bias` must be rank 0 but is rank " , max_bias.dims())); |
61 | |
62 | const float input_min = min_input.flat<float>()(0); |
63 | const float input_max = max_input.flat<float>()(0); |
64 | const float bias_min = min_bias.flat<float>()(0); |
65 | const float bias_max = max_bias.flat<float>()(0); |
66 | |
67 | OP_REQUIRES(context, TensorShapeUtils::IsMatrixOrHigher(input.shape()), |
68 | errors::InvalidArgument("Input tensor must be at least 2D: " , |
69 | input.shape().DebugString())); |
70 | OP_REQUIRES(context, TensorShapeUtils::IsVector(bias.shape()), |
71 | errors::InvalidArgument("Biases must be 1D: " , |
72 | bias.shape().DebugString())); |
73 | const auto last_dim = input.shape().dims() - 1; |
74 | OP_REQUIRES( |
75 | context, bias.shape().dim_size(0) == input.shape().dim_size(last_dim), |
76 | errors::InvalidArgument( |
77 | "Must provide as many biases as the last dimension " |
78 | "of the input tensor: " , |
79 | bias.shape().DebugString(), " vs. " , input.shape().DebugString())); |
80 | OP_REQUIRES(context, bias.NumElements() > 0, |
81 | errors::InvalidArgument("Must provide at least 1 bias" )); |
82 | |
83 | Tensor* output = nullptr; |
84 | OP_REQUIRES_OK(context, |
85 | context->allocate_output(0, input.shape(), &output)); |
86 | |
87 | float total_min; |
88 | float total_max; |
89 | |
90 | if (meta::IsSupportedAndEnabled() && std::is_same<T1, quint8>() && |
91 | std::is_same<T2, quint8>() && std::is_same<T3, qint32>()) { |
92 | auto input_ui8_array = input.flat<quint8>(); |
93 | auto bias_ui8_array = bias.flat<quint8>(); |
94 | GetOutputMinAndMaxForQuantizedAdd(input_min, input_max, bias_min, |
95 | bias_max, &total_min, &total_max); |
96 | meta::QuantizedBiasAdd(context, input_ui8_array.data(), |
97 | input_ui8_array.size(), bias_ui8_array.data(), |
98 | bias_ui8_array.size(), input_min, input_max, |
99 | bias_min, bias_max, total_min, total_max, |
100 | output->flat<qint32>().data()); |
101 | } else { |
102 | QuantizedAddUsingEigen<T1, T2, T3>( |
103 | context->template eigen_device<CPUDevice>(), input, input_min, |
104 | input_max, bias, bias_min, bias_max, output, &total_min, &total_max); |
105 | } |
106 | |
107 | Tensor* output_min = nullptr; |
108 | OP_REQUIRES_OK(context, context->allocate_output(1, {}, &output_min)); |
109 | output_min->flat<float>()(0) = total_min; |
110 | |
111 | Tensor* output_max = nullptr; |
112 | OP_REQUIRES_OK(context, context->allocate_output(2, {}, &output_max)); |
113 | output_max->flat<float>()(0) = total_max; |
114 | } |
115 | }; |
116 | |
117 | REGISTER_KERNEL_BUILDER(Name("QuantizedBiasAdd" ) |
118 | .Device(DEVICE_CPU) |
119 | .TypeConstraint<quint8>("T1" ) |
120 | .TypeConstraint<quint8>("T2" ) |
121 | .TypeConstraint<qint32>("out_type" ), |
122 | QuantizedBiasAddOp<quint8, quint8, qint32>); |
123 | REGISTER_KERNEL_BUILDER(Name("QuantizedBiasAdd" ) |
124 | .Device(DEVICE_CPU) |
125 | .TypeConstraint<qint8>("T1" ) |
126 | .TypeConstraint<qint8>("T2" ) |
127 | .TypeConstraint<qint32>("out_type" ), |
128 | QuantizedBiasAddOp<qint8, qint8, qint32>); |
129 | } // namespace tensorflow |
130 | |