1/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2
3Licensed under the Apache License, Version 2.0 (the "License");
4you may not use this file except in compliance with the License.
5You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9Unless required by applicable law or agreed to in writing, software
10distributed under the License is distributed on an "AS IS" BASIS,
11WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12See the License for the specific language governing permissions and
13limitations under the License.
14==============================================================================*/
15
16// See docs in ../ops/nn_ops.cc.
17
18#define EIGEN_USE_THREADS
19
20#include "tensorflow/core/kernels/relu_op.h"
21
22#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
23#include "tensorflow/core/framework/numeric_op.h"
24#include "tensorflow/core/framework/op_kernel.h"
25#include "tensorflow/core/framework/register_types.h"
26#include "tensorflow/core/framework/tensor.h"
27#include "tensorflow/core/lib/core/errors.h"
28
29namespace tensorflow {
30
31typedef Eigen::ThreadPoolDevice CPUDevice;
32typedef Eigen::GpuDevice GPUDevice;
33
34#define REGISTER_RELU_KERNELS(type) \
35 REGISTER_KERNEL_BUILDER( \
36 Name("Relu").Device(DEVICE_CPU).TypeConstraint<type>("T"), \
37 ReluOp<CPUDevice, type>); \
38 REGISTER_KERNEL_BUILDER( \
39 Name("ReluGrad").Device(DEVICE_CPU).TypeConstraint<type>("T"), \
40 ReluGradOp<CPUDevice, type>); \
41 REGISTER_KERNEL_BUILDER( \
42 Name("Relu6").Device(DEVICE_CPU).TypeConstraint<type>("T"), \
43 Relu6Op<CPUDevice, type>); \
44 REGISTER_KERNEL_BUILDER( \
45 Name("Relu6Grad").Device(DEVICE_CPU).TypeConstraint<type>("T"), \
46 Relu6GradOp<CPUDevice, type>) \
47 REGISTER_KERNEL_BUILDER( \
48 Name("LeakyReluGrad").Device(DEVICE_CPU).TypeConstraint<type>("T"), \
49 LeakyReluGradOp<CPUDevice, type>);
50
51TF_CALL_REAL_NUMBER_TYPES(REGISTER_RELU_KERNELS);
52#undef REGISTER_RELU_KERNELS
53
54// Register LeakyRelu here for all types except bfloat16
55// bfloat16 is in cwise_op_leakyrelu_bf16.cc
56#define REGISTER_LEAKYRELU_KERNELS(type) \
57 REGISTER_KERNEL_BUILDER( \
58 Name("LeakyRelu").Device(DEVICE_CPU).TypeConstraint<type>("T"), \
59 LeakyReluOp<CPUDevice, type>);
60
61TF_CALL_INTEGRAL_TYPES(REGISTER_LEAKYRELU_KERNELS)
62TF_CALL_half(REGISTER_LEAKYRELU_KERNELS)
63 TF_CALL_float(REGISTER_LEAKYRELU_KERNELS)
64 TF_CALL_double(REGISTER_LEAKYRELU_KERNELS)
65#undef REGISTER_LEAKYRELU_KERNELS
66
67#define REGISTER_ELU_KERNELS(type) \
68 REGISTER_KERNEL_BUILDER( \
69 Name("Elu").Device(DEVICE_CPU).TypeConstraint<type>("T"), \
70 EluOp<CPUDevice, type>); \
71 REGISTER_KERNEL_BUILDER( \
72 Name("EluGrad").Device(DEVICE_CPU).TypeConstraint<type>("T"), \
73 EluGradOp<CPUDevice, type>); \
74 REGISTER_KERNEL_BUILDER( \
75 Name("Selu").Device(DEVICE_CPU).TypeConstraint<type>("T"), \
76 SeluOp<CPUDevice, type>); \
77 REGISTER_KERNEL_BUILDER( \
78 Name("SeluGrad").Device(DEVICE_CPU).TypeConstraint<type>("T"), \
79 SeluGradOp<CPUDevice, type>)
80
81 // Elu and Selu only make sense with float or double.
82 TF_CALL_FLOAT_TYPES(REGISTER_ELU_KERNELS);
83#undef REGISTER_ELU_KERNELS
84
85#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
86
87#if !defined(MLIR_GENERATED_GPU_KERNELS_ENABLED)
88
89namespace functor {
90#define DECLARE_GPU_NO_MLIR_SPEC(T) \
91 template <> \
92 void Relu<GPUDevice, T>::operator()( \
93 const GPUDevice& d, typename TTypes<T>::ConstTensor features, \
94 typename TTypes<T>::Tensor activations); \
95 extern template struct Relu<GPUDevice, T>; \
96 \
97 template <> \
98 void Elu<GPUDevice, T>::operator()(const GPUDevice& d, \
99 typename TTypes<T>::ConstTensor features, \
100 typename TTypes<T>::Tensor activations); \
101 extern template struct Elu<GPUDevice, T>; \
102 \
103 template <> \
104 void Selu<GPUDevice, T>::operator()( \
105 const GPUDevice& d, typename TTypes<T>::ConstTensor features, \
106 typename TTypes<T>::Tensor activations); \
107 extern template struct Selu<GPUDevice, T>;
108
109TF_CALL_GPU_NUMBER_TYPES(DECLARE_GPU_NO_MLIR_SPEC);
110} // namespace functor
111
112#define REGISTER_GPU_NO_MLIR_KERNELS(type) \
113 REGISTER_KERNEL_BUILDER( \
114 Name("Relu").Device(DEVICE_GPU).TypeConstraint<type>("T"), \
115 ReluOp<GPUDevice, type>); \
116 REGISTER_KERNEL_BUILDER( \
117 Name("Elu").Device(DEVICE_GPU).TypeConstraint<type>("T"), \
118 EluOp<GPUDevice, type>); \
119 REGISTER_KERNEL_BUILDER( \
120 Name("Selu").Device(DEVICE_GPU).TypeConstraint<type>("T"), \
121 SeluOp<GPUDevice, type>);
122
123TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_NO_MLIR_KERNELS);
124#undef REGISTER_RELU_KERNEL
125#endif
126
127// Forward declarations of the functor specializations for GPU.
128namespace functor {
129#define DECLARE_GPU_SPEC(T) \
130 template <> \
131 void ReluGrad<GPUDevice, T>::operator()( \
132 const GPUDevice& d, typename TTypes<T>::ConstTensor gradients, \
133 typename TTypes<T>::ConstTensor features, \
134 typename TTypes<T>::Tensor backprops); \
135 extern template struct ReluGrad<GPUDevice, T>; \
136 \
137 template <> \
138 void Relu6<GPUDevice, T>::operator()( \
139 const GPUDevice& d, typename TTypes<T>::ConstTensor features, \
140 typename TTypes<T>::Tensor activations); \
141 extern template struct Relu6<GPUDevice, T>; \
142 \
143 template <> \
144 void Relu6Grad<GPUDevice, T>::operator()( \
145 const GPUDevice& d, typename TTypes<T>::ConstTensor gradients, \
146 typename TTypes<T>::ConstTensor features, \
147 typename TTypes<T>::Tensor backprops); \
148 extern template struct Relu6Grad<GPUDevice, T>; \
149 \
150 template <> \
151 void LeakyRelu<GPUDevice, T>::operator()(LeakyReluArgs args); \
152 extern template struct LeakyRelu<GPUDevice, T>; \
153 \
154 template <> \
155 void LeakyReluGrad<GPUDevice, T>::operator()( \
156 const GPUDevice& d, typename TTypes<T>::ConstTensor gradients, \
157 typename TTypes<T>::ConstTensor features, T alpha, \
158 typename TTypes<T>::Tensor backprops); \
159 extern template struct LeakyReluGrad<GPUDevice, T>; \
160 \
161 template <> \
162 void EluGrad<GPUDevice, T>::operator()( \
163 const GPUDevice& d, typename TTypes<T>::ConstTensor gradients, \
164 typename TTypes<T>::ConstTensor activations, \
165 typename TTypes<T>::Tensor backprops); \
166 extern template struct EluGrad<GPUDevice, T>; \
167 \
168 template <> \
169 void SeluGrad<GPUDevice, T>::operator()( \
170 const GPUDevice& d, typename TTypes<T>::ConstTensor gradients, \
171 typename TTypes<T>::ConstTensor activations, \
172 typename TTypes<T>::Tensor backprops); \
173 extern template struct SeluGrad<GPUDevice, T>;
174
175template <>
176void Relu<GPUDevice, qint8>::operator()(
177 const GPUDevice& d, typename TTypes<qint8>::ConstTensor features,
178 typename TTypes<qint8>::Tensor activations);
179extern template struct Relu<GPUDevice, qint8>;
180
181TF_CALL_GPU_NUMBER_TYPES(DECLARE_GPU_SPEC);
182} // namespace functor
183
184// Registration of the GPU implementations.
185#define REGISTER_GPU_KERNELS(type) \
186 REGISTER_KERNEL_BUILDER( \
187 Name("ReluGrad").Device(DEVICE_GPU).TypeConstraint<type>("T"), \
188 ReluGradOp<GPUDevice, type>); \
189 REGISTER_KERNEL_BUILDER( \
190 Name("Relu6").Device(DEVICE_GPU).TypeConstraint<type>("T"), \
191 Relu6Op<GPUDevice, type>); \
192 REGISTER_KERNEL_BUILDER( \
193 Name("Relu6Grad").Device(DEVICE_GPU).TypeConstraint<type>("T"), \
194 Relu6GradOp<GPUDevice, type>); \
195 REGISTER_KERNEL_BUILDER( \
196 Name("LeakyRelu").Device(DEVICE_GPU).TypeConstraint<type>("T"), \
197 LeakyReluOp<GPUDevice, type>); \
198 REGISTER_KERNEL_BUILDER( \
199 Name("LeakyReluGrad").Device(DEVICE_GPU).TypeConstraint<type>("T"), \
200 LeakyReluGradOp<GPUDevice, type>); \
201 REGISTER_KERNEL_BUILDER( \
202 Name("EluGrad").Device(DEVICE_GPU).TypeConstraint<type>("T"), \
203 EluGradOp<GPUDevice, type>); \
204 REGISTER_KERNEL_BUILDER( \
205 Name("SeluGrad").Device(DEVICE_GPU).TypeConstraint<type>("T"), \
206 SeluGradOp<GPUDevice, type>)
207
208TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNELS);
209#undef REGISTER_GPU_KERNELS
210
211template <typename Device>
212class ReluOp<Device, qint8>
213 : public UnaryElementWiseOp<qint8, ReluOp<Device, qint8>> {
214 public:
215 using UnaryElementWiseOp<qint8, ReluOp<Device, qint8>>::UnaryElementWiseOp;
216
217 void Operate(OpKernelContext* context, const Tensor& input, Tensor* output) {
218 auto flat_input = input.flat<qint8>();
219 OP_REQUIRES(context, (flat_input.size() % 4) == 0,
220 errors::InvalidArgument(
221 "Tensor size must be a multiple of 4 for Relu<qint8>. Got ",
222 flat_input.size()));
223 functor::Relu<Device, qint8> func;
224 func(context->eigen_device<Device>(), flat_input, output->flat<qint8>());
225 }
226};
227
228REGISTER_KERNEL_BUILDER(
229 Name("Relu").Device(DEVICE_GPU).TypeConstraint<qint8>("T"),
230 ReluOp<GPUDevice, qint8>);
231
232#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
233
234} // namespace tensorflow
235