1 | /* Copyright 2015 The TensorFlow Authors. All Rights Reserved. |
2 | |
3 | Licensed under the Apache License, Version 2.0 (the "License"); |
4 | you may not use this file except in compliance with the License. |
5 | You may obtain a copy of the License at |
6 | |
7 | http://www.apache.org/licenses/LICENSE-2.0 |
8 | |
9 | Unless required by applicable law or agreed to in writing, software |
10 | distributed under the License is distributed on an "AS IS" BASIS, |
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | See the License for the specific language governing permissions and |
13 | limitations under the License. |
14 | ==============================================================================*/ |
15 | |
16 | // See docs in ../ops/nn_ops.cc. |
17 | |
18 | #define EIGEN_USE_THREADS |
19 | |
20 | #include "tensorflow/core/kernels/relu_op.h" |
21 | |
22 | #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" |
23 | #include "tensorflow/core/framework/numeric_op.h" |
24 | #include "tensorflow/core/framework/op_kernel.h" |
25 | #include "tensorflow/core/framework/register_types.h" |
26 | #include "tensorflow/core/framework/tensor.h" |
27 | #include "tensorflow/core/lib/core/errors.h" |
28 | |
29 | namespace tensorflow { |
30 | |
31 | typedef Eigen::ThreadPoolDevice CPUDevice; |
32 | typedef Eigen::GpuDevice GPUDevice; |
33 | |
34 | #define REGISTER_RELU_KERNELS(type) \ |
35 | REGISTER_KERNEL_BUILDER( \ |
36 | Name("Relu").Device(DEVICE_CPU).TypeConstraint<type>("T"), \ |
37 | ReluOp<CPUDevice, type>); \ |
38 | REGISTER_KERNEL_BUILDER( \ |
39 | Name("ReluGrad").Device(DEVICE_CPU).TypeConstraint<type>("T"), \ |
40 | ReluGradOp<CPUDevice, type>); \ |
41 | REGISTER_KERNEL_BUILDER( \ |
42 | Name("Relu6").Device(DEVICE_CPU).TypeConstraint<type>("T"), \ |
43 | Relu6Op<CPUDevice, type>); \ |
44 | REGISTER_KERNEL_BUILDER( \ |
45 | Name("Relu6Grad").Device(DEVICE_CPU).TypeConstraint<type>("T"), \ |
46 | Relu6GradOp<CPUDevice, type>) \ |
47 | REGISTER_KERNEL_BUILDER( \ |
48 | Name("LeakyReluGrad").Device(DEVICE_CPU).TypeConstraint<type>("T"), \ |
49 | LeakyReluGradOp<CPUDevice, type>); |
50 | |
51 | TF_CALL_REAL_NUMBER_TYPES(REGISTER_RELU_KERNELS); |
52 | #undef REGISTER_RELU_KERNELS |
53 | |
54 | // Register LeakyRelu here for all types except bfloat16 |
55 | // bfloat16 is in cwise_op_leakyrelu_bf16.cc |
56 | #define REGISTER_LEAKYRELU_KERNELS(type) \ |
57 | REGISTER_KERNEL_BUILDER( \ |
58 | Name("LeakyRelu").Device(DEVICE_CPU).TypeConstraint<type>("T"), \ |
59 | LeakyReluOp<CPUDevice, type>); |
60 | |
61 | TF_CALL_INTEGRAL_TYPES(REGISTER_LEAKYRELU_KERNELS) |
62 | TF_CALL_half(REGISTER_LEAKYRELU_KERNELS) |
63 | TF_CALL_float(REGISTER_LEAKYRELU_KERNELS) |
64 | TF_CALL_double(REGISTER_LEAKYRELU_KERNELS) |
65 | #undef REGISTER_LEAKYRELU_KERNELS |
66 | |
67 | #define REGISTER_ELU_KERNELS(type) \ |
68 | REGISTER_KERNEL_BUILDER( \ |
69 | Name("Elu").Device(DEVICE_CPU).TypeConstraint<type>("T"), \ |
70 | EluOp<CPUDevice, type>); \ |
71 | REGISTER_KERNEL_BUILDER( \ |
72 | Name("EluGrad").Device(DEVICE_CPU).TypeConstraint<type>("T"), \ |
73 | EluGradOp<CPUDevice, type>); \ |
74 | REGISTER_KERNEL_BUILDER( \ |
75 | Name("Selu").Device(DEVICE_CPU).TypeConstraint<type>("T"), \ |
76 | SeluOp<CPUDevice, type>); \ |
77 | REGISTER_KERNEL_BUILDER( \ |
78 | Name("SeluGrad").Device(DEVICE_CPU).TypeConstraint<type>("T"), \ |
79 | SeluGradOp<CPUDevice, type>) |
80 | |
81 | // Elu and Selu only make sense with float or double. |
82 | TF_CALL_FLOAT_TYPES(REGISTER_ELU_KERNELS); |
83 | #undef REGISTER_ELU_KERNELS |
84 | |
85 | #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM |
86 | |
87 | #if !defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) |
88 | |
89 | namespace functor { |
90 | #define DECLARE_GPU_NO_MLIR_SPEC(T) \ |
91 | template <> \ |
92 | void Relu<GPUDevice, T>::operator()( \ |
93 | const GPUDevice& d, typename TTypes<T>::ConstTensor features, \ |
94 | typename TTypes<T>::Tensor activations); \ |
95 | extern template struct Relu<GPUDevice, T>; \ |
96 | \ |
97 | template <> \ |
98 | void Elu<GPUDevice, T>::operator()(const GPUDevice& d, \ |
99 | typename TTypes<T>::ConstTensor features, \ |
100 | typename TTypes<T>::Tensor activations); \ |
101 | extern template struct Elu<GPUDevice, T>; \ |
102 | \ |
103 | template <> \ |
104 | void Selu<GPUDevice, T>::operator()( \ |
105 | const GPUDevice& d, typename TTypes<T>::ConstTensor features, \ |
106 | typename TTypes<T>::Tensor activations); \ |
107 | extern template struct Selu<GPUDevice, T>; |
108 | |
109 | TF_CALL_GPU_NUMBER_TYPES(DECLARE_GPU_NO_MLIR_SPEC); |
110 | } // namespace functor |
111 | |
112 | #define REGISTER_GPU_NO_MLIR_KERNELS(type) \ |
113 | REGISTER_KERNEL_BUILDER( \ |
114 | Name("Relu").Device(DEVICE_GPU).TypeConstraint<type>("T"), \ |
115 | ReluOp<GPUDevice, type>); \ |
116 | REGISTER_KERNEL_BUILDER( \ |
117 | Name("Elu").Device(DEVICE_GPU).TypeConstraint<type>("T"), \ |
118 | EluOp<GPUDevice, type>); \ |
119 | REGISTER_KERNEL_BUILDER( \ |
120 | Name("Selu").Device(DEVICE_GPU).TypeConstraint<type>("T"), \ |
121 | SeluOp<GPUDevice, type>); |
122 | |
123 | TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_NO_MLIR_KERNELS); |
124 | #undef REGISTER_RELU_KERNEL |
125 | #endif |
126 | |
127 | // Forward declarations of the functor specializations for GPU. |
128 | namespace functor { |
129 | #define DECLARE_GPU_SPEC(T) \ |
130 | template <> \ |
131 | void ReluGrad<GPUDevice, T>::operator()( \ |
132 | const GPUDevice& d, typename TTypes<T>::ConstTensor gradients, \ |
133 | typename TTypes<T>::ConstTensor features, \ |
134 | typename TTypes<T>::Tensor backprops); \ |
135 | extern template struct ReluGrad<GPUDevice, T>; \ |
136 | \ |
137 | template <> \ |
138 | void Relu6<GPUDevice, T>::operator()( \ |
139 | const GPUDevice& d, typename TTypes<T>::ConstTensor features, \ |
140 | typename TTypes<T>::Tensor activations); \ |
141 | extern template struct Relu6<GPUDevice, T>; \ |
142 | \ |
143 | template <> \ |
144 | void Relu6Grad<GPUDevice, T>::operator()( \ |
145 | const GPUDevice& d, typename TTypes<T>::ConstTensor gradients, \ |
146 | typename TTypes<T>::ConstTensor features, \ |
147 | typename TTypes<T>::Tensor backprops); \ |
148 | extern template struct Relu6Grad<GPUDevice, T>; \ |
149 | \ |
150 | template <> \ |
151 | void LeakyRelu<GPUDevice, T>::operator()(LeakyReluArgs args); \ |
152 | extern template struct LeakyRelu<GPUDevice, T>; \ |
153 | \ |
154 | template <> \ |
155 | void LeakyReluGrad<GPUDevice, T>::operator()( \ |
156 | const GPUDevice& d, typename TTypes<T>::ConstTensor gradients, \ |
157 | typename TTypes<T>::ConstTensor features, T alpha, \ |
158 | typename TTypes<T>::Tensor backprops); \ |
159 | extern template struct LeakyReluGrad<GPUDevice, T>; \ |
160 | \ |
161 | template <> \ |
162 | void EluGrad<GPUDevice, T>::operator()( \ |
163 | const GPUDevice& d, typename TTypes<T>::ConstTensor gradients, \ |
164 | typename TTypes<T>::ConstTensor activations, \ |
165 | typename TTypes<T>::Tensor backprops); \ |
166 | extern template struct EluGrad<GPUDevice, T>; \ |
167 | \ |
168 | template <> \ |
169 | void SeluGrad<GPUDevice, T>::operator()( \ |
170 | const GPUDevice& d, typename TTypes<T>::ConstTensor gradients, \ |
171 | typename TTypes<T>::ConstTensor activations, \ |
172 | typename TTypes<T>::Tensor backprops); \ |
173 | extern template struct SeluGrad<GPUDevice, T>; |
174 | |
175 | template <> |
176 | void Relu<GPUDevice, qint8>::operator()( |
177 | const GPUDevice& d, typename TTypes<qint8>::ConstTensor features, |
178 | typename TTypes<qint8>::Tensor activations); |
179 | extern template struct Relu<GPUDevice, qint8>; |
180 | |
181 | TF_CALL_GPU_NUMBER_TYPES(DECLARE_GPU_SPEC); |
182 | } // namespace functor |
183 | |
184 | // Registration of the GPU implementations. |
185 | #define REGISTER_GPU_KERNELS(type) \ |
186 | REGISTER_KERNEL_BUILDER( \ |
187 | Name("ReluGrad").Device(DEVICE_GPU).TypeConstraint<type>("T"), \ |
188 | ReluGradOp<GPUDevice, type>); \ |
189 | REGISTER_KERNEL_BUILDER( \ |
190 | Name("Relu6").Device(DEVICE_GPU).TypeConstraint<type>("T"), \ |
191 | Relu6Op<GPUDevice, type>); \ |
192 | REGISTER_KERNEL_BUILDER( \ |
193 | Name("Relu6Grad").Device(DEVICE_GPU).TypeConstraint<type>("T"), \ |
194 | Relu6GradOp<GPUDevice, type>); \ |
195 | REGISTER_KERNEL_BUILDER( \ |
196 | Name("LeakyRelu").Device(DEVICE_GPU).TypeConstraint<type>("T"), \ |
197 | LeakyReluOp<GPUDevice, type>); \ |
198 | REGISTER_KERNEL_BUILDER( \ |
199 | Name("LeakyReluGrad").Device(DEVICE_GPU).TypeConstraint<type>("T"), \ |
200 | LeakyReluGradOp<GPUDevice, type>); \ |
201 | REGISTER_KERNEL_BUILDER( \ |
202 | Name("EluGrad").Device(DEVICE_GPU).TypeConstraint<type>("T"), \ |
203 | EluGradOp<GPUDevice, type>); \ |
204 | REGISTER_KERNEL_BUILDER( \ |
205 | Name("SeluGrad").Device(DEVICE_GPU).TypeConstraint<type>("T"), \ |
206 | SeluGradOp<GPUDevice, type>) |
207 | |
208 | TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNELS); |
209 | #undef REGISTER_GPU_KERNELS |
210 | |
211 | template <typename Device> |
212 | class ReluOp<Device, qint8> |
213 | : public UnaryElementWiseOp<qint8, ReluOp<Device, qint8>> { |
214 | public: |
215 | using UnaryElementWiseOp<qint8, ReluOp<Device, qint8>>::UnaryElementWiseOp; |
216 | |
217 | void Operate(OpKernelContext* context, const Tensor& input, Tensor* output) { |
218 | auto flat_input = input.flat<qint8>(); |
219 | OP_REQUIRES(context, (flat_input.size() % 4) == 0, |
220 | errors::InvalidArgument( |
221 | "Tensor size must be a multiple of 4 for Relu<qint8>. Got " , |
222 | flat_input.size())); |
223 | functor::Relu<Device, qint8> func; |
224 | func(context->eigen_device<Device>(), flat_input, output->flat<qint8>()); |
225 | } |
226 | }; |
227 | |
228 | REGISTER_KERNEL_BUILDER( |
229 | Name("Relu" ).Device(DEVICE_GPU).TypeConstraint<qint8>("T" ), |
230 | ReluOp<GPUDevice, qint8>); |
231 | |
232 | #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM |
233 | |
234 | } // namespace tensorflow |
235 | |