relu_op.cc source code [tensorflow/tensorflow/core/kernels/relu_op.cc]

1	/ Copyright 2015 The TensorFlow Authors. All Rights Reserved.*
2
3	Licensed under the Apache License, Version 2.0 (the "License");
4	you may not use this file except in compliance with the License.
5	You may obtain a copy of the License at
6
7	http://www.apache.org/licenses/LICENSE-2.0
8
9	Unless required by applicable law or agreed to in writing, software
10	distributed under the License is distributed on an "AS IS" BASIS,
11	WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12	See the License for the specific language governing permissions and
13	limitations under the License.
14	==============================================================================/*
15
16	// See docs in ../ops/nn_ops.cc.
17
18	#define EIGEN_USE_THREADS
19
20	#include "tensorflow/core/kernels/relu_op.h"
21
22	#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
23	#include "tensorflow/core/framework/numeric_op.h"
24	#include "tensorflow/core/framework/op_kernel.h"
25	#include "tensorflow/core/framework/register_types.h"
26	#include "tensorflow/core/framework/tensor.h"
27	#include "tensorflow/core/lib/core/errors.h"
28
29	namespace tensorflow {
30
31	typedef Eigen::ThreadPoolDevice CPUDevice;
32	typedef Eigen::GpuDevice GPUDevice;
33
34	#define REGISTER_RELU_KERNELS(type) \
35	REGISTER_KERNEL_BUILDER( \
36	Name("Relu").Device(DEVICE_CPU).TypeConstraint<type>("T"), \
37	ReluOp<CPUDevice, type>); \
38	REGISTER_KERNEL_BUILDER( \
39	Name("ReluGrad").Device(DEVICE_CPU).TypeConstraint<type>("T"), \
40	ReluGradOp<CPUDevice, type>); \
41	REGISTER_KERNEL_BUILDER( \
42	Name("Relu6").Device(DEVICE_CPU).TypeConstraint<type>("T"), \
43	Relu6Op<CPUDevice, type>); \
44	REGISTER_KERNEL_BUILDER( \
45	Name("Relu6Grad").Device(DEVICE_CPU).TypeConstraint<type>("T"), \
46	Relu6GradOp<CPUDevice, type>) \
47	REGISTER_KERNEL_BUILDER( \
48	Name("LeakyReluGrad").Device(DEVICE_CPU).TypeConstraint<type>("T"), \
49	LeakyReluGradOp<CPUDevice, type>);
50
51	TF_CALL_REAL_NUMBER_TYPES(REGISTER_RELU_KERNELS);
52	#undef REGISTER_RELU_KERNELS
53
54	// Register LeakyRelu here for all types except bfloat16
55	// bfloat16 is in cwise_op_leakyrelu_bf16.cc
56	#define REGISTER_LEAKYRELU_KERNELS(type) \
57	REGISTER_KERNEL_BUILDER( \
58	Name("LeakyRelu").Device(DEVICE_CPU).TypeConstraint<type>("T"), \
59	LeakyReluOp<CPUDevice, type>);
60
61	TF_CALL_INTEGRAL_TYPES(REGISTER_LEAKYRELU_KERNELS)
62	TF_CALL_half(REGISTER_LEAKYRELU_KERNELS)
63	TF_CALL_float(REGISTER_LEAKYRELU_KERNELS)
64	TF_CALL_double(REGISTER_LEAKYRELU_KERNELS)
65	#undef REGISTER_LEAKYRELU_KERNELS
66
67	#define REGISTER_ELU_KERNELS(type) \
68	REGISTER_KERNEL_BUILDER( \
69	Name("Elu").Device(DEVICE_CPU).TypeConstraint<type>("T"), \
70	EluOp<CPUDevice, type>); \
71	REGISTER_KERNEL_BUILDER( \
72	Name("EluGrad").Device(DEVICE_CPU).TypeConstraint<type>("T"), \
73	EluGradOp<CPUDevice, type>); \
74	REGISTER_KERNEL_BUILDER( \
75	Name("Selu").Device(DEVICE_CPU).TypeConstraint<type>("T"), \
76	SeluOp<CPUDevice, type>); \
77	REGISTER_KERNEL_BUILDER( \
78	Name("SeluGrad").Device(DEVICE_CPU).TypeConstraint<type>("T"), \
79	SeluGradOp<CPUDevice, type>)
80
81	// Elu and Selu only make sense with float or double.
82	TF_CALL_FLOAT_TYPES(REGISTER_ELU_KERNELS);
83	#undef REGISTER_ELU_KERNELS
84
85	#if GOOGLE_CUDA \|\| TENSORFLOW_USE_ROCM
86
87	#if !defined(MLIR_GENERATED_GPU_KERNELS_ENABLED)
88
89	namespace functor {
90	#define DECLARE_GPU_NO_MLIR_SPEC(T) \
91	template <> \
92	void Relu<GPUDevice, T>::operator()( \
93	const GPUDevice& d, typename TTypes<T>::ConstTensor features, \
94	typename TTypes<T>::Tensor activations); \
95	extern template struct Relu<GPUDevice, T>; \
96	\
97	template <> \
98	void Elu<GPUDevice, T>::operator()(const GPUDevice& d, \
99	typename TTypes<T>::ConstTensor features, \
100	typename TTypes<T>::Tensor activations); \
101	extern template struct Elu<GPUDevice, T>; \
102	\
103	template <> \
104	void Selu<GPUDevice, T>::operator()( \
105	const GPUDevice& d, typename TTypes<T>::ConstTensor features, \
106	typename TTypes<T>::Tensor activations); \
107	extern template struct Selu<GPUDevice, T>;
108
109	TF_CALL_GPU_NUMBER_TYPES(DECLARE_GPU_NO_MLIR_SPEC);
110	} // namespace functor
111
112	#define REGISTER_GPU_NO_MLIR_KERNELS(type) \
113	REGISTER_KERNEL_BUILDER( \
114	Name("Relu").Device(DEVICE_GPU).TypeConstraint<type>("T"), \
115	ReluOp<GPUDevice, type>); \
116	REGISTER_KERNEL_BUILDER( \
117	Name("Elu").Device(DEVICE_GPU).TypeConstraint<type>("T"), \
118	EluOp<GPUDevice, type>); \
119	REGISTER_KERNEL_BUILDER( \
120	Name("Selu").Device(DEVICE_GPU).TypeConstraint<type>("T"), \
121	SeluOp<GPUDevice, type>);
122
123	TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_NO_MLIR_KERNELS);
124	#undef REGISTER_RELU_KERNEL
125	#endif
126
127	// Forward declarations of the functor specializations for GPU.
128	namespace functor {
129	#define DECLARE_GPU_SPEC(T) \
130	template <> \
131	void ReluGrad<GPUDevice, T>::operator()( \
132	const GPUDevice& d, typename TTypes<T>::ConstTensor gradients, \
133	typename TTypes<T>::ConstTensor features, \
134	typename TTypes<T>::Tensor backprops); \
135	extern template struct ReluGrad<GPUDevice, T>; \
136	\
137	template <> \
138	void Relu6<GPUDevice, T>::operator()( \
139	const GPUDevice& d, typename TTypes<T>::ConstTensor features, \
140	typename TTypes<T>::Tensor activations); \
141	extern template struct Relu6<GPUDevice, T>; \
142	\
143	template <> \
144	void Relu6Grad<GPUDevice, T>::operator()( \
145	const GPUDevice& d, typename TTypes<T>::ConstTensor gradients, \
146	typename TTypes<T>::ConstTensor features, \
147	typename TTypes<T>::Tensor backprops); \
148	extern template struct Relu6Grad<GPUDevice, T>; \
149	\
150	template <> \
151	void LeakyRelu<GPUDevice, T>::operator()(LeakyReluArgs args); \
152	extern template struct LeakyRelu<GPUDevice, T>; \
153	\
154	template <> \
155	void LeakyReluGrad<GPUDevice, T>::operator()( \
156	const GPUDevice& d, typename TTypes<T>::ConstTensor gradients, \
157	typename TTypes<T>::ConstTensor features, T alpha, \
158	typename TTypes<T>::Tensor backprops); \
159	extern template struct LeakyReluGrad<GPUDevice, T>; \
160	\
161	template <> \
162	void EluGrad<GPUDevice, T>::operator()( \
163	const GPUDevice& d, typename TTypes<T>::ConstTensor gradients, \
164	typename TTypes<T>::ConstTensor activations, \
165	typename TTypes<T>::Tensor backprops); \
166	extern template struct EluGrad<GPUDevice, T>; \
167	\
168	template <> \
169	void SeluGrad<GPUDevice, T>::operator()( \
170	const GPUDevice& d, typename TTypes<T>::ConstTensor gradients, \
171	typename TTypes<T>::ConstTensor activations, \
172	typename TTypes<T>::Tensor backprops); \
173	extern template struct SeluGrad<GPUDevice, T>;
174
175	template <>
176	void Relu<GPUDevice, qint8>::operator()(
177	const GPUDevice& d, typename TTypes<qint8>::ConstTensor features,
178	typename TTypes<qint8>::Tensor activations);
179	extern template struct Relu<GPUDevice, qint8>;
180
181	TF_CALL_GPU_NUMBER_TYPES(DECLARE_GPU_SPEC);
182	} // namespace functor
183
184	// Registration of the GPU implementations.
185	#define REGISTER_GPU_KERNELS(type) \
186	REGISTER_KERNEL_BUILDER( \
187	Name("ReluGrad").Device(DEVICE_GPU).TypeConstraint<type>("T"), \
188	ReluGradOp<GPUDevice, type>); \
189	REGISTER_KERNEL_BUILDER( \
190	Name("Relu6").Device(DEVICE_GPU).TypeConstraint<type>("T"), \
191	Relu6Op<GPUDevice, type>); \
192	REGISTER_KERNEL_BUILDER( \
193	Name("Relu6Grad").Device(DEVICE_GPU).TypeConstraint<type>("T"), \
194	Relu6GradOp<GPUDevice, type>); \
195	REGISTER_KERNEL_BUILDER( \
196	Name("LeakyRelu").Device(DEVICE_GPU).TypeConstraint<type>("T"), \
197	LeakyReluOp<GPUDevice, type>); \
198	REGISTER_KERNEL_BUILDER( \
199	Name("LeakyReluGrad").Device(DEVICE_GPU).TypeConstraint<type>("T"), \
200	LeakyReluGradOp<GPUDevice, type>); \
201	REGISTER_KERNEL_BUILDER( \
202	Name("EluGrad").Device(DEVICE_GPU).TypeConstraint<type>("T"), \
203	EluGradOp<GPUDevice, type>); \
204	REGISTER_KERNEL_BUILDER( \
205	Name("SeluGrad").Device(DEVICE_GPU).TypeConstraint<type>("T"), \
206	SeluGradOp<GPUDevice, type>)
207
208	TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNELS);
209	#undef REGISTER_GPU_KERNELS
210
211	template <typename Device>
212	class ReluOp<Device, qint8>
213	: public UnaryElementWiseOp<qint8, ReluOp<Device, qint8>> {
214	public:
215	using UnaryElementWiseOp<qint8, ReluOp<Device, qint8>>::UnaryElementWiseOp;
216
217	void Operate(OpKernelContext* context, const Tensor& input, Tensor* output) {
218	auto flat_input = input.flat<qint8>();
219	OP_REQUIRES(context, (flat_input.size() % `4`) == `0`,
220	errors::InvalidArgument(
221	"Tensor size must be a multiple of 4 for Relu<qint8>. Got ",
222	flat_input.size()));
223	functor::Relu<Device, qint8> func;
224	func(context->eigen_device<Device>(), flat_input, output->flat<qint8>());
225	}
226	};
227
228	REGISTER_KERNEL_BUILDER(
229	Name("Relu").Device(DEVICE_GPU).TypeConstraint<qint8>("T"),
230	ReluOp<GPUDevice, qint8>);
231
232	#endif // GOOGLE_CUDA \|\| TENSORFLOW_USE_ROCM
233
234	} // namespace tensorflow
235

Browse the source code of tensorflow/tensorflow/core/kernels/relu_op.cc