batch_norm_op.cc source code [tensorflow/tensorflow/core/kernels/batch_norm_op.cc]

1	/ Copyright 2015 The TensorFlow Authors. All Rights Reserved.*
2
3	Licensed under the Apache License, Version 2.0 (the "License");
4	you may not use this file except in compliance with the License.
5	You may obtain a copy of the License at
6
7	http://www.apache.org/licenses/LICENSE-2.0
8
9	Unless required by applicable law or agreed to in writing, software
10	distributed under the License is distributed on an "AS IS" BASIS,
11	WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12	See the License for the specific language governing permissions and
13	limitations under the License.
14	==============================================================================/*
15
16	// See docs in ../ops/nn_ops.cc.
17
18	#define EIGEN_USE_THREADS
19
20	#include "tensorflow/core/kernels/batch_norm_op.h"
21	#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
22	#include "tensorflow/core/framework/numeric_op.h"
23	#include "tensorflow/core/framework/op_kernel.h"
24	#include "tensorflow/core/framework/register_types.h"
25	#include "tensorflow/core/framework/tensor.h"
26
27	namespace tensorflow {
28
29	typedef Eigen::ThreadPoolDevice CPUDevice;
30	typedef Eigen::GpuDevice GPUDevice;
31
32	template <typename Device, typename T>
33	class BatchNormOp : public OpKernel {
34	public:
35	explicit BatchNormOp(OpKernelConstruction* context) : OpKernel(context) {
36	float variance_epsilon;
37	OP_REQUIRES_OK(context,
38	context->GetAttr("variance_epsilon", &variance_epsilon));
39	variance_epsilon_ = T(variance_epsilon);
40	OP_REQUIRES_OK(context, context->GetAttr("scale_after_normalization",
41	&scale_after_normalization_));
42	}
43
44	void Compute(OpKernelContext* context) override {
45	const Tensor& input = context->input(`0`);
46	const Tensor& mean = context->input(`1`);
47	const Tensor& var = context->input(`2`);
48	const Tensor& beta = context->input(`3`);
49	const Tensor& gamma = context->input(`4`);
50
51	OP_REQUIRES(context, input.dims() == `4`,
52	errors::InvalidArgument("input must be 4-dimensional",
53	input.shape().DebugString()));
54	OP_REQUIRES(context, mean.dims() == `1`,
55	errors::InvalidArgument("mean must be 1-dimensional",
56	mean.shape().DebugString()));
57	OP_REQUIRES(context, var.dims() == `1`,
58	errors::InvalidArgument("var must be 1-dimensional",
59	var.shape().DebugString()));
60	OP_REQUIRES(context, beta.dims() == `1`,
61	errors::InvalidArgument("beta must be 1-dimensional",
62	beta.shape().DebugString()));
63	OP_REQUIRES(context, gamma.dims() == `1`,
64	errors::InvalidArgument("gamma must be 1-dimensional",
65	gamma.shape().DebugString()));
66
67	Tensor* output = nullptr;
68	OP_REQUIRES_OK(context,
69	context->allocate_output(`0`, input.shape(), &output));
70
71	functor::BatchNorm<Device, T>()(
72	context->eigen_device<Device>(), input.tensor<T, `4`>(), mean.vec<T>(),
73	var.vec<T>(), beta.vec<T>(), gamma.vec<T>(), variance_epsilon_,
74	scale_after_normalization_, output->tensor<T, `4`>());
75	}
76
77	private:
78	T variance_epsilon_;
79	bool scale_after_normalization_;
80	};
81
82	template <typename Device, typename T>
83	class BatchNormGradOp : public OpKernel {
84	public:
85	explicit BatchNormGradOp(OpKernelConstruction* context) : OpKernel(context) {
86	float variance_epsilon;
87	OP_REQUIRES_OK(context,
88	context->GetAttr("variance_epsilon", &variance_epsilon));
89	variance_epsilon_ = T(variance_epsilon);
90	OP_REQUIRES_OK(context, context->GetAttr("scale_after_normalization",
91	&scale_after_normalization_));
92	}
93
94	void Compute(OpKernelContext* context) override {
95	const Tensor& input = context->input(`0`);
96	const Tensor& mean = context->input(`1`);
97	const Tensor& var = context->input(`2`);
98	const Tensor& gamma = context->input(`3`);
99	const Tensor& out_backprop = context->input(`4`);
100
101	OP_REQUIRES(context, input.dims() == `4`,
102	errors::InvalidArgument("input must be 4-dimensional",
103	input.shape().DebugString()));
104	OP_REQUIRES(context, mean.dims() == `1`,
105	errors::InvalidArgument("mean must be 1-dimensional",
106	mean.shape().DebugString()));
107	OP_REQUIRES(context, var.dims() == `1`,
108	errors::InvalidArgument("var must be 1-dimensional",
109	var.shape().DebugString()));
110	OP_REQUIRES(context, gamma.dims() == `1`,
111	errors::InvalidArgument("gamma must be 1-dimensional",
112	gamma.shape().DebugString()));
113	OP_REQUIRES(context, out_backprop.dims() == `4`,
114	errors::InvalidArgument("out_backprop must be 4-dimensional",
115	out_backprop.shape().DebugString()));
116
117	Tensor* dx = nullptr;
118	OP_REQUIRES_OK(context, context->forward_input_or_allocate_output(
119	{`0`, `4`}, `0`, input.shape(), &dx));
120	Tensor* dm = nullptr;
121	OP_REQUIRES_OK(context, context->forward_input_or_allocate_output(
122	{`1`}, `1`, mean.shape(), &dm));
123	Tensor* dv = nullptr;
124	OP_REQUIRES_OK(context, context->forward_input_or_allocate_output(
125	{`2`}, `2`, var.shape(), &dv));
126	Tensor* db = nullptr;
127	if (scale_after_normalization_) {
128	OP_REQUIRES_OK(context, context->allocate_output(`3`, mean.shape(), &db));
129	} else {
130	OP_REQUIRES_OK(context, context->forward_input_or_allocate_output(
131	{`3`}, `3`, mean.shape(), &db));
132	}
133	Tensor* dg = nullptr;
134	OP_REQUIRES_OK(context, context->allocate_output(`4`, gamma.shape(), &dg));
135
136	// Scratch buffer of [depth] dimension, aka the 4th dimension of input,
137	// which is dim_size(3), for calculating various combinations of
138	// (var + epsilon).
139	Tensor scratch1;
140	OP_REQUIRES_OK(context, context->allocate_temp(
141	DataTypeToEnum<T>::value,
142	TensorShape ({input.dim_size(`3`)}), &scratch1));
143
144	// Scratch buffer of [depth] dimension for saving intermediate calculation
145	// values.
146	Tensor scratch2;
147	OP_REQUIRES_OK(context, context->allocate_temp(
148	DataTypeToEnum<T>::value,
149	TensorShape ({input.dim_size(`3`)}), &scratch2));
150
151	functor::BatchNormGrad<Device, T>()(
152	context->eigen_device<Device>(), input.tensor<T, `4`>(), mean.vec<T>(),
153	var.vec<T>(), gamma.vec<T>(), out_backprop.tensor<T, `4`>(),
154	variance_epsilon_, scale_after_normalization_, dx->tensor<T, `4`>(),
155	dm->vec<T>(), dv->vec<T>(), db->vec<T>(), dg->vec<T>(),
156	scratch1.vec<T>(), scratch2.vec<T>());
157	}
158
159	private:
160	T variance_epsilon_;
161	bool scale_after_normalization_;
162	};
163
164	#define REGISTER_KERNEL(T) \
165	REGISTER_KERNEL_BUILDER(Name("BatchNormWithGlobalNormalization") \
166	.Device(DEVICE_CPU) \
167	.TypeConstraint<T>("T"), \
168	BatchNormOp<CPUDevice, T>);
169
170	TF_CALL_half(REGISTER_KERNEL);
171	TF_CALL_float(REGISTER_KERNEL);
172	TF_CALL_double(REGISTER_KERNEL);
173	#undef REGISTER_KERNEL
174
175	#if GOOGLE_CUDA \|\| TENSORFLOW_USE_ROCM
176	// Forward declarations of the functor specializations for GPU.
177	namespace functor {
178	#define DECLARE_GPU_SPEC(T) \
179	template <> \
180	void BatchNorm<GPUDevice, T>::operator()( \
181	const GPUDevice& d, typename TTypes<T, 4>::ConstTensor input, \
182	typename TTypes<T>::ConstVec mean, typename TTypes<T>::ConstVec var, \
183	typename TTypes<T>::ConstVec beta, typename TTypes<T>::ConstVec gamma, \
184	T variance_epsilon, bool scale_after_normalization, \
185	typename TTypes<T, 4>::Tensor output); \
186	extern template struct BatchNorm<GPUDevice, T>;
187
188	#define DECLARE_GPU_SPECS(T) DECLARE_GPU_SPEC(T);
189
190	TF_CALL_half(DECLARE_GPU_SPECS);
191	TF_CALL_float(DECLARE_GPU_SPECS);
192	#undef DECLARE_GPU_SPEC
193	} // namespace functor
194
195	// Registration of the GPU implementations.
196	#define REGISTER_GPU_KERNEL(T) \
197	REGISTER_KERNEL_BUILDER(Name("BatchNormWithGlobalNormalization") \
198	.Device(DEVICE_GPU) \
199	.TypeConstraint<T>("T"), \
200	BatchNormOp<GPUDevice, T>);
201
202	TF_CALL_half(REGISTER_GPU_KERNEL);
203	TF_CALL_float(REGISTER_GPU_KERNEL);
204	#undef REGISTER_GPU_KERNEL
205
206	#endif // GOOGLE_CUDA \|\| TENSORFLOW_USE_ROCM
207
208
209	#define REGISTER_KERNEL(T) \
210	REGISTER_KERNEL_BUILDER(Name("BatchNormWithGlobalNormalizationGrad") \
211	.Device(DEVICE_CPU) \
212	.TypeConstraint<T>("T"), \
213	BatchNormGradOp<CPUDevice, T>);
214
215	TF_CALL_half(REGISTER_KERNEL);
216	TF_CALL_float(REGISTER_KERNEL);
217	TF_CALL_double(REGISTER_KERNEL);
218	#undef REGISTER_KERNEL
219
220	#if GOOGLE_CUDA \|\| TENSORFLOW_USE_ROCM
221	// Forward declarations of the functor specializations for GPU.
222	namespace functor {
223	#define DECLARE_GPU_SPEC(T) \
224	template <> \
225	void BatchNormGrad<GPUDevice, T>::operator()( \
226	const GPUDevice& d, typename TTypes<T, 4>::ConstTensor input, \
227	typename TTypes<T>::ConstVec mean, typename TTypes<T>::ConstVec var, \
228	typename TTypes<T>::ConstVec gamma, \
229	typename TTypes<T, 4>::ConstTensor out_backprop, T variance_epsilon, \
230	bool scale_after_normalization, typename TTypes<T, 4>::Tensor dx, \
231	typename TTypes<T>::Vec dm, typename TTypes<T>::Vec dv, \
232	typename TTypes<T>::Vec db, typename TTypes<T>::Vec dg, \
233	typename TTypes<T>::Vec scratch1, typename TTypes<T>::Vec scratch2); \
234	extern template struct BatchNormGrad<GPUDevice, T>;
235
236	#define DECLARE_GPU_SPECS(T) DECLARE_GPU_SPEC(T);
237
238	TF_CALL_half(DECLARE_GPU_SPECS);
239	TF_CALL_float(DECLARE_GPU_SPECS);
240	#undef DECLARE_GPU_SPEC
241	} // namespace functor
242
243	// Registration of the GPU implementations.
244	#define REGISTER_GPU_KERNEL(T) \
245	REGISTER_KERNEL_BUILDER(Name("BatchNormWithGlobalNormalizationGrad") \
246	.Device(DEVICE_GPU) \
247	.TypeConstraint<T>("T"), \
248	BatchNormGradOp<GPUDevice, T>);
249
250	TF_CALL_half(REGISTER_GPU_KERNEL);
251	TF_CALL_float(REGISTER_GPU_KERNEL);
252	#undef REGISTER_GPU_KERNEL
253
254	#endif // GOOGLE_CUDA \|\| TENSORFLOW_USE_ROCM
255
256
257	} // namespace tensorflow
258

Browse the source code of tensorflow/tensorflow/core/kernels/batch_norm_op.cc