check_numerics_op.cc source code [tensorflow/tensorflow/core/kernels/check_numerics_op.cc]

1	/ Copyright 2015 The TensorFlow Authors. All Rights Reserved.*
2
3	Licensed under the Apache License, Version 2.0 (the "License");
4	you may not use this file except in compliance with the License.
5	You may obtain a copy of the License at
6
7	http://www.apache.org/licenses/LICENSE-2.0
8
9	Unless required by applicable law or agreed to in writing, software
10	distributed under the License is distributed on an "AS IS" BASIS,
11	WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12	See the License for the specific language governing permissions and
13	limitations under the License.
14	==============================================================================/*
15
16	// See docs in ../ops/array_ops.cc.
17
18	// clang-format off
19	#include "tensorflow/core/platform/bfloat16.h"
20
21	#include <math.h> // NOLINT
22	#include <algorithm> // NOLINT
23	#include <numeric> // NOLINT
24	// clang-format on
25
26	#include "tensorflow/core/framework/op_kernel.h"
27	#include "tensorflow/core/framework/register_types.h"
28	#include "tensorflow/core/framework/tensor.h"
29	#include "tensorflow/core/framework/tensor_reference.h"
30	#include "tensorflow/core/framework/types.h"
31
32	#if GOOGLE_CUDA \|\| TENSORFLOW_USE_ROCM
33	#include "tensorflow/core/common_runtime/gpu/gpu_event_mgr.h"
34	#endif // GOOGLE_CUDA \|\| TENSORFLOW_USE_ROCM
35
36	#if GOOGLE_CUDA
37	#include "tensorflow/compiler/xla/stream_executor/cuda/cuda_activation.h"
38	#elif TENSORFLOW_USE_ROCM
39	#include "tensorflow/core/platform/rocm.h"
40	#endif
41	namespace tensorflow {
42
43	typedef Eigen::ThreadPoolDevice CPUDevice;
44	typedef Eigen::GpuDevice GPUDevice;
45
46	#if GOOGLE_CUDA \|\| TENSORFLOW_USE_ROCM
47	template <typename T>
48	struct CheckNumericsLaunch {
49	void Run(const GPUDevice& d, const T* data, int size,
50	int abnormal_detected[`2`]);
51	};
52
53	extern template struct CheckNumericsLaunch<Eigen::half>;
54	extern template struct CheckNumericsLaunch<float>;
55	extern template struct CheckNumericsLaunch<double>;
56
57	template <typename T>
58	struct CheckNumericsLaunchV2 {
59	void Run(const GPUDevice& d, const T* data, int size,
60	int abnormal_detected[`3`]);
61	};
62
63	extern template struct CheckNumericsLaunchV2<Eigen::half>;
64	extern template struct CheckNumericsLaunchV2<float>;
65	extern template struct CheckNumericsLaunchV2<double>;
66	#endif
67
68	namespace {
69
70	const int kInfBit = `0x01`;
71	const int kNaNBit = `0x02`;
72	const int kNegativeInfBit = `0x04`;
73	const int kPositiveInfBit = `0x08`;
74
75	template <typename Device, typename T>
76	class CheckNumericsOp;
77
78	// Partial specialization for CPU
79	// TODO(jeff,rmlarsen): We should make this variant be an AsyncOpKernel, as
80	// was done for the GPU case below.
81	template <typename T>
82	class CheckNumericsOp<CPUDevice, T> : public OpKernel {
83	public:
84	explicit CheckNumericsOp(OpKernelConstruction* context) : OpKernel(context) {
85	// message_ is used as the prefix for the assertion error message. For
86	// instance, this can be the name of the input op that produced the tensor.
87	OP_REQUIRES_OK(context, context->GetAttr("message", &message_));
88	}
89
90	void Compute(OpKernelContext* context) override {
91	// pass along the input to the output
92	context->set_output(`0`, context->input(`0`));
93
94	auto in = context->input(`0`).flat<T>();
95	const T* data = in.data();
96	const int64_t size = in.size();
97	// Check to see if any element of the tensor is NaN or Inf.
98	int fp_props = std::accumulate(
99	data, data + size, `0`,
100	[this](const int x, const T& y) { return checkFloatingElement(x, y); });
101	if (fp_props != `0`) {
102	const string& status = getErrorString(fp_props);
103	if (!status.empty()) {
104	context->SetStatus(errors::InvalidArgument(message_, " : Tensor had ",
105	status, " values"));
106	}
107	}
108	}
109
110	protected:
111	virtual int checkFloatingElement(const int x, const T& y) {
112	int result = x;
113	if (TF_PREDICT_TRUE(Eigen::numext::isfinite(y))) {
114	// Do nothing: common case.
115	} else {
116	if (Eigen::numext::isinf(y)) {
117	result \|= kInfBit;
118	} else if (Eigen::numext::isnan(y)) {
119	result \|= kNaNBit;
120	}
121	}
122	return result;
123	}
124
125	virtual const string getErrorString(const int fp_props) {
126	string status;
127	if ((fp_props & kInfBit) && (fp_props & kNaNBit)) {
128	status = "Inf and NaN";
129	} else {
130	if (fp_props & kInfBit) {
131	status = "Inf";
132	}
133	if (fp_props & kNaNBit) {
134	status = "NaN";
135	}
136	}
137	return status;
138	}
139
140	private:
141	string message_;
142	};
143
144	template <typename Device, typename T>
145	class CheckNumericsV2Op;
146
147	// Partial specialization for CPU: v2.
148	// The v2 op differs from the v1 in that it distinguishes -inf and +inf.
149	template <typename T>
150	class CheckNumericsV2Op<CPUDevice, T> : public CheckNumericsOp<CPUDevice, T> {
151	public:
152	explicit CheckNumericsV2Op(OpKernelConstruction* context)
153	: CheckNumericsOp<CPUDevice, T>(context) {}
154
155	protected:
156	int checkFloatingElement(const int x, const T& y) override {
157	int result = x;
158	if (TF_PREDICT_TRUE(Eigen::numext::isfinite(y))) {
159	// Do nothing: common case.
160	} else {
161	if (Eigen::numext::isinf(y)) {
162	result \|= y < static_cast<T>(`0.`) ? kNegativeInfBit : kPositiveInfBit;
163	} else if (Eigen::numext::isnan(y)) {
164	result \|= kNaNBit;
165	}
166	}
167	return result;
168	}
169
170	const string getErrorString(const int fp_props) override {
171	std::vector<string> anomalies;
172	if (fp_props & kNegativeInfBit) {
173	anomalies.push_back("-Inf");
174	}
175	if (fp_props & kPositiveInfBit) {
176	anomalies.push_back("+Inf");
177	}
178	if (fp_props & kNaNBit) {
179	anomalies.push_back("NaN");
180	}
181	if (anomalies.size() == `3`) {
182	return strings::StrCat(anomalies [`0`], ", ", anomalies [`1`], ", and ",
183	anomalies [`2`]);
184	} else if (anomalies.size() == `2`) {
185	return strings::StrCat(anomalies [`0`], " and ", anomalies [`1`]);
186	} else {
187	return anomalies [`0`];
188	}
189	}
190	};
191
192	#if GOOGLE_CUDA \|\| TENSORFLOW_USE_ROCM
193	// Partial specialization for GPU
194	template <typename T>
195	class CheckNumericsOp<GPUDevice, T> : public AsyncOpKernel {
196	public:
197	typedef GPUDevice Device;
198
199	explicit CheckNumericsOp(OpKernelConstruction* context)
200	: AsyncOpKernel(context) {
201	// message_ is used as the prefix for the assertion error message. For
202	// instance, this can be the name of the input op that produced the tensor.
203	OP_REQUIRES_OK(context, context->GetAttr("message", &message_));
204	}
205
206	void ComputeAsync(OpKernelContext* context, DoneCallback done) override {
207	// pass along the input to the output
208	context->set_output(`0`, context->input(`0`));
209	if (context->input(`0`).NumElements() == `0`) {
210	done();
211	return;
212	}
213	auto input = context->input(`0`).flat<T>();
214
215	// Allocate and initialize the elements to hold the check results
216	Tensor abnormal_detected;
217	const int abnormal_detected_size = getAnomalyIndicatorSize();
218	OP_REQUIRES_OK(context, context->allocate_temp(
219	DT_INT32, TensorShape({abnormal_detected_size}),
220	&abnormal_detected));
221
222	auto* stream = context->op_device_context()->stream();
223	OP_REQUIRES_ASYNC(context, stream != nullptr,
224	errors::Internal("No GPU stream available."), done);
225
226	se::DeviceMemoryBase abnormal_detected_ptr(
227	abnormal_detected.flat<int>().data(),
228	abnormal_detected.flat<int>().size());
229	stream->ThenMemset32(&abnormal_detected_ptr, `0`,
230	abnormal_detected.flat<int>().size() * sizeof(int));
231
232	// Call the GPU kernels for the numerical checks
233	const Device& d = context->eigen_device<Device>();
234	RunKernel(d, input.data(), input.size(),
235	abnormal_detected.flat<int>().data());
236
237	// Copy the results from device to host
238	AllocatorAttributes attr;
239	attr.set_on_host(true);
240	attr.set_gpu_compatible(true);
241	Tensor abnormal_detected_host;
242	OP_REQUIRES_OK_ASYNC(
243	context,
244	context->allocate_temp(DT_INT32, TensorShape({abnormal_detected_size}),
245	&abnormal_detected_host, attr),
246	done);
247	OP_REQUIRES_ASYNC(
248	context,
249	stream
250	->ThenMemcpy(abnormal_detected_host.flat<int>().data(),
251	abnormal_detected_ptr,
252	abnormal_detected_size * sizeof(int))
253	.ok(),
254	errors::Internal("GPU memcpy from device to host failed"), done);
255
256	// We have observed crashes on some network stacks when not holding
257	// this tensor reference.
258	TensorReference abnormal_detected_ref(abnormal_detected);
259	auto check_cb = [this, stream, abnormal_detected_ref,
260	abnormal_detected_host, context, done]() {
261	#if GOOGLE_CUDA
262	se::cuda::ScopedActivateExecutorContext scoped_activation{
263	stream->parent()};
264	#elif TENSORFLOW_USE_ROCM
265	se::rocm::ScopedActivateExecutorContext scoped_activation{
266	stream->parent()};
267	#endif
268	TTypes<const int>::Vec abnormal_detected_host_flat =
269	abnormal_detected_host.flat<int>();
270	abnormal_detected_ref.Unref();
271	checkForAnomalies(context, abnormal_detected_host_flat);
272	done();
273	};
274	context->device()
275	->tensorflow_accelerator_device_info()
276	->event_mgr->ThenExecute(stream, std::move(check_cb));
277	}
278
279	protected:
280	virtual int getAnomalyIndicatorSize() { return `2`; }
281
282	virtual void RunKernel(const GPUDevice& d, const T* data, int size,
283	int* abnormal_detected) {
284	CheckNumericsLaunch<T>().Run(d, data, size, abnormal_detected);
285	}
286
287	virtual void checkForAnomalies(
288	OpKernelContext* context,
289	const TTypes<const int>::Vec& abnormality_indicators) {
290	const int is_nan = abnormality_indicators(`0`);
291	const int is_inf = abnormality_indicators(`1`);
292	if (is_nan \|\| is_inf) {
293	LOG(ERROR) << "abnormal_detected_host @" << abnormality_indicators.data()
294	<< " = {" << is_nan << ", " << is_inf << "} " << message_;
295
296	string anomalies;
297	if (is_nan && is_inf) {
298	anomalies = "Inf and NaN";
299	} else if (is_nan) {
300	anomalies = "NaN";
301	} else if (is_inf) {
302	anomalies = "Inf";
303	}
304	context->SetStatus(errors::InvalidArgument(message_, " : Tensor had ",
305	anomalies, " values"));
306	}
307	}
308
309	string message_;
310	};
311
312	template <typename T>
313	class CheckNumericsV2Op<GPUDevice, T> : public CheckNumericsOp<GPUDevice, T> {
314	public:
315	CheckNumericsV2Op(OpKernelConstruction* context)
316	: CheckNumericsOp<GPUDevice, T>(context) {}
317
318	protected:
319	int getAnomalyIndicatorSize() override { return `3`; }
320
321	void RunKernel(const GPUDevice& d, const T* data, int size,
322	int* abnormal_detected) override {
323	CheckNumericsLaunchV2<T>().Run(d, data, size, abnormal_detected);
324	}
325
326	void checkForAnomalies(
327	OpKernelContext* context,
328	const TTypes<const int>::Vec& abnormality_indicators) override {
329	const int is_nan = abnormality_indicators(`0`);
330	const int is_negative_inf = abnormality_indicators(`1`);
331	const int is_positive_inf = abnormality_indicators(`2`);
332	if (is_negative_inf \|\| is_positive_inf \|\| is_nan) {
333	std::vector<string> anomalies;
334	if (is_negative_inf) {
335	anomalies.push_back("-Inf");
336	}
337	if (is_positive_inf) {
338	anomalies.push_back("+Inf");
339	}
340	if (is_nan) {
341	anomalies.push_back("NaN");
342	}
343	string all_anomalies;
344	if (anomalies.size() == `3`) {
345	all_anomalies = strings::StrCat(anomalies[`0`], ", ", anomalies[`1`],
346	", and ", anomalies[`2`]);
347	} else if (anomalies.size() == `2`) {
348	all_anomalies = strings::StrCat(anomalies[`0`], " and ", anomalies[`1`]);
349	} else {
350	all_anomalies = anomalies[`0`];
351	}
352	context->SetStatus(errors::InvalidArgument(
353	this->message_, " : Tensor had ", all_anomalies, " values"));
354	}
355	}
356
357	static constexpr int abnormal_detected_size = `3`;
358	};
359
360	#endif // GOOGLE_CUDA \|\| TENSORFLOW_USE_ROCM
361
362	} // namespace
363
364	#define REGISTER_CPU_KERNEL(T) \
365	REGISTER_KERNEL_BUILDER( \
366	Name("CheckNumerics").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
367	CheckNumericsOp<CPUDevice, T>);
368	TF_CALL_half(REGISTER_CPU_KERNEL);
369	TF_CALL_bfloat16(REGISTER_CPU_KERNEL);
370	TF_CALL_float(REGISTER_CPU_KERNEL);
371	TF_CALL_double(REGISTER_CPU_KERNEL);
372
373	#define REGISTER_V2_CPU_KERNEL(T) \
374	REGISTER_KERNEL_BUILDER( \
375	Name("CheckNumericsV2").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
376	CheckNumericsV2Op<CPUDevice, T>);
377	TF_CALL_half(REGISTER_V2_CPU_KERNEL);
378	TF_CALL_bfloat16(REGISTER_V2_CPU_KERNEL);
379	TF_CALL_float(REGISTER_V2_CPU_KERNEL);
380	TF_CALL_double(REGISTER_V2_CPU_KERNEL);
381
382	#if GOOGLE_CUDA \|\| TENSORFLOW_USE_ROCM
383	REGISTER_KERNEL_BUILDER(
384	Name("CheckNumerics").Device(DEVICE_GPU).TypeConstraint<Eigen::half>("T"),
385	CheckNumericsOp<GPUDevice, Eigen::half>);
386	REGISTER_KERNEL_BUILDER(
387	Name("CheckNumerics").Device(DEVICE_GPU).TypeConstraint<float>("T"),
388	CheckNumericsOp<GPUDevice, float>);
389	REGISTER_KERNEL_BUILDER(
390	Name("CheckNumerics").Device(DEVICE_GPU).TypeConstraint<double>("T"),
391	CheckNumericsOp<GPUDevice, double>);
392
393	REGISTER_KERNEL_BUILDER(
394	Name("CheckNumericsV2").Device(DEVICE_GPU).TypeConstraint<Eigen::half>("T"),
395	CheckNumericsV2Op<GPUDevice, Eigen::half>);
396	REGISTER_KERNEL_BUILDER(
397	Name("CheckNumericsV2").Device(DEVICE_GPU).TypeConstraint<float>("T"),
398	CheckNumericsV2Op<GPUDevice, float>);
399	REGISTER_KERNEL_BUILDER(
400	Name("CheckNumericsV2").Device(DEVICE_GPU).TypeConstraint<double>("T"),
401	CheckNumericsV2Op<GPUDevice, double>);
402	#endif // GOOGLE_CUDA \|\| TENSORFLOW_USE_ROCM
403
404	} // namespace tensorflow
405

Browse the source code of tensorflow/tensorflow/core/kernels/check_numerics_op.cc