compare.cpp source code [oneDNN/tests/benchdnn/utils/compare.cpp]

1	/*******************************************************************************
2	* Copyright 2020-2022 Intel Corporation
3	*
4	* Licensed under the Apache License, Version 2.0 (the "License");
5	* you may not use this file except in compliance with the License.
6	* You may obtain a copy of the License at
7	*
8	* http://www.apache.org/licenses/LICENSE-2.0
9	*
10	* Unless required by applicable law or agreed to in writing, software
11	* distributed under the License is distributed on an "AS IS" BASIS,
12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13	* See the License for the specific language governing permissions and
14	* limitations under the License.
15	*******************************************************************************/
16
17	#include <algorithm>
18	#include <atomic>
19	#include <cmath>
20	#include <sstream>
21	#include <string>
22
23	#include "utils/parallel.hpp"
24
25	#include "common.hpp"
26	#include "utils/compare.hpp"
27	#include "utils/norm.hpp"
28
29	#include "eltwise/eltwise.hpp"
30
31	namespace compare {
32
33	static void dump_point_values(const dnnl_memory_desc_t &md, data_kind_t kind,
34	int64_t l_offset, float exp_f32, float exp, float got, float diff,
35	float rel_diff) {
36	std::stringstream ss;
37	dims_t l_dims = md2dims(md);
38	dims_t dims_idx = off2dims_idx(l_dims, l_offset);
39	ss << dims_idx;
40	std::string ind_str = ss.str();
41
42	std::string skind;
43	if (kind != DAT_TOTAL) skind = "[" + std::string(data_kind2str(kind)) + "]";
44
45	BENCHDNN_PRINT(`0`,
46	"[%4ld]%s[%s] exp_f32:%12g exp:%12g got:%12g diff:%8g rdiff:%8g\n",
47	(long)l_offset, skind.c_str(), ind_str.c_str(), exp_f32, exp, got,
48	diff, rel_diff);
49	}
50
51	static void dump_norm_values(const diff_norm_t &diff_norm, data_kind_t kind) {
52	std::string skind;
53	if (kind != DAT_TOTAL) skind = "[" + std::string(data_kind2str(kind)) + "]";
54
55	BENCHDNN_PRINT(`0`,
56	"%s[L0] = %g\n"
57	"%s[L1] exp:%8g got:%8g diff:%8g rel_diff:%8g\n"
58	"%s[L2] exp:%8g got:%8g diff:%8g rel_diff:%8g\n"
59	"%s[L8] exp:%8g got:%8g diff:%8g rel_diff:%8g\n",
60	skind.c_str(), diff_norm.rel_diff(norm_t::L0), skind.c_str(),
61	diff_norm.a_[norm_t::L1], diff_norm.b_[norm_t::L1],
62	diff_norm.diff_[norm_t::L1], diff_norm.rel_diff(norm_t::L1),
63	skind.c_str(), diff_norm.a_[norm_t::L2], diff_norm.b_[norm_t::L2],
64	diff_norm.diff_[norm_t::L2], diff_norm.rel_diff(norm_t::L2),
65	skind.c_str(), diff_norm.a_[norm_t::L8], diff_norm.b_[norm_t::L8],
66	diff_norm.diff_[norm_t::L8], diff_norm.rel_diff(norm_t::L8));
67	}
68
69	static bool has_binary_comparison_po(const attr_t &attr) {
70	const auto &po = attr.post_ops;
71	if (po.is_def()) return false;
72
73	using alg_t = attr_t::post_ops_t::kind_t;
74	static const std::vector<alg_t> cmp_alg = {alg_t::MAX, alg_t::MIN,
75	alg_t::GE, alg_t::GT, alg_t::LE, alg_t::LT, alg_t::EQ, alg_t::NE};
76
77	for (int idx = `0`; idx < po.len(); ++idx) {
78	const auto &e = po.entry[idx];
79	if (!e.is_binary_kind()) continue;
80
81	if (std::any_of(cmp_alg.cbegin(), cmp_alg.cend(),
82	[&](const alg_t alg) { return e.kind == alg; }))
83	return true;
84	}
85	return false;
86	}
87
88	bool compare_extreme_values(float a, float b) {
89	if (std::isnan(a) && std::isnan(b)) return true;
90	if (std::isinf(a) && std::isinf(b) && std::signbit(a) == std::signbit(b))
91	return true;
92	return false;
93	}
94
95	compare_t::driver_check_func_args_t::driver_check_func_args_t(
96	const dnn_mem_t &exp_mem, const dnn_mem_t &got_f32, const int64_t i,
97	const dnnl_data_type_t data_type, const float trh)
98	: dt(data_type)
99	, idx(i)
100	, exp_f32(exp_mem.get_elem(idx))
101	, exp(round_to_nearest_representable(dt, exp_f32))
102	, got(got_f32.get_elem(idx))
103	, diff(fabsf(exp - got))
104	, rel_diff(diff / (fabsf(exp) > FLT_MIN ? fabsf(exp) : `1`))
105	, trh(trh) {}
106
107	int compare_t::compare_norm(const dnn_mem_t &exp_mem, const dnn_mem_t &got_mem,
108	const attr_t &attr, res_t res) const* {
109	const auto nelems = got_mem.nelems();
110	if (nelems == `0`) {
111	if (res->state == EXECUTED) res->state = PASSED;
112	return OK;
113	}
114
115	res->total = nelems;
116
117	dnn_mem_t got_f32(got_mem, dnnl_f32, tag::abx, get_cpu_engine());
118	const auto dt = got_mem.dt();
119
120	diff_norm_t diff_norm;
121	const bool need_dump = verbose >= `99`;
122	for (int64_t i = `0`; i < nelems; ++i) {
123	driver_check_func_args_t args(exp_mem, got_f32, i, dt, trh_);
124
125	if (std::isnan(args.exp_f32) && is_integral_dt(dt)) {
126	// Don't include integer max values into norm as they make it
127	// irrelevant for validation.
128	;
129	} else if (is_cpu() && dt == dnnl_s32 && args.exp == max_dt(dnnl_s32)
130	&& args.got >= BENCHDNN_S32_TO_F32_SAT_CONST
131	&& args.got < max_dt(dnnl_s32)) {
132	// Don't include f32->s32 saturation values into norm as they make
133	// it irrelevant for validation.
134	;
135	} else {
136	diff_norm.update(args.exp, args.got);
137	}
138
139	if (need_dump)
140	dump_point_values(got_mem.md_, kind_, i, args.exp_f32, args.exp,
141	args.got, args.diff, args.rel_diff);
142	}
143	diff_norm.done();
144
145	bool ok = diff_norm.rel_diff(norm_t::L2) <= trh_;
146	if (!ok) res->errors = `1`;
147
148	const bool dump = need_dump \|\| !ok;
149	if (dump) dump_norm_values(diff_norm, kind_);
150
151	if (res->errors) res->state = FAILED;
152	if (res->state == EXECUTED) res->state = PASSED;
153
154	return res->state == FAILED ? FAIL : OK;
155	}
156
157	int compare_t::compare_p2p(const dnn_mem_t &exp_mem, const dnn_mem_t &got_mem,
158	const attr_t &attr, res_t res) const* {
159	const auto nelems = got_mem.nelems();
160	if (nelems == `0`) {
161	if (res->state == EXECUTED) res->state = PASSED;
162	return OK;
163	}
164
165	res->total = nelems;
166
167	dnn_mem_t got_f32(got_mem, dnnl_f32, tag::abx, get_cpu_engine());
168	const auto dt = got_mem.dt();
169	const bool has_eltwise = attr.post_ops.eltwise_index() != -`1`;
170	const bool has_exp_eltwise
171	= attr.post_ops.find(attr_t::post_ops_t::kind_t::EXP) >= `0`;
172	const bool has_dst_scale = !attr.scales.get(DNNL_ARG_DST).is_def();
173
174	// Atomics to be updated in parallel section, non-atomics - in sequential.
175	std::atomic<bool> all_ok(true);
176	std::atomic<int64_t> zeros(`0`);
177	int64_t n_errors = `0`;
178	volatile bool from_parallel = true;
179	const bool need_dump = verbose >= `99`;
180
181	const auto compare_point_values = [&](int64_t i) {
182	driver_check_func_args_t args(exp_mem, got_f32, i, dt, trh_);
183
184	bool ok = args.diff == `0.f`;
185	if (std::isnan(args.exp_f32) && is_integral_dt(dt)) {
186	// Relax output requirements for this case, since different backends
187	// may implement NaN fp32 -> int32 conversion in a different manner.
188	ok = true;
189	}
190	// If fast check failed, go through all of them.
191	if (!ok) {
192	// Standard check for relative diff is under set threshold...
193	ok = (fabsf(args.exp) > `1e-5f` ? args.rel_diff : args.diff) <= trh_;
194	// If not, check that both are NaNs or infinity with same sign...
195	if (!ok) ok = compare::compare_extreme_values(args.exp, args.got);
196	// If not, use hack to check not fully correct s32 saturation on
197	// cpu...
198	if (!ok && is_cpu() && dt == dnnl_s32
199	&& args.exp == max_dt(dnnl_s32))
200	ok = args.got >= BENCHDNN_S32_TO_F32_SAT_CONST
201	&& args.got < max_dt(dnnl_s32);
202	// If not, check driver additional checks if set...
203	if (!ok && driver_check_func_) ok = driver_check_func_(args);
204	// If not, check if there are eltwise post-ops, use very relaxed
205	// comparison since we can't control inputs for each driver finely
206	// or validate if the output value from operation satisfies the
207	// check for catastrophic cancellation (see eltwise additional check
208	// function). We rely on validation of pure eltwise and let some
209	// big rdiff errors slip away hoping that absolute error is good
210	// enough.
211	if (!ok && has_eltwise) {
212	const float experimental_tolerated_trh
213	= std::max(epsilon_dt(dt), `2e-5f`);
214	ok = args.diff <= experimental_tolerated_trh;
215	}
216	// For eltwise it also may happen that threshold is really small,
217	// but absolute difference is really big. Also exponent is a special
218	// transcendental post-op that has accuracy issues with older isa.
219	if (!ok && has_eltwise
220	&& (fabsf(args.exp) > `1e+5f` \|\| has_exp_eltwise)) {
221	ok = args.rel_diff <= std::max(epsilon_dt(dt), `5e-6f`);
222	}
223	// Attr dst scale is used as a divisor to quantize data to dt.
224	// Implementation might decide to pre-compute inverse value and
225	// multiply on it in kernel. This difference might result in a
226	// slight error comparing to a division operation.
227	if (!ok && has_dst_scale) {
228	const float experimental_tolerated_trh
229	= std::max(epsilon_dt(dt), `1e-5f`);
230	ok = args.rel_diff <= experimental_tolerated_trh;
231	}
232	// Binary MAX, MIN and comparison operations post-ops may return
233	// different results for different backends when NaN is one of
234	// inputs. Depending on its position and implementation, either
235	// first or second operand may be returned.
236	if (!ok && has_binary_comparison_po(attr) && op_output_has_nans_)
237	ok = true;
238	// Some drivers (like pooling or resampling) on integer data types
239	// may result in sporadic order of operations. This may cause a
240	// difference around `x.5f` value, and can be rounded either way to
241	// `x` or `x + 1` which can't be fixed by filling.
242	if (!ok && is_integral_dt(args.dt)) {
243	// Check that original value is close to x.5f.
244	static constexpr float small_eps = `9e-6`;
245	const float floor_val = floorf(args.exp_f32);
246	const float ceil_val = ceilf(args.exp_f32);
247	if (fabsf((floor_val + `0.5f`) - args.exp_f32) < small_eps) {
248	// If it is, check exp and got values are on opposite sides.
249	if (args.exp == floor_val) {
250	ok = args.got == ceil_val;
251	} else if (args.exp == ceil_val) {
252	ok = args.got == floor_val;
253	}
254	}
255	}
256	}
257	// Update zero stats for mistrust testing.
258	if (from_parallel && fabsf(args.got) == `0`) zeros++;
259
260	if (!ok && all_ok) all_ok = false;
261	if (!ok && !from_parallel) n_errors++;
262
263	const bool dump
264	= need_dump \|\| (!ok && (n_errors < `10` \|\| verbose >= `10`));
265	if (!from_parallel && dump)
266	dump_point_values(got_mem.md_, kind_, i, args.exp_f32, args.exp,
267	args.got, args.diff, args.rel_diff);
268	};
269
270	// parallel comparison to speed up the process
271	benchdnn_parallel_nd(nelems, compare_point_values);
272
273	// serial comparison with enabled dumping when needed for nicer output.
274	if (!all_ok \|\| need_dump) {
275	from_parallel = false;
276	for (int64_t i = `0`; i < nelems; ++i)
277	compare_point_values (i);
278	}
279
280	// Set state to FAILED in case of any errors.
281	if (n_errors) res->errors = n_errors, res->state = FAILED;
282	// State could be already FAILED, check zero trust for non-FAILED only.
283	if (res->state != FAILED) {
284	const auto zeros_percent = `100.f` * zeros / nelems;
285	if (nelems >= `10` && zeros_percent > zero_trust_percent_) {
286	res->state = MISTRUSTED;
287	std::string skind;
288	if (kind_ != DAT_TOTAL)
289	skind = "[" + std::string(data_kind2str(kind_)) + "]";
290	BENCHDNN_PRINT(`2`,
291	"No trust stats [%s]: z:%2.0f%% (>%2.0f%%) (z: %ld, "
292	"total: %ld)\n",
293	skind.c_str(), zeros_percent, zero_trust_percent_,
294	(long)zeros.load(), (long)nelems);
295	}
296	}
297	// Set PASSED if no failure in current or previous checks happened and test
298	// can be trusted.
299	if (res->state == EXECUTED) res->state = PASSED;
300
301	return res->state == FAILED ? FAIL : OK;
302	}
303
304	int compare_t::compare(const dnn_mem_t &exp_mem, const dnn_mem_t &got_mem,
305	const attr_t &attr, res_t res) const* {
306	if (use_norm_) return compare_norm(exp_mem, got_mem, attr, res);
307	return compare_p2p(exp_mem, got_mem, attr, res);
308	}
309
310	} // namespace compare
311

Browse the source code of oneDNN/tests/benchdnn/utils/compare.cpp