reduction.cpp source code [oneDNN/tests/benchdnn/reduction/reduction.cpp]

1	/*******************************************************************************
2	* Copyright 2020-2022 Intel Corporation
3	*
4	* Licensed under the Apache License, Version 2.0 (the "License");
5	* you may not use this file except in compliance with the License.
6	* You may obtain a copy of the License at
7	*
8	* http://www.apache.org/licenses/LICENSE-2.0
9	*
10	* Unless required by applicable law or agreed to in writing, software
11	* distributed under the License is distributed on an "AS IS" BASIS,
12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13	* See the License for the specific language governing permissions and
14	* limitations under the License.
15	*******************************************************************************/
16
17	#include <math.h>
18
19	#include <random>
20	#include <sstream>
21
22	#include "utils/parallel.hpp"
23
24	#include "dnnl_common.hpp"
25	#include "dnnl_memory.hpp"
26
27	#include "binary/binary.hpp"
28	#include "reduction/reduction.hpp"
29
30	namespace reduction {
31
32	dnnl_status_t init_pd(init_pd_args_t<prb_t> &init_pd_args) {
33	const prb_t *prb = init_pd_args.prb;
34
35	auto src_desc = dnn_mem_t::init_md(
36	prb->ndims, prb->vdims[`0`].data(), prb->sdt, prb->stag);
37	auto dst_desc = dnn_mem_t::init_md(
38	prb->ndims, prb->vdims[`1`].data(), prb->ddt, prb->dtag);
39
40	attr_args_t attr_args;
41	attr_args.prepare_post_ops_mds(prb->attr, prb->ndims, prb->vdims[`1`].data());
42	const auto dnnl_attr = make_benchdnn_dnnl_wrapper(
43	create_dnnl_attr(prb->attr, attr_args));
44
45	DNN_SAFE_STATUS(dnnl_reduction_primitive_desc_create(&init_pd_args.pd,
46	init_pd_args.engine, alg2alg_kind(prb->alg), src_desc, dst_desc,
47	prb->p, prb->eps, dnnl_attr));
48
49	return dnnl_success;
50	}
51
52	bool is_norm_alg(const alg_t alg) {
53	return alg == alg_t::norm_lp_max \|\| alg == alg_t::norm_lp_sum
54	\|\| alg == alg_t::norm_lp_power_p_max
55	\|\| alg == alg_t::norm_lp_power_p_sum;
56	}
57
58	int fill_mem(const prb_t *prb, dnn_mem_t &mem_dt, dnn_mem_t &mem_fp,
59	float non_neutral_prob, bool use_reduced_range,
60	bool only_positive_values) {
61	const auto sdt = mem_dt.dt();
62	const auto nelems = mem_fp.nelems();
63	const float neutral_value = prb->alg == alg_t::mul ? `1.0f` : `0.0f`;
64	const float mean_shift = prb->alg == alg_t::mean ? `1.0f` : `0.0f`;
65	const bool is_signed = sdt != dnnl_u8;
66	const bool is_int = is_integral_dt(sdt);
67
68	int value_range = use_reduced_range ? `16` : `1000`;
69	if (is_int) value_range = use_reduced_range ? `3` : max_dt(dnnl_s8);
70
71	const int64_t n_chunks = `16`;
72	const int64_t chunk_size = div_up(nelems, n_chunks);
73
74	benchdnn_parallel_nd(n_chunks, [&](int64_t idx_chunk) {
75	const int64_t idx_start = idx_chunk * chunk_size;
76	const int64_t idx_end = MIN2(idx_start + chunk_size, nelems);
77
78	std::minstd_rand msr(idx_start + `1`);
79	msr.discard(`1`);
80	std::uniform_int_distribution<> igen(`1`, value_range);
81
82	for (int64_t idx = idx_start; idx < idx_end; ++idx) {
83	float value = neutral_value;
84	if (flip_coin(idx, non_neutral_prob)) {
85	const int gen = igen(msr);
86	value = is_int ? gen : gen / `8.f`;
87	if (!only_positive_values && is_signed && flip_coin(gen, `0.5f`))
88	value = -value;
89	}
90	value += mean_shift;
91	mem_fp.set_elem(idx, round_to_nearest_representable(sdt, value));
92	}
93	});
94	SAFE(mem_dt.reorder(mem_fp), WARN);
95	return OK;
96	}
97
98	int fill_src(const prb_t *prb, dnn_mem_t &mem_dt, dnn_mem_t &mem_fp) {
99	const auto nelems = mem_fp.nelems();
100	const auto ddt = prb->ddt;
101	if (!nelems) return OK;
102
103	int nelems_to_reduce = `1`;
104	for (int dim = `0`; dim < prb->ndims; dim++) {
105	if (prb->vdims[`0`][dim] != prb->vdims[`1`][dim]) {
106	nelems_to_reduce *= prb->vdims[`0`][dim];
107	}
108	}
109	// There is no accumulation error in case of min or max algorithm
110	const bool is_min_or_max = prb->alg == alg_t::min \|\| prb->alg == alg_t::max;
111	// Number of elements that should not exceed datatype limit after reduction
112	int safe_to_reduce_elems = nelems_to_reduce;
113	if (!is_min_or_max) { // Other algs do computations, reduce final values
114	safe_to_reduce_elems = prb->alg == alg_t::mul ? `10` : `1000`;
115	// Integral values easily reach border values,
116	// shrink their final values more
117	if (is_integral_dt(ddt))
118	safe_to_reduce_elems = prb->alg == alg_t::mul ? `3` : `10`;
119	}
120	const float non_neutral_prob
121	= `1.f` * safe_to_reduce_elems / nelems_to_reduce;
122
123	return fill_mem(
124	prb, mem_dt, mem_fp, non_neutral_prob, !is_min_or_max, false);
125	}
126
127	int fill_dst(const prb_t *prb, dnn_mem_t &mem_dt, dnn_mem_t &mem_fp) {
128	const bool only_positive_values = is_norm_alg(prb->alg);
129	return fill_mem(prb, mem_dt, mem_fp, `1.0f`, false, only_positive_values);
130	}
131
132	void skip_unimplemented_prb(const prb_t prb, res_t res) {
133	skip_unimplemented_data_type({prb->sdt, prb->ddt}, prb->dir, res);
134	skip_unimplemented_sum_po(prb->attr, res);
135	}
136
137	void skip_invalid_prb(const prb_t prb, res_t res) {
138	// Normalization algorithms don't make sense for integer data type.
139	// They also can't have `p` parameter less than one.
140	const bool is_invalid = is_norm_alg(prb->alg)
141	&& (is_integral_dt(prb->sdt) \|\| prb->p < `1.f`);
142
143	if (is_invalid) {
144	res->state = SKIPPED, res->reason = INVALID_CASE;
145	return;
146	}
147	}
148
149	void setup_cmp(compare::compare_t &cmp, const prb_t *prb, data_kind_t kind,
150	const args_t &ref_args) {
151	// `5` is a temporary magic const for GPU to pass norm algs.
152	// TODO: consider change the filling with power-of-two values for better
153	// answer precision.
154	cmp.set_threshold(`5` * epsilon_dt(prb->ddt));
155	if (is_amd_gpu()) {
156	// MIOpen implementation is less accurate for f16 data type therefore
157	// adjust the threshold.
158	if (prb->sdt == dnnl_f16 \|\| prb->ddt == dnnl_f16)
159	cmp.set_threshold(`1.5e-4` * `4`);
160	}
161	}
162
163	int doit(const prb_t prb, res_t res) {
164	if (bench_mode == LIST) return res->state = LISTED, OK;
165
166	benchdnn_dnnl_wrapper_t<dnnl_primitive_t> prim;
167	SAFE(init_prim(prb->ctx_init, prim, init_pd, prb, res), WARN);
168	if (res->state == SKIPPED \|\| res->state == UNIMPLEMENTED) return OK;
169
170	auto const_pd = query_pd(prim);
171
172	const auto fp_dt = dnnl_f32;
173	const auto abx_tag = tag::abx;
174
175	const auto &test_engine = get_test_engine();
176	const auto &ref_engine = get_cpu_engine();
177
178	const auto &src_md = query_md(const_pd, DNNL_ARG_SRC);
179	dnn_mem_t src_fp(src_md, fp_dt, abx_tag, ref_engine);
180	dnn_mem_t src_dt(src_md, test_engine);
181	SAFE(fill_src(prb, src_dt, src_fp), WARN);
182
183	const auto &dst_md = query_md(const_pd, DNNL_ARG_DST);
184	dnn_mem_t dst_fp(dst_md, fp_dt, abx_tag, ref_engine);
185	dnn_mem_t dst_dt(dst_md, test_engine);
186	if (prb->attr.post_ops.find(attr_t::post_ops_t::kind_t::SUM) >= `0`)
187	SAFE(fill_dst(prb, dst_dt, dst_fp), WARN);
188
189	const bool binary_po_only_positive_vals = is_norm_alg(prb->alg);
190	std::vector<dnn_mem_t> binary_po_fp, binary_po_dt;
191	std::vector<int> binary_po_args;
192	SAFE(binary::setup_binary_po(const_pd, binary_po_args, binary_po_dt,
193	binary_po_fp, binary_po_only_positive_vals),
194	WARN);
195
196	args_t args, ref_args;
197
198	args.set(DNNL_ARG_SRC, src_dt);
199	args.set(DNNL_ARG_DST, dst_dt);
200	args.set(binary_po_args, binary_po_dt);
201
202	SAFE(execute_and_wait(prim, args, res), WARN);
203
204	if (is_bench_mode(CORR)) {
205	ref_args.set(DNNL_ARG_SRC, src_fp);
206	ref_args.set(DNNL_ARG_DST, dst_fp);
207	ref_args.set(binary_po_args, binary_po_fp);
208
209	check_correctness(prb, {DST}, args, ref_args, setup_cmp, res);
210	}
211
212	return measure_perf(prb->ctx_exe, res, prim, args);
213	}
214
215	} // namespace reduction
216

Browse the source code of oneDNN/tests/benchdnn/reduction/reduction.cpp