ref_reduction.cpp source code [oneDNN/src/cpu/ref_reduction.cpp]

1	/*******************************************************************************
2	* Copyright 2020-2022 Intel Corporation
3	*
4	* Licensed under the Apache License, Version 2.0 (the "License");
5	* you may not use this file except in compliance with the License.
6	* You may obtain a copy of the License at
7	*
8	* http://www.apache.org/licenses/LICENSE-2.0
9	*
10	* Unless required by applicable law or agreed to in writing, software
11	* distributed under the License is distributed on an "AS IS" BASIS,
12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13	* See the License for the specific language governing permissions and
14	* limitations under the License.
15	*******************************************************************************/
16
17	#include <math.h>
18
19	#include "common/c_types_map.hpp"
20	#include "common/dnnl_thread.hpp"
21	#include "common/nstl.hpp"
22
23	#include "cpu/simple_q10n.hpp"
24
25	#include "cpu/ref_reduction.hpp"
26
27	namespace dnnl {
28	namespace impl {
29	namespace cpu {
30
31	template <data_type_t src_type, data_type_t dst_type, data_type_t acc_type>
32	void ref_reduction_t<src_type, dst_type, acc_type>::init_acc(
33	acc_t &acc, alg_kind_t alg) const {
34	using namespace alg_kind;
35	using namespace nstl;
36
37	switch (alg) {
38	case reduction_max:
39	acc = static_cast<acc_t>(numeric_limits<src_t>::lowest());
40	break;
41	case reduction_min:
42	acc = static_cast<acc_t>(numeric_limits<src_t>::max());
43	break;
44	case reduction_mean:
45	case reduction_sum: acc = acc_t(`0`); break;
46	case reduction_mul: acc = acc_t(`1`); break;
47	case reduction_norm_lp_max:
48	case reduction_norm_lp_sum:
49	case reduction_norm_lp_power_p_max:
50	case reduction_norm_lp_power_p_sum: acc = acc_t(`0`); break;
51	default: assert(!"unknown alg");
52	}
53	}
54
55	template <data_type_t src_type, data_type_t dst_type, data_type_t acc_type>
56	void ref_reduction_t<src_type, dst_type, acc_type>::accumulate(
57	acc_t &acc, const src_t &src, alg_kind_t alg, float p) const {
58	using namespace alg_kind;
59
60	acc_t src_ = static_cast<acc_t>(src);
61
62	switch (alg) {
63	case reduction_max: acc = nstl::max(acc, src_); break;
64	case reduction_min: acc = nstl::min(acc, src_); break;
65	case reduction_mean:
66	case reduction_sum: acc += src_; break;
67	case reduction_mul: acc = src_; break*;
68	case reduction_norm_lp_max:
69	case reduction_norm_lp_sum:
70	case reduction_norm_lp_power_p_max:
71	case reduction_norm_lp_power_p_sum:
72	acc += powf(nstl::abs(src_), p);
73	break;
74	default: assert(!"unknown alg");
75	}
76	}
77
78	template <data_type_t src_type, data_type_t dst_type, data_type_t acc_type>
79	void ref_reduction_t<src_type, dst_type, acc_type>::finalize(
80	float &acc_f32, alg_kind_t alg, float p, float eps, dim_t n) const {
81	using namespace alg_kind;
82
83	switch (alg) {
84	case reduction_mean: acc_f32 /= n; break;
85	case reduction_norm_lp_max:
86	acc_f32 = nstl::max(acc_f32, eps);
87	acc_f32 = powf(acc_f32, `1.0f` / p);
88	break;
89	case reduction_norm_lp_sum:
90	acc_f32 += eps;
91	acc_f32 = powf(acc_f32, `1.0f` / p);
92	break;
93	case reduction_norm_lp_power_p_max:
94	acc_f32 = nstl::max(acc_f32, eps);
95	break;
96	case reduction_norm_lp_power_p_sum: acc_f32 += eps; break;
97	default: break;
98	}
99	}
100
101	template <data_type_t src_type, data_type_t dst_type, data_type_t acc_type>
102	status_t ref_reduction_t<src_type, dst_type, acc_type>::execute_ref(
103	const exec_ctx_t &ctx) const {
104	status_t status = status::success;
105	auto src = CTX_IN_MEM(const src_t *, DNNL_ARG_SRC);
106	auto dst = CTX_OUT_CLEAN_MEM(dst_t *, DNNL_ARG_DST, status);
107	CHECK(status);
108
109	const memory_desc_wrapper src_mdw(pd()->src_md());
110	const memory_desc_wrapper dst_mdw(pd()->dst_md());
111
112	const int ndims = src_mdw.ndims();
113	const auto &src_dims = src_mdw.dims();
114	const auto &dst_dims = dst_mdw.dims();
115
116	const auto alg = pd()->desc()->alg_kind;
117	const auto p = pd()->desc()->p;
118	const auto eps = pd()->desc()->eps;
119
120	dims_t reduce_dims;
121	dim_t reduce_size {`1`}, idle_size = dst_mdw.nelems();
122
123	for (int d = `0`; d < ndims; ++d) {
124	reduce_dims[d] = dim_t {`1`};
125	const bool is_reduction_dim = src_dims[d] != dst_dims[d];
126	if (is_reduction_dim) {
127	reduce_dims[d] = src_dims[d];
128	reduce_size *= reduce_dims[d];
129	}
130	}
131
132	parallel_nd(idle_size, [&](dim_t l_offset) {
133	dims_t idle_pos, reduce_pos;
134	utils::l_dims_by_l_offset(idle_pos, l_offset, dst_mdw.dims(), ndims);
135	const dim_t dst_off = dst_mdw.off_v(idle_pos);
136	const dim_t src_idle_off = src_mdw.off_v(idle_pos);
137	acc_t acc {`0`};
138	init_acc(acc, alg);
139	for (dim_t r = `0`; r < reduce_size; ++r) {
140	utils::l_dims_by_l_offset(reduce_pos, r, reduce_dims, ndims);
141	const dim_t src_reduce_off = src_mdw.off_v(reduce_pos);
142	const dim_t src_off = src_idle_off + src_reduce_off;
143	accumulate(acc, src[src_off], alg, p);
144	}
145	float acc_f32 = static_cast<float>(acc);
146	finalize(acc_f32, alg, p, eps, reduce_size);
147
148	ref_post_ops_t::args_t args;
149	args.dst_val = dst[dst_off];
150	args.ctx = &ctx;
151	args.l_offset = l_offset;
152	args.dst_md = pd()->dst_md();
153	ref_post_ops->execute(acc_f32, args);
154
155	dst[dst_off] = saturate_and_round<dst_t>(acc_f32);
156	});
157
158	return status::success;
159	}
160
161	using namespace data_type;
162	template struct ref_reduction_t<f32, f32, f32>;
163	template struct ref_reduction_t<bf16, bf16, f32>;
164	template struct ref_reduction_t<bf16, f32, f32>;
165	template struct ref_reduction_t<f16, f16, f32>;
166	template struct ref_reduction_t<f16, f32, f32>;
167	template struct ref_reduction_t<s8, s8, s32>;
168	template struct ref_reduction_t<s8, s32, s32>;
169	template struct ref_reduction_t<s8, f32, s32>;
170	template struct ref_reduction_t<u8, u8, s32>;
171	template struct ref_reduction_t<u8, s32, s32>;
172	template struct ref_reduction_t<u8, f32, s32>;
173
174	} // namespace cpu
175	} // namespace impl
176	} // namespace dnnl
177

Browse the source code of oneDNN/src/cpu/ref_reduction.cpp