gen9_simple_sum.hpp source code [oneDNN/src/gpu/jit/gen9_simple_sum.hpp]

1	/*******************************************************************************
2	* Copyright 2019-2021 Intel Corporation
3	*
4	* Licensed under the Apache License, Version 2.0 (the "License");
5	* you may not use this file except in compliance with the License.
6	* You may obtain a copy of the License at
7	*
8	* http://www.apache.org/licenses/LICENSE-2.0
9	*
10	* Unless required by applicable law or agreed to in writing, software
11	* distributed under the License is distributed on an "AS IS" BASIS,
12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13	* See the License for the specific language governing permissions and
14	* limitations under the License.
15	*******************************************************************************/
16
17	#ifndef GPU_JIT_GEN9_SIMPLE_SUM_HPP
18	#define GPU_JIT_GEN9_SIMPLE_SUM_HPP
19
20	#include "common/c_types_map.hpp"
21	#include "gpu/compute/compute.hpp"
22	#include "gpu/gpu_primitive.hpp"
23	#include "gpu/gpu_sum_pd.hpp"
24
25	namespace dnnl {
26	namespace impl {
27	namespace gpu {
28	namespace jit {
29
30	struct gen9_simple_sum_t : public gpu_primitive_t {
31	struct pd_t : public gpu_sum_pd_t {
32	using gpu_sum_pd_t::gpu_sum_pd_t;
33
34	DECLARE_SUM_PD_T("ngen:simple:any", gen9_simple_sum_t);
35
36	status_t init(engine_t *engine) {
37	auto *compute_engine
38	= utils::downcast<compute::compute_engine_t *>(engine);
39	if (!compute_engine->mayiuse_ngen_kernels())
40	return status::unimplemented;
41
42	const int n = n_inputs();
43
44	constexpr auto data_type = data_type::f32;
45
46	bool ok = gpu_sum_pd_t::init(engine) == status::success;
47	if (!ok) return status::unimplemented;
48
49	const memory_desc_wrapper o_d(dst_md());
50	ok = ok && o_d.data_type() == data_type && o_d.is_dense();
51	if (!ok) return status::unimplemented;
52
53	for (int i = `0`; i < n; ++i) {
54	const memory_desc_wrapper i_d(src_md(i));
55	if (i_d != o_d) return status::unimplemented;
56	}
57
58	return status::success;
59	}
60	};
61
62	gen9_simple_sum_t(const pd_t *apd) : gpu_primitive_t(apd) {}
63
64	virtual status_t init(engine_t *engine);
65
66	virtual status_t execute(const exec_ctx_t &ctx) const {
67	status_t status = status::success;
68	auto &output = CTX_OUT_CLEAN_STORAGE(DNNL_ARG_DST, status);
69	CHECK(status);
70
71	const int num_arrs = pd()->n_inputs();
72	const memory_desc_wrapper o_d(pd()->dst_md());
73	const size_t nelems = o_d.nelems();
74
75	for (int a = `0`; a < num_arrs; ++a) {
76	auto &input = CTX_IN_STORAGE(DNNL_ARG_MULTIPLE_SRC + a);
77	const float scale = pd()->scales()[a];
78
79	compute::kernel_arg_list_t arg_list;
80	arg_list.set(`0`, input);
81	arg_list.set(`1`, output);
82	arg_list.set(`2`, scale);
83	arg_list.set(`3`, a);
84
85	size_t gws[`3`] = {nelems, `1`, `1`};
86	size_t lws[`3`] = {`1`, `1`, `1`};
87	auto nd_range = compute::nd_range_t (gws, lws);
88	status = parallel_for(ctx, nd_range, kernel_, arg_list);
89	if (status != status::success) return status;
90	}
91	return status::success;
92	}
93
94	private:
95	const pd_t pd() const* { return (const pd_t *)gpu_primitive_t::pd().get(); }
96
97	compute::kernel_t kernel_;
98	};
99
100	} // namespace jit
101	} // namespace gpu
102	} // namespace impl
103	} // namespace dnnl
104
105	#endif // GPU_JIT_GEN9_SIMPLE_SUM_HPP
106

Browse the source code of oneDNN/src/gpu/jit/gen9_simple_sum.hpp