gen9_reduction.hpp source code [oneDNN/src/gpu/ocl/gen9_reduction.hpp]

1	/*******************************************************************************
2	* Copyright 2021-2022 Intel Corporation
3	*
4	* Licensed under the Apache License, Version 2.0 (the "License");
5	* you may not use this file except in compliance with the License.
6	* You may obtain a copy of the License at
7	*
8	* http://www.apache.org/licenses/LICENSE-2.0
9	*
10	* Unless required by applicable law or agreed to in writing, software
11	* distributed under the License is distributed on an "AS IS" BASIS,
12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13	* See the License for the specific language governing permissions and
14	* limitations under the License.
15	*******************************************************************************/
16
17	#ifndef GPU_GEN9_REDUCTION_HPP
18	#define GPU_GEN9_REDUCTION_HPP
19
20	#include "common/c_types_map.hpp"
21	#include "common/primitive.hpp"
22	#include "common/type_helpers.hpp"
23	#include "common/utils.hpp"
24	#include "gpu/compute/compute.hpp"
25	#include "gpu/gpu_primitive.hpp"
26	#include "gpu/gpu_reduction_pd.hpp"
27	#include "gpu/gpu_resource.hpp"
28	#include "gpu/primitive_conf.hpp"
29
30	namespace dnnl {
31	namespace impl {
32	namespace gpu {
33	namespace ocl {
34
35	// Requirements for this implementation:
36	// - only N and C can be blocked (NCx or nCx)
37	// - C is blocked by 16 or 32
38	// - src/dst blocking structures have to match
39	// - either all or none of the HWD dims are being reduced
40	// - padded C is a multiple of 16
41
42	// This implementation combines any HWD dimensions to a single index,
43	// Leaving N, C, and HWD dimensions (plus blocked portions of C and
44	// possibly N). Each of these dimensions is broken into chunks, and a
45	// work item is assigned one of each of these chunks in the initial step.
46
47	// The final phase finishes the reduction by reducing all of the chunks
48	// belonging to reduction dimensions.
49
50	struct gen9_reduction_t : public gpu_primitive_t {
51	using gpu_primitive_t::gpu_primitive_t;
52	struct pd_t : public gpu_reduction_pd_t {
53	using gpu_reduction_pd_t::gpu_reduction_pd_t;
54
55	DECLARE_COMMON_PD_T("ocl:gen9", gen9_reduction_t);
56
57	status_t init(engine_t *engine) {
58	using smask_t = primitive_attr_t::skip_mask_t;
59	bool ok = set_default_params() == status::success
60	&& attr_.has_default_values(
61	smask_t::post_ops \| smask_t::gpu_attr)
62	&& !memory_desc_ndims_ok(src_md(), dst_md())
63	&& post_ops_with_binary_ok(attr(), dst_md()->data_type, `5`)
64	&& attr_.set_default_formats(dst_md(`0`)) == status::success;
65	if (!ok) return status::unimplemented;
66
67	CHECK(init_conf(engine));
68	init_scratchpad();
69
70	return status::success;
71	}
72
73	status_t init_conf(engine_t *engine);
74	status_t init_kernel_ctx(compute::kernel_ctx_t &kernel_ctx) const;
75	void init_scratchpad();
76
77	reduction_conf_t conf;
78	};
79
80	status_t init(engine_t *engine) override {
81	compute::kernel_ctx_t kernel_ctx(pd()->attr());
82
83	status_t status = pd()->init_kernel_ctx(kernel_ctx);
84	CHECK(status);
85
86	status = create_kernel(
87	engine, &initial_kernel, "gen9_initial_reduce", kernel_ctx);
88	CHECK(status);
89	if (!pd()->conf.skip_final_phase) {
90	status = create_kernel(
91	engine, &final_kernel, "gen9_final_reduce", kernel_ctx);
92	CHECK(status);
93	}
94
95	return status::success;
96	}
97
98	virtual status_t execute(const exec_ctx_t &ctx) const override {
99	return execute_gen9(ctx);
100	}
101
102	private:
103	status_t execute_gen9(const exec_ctx_t &ctx) const;
104	const pd_t pd() const* { return (const pd_t *)primitive_t::pd().get(); }
105
106	compute::kernel_t initial_kernel;
107	compute::kernel_t final_kernel;
108	};
109
110	} // namespace ocl
111	} // namespace gpu
112	} // namespace impl
113	} // namespace dnnl
114
115	#endif
116

Browse the source code of oneDNN/src/gpu/ocl/gen9_reduction.hpp