gen9_binary.hpp source code [oneDNN/src/gpu/ocl/gen9_binary.hpp]

1	/*******************************************************************************
2	* Copyright 2020-2022 Intel Corporation
3	*
4	* Licensed under the Apache License, Version 2.0 (the "License");
5	* you may not use this file except in compliance with the License.
6	* You may obtain a copy of the License at
7	*
8	* http://www.apache.org/licenses/LICENSE-2.0
9	*
10	* Unless required by applicable law or agreed to in writing, software
11	* distributed under the License is distributed on an "AS IS" BASIS,
12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13	* See the License for the specific language governing permissions and
14	* limitations under the License.
15	*******************************************************************************/
16
17	#ifndef GPU_OCL_GEN9_BINARY_HPP
18	#define GPU_OCL_GEN9_BINARY_HPP
19
20	#include "common/c_types_map.hpp"
21	#include "common/primitive.hpp"
22	#include "gpu/compute/compute.hpp"
23	#include "gpu/gpu_binary_pd.hpp"
24	#include "gpu/gpu_primitive.hpp"
25	#include "gpu/gpu_resource.hpp"
26	#include "gpu/ocl/ocl_stream.hpp"
27	#include "gpu/ocl/ocl_utils.hpp"
28	#include "gpu/primitive_conf.hpp"
29
30	namespace dnnl {
31	namespace impl {
32	namespace gpu {
33	namespace ocl {
34
35	struct gen9_binary_t : public gpu_primitive_t {
36	using gpu_primitive_t::gpu_primitive_t;
37	struct pd_t : public gpu_binary_pd_t {
38	using gpu_binary_pd_t::gpu_binary_pd_t;
39
40	DECLARE_COMMON_PD_T("ocl:gen9", gen9_binary_t);
41
42	status_t init(engine_t *engine) {
43	using namespace data_type;
44	using namespace format_tag;
45	using sm = primitive_attr_t::skip_mask_t;
46
47	auto *compute_engine
48	= utils::downcast<compute::compute_engine_t *>(engine);
49
50	const auto attr_skip_mask = sm::post_ops \| sm::scales_runtime;
51	const memory_desc_wrapper dst_d(dst_md());
52	format_tag_t dst_tag
53	= dst_d.matches_one_of_tag(nc, ncw, nchw, ncdhw);
54	bool is_plain_layout = dst_d.matches_tag(dst_tag);
55	bool ok = set_default_params() == status::success
56	&& IMPLICATION(is_broadcast(), is_plain_layout)
57	&& !memory_desc_ndims_ok(src_md(`0`), src_md(`1`), dst_md())
58	&& ((utils::everyone_is(bf16, src_md(`0`)->data_type,
59	src_md(`1`)->data_type)
60	&& utils::one_of(dst_md()->data_type, bf16, u8))
61	\|\| (utils::one_of(
62	src_md(`0`)->data_type, f16, f32, s8, u8)
63	&& utils::one_of(src_md(`1`)->data_type, f16,
64	f32, s8, u8)
65	&& utils::one_of(dst_md()->data_type, f16,
66	f32, s8, u8)))
67	&& IMPLICATION(!attr()->scales_.has_default_values(),
68	utils::one_of(dst_md()->data_type, s8, u8)
69	&& utils::everyone_is(
70	attr()->scales_.get(DNNL_ARG_SRC_0)
71	.mask_,
72	attr()->scales_.get(DNNL_ARG_SRC_1)
73	.mask_,
74	`0`))
75	&& attr()->has_default_values(attr_skip_mask)
76	&& compute_engine->mayiuse(
77	compute::device_ext_t::intel_subgroups)
78	&& IMPLICATION(
79	utils::one_of(f16, src_md(`1`)->data_type,
80	src_md(`0`)->data_type, dst_md()->data_type),
81	compute_engine->mayiuse(
82	compute::device_ext_t::khr_fp16)
83	&& compute_engine->mayiuse(
84	compute::device_ext_t::
85	intel_subgroups_short))
86	&& post_ops_with_binary_ok(
87	attr(), dst_md()->data_type, MAX_NDIMS)
88	&& attr_.set_default_formats(dst_md(`0`)) == status::success
89	&& !(attr()->post_ops_.len() > `0`
90	&& src_md(`0`)->data_type == bf16
91	&& src_md(`1`)->data_type == bf16
92	&& dst_md()->data_type == u8);
93
94	if (!ok) return status::unimplemented;
95
96	return init_conf(engine);
97	}
98
99	status_t init_conf(engine_t *engine);
100
101	status_t init_kernel_ctx(compute::kernel_ctx_t &kernel_ctx) const;
102
103	bool is_broadcast() {
104	auto bcast_dims = broadcast_dims();
105	for (int i = `0`; i < src_md(`0`)->ndims; ++i) {
106	if (bcast_dims[i] != `0`) { return true; }
107	}
108	return false;
109	}
110
111	binary_conf_t conf;
112	};
113
114	status_t init(engine_t *engine) override {
115	compute::kernel_ctx_t kernel_ctx;
116
117	auto status = pd()->init_kernel_ctx(kernel_ctx);
118	if (status != status::success) return status;
119
120	create_kernel(engine, &kernel_, "gen9_binary", kernel_ctx);
121	if (!kernel_) return status::runtime_error;
122
123	return status::success;
124	}
125
126	status_t execute(const exec_ctx_t &ctx) const override {
127
128	auto &src0 = CTX_IN_STORAGE(DNNL_ARG_SRC_0);
129	auto &src1 = CTX_IN_STORAGE(DNNL_ARG_SRC_1);
130	auto &dst = CTX_OUT_STORAGE(DNNL_ARG_DST);
131
132	const auto &conf = pd()->conf;
133
134	auto &src0_scale
135	= CTX_IN_STORAGE(DNNL_ARG_SRC_0 \| DNNL_ARG_ATTR_SCALES);
136	auto &src1_scale
137	= CTX_IN_STORAGE(DNNL_ARG_SRC_1 \| DNNL_ARG_ATTR_SCALES);
138
139	compute::kernel_arg_list_t arg_list;
140	arg_list.set(`0`, src0);
141	arg_list.set(`1`, src1);
142	arg_list.set(`2`, dst);
143
144	unsigned arg_idx = append_post_ops_to_arg_list(
145	ctx, arg_list, `3`, pd()->attr()->post_ops_);
146
147	arg_list.set(arg_idx++, src0_scale);
148	arg_list.set(arg_idx, src1_scale);
149
150	auto nd_range = conf.dispatch.nd_range();
151
152	return parallel_for(ctx, nd_range, kernel_, arg_list);
153	}
154
155	private:
156	const pd_t pd() const* { return (const pd_t *)primitive_t::pd().get(); }
157	compute::kernel_t kernel_;
158	};
159
160	} // namespace ocl
161	} // namespace gpu
162	} // namespace impl
163	} // namespace dnnl
164
165	#endif
166

Browse the source code of oneDNN/src/gpu/ocl/gen9_binary.hpp