ref_reorder.cpp source code [oneDNN/src/gpu/ocl/ref_reorder.cpp]

1	/*******************************************************************************
2	* Copyright 2019-2022 Intel Corporation
3	*
4	* Licensed under the Apache License, Version 2.0 (the "License");
5	* you may not use this file except in compliance with the License.
6	* You may obtain a copy of the License at
7	*
8	* http://www.apache.org/licenses/LICENSE-2.0
9	*
10	* Unless required by applicable law or agreed to in writing, software
11	* distributed under the License is distributed on an "AS IS" BASIS,
12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13	* See the License for the specific language governing permissions and
14	* limitations under the License.
15	*******************************************************************************/
16
17	#include <algorithm>
18	#include "gpu/ocl/ref_reorder.hpp"
19
20	#include "common/utils.hpp"
21	#include "gpu/ocl/ocl_stream.hpp"
22	#include "gpu/ocl/ocl_utils.hpp"
23	namespace dnnl {
24	namespace impl {
25	namespace gpu {
26	namespace ocl {
27
28	using namespace dnnl::impl::memory_tracking::names;
29
30	status_t ref_reorder_t::pd_t::init_conf(engine_t *engine) {
31	using namespace format_tag;
32
33	const memory_desc_wrapper src_mdw(src_md());
34	const memory_desc_wrapper dst_mdw(dst_md());
35
36	conf.src_md_info = memory_desc_info_t::create(src_mdw);
37	conf.dst_md_info = memory_desc_info_t::create(dst_mdw);
38
39	status_t status = status::success;
40
41	const auto &padded_dims = dst_mdw.padded_dims();
42	conf.src_quant = {attr(), src_mdw, DNNL_ARG_SRC};
43	conf.dst_quant = {attr(), dst_mdw, DNNL_ARG_DST};
44	conf.sum_quant = {attr()};
45	conf.has_padding = !src_mdw.is_dense() \|\| !dst_mdw.is_dense();
46	conf.ndims = src_mdw.ndims();
47	conf.nelems = utils::array_product(padded_dims, conf.ndims);
48
49	conf.sub_group_size = `1`;
50
51	if (conf.nelems == `0`) return status::success;
52
53	auto compute_engine = utils::downcast<compute::compute_engine_t >(engine);
54
55	dim_t blocks[MAX_NDIMS] = {`1`, `1`, `1`, `1`, `1`, `1`};
56
57	conf.dispatch = compute_engine->create_dispatch(dst_mdw.md_);
58
59	blocks[`2`] = blocks[`3`] = blocks[`4`] = blocks[`5`] = `0`;
60
61	for (int i = `0`; i < MAX_NDIMS; ++i) {
62	auto dim_str = utils::format("D%d", i);
63	if (i < dst_mdw.ndims()) {
64	int dim = padded_dims[i];
65	// if needed to align vectorized dim with vector size, pad that dim again
66	conf.dispatch.define_dim(dim_str, i, dim, blocks[i]);
67	} else {
68	conf.dispatch.define_dim(dim_str, `1`);
69	}
70	}
71
72	conf.dispatch.generate();
73	return status;
74	}
75
76	status_t ref_reorder_t::pd_t::init_kernel_ctx(
77	compute::kernel_ctx_t &kernel_ctx) const {
78	using namespace format_tag;
79
80	const memory_desc_wrapper src_mdw(src_md());
81	const memory_desc_wrapper dst_mdw(dst_md());
82
83	if (conf.nelems == `0`) return status::success;
84
85	kernel_ctx.define_int("NDIMS", conf.ndims);
86	kernel_ctx.add_option("-cl-std=CL2.0");
87
88	conf.src_quant.define_macros(kernel_ctx, "SRC");
89	conf.dst_quant.define_macros(kernel_ctx, "DST");
90	conf.sum_quant.define_macros(kernel_ctx, "SUM");
91
92	def_dispatch(kernel_ctx, conf.dispatch);
93
94	kernel_ctx.define_int("REF_REORDER", `1`);
95
96	kernel_ctx.define_int("PAD_FILL_ZERO", conf.has_padding);
97
98	def_memory_desc_info(kernel_ctx, conf.src_md_info, "SRC");
99	def_memory_desc_info(kernel_ctx, conf.dst_md_info, "DST");
100
101	kernel_ctx.print_options();
102	return status::success;
103	}
104
105	void ref_reorder_t::pd_t::init_scratchpad() {
106	if (conf.src_quant.with_scale()) {
107	auto scratchpad = scratchpad_registry().registrar();
108	scratchpad.book(memory_tracking::names::key_reorder_src_scales,
109	conf.src_quant.num_scales(), sizeof(float),
110	OCL_BUFFER_ALIGNMENT);
111	}
112	if (conf.dst_quant.with_scale()) {
113	auto scratchpad = scratchpad_registry().registrar();
114	scratchpad.book(memory_tracking::names::key_reorder_dst_scales,
115	conf.dst_quant.num_scales(), sizeof(float),
116	OCL_BUFFER_ALIGNMENT);
117	}
118	}
119
120	status_t ref_reorder_t::execute(const exec_ctx_t &ctx) const {
121
122	status_t status = status::success;
123
124	auto &src = CTX_IN_STORAGE(DNNL_ARG_FROM);
125	auto &dst = CTX_OUT_STORAGE(DNNL_ARG_TO);
126	CHECK(status);
127
128	const auto &conf = pd()->conf;
129	if (conf.nelems == `0`) return status::success;
130
131	compute::kernel_arg_list_t arg_list;
132	arg_list.set(`0`, src);
133	arg_list.set(`1`, dst);
134
135	arg_list.set(`2`, conf.src_quant.scales(ctx));
136	arg_list.set(`3`, conf.src_quant.zero_points(ctx));
137	arg_list.set(`4`, conf.dst_quant.scales(ctx));
138	arg_list.set(`5`, conf.dst_quant.zero_points(ctx));
139
140	arg_list.set(`6`, conf.sum_quant.scales());
141	arg_list.set(`7`, conf.sum_quant.zero_points());
142
143	auto nd_range = conf.dispatch.nd_range();
144
145	status = parallel_for(ctx, nd_range, kernel_, arg_list);
146
147	return status;
148	}
149
150	} // namespace ocl
151	} // namespace gpu
152	} // namespace impl
153	} // namespace dnnl
154

Browse the source code of oneDNN/src/gpu/ocl/ref_reorder.cpp