compute_stream.cpp source code [oneDNN/src/gpu/compute/compute_stream.cpp]

1	/*******************************************************************************
2	* Copyright 2020-2021 Intel Corporation
3	*
4	* Licensed under the Apache License, Version 2.0 (the "License");
5	* you may not use this file except in compliance with the License.
6	* You may obtain a copy of the License at
7	*
8	* http://www.apache.org/licenses/LICENSE-2.0
9	*
10	* Unless required by applicable law or agreed to in writing, software
11	* distributed under the License is distributed on an "AS IS" BASIS,
12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13	* See the License for the specific language governing permissions and
14	* limitations under the License.
15	*******************************************************************************/
16
17	#include "gpu/compute/compute_stream.hpp"
18	#include "gpu/compute/compute_engine.hpp"
19	#include "gpu/gpu_primitive.hpp"
20
21	namespace dnnl {
22	namespace impl {
23	namespace gpu {
24	namespace compute {
25	status_t compute_stream_t::zero_pad(
26	const memory_t memory, const* exec_ctx_t &ctx) {
27	memory_desc_wrapper mdw(memory->md());
28
29	if (mdw.format_kind() != format_kind::blocked) return status::unimplemented;
30
31	if (mdw.nelems(false) == mdw.nelems(true)) return status::success;
32
33	if (!has_zero_pad_primitive()) return stream_t::zero_pad(memory, ctx);
34
35	// Kernel only compiled to support data types of length 1, 2, or 4 currently
36	if (!utils::one_of(mdw.data_type_size(), `1u`, `2u`, `4u`))
37	return status::unimplemented;
38
39	const blocking_desc_t blocking_desc = mdw.blocking_desc();
40
41	const int max_step_nelems = ZERO_PAD_MAX_STEP_SIZE;
42	size_t step_nelems = `1`;
43	for (int i = `0`; i < blocking_desc.inner_nblks; i++) {
44	step_nelems *= blocking_desc.inner_blks[i];
45	}
46
47	assert(step_nelems <= max_step_nelems);
48	if (step_nelems > max_step_nelems) return stream_t::zero_pad(memory, ctx);
49
50	engine_t engine = this*->engine();
51
52	primitive_t *zero_pad_primitive;
53	const resource_mapper_t *mapper;
54	CHECK(utils::downcast<compute_engine_t *>(engine)->get_zero_pad_primitive(
55	zero_pad_primitive, mapper));
56
57	exec_args_t zero_pad_args;
58	memory_arg_t arg = {const_cast<memory_t >(memory), true*};
59	zero_pad_args[DNNL_ARG_SRC] = arg;
60	exec_ctx_t zero_pad_ctx(this, std::move(zero_pad_args));
61	zero_pad_ctx.set_resource_mapper(mapper);
62
63	// Verbose is implemented separately here since fake primitive descriptor
64	// contains only primitive_kind in internal op_desc, but no md. Such design
65	// was chosen to avoid re-creation of zeropad primitive in case it lives as
66	// a regular one in cache and may be evicted from there. It means that md
67	// is available only with incoming memory at execution point here, that's
68	// why separate logic is written apart from a common place.
69	// XXX: re-consider, once zeropad appears in other places in the library.
70	if (get_verbose()) {
71	this->wait();
72	double start_ms = get_msec();
73	CHECK(zero_pad_primitive->execute(zero_pad_ctx));
74	status_t status = this->wait();
75	double duration_ms = get_msec() - start_ms;
76	std::string stamp;
77	if (get_verbose_timestamp()) stamp = "," + std::to_string(start_ms);
78	std::string md_fmt_str = md2fmt_str(memory->md());
79	std::string md_dim_str = md2dim_str(memory->md());
80
81	printf("onednn_verbose%s,exec,%s,%s,undef,%s,,,%s,%g\n", stamp.c_str(),
82	"gpu,zero_pad", zero_pad_primitive->pd()->name(),
83	md_fmt_str.c_str(), md_dim_str.c_str(), duration_ms);
84
85	return status;
86	} else {
87	return zero_pad_primitive->execute(zero_pad_ctx);
88	}
89	};
90	} // namespace compute
91	} // namespace gpu
92	} // namespace impl
93	} // namespace dnnl
94

Browse the source code of oneDNN/src/gpu/compute/compute_stream.cpp