1/*******************************************************************************
2* Copyright 2019-2022 Intel Corporation
3*
4* Licensed under the Apache License, Version 2.0 (the "License");
5* you may not use this file except in compliance with the License.
6* You may obtain a copy of the License at
7*
8* http://www.apache.org/licenses/LICENSE-2.0
9*
10* Unless required by applicable law or agreed to in writing, software
11* distributed under the License is distributed on an "AS IS" BASIS,
12* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13* See the License for the specific language governing permissions and
14* limitations under the License.
15*******************************************************************************/
16
17#ifndef GPU_OCL_CUSTOM_REORDER_HPP
18#define GPU_OCL_CUSTOM_REORDER_HPP
19
20#include "common/c_types_map.hpp"
21#include "common/memory.hpp"
22#include "common/primitive.hpp"
23#include "common/utils.hpp"
24#include "gpu/gpu_primitive.hpp"
25#include "gpu/gpu_reorder_pd.hpp"
26#include "gpu/gpu_resource.hpp"
27#include "gpu/ocl/ocl_utils.hpp"
28#include "gpu/primitive_conf.hpp"
29
30namespace dnnl {
31namespace impl {
32namespace gpu {
33namespace ocl {
34
35// Collection of custom reorder implementations that are highly optimized
36// but only applicable to specific scenarios.
37struct custom_reorder_t : public gpu_primitive_t {
38 using gpu_primitive_t::gpu_primitive_t;
39 struct pd_t : public gpu_reorder_pd_t {
40 using gpu_reorder_pd_t::gpu_reorder_pd_t;
41
42 DECLARE_COMMON_PD_T("ocl:custom:any", custom_reorder_t);
43
44 status_t init(
45 engine_t *engine, engine_t *src_engine, engine_t *dst_engine) {
46 bool ok = src_engine == dst_engine
47 && src_engine->kind() == engine_kind::gpu && attr_ok()
48 && extra_ok();
49 if (!ok) return status::unimplemented;
50
51 if (memory_desc_wrapper(src_md()).has_runtime_dims_or_strides())
52 return status::unimplemented;
53
54 auto *compute_engine = utils::downcast<compute::compute_engine_t *>(
55 dst_engine->kind() == engine_kind::gpu ? dst_engine
56 : src_engine);
57
58 ok = ok && !memory_desc_ndims_ok(src_md(), dst_md())
59 && compute_engine->mayiuse(
60 compute::device_ext_t::intel_subgroups)
61 && IMPLICATION(
62 utils::one_of(data_type::f16, src_md()->data_type,
63 dst_md()->data_type),
64 compute_engine->mayiuse(
65 compute::device_ext_t::khr_fp16)
66 && compute_engine->mayiuse(
67 compute::device_ext_t::
68 intel_subgroups_short))
69 && (!utils::one_of(data_type::f64, src_md()->data_type,
70 dst_md()->data_type));
71
72 if (!ok) return status::unimplemented;
73
74 status_t status = init_conf(engine);
75 if (status != status::success) return status;
76 init_scratchpad();
77
78 return status::success;
79 }
80
81 status_t init_conf(engine_t *engine);
82 void alt_gen();
83 void alt_defines(compute::kernel_ctx_t &kernel_ctx) const;
84 void init_scratchpad();
85 status_t init_kernel_ctx(compute::kernel_ctx_t &kernel_ctx) const;
86
87 reorder_conf_t conf;
88
89 private:
90 DECLARE_GPU_REORDER_CREATE();
91 };
92
93 status_t init(engine_t *engine) override {
94 compute::kernel_ctx_t kernel_ctx;
95
96 auto status = pd()->init_kernel_ctx(kernel_ctx);
97 if (status != status::success) return status;
98
99 const auto &conf = pd()->conf;
100 if (conf.nelems == 0) return status::success;
101
102 create_kernel(engine, &kernel_, "custom_reorder", kernel_ctx);
103 if (!kernel_) return status::runtime_error;
104 return status::success;
105 }
106
107 status_t execute(const exec_ctx_t &ctx) const override;
108
109private:
110 const pd_t *pd() const { return (const pd_t *)primitive_t::pd().get(); }
111 compute::kernel_t kernel_;
112};
113
114} // namespace ocl
115} // namespace gpu
116} // namespace impl
117} // namespace dnnl
118
119#endif
120