1 | /******************************************************************************* |
2 | * Copyright 2019-2022 Intel Corporation |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | *******************************************************************************/ |
16 | |
17 | #ifndef GPU_OCL_CUSTOM_REORDER_HPP |
18 | #define GPU_OCL_CUSTOM_REORDER_HPP |
19 | |
20 | #include "common/c_types_map.hpp" |
21 | #include "common/memory.hpp" |
22 | #include "common/primitive.hpp" |
23 | #include "common/utils.hpp" |
24 | #include "gpu/gpu_primitive.hpp" |
25 | #include "gpu/gpu_reorder_pd.hpp" |
26 | #include "gpu/gpu_resource.hpp" |
27 | #include "gpu/ocl/ocl_utils.hpp" |
28 | #include "gpu/primitive_conf.hpp" |
29 | |
30 | namespace dnnl { |
31 | namespace impl { |
32 | namespace gpu { |
33 | namespace ocl { |
34 | |
35 | // Collection of custom reorder implementations that are highly optimized |
36 | // but only applicable to specific scenarios. |
37 | struct custom_reorder_t : public gpu_primitive_t { |
38 | using gpu_primitive_t::gpu_primitive_t; |
39 | struct pd_t : public gpu_reorder_pd_t { |
40 | using gpu_reorder_pd_t::gpu_reorder_pd_t; |
41 | |
42 | DECLARE_COMMON_PD_T("ocl:custom:any" , custom_reorder_t); |
43 | |
44 | status_t init( |
45 | engine_t *engine, engine_t *src_engine, engine_t *dst_engine) { |
46 | bool ok = src_engine == dst_engine |
47 | && src_engine->kind() == engine_kind::gpu && attr_ok() |
48 | && extra_ok(); |
49 | if (!ok) return status::unimplemented; |
50 | |
51 | if (memory_desc_wrapper(src_md()).has_runtime_dims_or_strides()) |
52 | return status::unimplemented; |
53 | |
54 | auto *compute_engine = utils::downcast<compute::compute_engine_t *>( |
55 | dst_engine->kind() == engine_kind::gpu ? dst_engine |
56 | : src_engine); |
57 | |
58 | ok = ok && !memory_desc_ndims_ok(src_md(), dst_md()) |
59 | && compute_engine->mayiuse( |
60 | compute::device_ext_t::intel_subgroups) |
61 | && IMPLICATION( |
62 | utils::one_of(data_type::f16, src_md()->data_type, |
63 | dst_md()->data_type), |
64 | compute_engine->mayiuse( |
65 | compute::device_ext_t::khr_fp16) |
66 | && compute_engine->mayiuse( |
67 | compute::device_ext_t:: |
68 | intel_subgroups_short)) |
69 | && (!utils::one_of(data_type::f64, src_md()->data_type, |
70 | dst_md()->data_type)); |
71 | |
72 | if (!ok) return status::unimplemented; |
73 | |
74 | status_t status = init_conf(engine); |
75 | if (status != status::success) return status; |
76 | init_scratchpad(); |
77 | |
78 | return status::success; |
79 | } |
80 | |
81 | status_t init_conf(engine_t *engine); |
82 | void alt_gen(); |
83 | void alt_defines(compute::kernel_ctx_t &kernel_ctx) const; |
84 | void init_scratchpad(); |
85 | status_t init_kernel_ctx(compute::kernel_ctx_t &kernel_ctx) const; |
86 | |
87 | reorder_conf_t conf; |
88 | |
89 | private: |
90 | DECLARE_GPU_REORDER_CREATE(); |
91 | }; |
92 | |
93 | status_t init(engine_t *engine) override { |
94 | compute::kernel_ctx_t kernel_ctx; |
95 | |
96 | auto status = pd()->init_kernel_ctx(kernel_ctx); |
97 | if (status != status::success) return status; |
98 | |
99 | const auto &conf = pd()->conf; |
100 | if (conf.nelems == 0) return status::success; |
101 | |
102 | create_kernel(engine, &kernel_, "custom_reorder" , kernel_ctx); |
103 | if (!kernel_) return status::runtime_error; |
104 | return status::success; |
105 | } |
106 | |
107 | status_t execute(const exec_ctx_t &ctx) const override; |
108 | |
109 | private: |
110 | const pd_t *pd() const { return (const pd_t *)primitive_t::pd().get(); } |
111 | compute::kernel_t kernel_; |
112 | }; |
113 | |
114 | } // namespace ocl |
115 | } // namespace gpu |
116 | } // namespace impl |
117 | } // namespace dnnl |
118 | |
119 | #endif |
120 | |