1 | /******************************************************************************* |
2 | * Copyright 2019-2022 Intel Corporation |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | *******************************************************************************/ |
16 | |
17 | #ifndef CPU_X64_JIT_UNI_DW_CONVOLUTION_HPP |
18 | #define CPU_X64_JIT_UNI_DW_CONVOLUTION_HPP |
19 | |
20 | #include "common/c_types_map.hpp" |
21 | #include "common/memory_tracking.hpp" |
22 | #include "common/primitive.hpp" |
23 | |
24 | #include "cpu/cpu_convolution_pd.hpp" |
25 | #include "cpu/x64/cpu_barrier.hpp" |
26 | #include "cpu/x64/cpu_reducer.hpp" |
27 | |
28 | #include "cpu/x64/jit_uni_dw_conv_kernel_utils.hpp" |
29 | |
30 | namespace dnnl { |
31 | namespace impl { |
32 | namespace cpu { |
33 | namespace x64 { |
34 | |
35 | template <cpu_isa_t isa, data_type_t src_type, data_type_t dst_type = src_type> |
36 | struct jit_uni_dw_convolution_fwd_t : public primitive_t { |
37 | struct pd_t : public cpu_convolution_fwd_pd_t { |
38 | pd_t(const convolution_desc_t *adesc, const primitive_attr_t *attr, |
39 | const typename pd_t::base_class *hint_fwd_pd) |
40 | : cpu_convolution_fwd_pd_t(adesc, attr, hint_fwd_pd), jcp_() {} |
41 | |
42 | DECLARE_COMMON_PD_T(JIT_IMPL_NAME_HELPER("jit_dw:" , jcp_.isa, "" ), |
43 | jit_uni_dw_convolution_fwd_t); |
44 | |
45 | status_t init(engine_t *engine) { |
46 | bool ok = true && is_fwd() |
47 | && set_default_alg_kind(alg_kind::convolution_direct) |
48 | && expect_data_types(src_type, src_type, data_type::undef, |
49 | dst_type, data_type::f32) |
50 | && IMPLICATION(this->with_bias(), |
51 | utils::one_of(this->desc()->bias_desc.data_type, |
52 | data_type::f32, data_type::bf16)) |
53 | && attr()->has_default_values( |
54 | primitive_attr_t::skip_mask_t::post_ops, dst_type) |
55 | && !has_zero_dim_memory(); |
56 | if (!ok) return status::unimplemented; |
57 | |
58 | auto status = jit_uni_dw_conv_fwd_kernel<isa, src_type>::init_conf( |
59 | jcp_, *desc(), src_md_, weights_md_, bias_md_, dst_md_, |
60 | attr_); |
61 | if (status != status::success) return status::unimplemented; |
62 | |
63 | auto scratchpad = scratchpad_registry().registrar(); |
64 | jit_uni_dw_conv_fwd_kernel<isa, src_type>::init_scratchpad( |
65 | scratchpad, jcp_); |
66 | |
67 | return status::success; |
68 | } |
69 | |
70 | jit_conv_conf_t jcp_; |
71 | }; |
72 | |
73 | jit_uni_dw_convolution_fwd_t(const pd_t *apd) : primitive_t(apd) {} |
74 | |
75 | typedef typename prec_traits<data_type::f32>::type f32_data_t; |
76 | typedef typename prec_traits<data_type::bf16>::type bf16_data_t; |
77 | typedef typename prec_traits<src_type>::type data_t; |
78 | typedef typename prec_traits<dst_type>::type dst_data_t; |
79 | |
80 | status_t init(engine_t *engine) override { |
81 | CHECK(safe_ptr_assign(kernel_, |
82 | new jit_uni_dw_conv_fwd_kernel<isa, src_type>( |
83 | pd()->jcp_, *pd()->dst_md(0)))); |
84 | return kernel_->create_kernel(); |
85 | } |
86 | |
87 | status_t execute(const exec_ctx_t &ctx) const override { |
88 | execute_forward(ctx); |
89 | return status::success; |
90 | } |
91 | |
92 | private: |
93 | void execute_forward(const exec_ctx_t &ctx) const; |
94 | const pd_t *pd() const { return (const pd_t *)primitive_t::pd().get(); } |
95 | |
96 | std::unique_ptr<jit_uni_dw_conv_fwd_kernel<isa, src_type>> kernel_; |
97 | }; |
98 | |
99 | using jit_avx512_common_dw_convolution_fwd_t |
100 | = jit_uni_dw_convolution_fwd_t<avx512_core, data_type::f32>; |
101 | using jit_avx2_dw_convolution_fwd_t |
102 | = jit_uni_dw_convolution_fwd_t<avx2, data_type::f32>; |
103 | using jit_sse41_dw_convolution_fwd_t |
104 | = jit_uni_dw_convolution_fwd_t<sse41, data_type::f32>; |
105 | |
106 | template <cpu_isa_t isa, data_type_t diff_dst_type, |
107 | data_type_t diff_src_type = diff_dst_type> |
108 | struct jit_uni_dw_convolution_bwd_data_t : public primitive_t { |
109 | struct pd_t : public cpu_convolution_bwd_data_pd_t { |
110 | pd_t(const convolution_desc_t *adesc, const primitive_attr_t *attr, |
111 | const convolution_fwd_pd_t *hint_fwd_pd) |
112 | : cpu_convolution_bwd_data_pd_t(adesc, attr, hint_fwd_pd), jcp_() {} |
113 | |
114 | DECLARE_COMMON_PD_T(JIT_IMPL_NAME_HELPER("jit_dw:" , jcp_.isa, "" ), |
115 | jit_uni_dw_convolution_bwd_data_t); |
116 | |
117 | status_t init(engine_t *engine) { |
118 | bool ok = true && desc()->prop_kind == prop_kind::backward_data |
119 | && set_default_alg_kind(alg_kind::convolution_direct) |
120 | && expect_data_types(diff_src_type, diff_dst_type, |
121 | data_type::undef, diff_dst_type, data_type::f32) |
122 | && attr()->has_default_values() && !has_zero_dim_memory(); |
123 | |
124 | if (!ok) return status::unimplemented; |
125 | |
126 | status_t status = jit_uni_dw_conv_bwd_data_kernel<isa, |
127 | diff_dst_type>::init_conf(jcp_, *desc(), diff_src_md_, |
128 | weights_md_, diff_dst_md_); |
129 | if (status != status::success) return status; |
130 | |
131 | auto scratchpad = scratchpad_registry().registrar(); |
132 | jit_uni_dw_conv_bwd_data_kernel<isa, |
133 | diff_dst_type>::init_scratchpad(scratchpad, jcp_); |
134 | |
135 | return status::success; |
136 | } |
137 | |
138 | jit_conv_conf_t jcp_; |
139 | }; |
140 | |
141 | jit_uni_dw_convolution_bwd_data_t(const pd_t *apd) : primitive_t(apd) {} |
142 | |
143 | typedef typename prec_traits<diff_src_type>::type diff_src_data_t; |
144 | typedef typename prec_traits<diff_dst_type>::type diff_dst_data_t; |
145 | typedef typename prec_traits<diff_dst_type>::type wei_data_t; |
146 | |
147 | status_t init(engine_t *engine) override { |
148 | CHECK(safe_ptr_assign(kernel_, |
149 | new jit_uni_dw_conv_bwd_data_kernel<isa, diff_dst_type>( |
150 | pd()->jcp_))); |
151 | return kernel_->create_kernel(); |
152 | } |
153 | |
154 | status_t execute(const exec_ctx_t &ctx) const override { |
155 | execute_backward_data(ctx); |
156 | return status::success; |
157 | } |
158 | |
159 | private: |
160 | void execute_backward_data(const exec_ctx_t &ctx) const; |
161 | const pd_t *pd() const { return (const pd_t *)primitive_t::pd().get(); } |
162 | |
163 | std::unique_ptr<jit_uni_dw_conv_bwd_data_kernel<isa, diff_dst_type>> |
164 | kernel_; |
165 | }; |
166 | |
167 | using jit_avx512_common_dw_convolution_bwd_data_t |
168 | = jit_uni_dw_convolution_bwd_data_t<avx512_core, data_type::f32>; |
169 | using jit_avx2_dw_convolution_bwd_data_t |
170 | = jit_uni_dw_convolution_bwd_data_t<avx2, data_type::f32>; |
171 | using jit_sse41_dw_convolution_bwd_data_t |
172 | = jit_uni_dw_convolution_bwd_data_t<sse41, data_type::f32>; |
173 | |
174 | template <cpu_isa_t isa, data_type_t src_type, |
175 | data_type_t diff_weights_type = src_type> |
176 | struct jit_uni_dw_convolution_bwd_weights_t : public primitive_t { |
177 | struct pd_t : public cpu_convolution_bwd_weights_pd_t { |
178 | pd_t(const convolution_desc_t *adesc, const primitive_attr_t *attr, |
179 | const convolution_fwd_pd_t *hint_fwd_pd) |
180 | : cpu_convolution_bwd_weights_pd_t(adesc, attr, hint_fwd_pd) |
181 | , jcp_() {} |
182 | using jit_uni_dw_convolution_bwd_weights |
183 | = jit_uni_dw_convolution_bwd_weights_t<isa, src_type, |
184 | diff_weights_type>; |
185 | DECLARE_COMMON_PD_T(JIT_IMPL_NAME_HELPER("jit_dw:" , jcp_.isa, "" ), |
186 | jit_uni_dw_convolution_bwd_weights); |
187 | |
188 | status_t init(engine_t *engine) { |
189 | bool ok = true && desc()->prop_kind == prop_kind::backward_weights |
190 | && set_default_alg_kind(alg_kind::convolution_direct) |
191 | && expect_data_types(src_type, diff_weights_type, |
192 | data_type::undef, src_type, data_type::f32) |
193 | && IMPLICATION(this->with_bias(), |
194 | utils::one_of( |
195 | this->desc()->diff_bias_desc.data_type, |
196 | data_type::f32, data_type::bf16)) |
197 | && attr()->has_default_values() && !has_zero_dim_memory(); |
198 | if (!ok) return status::unimplemented; |
199 | |
200 | const int max_threads |
201 | = dnnl_in_parallel() ? 1 : dnnl_get_max_threads(); |
202 | |
203 | status_t status = jit_uni_dw_conv_bwd_weights_kernel<isa, |
204 | src_type>::init_conf(jcp_, *desc(), src_md_, |
205 | diff_weights_md_, diff_bias_md_, diff_dst_md_, max_threads); |
206 | if (status != status::success) return status; |
207 | |
208 | auto scratchpad = scratchpad_registry().registrar(); |
209 | jit_uni_dw_conv_bwd_weights_kernel<isa, src_type>::init_scratchpad( |
210 | scratchpad, jcp_); |
211 | |
212 | return status::success; |
213 | } |
214 | |
215 | jit_conv_conf_t jcp_; |
216 | }; |
217 | jit_uni_dw_convolution_bwd_weights_t(const pd_t *apd); |
218 | |
219 | typedef typename prec_traits<data_type::f32>::type f32_data_t; |
220 | typedef typename prec_traits<data_type::bf16>::type bf16_data_t; |
221 | typedef typename prec_traits<src_type>::type src_data_t; |
222 | typedef typename prec_traits<src_type>::type diff_dst_data_t; |
223 | typedef typename prec_traits<diff_weights_type>::type diff_weights_data_t; |
224 | |
225 | status_t init(engine_t *engine) override { |
226 | CHECK(safe_ptr_assign(kernel_, |
227 | new jit_uni_dw_conv_bwd_weights_kernel<isa, src_type>( |
228 | pd()->jcp_))); |
229 | CHECK(kernel_->create_kernel()); |
230 | |
231 | const auto jcp = &pd()->jcp_; |
232 | const int reduction = jcp->nthr_mb * jcp->nthr_oh; |
233 | if (reduction > 1 && isa != sse41) { |
234 | CHECK(safe_ptr_assign( |
235 | acc_ker_, new cpu_accumulator_1d_t<data_type::f32>())); |
236 | CHECK(acc_ker_->create_kernel()); |
237 | } |
238 | return status::success; |
239 | } |
240 | |
241 | status_t execute(const exec_ctx_t &ctx) const override { |
242 | switch (pd()->jcp_.harness) { |
243 | case harness_nxc: |
244 | execute_backward_weights_nxc(ctx); |
245 | execute_reduction_nxc(ctx); |
246 | break; |
247 | case harness_mb_reduction: |
248 | execute_backward_weights(ctx); |
249 | execute_reduction(ctx); |
250 | break; |
251 | default: assert(!"Invalid harness type" ); |
252 | } |
253 | return status::success; |
254 | } |
255 | |
256 | private: |
257 | void execute_backward_weights(const exec_ctx_t &ctx) const; |
258 | void execute_reduction(const exec_ctx_t &ctx) const; |
259 | void execute_backward_weights_nxc(const exec_ctx_t &ctx) const; |
260 | void execute_reduction_nxc(const exec_ctx_t &ctx) const; |
261 | const pd_t *pd() const { return (const pd_t *)primitive_t::pd().get(); } |
262 | |
263 | std::unique_ptr<cpu_accumulator_1d_t<data_type::f32>> acc_ker_; |
264 | std::unique_ptr<jit_uni_dw_conv_bwd_weights_kernel<isa, src_type>> kernel_; |
265 | }; |
266 | |
267 | using jit_avx512_common_dw_convolution_bwd_weights_t |
268 | = jit_uni_dw_convolution_bwd_weights_t<avx512_core, data_type::f32>; |
269 | using jit_avx2_dw_convolution_bwd_weights_t |
270 | = jit_uni_dw_convolution_bwd_weights_t<avx2, data_type::f32>; |
271 | using jit_sse41_dw_convolution_bwd_weights_t |
272 | = jit_uni_dw_convolution_bwd_weights_t<sse41, data_type::f32>; |
273 | |
274 | } // namespace x64 |
275 | } // namespace cpu |
276 | } // namespace impl |
277 | } // namespace dnnl |
278 | |
279 | #endif |
280 | |