1/*******************************************************************************
2* Copyright 2019-2022 Intel Corporation
3*
4* Licensed under the Apache License, Version 2.0 (the "License");
5* you may not use this file except in compliance with the License.
6* You may obtain a copy of the License at
7*
8* http://www.apache.org/licenses/LICENSE-2.0
9*
10* Unless required by applicable law or agreed to in writing, software
11* distributed under the License is distributed on an "AS IS" BASIS,
12* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13* See the License for the specific language governing permissions and
14* limitations under the License.
15*******************************************************************************/
16
17#ifndef CPU_X64_JIT_UNI_DW_CONVOLUTION_HPP
18#define CPU_X64_JIT_UNI_DW_CONVOLUTION_HPP
19
20#include "common/c_types_map.hpp"
21#include "common/memory_tracking.hpp"
22#include "common/primitive.hpp"
23
24#include "cpu/cpu_convolution_pd.hpp"
25#include "cpu/x64/cpu_barrier.hpp"
26#include "cpu/x64/cpu_reducer.hpp"
27
28#include "cpu/x64/jit_uni_dw_conv_kernel_utils.hpp"
29
30namespace dnnl {
31namespace impl {
32namespace cpu {
33namespace x64 {
34
35template <cpu_isa_t isa, data_type_t src_type, data_type_t dst_type = src_type>
36struct jit_uni_dw_convolution_fwd_t : public primitive_t {
37 struct pd_t : public cpu_convolution_fwd_pd_t {
38 pd_t(const convolution_desc_t *adesc, const primitive_attr_t *attr,
39 const typename pd_t::base_class *hint_fwd_pd)
40 : cpu_convolution_fwd_pd_t(adesc, attr, hint_fwd_pd), jcp_() {}
41
42 DECLARE_COMMON_PD_T(JIT_IMPL_NAME_HELPER("jit_dw:", jcp_.isa, ""),
43 jit_uni_dw_convolution_fwd_t);
44
45 status_t init(engine_t *engine) {
46 bool ok = true && is_fwd()
47 && set_default_alg_kind(alg_kind::convolution_direct)
48 && expect_data_types(src_type, src_type, data_type::undef,
49 dst_type, data_type::f32)
50 && IMPLICATION(this->with_bias(),
51 utils::one_of(this->desc()->bias_desc.data_type,
52 data_type::f32, data_type::bf16))
53 && attr()->has_default_values(
54 primitive_attr_t::skip_mask_t::post_ops, dst_type)
55 && !has_zero_dim_memory();
56 if (!ok) return status::unimplemented;
57
58 auto status = jit_uni_dw_conv_fwd_kernel<isa, src_type>::init_conf(
59 jcp_, *desc(), src_md_, weights_md_, bias_md_, dst_md_,
60 attr_);
61 if (status != status::success) return status::unimplemented;
62
63 auto scratchpad = scratchpad_registry().registrar();
64 jit_uni_dw_conv_fwd_kernel<isa, src_type>::init_scratchpad(
65 scratchpad, jcp_);
66
67 return status::success;
68 }
69
70 jit_conv_conf_t jcp_;
71 };
72
73 jit_uni_dw_convolution_fwd_t(const pd_t *apd) : primitive_t(apd) {}
74
75 typedef typename prec_traits<data_type::f32>::type f32_data_t;
76 typedef typename prec_traits<data_type::bf16>::type bf16_data_t;
77 typedef typename prec_traits<src_type>::type data_t;
78 typedef typename prec_traits<dst_type>::type dst_data_t;
79
80 status_t init(engine_t *engine) override {
81 CHECK(safe_ptr_assign(kernel_,
82 new jit_uni_dw_conv_fwd_kernel<isa, src_type>(
83 pd()->jcp_, *pd()->dst_md(0))));
84 return kernel_->create_kernel();
85 }
86
87 status_t execute(const exec_ctx_t &ctx) const override {
88 execute_forward(ctx);
89 return status::success;
90 }
91
92private:
93 void execute_forward(const exec_ctx_t &ctx) const;
94 const pd_t *pd() const { return (const pd_t *)primitive_t::pd().get(); }
95
96 std::unique_ptr<jit_uni_dw_conv_fwd_kernel<isa, src_type>> kernel_;
97};
98
99using jit_avx512_common_dw_convolution_fwd_t
100 = jit_uni_dw_convolution_fwd_t<avx512_core, data_type::f32>;
101using jit_avx2_dw_convolution_fwd_t
102 = jit_uni_dw_convolution_fwd_t<avx2, data_type::f32>;
103using jit_sse41_dw_convolution_fwd_t
104 = jit_uni_dw_convolution_fwd_t<sse41, data_type::f32>;
105
106template <cpu_isa_t isa, data_type_t diff_dst_type,
107 data_type_t diff_src_type = diff_dst_type>
108struct jit_uni_dw_convolution_bwd_data_t : public primitive_t {
109 struct pd_t : public cpu_convolution_bwd_data_pd_t {
110 pd_t(const convolution_desc_t *adesc, const primitive_attr_t *attr,
111 const convolution_fwd_pd_t *hint_fwd_pd)
112 : cpu_convolution_bwd_data_pd_t(adesc, attr, hint_fwd_pd), jcp_() {}
113
114 DECLARE_COMMON_PD_T(JIT_IMPL_NAME_HELPER("jit_dw:", jcp_.isa, ""),
115 jit_uni_dw_convolution_bwd_data_t);
116
117 status_t init(engine_t *engine) {
118 bool ok = true && desc()->prop_kind == prop_kind::backward_data
119 && set_default_alg_kind(alg_kind::convolution_direct)
120 && expect_data_types(diff_src_type, diff_dst_type,
121 data_type::undef, diff_dst_type, data_type::f32)
122 && attr()->has_default_values() && !has_zero_dim_memory();
123
124 if (!ok) return status::unimplemented;
125
126 status_t status = jit_uni_dw_conv_bwd_data_kernel<isa,
127 diff_dst_type>::init_conf(jcp_, *desc(), diff_src_md_,
128 weights_md_, diff_dst_md_);
129 if (status != status::success) return status;
130
131 auto scratchpad = scratchpad_registry().registrar();
132 jit_uni_dw_conv_bwd_data_kernel<isa,
133 diff_dst_type>::init_scratchpad(scratchpad, jcp_);
134
135 return status::success;
136 }
137
138 jit_conv_conf_t jcp_;
139 };
140
141 jit_uni_dw_convolution_bwd_data_t(const pd_t *apd) : primitive_t(apd) {}
142
143 typedef typename prec_traits<diff_src_type>::type diff_src_data_t;
144 typedef typename prec_traits<diff_dst_type>::type diff_dst_data_t;
145 typedef typename prec_traits<diff_dst_type>::type wei_data_t;
146
147 status_t init(engine_t *engine) override {
148 CHECK(safe_ptr_assign(kernel_,
149 new jit_uni_dw_conv_bwd_data_kernel<isa, diff_dst_type>(
150 pd()->jcp_)));
151 return kernel_->create_kernel();
152 }
153
154 status_t execute(const exec_ctx_t &ctx) const override {
155 execute_backward_data(ctx);
156 return status::success;
157 }
158
159private:
160 void execute_backward_data(const exec_ctx_t &ctx) const;
161 const pd_t *pd() const { return (const pd_t *)primitive_t::pd().get(); }
162
163 std::unique_ptr<jit_uni_dw_conv_bwd_data_kernel<isa, diff_dst_type>>
164 kernel_;
165};
166
167using jit_avx512_common_dw_convolution_bwd_data_t
168 = jit_uni_dw_convolution_bwd_data_t<avx512_core, data_type::f32>;
169using jit_avx2_dw_convolution_bwd_data_t
170 = jit_uni_dw_convolution_bwd_data_t<avx2, data_type::f32>;
171using jit_sse41_dw_convolution_bwd_data_t
172 = jit_uni_dw_convolution_bwd_data_t<sse41, data_type::f32>;
173
174template <cpu_isa_t isa, data_type_t src_type,
175 data_type_t diff_weights_type = src_type>
176struct jit_uni_dw_convolution_bwd_weights_t : public primitive_t {
177 struct pd_t : public cpu_convolution_bwd_weights_pd_t {
178 pd_t(const convolution_desc_t *adesc, const primitive_attr_t *attr,
179 const convolution_fwd_pd_t *hint_fwd_pd)
180 : cpu_convolution_bwd_weights_pd_t(adesc, attr, hint_fwd_pd)
181 , jcp_() {}
182 using jit_uni_dw_convolution_bwd_weights
183 = jit_uni_dw_convolution_bwd_weights_t<isa, src_type,
184 diff_weights_type>;
185 DECLARE_COMMON_PD_T(JIT_IMPL_NAME_HELPER("jit_dw:", jcp_.isa, ""),
186 jit_uni_dw_convolution_bwd_weights);
187
188 status_t init(engine_t *engine) {
189 bool ok = true && desc()->prop_kind == prop_kind::backward_weights
190 && set_default_alg_kind(alg_kind::convolution_direct)
191 && expect_data_types(src_type, diff_weights_type,
192 data_type::undef, src_type, data_type::f32)
193 && IMPLICATION(this->with_bias(),
194 utils::one_of(
195 this->desc()->diff_bias_desc.data_type,
196 data_type::f32, data_type::bf16))
197 && attr()->has_default_values() && !has_zero_dim_memory();
198 if (!ok) return status::unimplemented;
199
200 const int max_threads
201 = dnnl_in_parallel() ? 1 : dnnl_get_max_threads();
202
203 status_t status = jit_uni_dw_conv_bwd_weights_kernel<isa,
204 src_type>::init_conf(jcp_, *desc(), src_md_,
205 diff_weights_md_, diff_bias_md_, diff_dst_md_, max_threads);
206 if (status != status::success) return status;
207
208 auto scratchpad = scratchpad_registry().registrar();
209 jit_uni_dw_conv_bwd_weights_kernel<isa, src_type>::init_scratchpad(
210 scratchpad, jcp_);
211
212 return status::success;
213 }
214
215 jit_conv_conf_t jcp_;
216 };
217 jit_uni_dw_convolution_bwd_weights_t(const pd_t *apd);
218
219 typedef typename prec_traits<data_type::f32>::type f32_data_t;
220 typedef typename prec_traits<data_type::bf16>::type bf16_data_t;
221 typedef typename prec_traits<src_type>::type src_data_t;
222 typedef typename prec_traits<src_type>::type diff_dst_data_t;
223 typedef typename prec_traits<diff_weights_type>::type diff_weights_data_t;
224
225 status_t init(engine_t *engine) override {
226 CHECK(safe_ptr_assign(kernel_,
227 new jit_uni_dw_conv_bwd_weights_kernel<isa, src_type>(
228 pd()->jcp_)));
229 CHECK(kernel_->create_kernel());
230
231 const auto jcp = &pd()->jcp_;
232 const int reduction = jcp->nthr_mb * jcp->nthr_oh;
233 if (reduction > 1 && isa != sse41) {
234 CHECK(safe_ptr_assign(
235 acc_ker_, new cpu_accumulator_1d_t<data_type::f32>()));
236 CHECK(acc_ker_->create_kernel());
237 }
238 return status::success;
239 }
240
241 status_t execute(const exec_ctx_t &ctx) const override {
242 switch (pd()->jcp_.harness) {
243 case harness_nxc:
244 execute_backward_weights_nxc(ctx);
245 execute_reduction_nxc(ctx);
246 break;
247 case harness_mb_reduction:
248 execute_backward_weights(ctx);
249 execute_reduction(ctx);
250 break;
251 default: assert(!"Invalid harness type");
252 }
253 return status::success;
254 }
255
256private:
257 void execute_backward_weights(const exec_ctx_t &ctx) const;
258 void execute_reduction(const exec_ctx_t &ctx) const;
259 void execute_backward_weights_nxc(const exec_ctx_t &ctx) const;
260 void execute_reduction_nxc(const exec_ctx_t &ctx) const;
261 const pd_t *pd() const { return (const pd_t *)primitive_t::pd().get(); }
262
263 std::unique_ptr<cpu_accumulator_1d_t<data_type::f32>> acc_ker_;
264 std::unique_ptr<jit_uni_dw_conv_bwd_weights_kernel<isa, src_type>> kernel_;
265};
266
267using jit_avx512_common_dw_convolution_bwd_weights_t
268 = jit_uni_dw_convolution_bwd_weights_t<avx512_core, data_type::f32>;
269using jit_avx2_dw_convolution_bwd_weights_t
270 = jit_uni_dw_convolution_bwd_weights_t<avx2, data_type::f32>;
271using jit_sse41_dw_convolution_bwd_weights_t
272 = jit_uni_dw_convolution_bwd_weights_t<sse41, data_type::f32>;
273
274} // namespace x64
275} // namespace cpu
276} // namespace impl
277} // namespace dnnl
278
279#endif
280