1 | /******************************************************************************* |
2 | * Copyright 2017-2022 Intel Corporation |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | *******************************************************************************/ |
16 | |
17 | #ifndef CPU_GEMM_X8S8S32X_CONVOLUTION_HPP |
18 | #define CPU_GEMM_X8S8S32X_CONVOLUTION_HPP |
19 | |
20 | #include <memory> |
21 | |
22 | #include "common/c_types_map.hpp" |
23 | #include "common/memory_tracking.hpp" |
24 | #include "common/primitive.hpp" |
25 | |
26 | #include "cpu/platform.hpp" |
27 | |
28 | #include "cpu/cpu_convolution_pd.hpp" |
29 | |
30 | #include "cpu/gemm_convolution_utils.hpp" |
31 | #include "cpu/gemm_x8s8s32x_convolution_utils.hpp" |
32 | |
33 | #include "cpu/gemm/gemm.hpp" |
34 | #include "cpu/zero_point_utils.hpp" |
35 | |
36 | namespace dnnl { |
37 | namespace impl { |
38 | namespace cpu { |
39 | |
40 | struct gemm_x8s8s32x_convolution_fwd_t : public primitive_t { |
41 | struct pd_t : public cpu_convolution_fwd_pd_t { |
42 | pd_t(const convolution_desc_t *adesc, const primitive_attr_t *attr, |
43 | const typename pd_t::base_class *hint_fwd_pd) |
44 | : cpu_convolution_fwd_pd_t(adesc, attr, hint_fwd_pd), jcp_() {} |
45 | |
46 | DECLARE_COMMON_PD_T(src_md()->data_type == data_type::u8 |
47 | ? IGEMM_S8U8S32_IMPL_STR |
48 | : IGEMM_S8S8S32_IMPL_STR, |
49 | gemm_x8s8s32x_convolution_fwd_t, USE_GLOBAL_SCRATCHPAD); |
50 | |
51 | status_t init(engine_t *engine) { |
52 | using namespace data_type; |
53 | using skip_mask_t = primitive_attr_t::skip_mask_t; |
54 | const auto dst_type = dst_md(0)->data_type; |
55 | |
56 | bool ok = is_fwd() |
57 | && set_default_alg_kind(alg_kind::convolution_direct) |
58 | && utils::one_of(src_md()->data_type, s8, u8) |
59 | && weights_md()->data_type == s8 |
60 | && utils::one_of( |
61 | dst_md()->data_type, f32, bf16, s32, s8, u8) |
62 | && IMPLICATION(with_bias(), |
63 | utils::one_of(weights_md(1)->data_type, f32, bf16, |
64 | s32, s8, u8)) |
65 | && !has_zero_dim_memory() |
66 | && attr()->has_default_values(skip_mask_t::scales_runtime |
67 | | skip_mask_t::zero_points_runtime |
68 | | skip_mask_t::post_ops |
69 | | skip_mask_t::sum_dt, |
70 | dst_type) |
71 | && attr()->post_ops_.check_sum_consistent_dt(dst_type) |
72 | && scales_mask_ok() && zero_points_valid(attr()); |
73 | if (!ok) return status::unimplemented; |
74 | |
75 | auto scratchpad = scratchpad_registry().registrar(); |
76 | CHECK(jit_gemm_convolution_utils::init_conf(jcp_, scratchpad, |
77 | *desc(), src_md_, weights_md_, dst_md_, bias_md_, attr_, |
78 | dnnl_get_max_threads())); |
79 | if (!gemm_x8s8s32x_convolution_utils::post_ops_ok( |
80 | attr()->post_ops_, &dst_md_)) |
81 | return status::unimplemented; |
82 | return status::success; |
83 | } |
84 | |
85 | conv_gemm_conf_t jcp_; |
86 | |
87 | protected: |
88 | bool scales_mask_ok() const { |
89 | using namespace data_type; |
90 | const std::vector<int> supported_args |
91 | = {DNNL_ARG_SRC, DNNL_ARG_WEIGHTS, DNNL_ARG_DST}; |
92 | bool ok = attr()->scales_.has_default_values(supported_args); |
93 | for (int arg : supported_args) { |
94 | const auto &mask = attr()->scales_.get(arg).mask_; |
95 | if (arg == DNNL_ARG_WEIGHTS) |
96 | ok = ok && (mask == 0 || mask == (1 << (int)with_groups())); |
97 | else |
98 | ok = ok && (mask == 0); |
99 | } |
100 | return ok; |
101 | } |
102 | }; |
103 | |
104 | gemm_x8s8s32x_convolution_fwd_t(const pd_t *apd) : primitive_t(apd) {} |
105 | |
106 | status_t init(engine_t *engine) override { |
107 | CHECK(safe_ptr_assign(pp_ker_, pp_ker_t::create(pd(), pd()->jcp_))); |
108 | return (pp_ker_) ? pp_ker_->create_kernel() : status::success; |
109 | } |
110 | |
111 | status_t execute(const exec_ctx_t &ctx) const override { |
112 | return execute_forward(ctx); |
113 | } |
114 | |
115 | private: |
116 | const pd_t *pd() const { return (const pd_t *)primitive_t::pd().get(); } |
117 | status_t execute_forward(const exec_ctx_t &ctx) const; |
118 | status_t execute_forward_thr(const int ithr, const int nthr, |
119 | const char *src_base, const int8_t *wei_base, const char *bia_base, |
120 | void *dst_base, const float *scales, const float *dst_scales, |
121 | const zero_point_call_params_t &zp, |
122 | const memory_tracking::grantor_t &scratchpad, |
123 | const void *post_ops_binary_rhs_arg_vec, |
124 | const exec_ctx_t &ctx) const; |
125 | |
126 | using pp_ker_t = gemm_x8s8s32x_convolution_utils::pp_ker_t; |
127 | std::unique_ptr<pp_ker_t> pp_ker_; |
128 | }; |
129 | |
130 | struct gemm_x8s8s32x_convolution_bwd_data_t : public primitive_t { |
131 | struct pd_t : public cpu_convolution_bwd_data_pd_t { |
132 | pd_t(const convolution_desc_t *adesc, const primitive_attr_t *attr, |
133 | const convolution_fwd_pd_t *hint_fwd_pd) |
134 | : cpu_convolution_bwd_data_pd_t(adesc, attr, hint_fwd_pd), jcp_() {} |
135 | |
136 | DECLARE_COMMON_PD_T(diff_dst_md()->data_type == data_type::u8 |
137 | ? IGEMM_S8U8S32_IMPL_STR |
138 | : IGEMM_S8S8S32_IMPL_STR, |
139 | gemm_x8s8s32x_convolution_bwd_data_t, USE_GLOBAL_SCRATCHPAD); |
140 | |
141 | status_t init(engine_t *engine) { |
142 | using namespace data_type; |
143 | |
144 | bool ok = desc()->prop_kind == prop_kind::backward_data |
145 | && set_default_alg_kind(alg_kind::convolution_direct) |
146 | && utils::one_of(diff_dst_md()->data_type, s8, u8) |
147 | && weights_md()->data_type == s8 |
148 | && utils::one_of( |
149 | diff_src_md()->data_type, f32, bf16, s32, s8, u8) |
150 | && IMPLICATION(with_bias(), |
151 | utils::one_of(weights_md(1)->data_type, f32, bf16, |
152 | s32, s8, u8)) |
153 | && !has_zero_dim_memory() |
154 | && attr()->has_default_values( |
155 | primitive_attr_t::skip_mask_t::scales_runtime) |
156 | && output_scales_mask_ok(); |
157 | if (!ok) return status::unimplemented; |
158 | |
159 | auto scratchpad = scratchpad_registry().registrar(); |
160 | return jit_gemm_convolution_utils::init_conf(jcp_, scratchpad, |
161 | *desc(), diff_src_md_, weights_md_, diff_dst_md_, bias_md_, |
162 | attr_, dnnl_get_max_threads()); |
163 | } |
164 | |
165 | bool support_bias() const override { return true; } |
166 | |
167 | conv_gemm_conf_t jcp_; |
168 | |
169 | protected: |
170 | bool output_scales_mask_ok() const { |
171 | const auto &mask = attr()->output_scales_.mask_; |
172 | return mask == 0 || mask == 1 << 1; |
173 | } |
174 | }; |
175 | |
176 | gemm_x8s8s32x_convolution_bwd_data_t(const pd_t *apd) : primitive_t(apd) {} |
177 | |
178 | status_t execute(const exec_ctx_t &ctx) const override { |
179 | return execute_backward_data(ctx); |
180 | } |
181 | |
182 | private: |
183 | status_t execute_backward_data(const exec_ctx_t &ctx) const; |
184 | status_t execute_backward_data_thr(const int ithr, const int nthr, |
185 | const char *diff_dst_base, const int8_t *wei_base, |
186 | const char *bia_base, char *diff_src_base, |
187 | const memory_tracking::grantor_t &scratchpad, |
188 | const exec_ctx_t &ctx) const; |
189 | const pd_t *pd() const { return (const pd_t *)primitive_t::pd().get(); } |
190 | }; |
191 | |
192 | } // namespace cpu |
193 | } // namespace impl |
194 | } // namespace dnnl |
195 | |
196 | #endif |
197 | |