1/*******************************************************************************
2* Copyright 2017-2022 Intel Corporation
3*
4* Licensed under the Apache License, Version 2.0 (the "License");
5* you may not use this file except in compliance with the License.
6* You may obtain a copy of the License at
7*
8* http://www.apache.org/licenses/LICENSE-2.0
9*
10* Unless required by applicable law or agreed to in writing, software
11* distributed under the License is distributed on an "AS IS" BASIS,
12* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13* See the License for the specific language governing permissions and
14* limitations under the License.
15*******************************************************************************/
16
17#ifndef CPU_GEMM_X8S8S32X_CONVOLUTION_HPP
18#define CPU_GEMM_X8S8S32X_CONVOLUTION_HPP
19
20#include <memory>
21
22#include "common/c_types_map.hpp"
23#include "common/memory_tracking.hpp"
24#include "common/primitive.hpp"
25
26#include "cpu/platform.hpp"
27
28#include "cpu/cpu_convolution_pd.hpp"
29
30#include "cpu/gemm_convolution_utils.hpp"
31#include "cpu/gemm_x8s8s32x_convolution_utils.hpp"
32
33#include "cpu/gemm/gemm.hpp"
34#include "cpu/zero_point_utils.hpp"
35
36namespace dnnl {
37namespace impl {
38namespace cpu {
39
40struct gemm_x8s8s32x_convolution_fwd_t : public primitive_t {
41 struct pd_t : public cpu_convolution_fwd_pd_t {
42 pd_t(const convolution_desc_t *adesc, const primitive_attr_t *attr,
43 const typename pd_t::base_class *hint_fwd_pd)
44 : cpu_convolution_fwd_pd_t(adesc, attr, hint_fwd_pd), jcp_() {}
45
46 DECLARE_COMMON_PD_T(src_md()->data_type == data_type::u8
47 ? IGEMM_S8U8S32_IMPL_STR
48 : IGEMM_S8S8S32_IMPL_STR,
49 gemm_x8s8s32x_convolution_fwd_t, USE_GLOBAL_SCRATCHPAD);
50
51 status_t init(engine_t *engine) {
52 using namespace data_type;
53 using skip_mask_t = primitive_attr_t::skip_mask_t;
54 const auto dst_type = dst_md(0)->data_type;
55
56 bool ok = is_fwd()
57 && set_default_alg_kind(alg_kind::convolution_direct)
58 && utils::one_of(src_md()->data_type, s8, u8)
59 && weights_md()->data_type == s8
60 && utils::one_of(
61 dst_md()->data_type, f32, bf16, s32, s8, u8)
62 && IMPLICATION(with_bias(),
63 utils::one_of(weights_md(1)->data_type, f32, bf16,
64 s32, s8, u8))
65 && !has_zero_dim_memory()
66 && attr()->has_default_values(skip_mask_t::scales_runtime
67 | skip_mask_t::zero_points_runtime
68 | skip_mask_t::post_ops
69 | skip_mask_t::sum_dt,
70 dst_type)
71 && attr()->post_ops_.check_sum_consistent_dt(dst_type)
72 && scales_mask_ok() && zero_points_valid(attr());
73 if (!ok) return status::unimplemented;
74
75 auto scratchpad = scratchpad_registry().registrar();
76 CHECK(jit_gemm_convolution_utils::init_conf(jcp_, scratchpad,
77 *desc(), src_md_, weights_md_, dst_md_, bias_md_, attr_,
78 dnnl_get_max_threads()));
79 if (!gemm_x8s8s32x_convolution_utils::post_ops_ok(
80 attr()->post_ops_, &dst_md_))
81 return status::unimplemented;
82 return status::success;
83 }
84
85 conv_gemm_conf_t jcp_;
86
87 protected:
88 bool scales_mask_ok() const {
89 using namespace data_type;
90 const std::vector<int> supported_args
91 = {DNNL_ARG_SRC, DNNL_ARG_WEIGHTS, DNNL_ARG_DST};
92 bool ok = attr()->scales_.has_default_values(supported_args);
93 for (int arg : supported_args) {
94 const auto &mask = attr()->scales_.get(arg).mask_;
95 if (arg == DNNL_ARG_WEIGHTS)
96 ok = ok && (mask == 0 || mask == (1 << (int)with_groups()));
97 else
98 ok = ok && (mask == 0);
99 }
100 return ok;
101 }
102 };
103
104 gemm_x8s8s32x_convolution_fwd_t(const pd_t *apd) : primitive_t(apd) {}
105
106 status_t init(engine_t *engine) override {
107 CHECK(safe_ptr_assign(pp_ker_, pp_ker_t::create(pd(), pd()->jcp_)));
108 return (pp_ker_) ? pp_ker_->create_kernel() : status::success;
109 }
110
111 status_t execute(const exec_ctx_t &ctx) const override {
112 return execute_forward(ctx);
113 }
114
115private:
116 const pd_t *pd() const { return (const pd_t *)primitive_t::pd().get(); }
117 status_t execute_forward(const exec_ctx_t &ctx) const;
118 status_t execute_forward_thr(const int ithr, const int nthr,
119 const char *src_base, const int8_t *wei_base, const char *bia_base,
120 void *dst_base, const float *scales, const float *dst_scales,
121 const zero_point_call_params_t &zp,
122 const memory_tracking::grantor_t &scratchpad,
123 const void *post_ops_binary_rhs_arg_vec,
124 const exec_ctx_t &ctx) const;
125
126 using pp_ker_t = gemm_x8s8s32x_convolution_utils::pp_ker_t;
127 std::unique_ptr<pp_ker_t> pp_ker_;
128};
129
130struct gemm_x8s8s32x_convolution_bwd_data_t : public primitive_t {
131 struct pd_t : public cpu_convolution_bwd_data_pd_t {
132 pd_t(const convolution_desc_t *adesc, const primitive_attr_t *attr,
133 const convolution_fwd_pd_t *hint_fwd_pd)
134 : cpu_convolution_bwd_data_pd_t(adesc, attr, hint_fwd_pd), jcp_() {}
135
136 DECLARE_COMMON_PD_T(diff_dst_md()->data_type == data_type::u8
137 ? IGEMM_S8U8S32_IMPL_STR
138 : IGEMM_S8S8S32_IMPL_STR,
139 gemm_x8s8s32x_convolution_bwd_data_t, USE_GLOBAL_SCRATCHPAD);
140
141 status_t init(engine_t *engine) {
142 using namespace data_type;
143
144 bool ok = desc()->prop_kind == prop_kind::backward_data
145 && set_default_alg_kind(alg_kind::convolution_direct)
146 && utils::one_of(diff_dst_md()->data_type, s8, u8)
147 && weights_md()->data_type == s8
148 && utils::one_of(
149 diff_src_md()->data_type, f32, bf16, s32, s8, u8)
150 && IMPLICATION(with_bias(),
151 utils::one_of(weights_md(1)->data_type, f32, bf16,
152 s32, s8, u8))
153 && !has_zero_dim_memory()
154 && attr()->has_default_values(
155 primitive_attr_t::skip_mask_t::scales_runtime)
156 && output_scales_mask_ok();
157 if (!ok) return status::unimplemented;
158
159 auto scratchpad = scratchpad_registry().registrar();
160 return jit_gemm_convolution_utils::init_conf(jcp_, scratchpad,
161 *desc(), diff_src_md_, weights_md_, diff_dst_md_, bias_md_,
162 attr_, dnnl_get_max_threads());
163 }
164
165 bool support_bias() const override { return true; }
166
167 conv_gemm_conf_t jcp_;
168
169 protected:
170 bool output_scales_mask_ok() const {
171 const auto &mask = attr()->output_scales_.mask_;
172 return mask == 0 || mask == 1 << 1;
173 }
174 };
175
176 gemm_x8s8s32x_convolution_bwd_data_t(const pd_t *apd) : primitive_t(apd) {}
177
178 status_t execute(const exec_ctx_t &ctx) const override {
179 return execute_backward_data(ctx);
180 }
181
182private:
183 status_t execute_backward_data(const exec_ctx_t &ctx) const;
184 status_t execute_backward_data_thr(const int ithr, const int nthr,
185 const char *diff_dst_base, const int8_t *wei_base,
186 const char *bia_base, char *diff_src_base,
187 const memory_tracking::grantor_t &scratchpad,
188 const exec_ctx_t &ctx) const;
189 const pd_t *pd() const { return (const pd_t *)primitive_t::pd().get(); }
190};
191
192} // namespace cpu
193} // namespace impl
194} // namespace dnnl
195
196#endif
197