1 | /******************************************************************************* |
2 | * Copyright 2021 Intel Corporation |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | *******************************************************************************/ |
16 | |
17 | #ifndef CPU_X64_JIT_GEMM_X8S8S32X_CONV_ZP_SRC_PAD_COMP_HPP |
18 | #define CPU_X64_JIT_GEMM_X8S8S32X_CONV_ZP_SRC_PAD_COMP_HPP |
19 | |
20 | #include <functional> |
21 | |
22 | #include "common/c_types_map.hpp" |
23 | #include "cpu/x64/cpu_isa_traits.hpp" |
24 | |
25 | namespace dnnl { |
26 | namespace impl { |
27 | namespace cpu { |
28 | |
29 | struct conv_gemm_conf_t; |
30 | |
31 | namespace x64 { |
32 | |
33 | class jit_generator; |
34 | |
35 | namespace gemm_x8s8s32x_convolution_utils { |
36 | |
37 | struct jit_gemm_x8s8s32x_zp_pad_comp_helper { |
38 | jit_gemm_x8s8s32x_zp_pad_comp_helper(jit_generator *host, |
39 | const conv_gemm_conf_t &jcp, const Xbyak::Reg64 ®_zp_pad_comp, |
40 | const Xbyak::Reg64 ®_zp_pad_comp_temp, |
41 | const Xbyak::Reg8 &should_apply_zp_src_pad, const dim_t ndims); |
42 | |
43 | public: |
44 | void init(const dim_t off_w, const dim_t off_h, const dim_t off_w_size, |
45 | const dim_t off_w_off, const dim_t off_zp_pad_com_base_off, |
46 | const dim_t off_g_oc_offset_prologue, const dim_t off_g_oc_offset, |
47 | const dim_t off_zp_src_pad_com_d_offset, |
48 | const dim_t off_should_apply_zp_src_pad_comp_d); |
49 | void fin(); |
50 | void load_next_point_zp_src_comp_pad_addr(); |
51 | void zp_src_comp_pad_operation( |
52 | const std::function<void(const Xbyak::Reg64 &)> &op); |
53 | struct zp_src_pad_com_d { |
54 | bool should_apply_pad_comp_d; |
55 | dim_t offset; |
56 | }; |
57 | |
58 | zp_src_pad_com_d calculate_zp_src_pad_com_d(const dim_t d_off) const; |
59 | |
60 | private: |
61 | enum bound { upper, lower }; |
62 | |
63 | dim_t calculate_lower_bound_dim(const dim_t begin_comp_pad) const noexcept; |
64 | dim_t calculate_upper_bound_dim( |
65 | const dim_t output_size, const dim_t end_comp_pad) const noexcept; |
66 | |
67 | void set_up_initial_args(const dim_t off_w, const dim_t off_h, |
68 | const dim_t off_w_size, const dim_t off_w_off, |
69 | const dim_t off_zp_pad_com_base_off, |
70 | const dim_t off_g_oc_offset_prologue, const dim_t off_g_oc_offset, |
71 | const dim_t off_zp_src_pad_com_d_offset, |
72 | const dim_t off_should_apply_zp_src_pad_comp_d); |
73 | void check_bound(const Xbyak::Reg64 ®_dim, |
74 | const Xbyak::Address &result_addr, const dim_t bound_value, |
75 | const bound bound_kind); |
76 | void load_zp_src_comp_pad_addr_if_needed(const Xbyak::Address &g_oc_offset); |
77 | void calculate_zp_src_comp_pad_effective_addr( |
78 | const Xbyak::Address &g_oc_offset); |
79 | void get_zp_pad_com_dim(const Xbyak::Address &dim_under_lower_bound, |
80 | const Xbyak::Address &dim_over_eq_upper_bound, |
81 | const dim_t begin_pad, dim_t mid_pad, const dim_t end_pad, |
82 | const dim_t out_dim_size, const Xbyak::Address &out_point_dim, |
83 | const Xbyak::Address &result); |
84 | void should_apply_zp_src_pad(); |
85 | void next_point(); |
86 | |
87 | jit_generator *const host_; |
88 | const conv_gemm_conf_t &jcp_; |
89 | const Xbyak::Address w_addr_; |
90 | const Xbyak::Address h_addr_; |
91 | const Xbyak::Address w_size_addr_; |
92 | const Xbyak::Address w_off_addr_; |
93 | const Xbyak::Address zp_pad_com_h_; |
94 | const Xbyak::Address zp_pad_com_w_; |
95 | const Xbyak::Address zp_pad_com_base_; |
96 | const Xbyak::Address g_oc_offset_prologue_; |
97 | const Xbyak::Address g_oc_offset_; |
98 | const Xbyak::Address zp_pad_com_d_offset_; |
99 | |
100 | const Xbyak::Address h_under_lower_bound_; |
101 | const Xbyak::Address h_over_eq_upper_bound_; |
102 | const Xbyak::Address w_under_lower_bound_; |
103 | const Xbyak::Address w_over_eq_upper_bound_; |
104 | const Xbyak::Address should_apply_zp_src_pad_comp_d_; |
105 | const Xbyak::Reg8 &should_apply_zp_src_pad_; |
106 | |
107 | const dim_t lower_h_bound_; |
108 | const dim_t upper_h_bound_; |
109 | const dim_t lower_w_bound_; |
110 | const dim_t upper_w_bound_; |
111 | const dim_t lower_d_bound_; |
112 | const dim_t upper_d_bound_; |
113 | |
114 | const bool with_zp_pad_com_d_; |
115 | const bool with_zp_pad_com_h_; |
116 | |
117 | const Xbyak::Reg64 ®_zp_pad_comp_; |
118 | const Xbyak::Reg64 ®_zp_pad_comp_tmp_; |
119 | // 10 * 4 (qword_size) + 5 * 1 (byte size) = 85 |
120 | // 85 aligned to 4 = 88 |
121 | static constexpr dim_t reserved_stack_size_ = 88; |
122 | }; |
123 | |
124 | } // namespace gemm_x8s8s32x_convolution_utils |
125 | } // namespace x64 |
126 | } // namespace cpu |
127 | } // namespace impl |
128 | } // namespace dnnl |
129 | |
130 | #endif |
131 | |