1 | /******************************************************************************* |
2 | * Copyright 2022 Intel Corporation |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | *******************************************************************************/ |
16 | |
17 | #ifndef COMMON_MEMORY_DESC_HPP |
18 | #define COMMON_MEMORY_DESC_HPP |
19 | |
20 | #include "common/c_types_map.hpp" |
21 | #include "common/nstl.hpp" |
22 | |
23 | namespace dnnl { |
24 | namespace impl { |
25 | |
26 | // Winograd-specific formats |
27 | enum class wino_memory_format_t { |
28 | // Undefined memory format, used for empty memory descriptors. |
29 | wino_undef, |
30 | // Tensors of weights for 2x3 winograd convolutions. |
31 | // |
32 | // Internal weights format for 2x3 Winograd. |
33 | wino_wei_aaOio, |
34 | // Internal weights format for 2x3 Winograd. |
35 | wino_wei_aaOBiOo, |
36 | // Tensor of weights for 4x3 convolution. |
37 | // |
38 | // Internal weights format for 4x3 Winograd. |
39 | wino_wei_OBaaIBOIio |
40 | }; |
41 | |
42 | enum class rnn_packed_memory_format_t { undef, ldigo_p, ldgoi_p, ldio_p }; |
43 | |
44 | // Create aliases for extra flags to preserve the old behavior. |
45 | // This should be removed and all places that are affected should use |
46 | // rnn_packed_memory_format_t::<flag name> syntax. |
47 | namespace rnn_packed_format { |
48 | const rnn_packed_memory_format_t undef = rnn_packed_memory_format_t::undef; |
49 | const rnn_packed_memory_format_t ldigo_p = rnn_packed_memory_format_t::ldigo_p; |
50 | const rnn_packed_memory_format_t ldgoi_p = rnn_packed_memory_format_t::ldgoi_p; |
51 | const rnn_packed_memory_format_t ldio_p = rnn_packed_memory_format_t::ldio_p; |
52 | } // namespace rnn_packed_format |
53 | |
54 | // TODO: convert to 'enum class'. |
55 | // Flags for memory special features |
56 | enum { |
57 | = 0x0U, |
58 | // Indicates the weights have an additional buffer, that depends on the |
59 | // @p compensation_mask. |
60 | // |
61 | // For instance, in 4D case with the compensation mask equals (1 << 0) |
62 | // the additional buffer would consist of OC values: |
63 | // O[oc : 0,OC] = |
64 | // -128 * SUM(ic : 0,IC; kh : 0,KH; kw : 0,KW){ weights(oc, ic, kh, kw) } |
65 | = 0x1U, |
66 | = 0x2U, |
67 | = 0x4U, |
68 | |
69 | = dnnl_memory_extra_flag_rnn_u8s8_compensation, |
70 | = 0x8U, |
71 | = 0x16U, |
72 | }; |
73 | |
74 | // Create aliases for extra flags to preserve the old behavior. |
75 | // This should be removed and all places that are affected should use |
76 | // memory_extra_flags_t::<flag name> syntax. |
77 | namespace memory_extra_flags { |
78 | const memory_extra_flags_t = dnnl_memory_extra_flag_none; |
79 | const memory_extra_flags_t |
80 | = dnnl_memory_extra_flag_compensation_conv_s8s8; |
81 | const memory_extra_flags_t = dnnl_memory_extra_flag_scale_adjust; |
82 | const memory_extra_flags_t |
83 | = dnnl_memory_extra_flag_rnn_u8s8_compensation; |
84 | const memory_extra_flags_t |
85 | = dnnl_memory_extra_flag_rnn_s8s8_compensation; |
86 | const memory_extra_flags_t |
87 | = dnnl_memory_extra_flag_compensation_conv_asymmetric_src; |
88 | } // namespace memory_extra_flags |
89 | |
90 | // Generic description of blocked data layout for most memory formats. |
91 | struct blocking_desc_t { |
92 | // The strides between the outermost blocks. |
93 | // In case of plain (non-blocked) formats the strides between dimensions. |
94 | dims_t strides; |
95 | // Innermost section |
96 | // ASSUMPTION: the innermost blocks are always dense |
97 | // The number of innermost blocks, e.g. 3 in case of `OIhw_4i16o4i_` |
98 | int inner_nblks; |
99 | // The size of the blocks, e.g. `{4, 16, 4}` in case of `OIhw_4i16o4i` |
100 | dims_t inner_blks; |
101 | // The logical indices of the blocks, e.g. `{1, 0, 1}` in case of |
102 | // `4i16o4i`, because `i` is the 1st dim and `o` is the 0st dim |
103 | dims_t inner_idxs; |
104 | }; |
105 | |
106 | // Description of tensor of weights for winograd 2x3 convolution. |
107 | struct wino_desc_t { |
108 | wino_memory_format_t wino_format; |
109 | int r; |
110 | int alpha; |
111 | int ic; |
112 | int oc; |
113 | int ic_block; |
114 | int oc_block; |
115 | int ic2_block; |
116 | int oc2_block; |
117 | float adj_scale; |
118 | size_t size; |
119 | }; |
120 | |
121 | #define DNNL_RNN_MAX_N_PARTS 4 |
122 | // Description of tensor of packed weights for rnn. |
123 | struct rnn_packed_desc_t { |
124 | // Maximum number of parts of RNN weights tensor that require separate |
125 | // computation. |
126 | const static int max_n_parts = 4; |
127 | rnn_packed_memory_format_t format; |
128 | int n_parts; |
129 | int n; |
130 | int ldb; |
131 | int parts[max_n_parts]; |
132 | size_t part_pack_size[max_n_parts]; |
133 | unsigned pack_part[max_n_parts]; |
134 | size_t offset_compensation; |
135 | size_t size; |
136 | }; |
137 | |
138 | // Description of extra information stored in memory |
139 | struct { |
140 | // The flags contain arbitrary extra information, such as compensation. |
141 | // @sa dnnl_memory_extra_flags_t |
142 | uint64_t ; |
143 | // Compensation mask |
144 | int ; |
145 | // Scale applied to the data |
146 | float ; |
147 | // Compensation mask for asymmetric quantization |
148 | int ; |
149 | }; |
150 | |
151 | status_t DNNL_API memory_desc_init_by_tag(memory_desc_t &memory_desc, int ndims, |
152 | const dims_t dims, data_type_t data_type, format_tag_t tag); |
153 | |
154 | status_t memory_desc_init_by_strides(memory_desc_t &memory_desc, int ndims, |
155 | const dims_t dims, data_type_t data_type, const dims_t strides); |
156 | |
157 | status_t memory_desc_init_submemory(memory_desc_t &memory_desc, |
158 | const memory_desc_t &parent_memory_desc, const dims_t dims, |
159 | const dims_t offsets); |
160 | |
161 | status_t memory_desc_reshape(memory_desc_t &out_memory_desc, |
162 | const memory_desc_t &in_memory_desc, int ndims, const dims_t dims); |
163 | |
164 | status_t memory_desc_permute_axes(memory_desc_t &out_memory_desc, |
165 | const memory_desc_t &in_memory_desc, const int *perm); |
166 | |
167 | } // namespace impl |
168 | } // namespace dnnl |
169 | |
170 | // Memory descriptor. The description is based on a number of dimensions, |
171 | // dimensions themselves, plus information about elements type and memory |
172 | // format. Additionally, contains format-specific descriptions of the data |
173 | // layout. |
174 | struct dnnl_memory_desc : public dnnl::impl::c_compatible { |
175 | dnnl_memory_desc() = default; |
176 | dnnl_memory_desc(const dnnl_memory_desc &other) = default; |
177 | // Number of dimensions |
178 | int ndims; |
179 | // Dimensions in the following order: |
180 | // - CNN data tensors: mini-batch, channel, spatial |
181 | // (<code>{N, C, [[D,] H,] W}</code>) |
182 | // - CNN weight tensors: group (optional), output channel, input channel, |
183 | // spatial (<code>{[G,] O, I, [[D,] H,] W}</code>) |
184 | // - RNN data tensors: time, mini-batch, channels (<code>{T, N, C}</code>) |
185 | // or layers, directions, states, mini-batch, channels (<code>{L, D, S, N, C}</code>) |
186 | // - RNN weight tensor: layers, directions, input channel, gates, output channels |
187 | // (<code>{L, D, I, G, O}</code>). |
188 | // |
189 | // @note |
190 | // The order of dimensions does not depend on the memory format, so |
191 | // whether the data is laid out in #dnnl_nchw or #dnnl_nhwc |
192 | // the dims for 4D CN data tensor would be <code>{N, C, H, W}</code>. |
193 | dnnl::impl::dims_t dims; |
194 | |
195 | // Data type of the tensor elements. |
196 | dnnl::impl::data_type_t data_type; |
197 | |
198 | // Size of the data including padding in each dimension. |
199 | dnnl::impl::dims_t padded_dims; |
200 | |
201 | // Per-dimension offset from the padding to actual data, the top-level |
202 | // tensor with offsets applied must lie within the padding area. |
203 | dnnl::impl::dims_t padded_offsets; |
204 | |
205 | // Offset from memory origin to the current block, non-zero only in |
206 | // a description of a memory sub-block. |
207 | dnnl::impl::dim_t offset0; |
208 | |
209 | // Memory format kind. |
210 | dnnl::impl::format_kind_t format_kind; |
211 | union { |
212 | // Description of the data layout for memory formats that use |
213 | // blocking. |
214 | dnnl::impl::blocking_desc_t blocking; |
215 | // Tensor of weights for winograd convolution. |
216 | dnnl::impl::wino_desc_t wino_desc; |
217 | // Tensor of packed weights for RNN. |
218 | dnnl::impl::rnn_packed_desc_t rnn_packed_desc; |
219 | // ... other descriptions possible |
220 | } format_desc; |
221 | |
222 | dnnl::impl::memory_extra_desc_t ; |
223 | }; |
224 | |
225 | #endif |
226 | |