1/*******************************************************************************
2* Copyright 2022 Intel Corporation
3*
4* Licensed under the Apache License, Version 2.0 (the "License");
5* you may not use this file except in compliance with the License.
6* You may obtain a copy of the License at
7*
8* http://www.apache.org/licenses/LICENSE-2.0
9*
10* Unless required by applicable law or agreed to in writing, software
11* distributed under the License is distributed on an "AS IS" BASIS,
12* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13* See the License for the specific language governing permissions and
14* limitations under the License.
15*******************************************************************************/
16
17#ifndef COMMON_MEMORY_DESC_HPP
18#define COMMON_MEMORY_DESC_HPP
19
20#include "common/c_types_map.hpp"
21#include "common/nstl.hpp"
22
23namespace dnnl {
24namespace impl {
25
26// Winograd-specific formats
27enum class wino_memory_format_t {
28 // Undefined memory format, used for empty memory descriptors.
29 wino_undef,
30 // Tensors of weights for 2x3 winograd convolutions.
31 //
32 // Internal weights format for 2x3 Winograd.
33 wino_wei_aaOio,
34 // Internal weights format for 2x3 Winograd.
35 wino_wei_aaOBiOo,
36 // Tensor of weights for 4x3 convolution.
37 //
38 // Internal weights format for 4x3 Winograd.
39 wino_wei_OBaaIBOIio
40};
41
42enum class rnn_packed_memory_format_t { undef, ldigo_p, ldgoi_p, ldio_p };
43
44// Create aliases for extra flags to preserve the old behavior.
45// This should be removed and all places that are affected should use
46// rnn_packed_memory_format_t::<flag name> syntax.
47namespace rnn_packed_format {
48const rnn_packed_memory_format_t undef = rnn_packed_memory_format_t::undef;
49const rnn_packed_memory_format_t ldigo_p = rnn_packed_memory_format_t::ldigo_p;
50const rnn_packed_memory_format_t ldgoi_p = rnn_packed_memory_format_t::ldgoi_p;
51const rnn_packed_memory_format_t ldio_p = rnn_packed_memory_format_t::ldio_p;
52} // namespace rnn_packed_format
53
54// TODO: convert to 'enum class'.
55// Flags for memory special features
56enum memory_extra_flags_t {
57 dnnl_memory_extra_flag_none = 0x0U,
58 // Indicates the weights have an additional buffer, that depends on the
59 // @p compensation_mask.
60 //
61 // For instance, in 4D case with the compensation mask equals (1 << 0)
62 // the additional buffer would consist of OC values:
63 // O[oc : 0,OC] =
64 // -128 * SUM(ic : 0,IC; kh : 0,KH; kw : 0,KW){ weights(oc, ic, kh, kw) }
65 dnnl_memory_extra_flag_compensation_conv_s8s8 = 0x1U,
66 dnnl_memory_extra_flag_scale_adjust = 0x2U,
67 dnnl_memory_extra_flag_rnn_u8s8_compensation = 0x4U,
68 dnnl_memory_extra_flag_gpu_rnn_u8s8_compensation
69 = dnnl_memory_extra_flag_rnn_u8s8_compensation,
70 dnnl_memory_extra_flag_compensation_conv_asymmetric_src = 0x8U,
71 dnnl_memory_extra_flag_rnn_s8s8_compensation = 0x16U,
72};
73
74// Create aliases for extra flags to preserve the old behavior.
75// This should be removed and all places that are affected should use
76// memory_extra_flags_t::<flag name> syntax.
77namespace memory_extra_flags {
78const memory_extra_flags_t none = dnnl_memory_extra_flag_none;
79const memory_extra_flags_t compensation_conv_s8s8
80 = dnnl_memory_extra_flag_compensation_conv_s8s8;
81const memory_extra_flags_t scale_adjust = dnnl_memory_extra_flag_scale_adjust;
82const memory_extra_flags_t rnn_u8s8_compensation
83 = dnnl_memory_extra_flag_rnn_u8s8_compensation;
84const memory_extra_flags_t rnn_s8s8_compensation
85 = dnnl_memory_extra_flag_rnn_s8s8_compensation;
86const memory_extra_flags_t compensation_conv_asymmetric_src
87 = dnnl_memory_extra_flag_compensation_conv_asymmetric_src;
88} // namespace memory_extra_flags
89
90// Generic description of blocked data layout for most memory formats.
91struct blocking_desc_t {
92 // The strides between the outermost blocks.
93 // In case of plain (non-blocked) formats the strides between dimensions.
94 dims_t strides;
95 // Innermost section
96 // ASSUMPTION: the innermost blocks are always dense
97 // The number of innermost blocks, e.g. 3 in case of `OIhw_4i16o4i_`
98 int inner_nblks;
99 // The size of the blocks, e.g. `{4, 16, 4}` in case of `OIhw_4i16o4i`
100 dims_t inner_blks;
101 // The logical indices of the blocks, e.g. `{1, 0, 1}` in case of
102 // `4i16o4i`, because `i` is the 1st dim and `o` is the 0st dim
103 dims_t inner_idxs;
104};
105
106// Description of tensor of weights for winograd 2x3 convolution.
107struct wino_desc_t {
108 wino_memory_format_t wino_format;
109 int r;
110 int alpha;
111 int ic;
112 int oc;
113 int ic_block;
114 int oc_block;
115 int ic2_block;
116 int oc2_block;
117 float adj_scale;
118 size_t size;
119};
120
121#define DNNL_RNN_MAX_N_PARTS 4
122// Description of tensor of packed weights for rnn.
123struct rnn_packed_desc_t {
124 // Maximum number of parts of RNN weights tensor that require separate
125 // computation.
126 const static int max_n_parts = 4;
127 rnn_packed_memory_format_t format;
128 int n_parts;
129 int n;
130 int ldb;
131 int parts[max_n_parts];
132 size_t part_pack_size[max_n_parts];
133 unsigned pack_part[max_n_parts];
134 size_t offset_compensation;
135 size_t size;
136};
137
138// Description of extra information stored in memory
139struct memory_extra_desc_t {
140 // The flags contain arbitrary extra information, such as compensation.
141 // @sa dnnl_memory_extra_flags_t
142 uint64_t flags;
143 // Compensation mask
144 int compensation_mask;
145 // Scale applied to the data
146 float scale_adjust;
147 // Compensation mask for asymmetric quantization
148 int asymm_compensation_mask;
149};
150
151status_t DNNL_API memory_desc_init_by_tag(memory_desc_t &memory_desc, int ndims,
152 const dims_t dims, data_type_t data_type, format_tag_t tag);
153
154status_t memory_desc_init_by_strides(memory_desc_t &memory_desc, int ndims,
155 const dims_t dims, data_type_t data_type, const dims_t strides);
156
157status_t memory_desc_init_submemory(memory_desc_t &memory_desc,
158 const memory_desc_t &parent_memory_desc, const dims_t dims,
159 const dims_t offsets);
160
161status_t memory_desc_reshape(memory_desc_t &out_memory_desc,
162 const memory_desc_t &in_memory_desc, int ndims, const dims_t dims);
163
164status_t memory_desc_permute_axes(memory_desc_t &out_memory_desc,
165 const memory_desc_t &in_memory_desc, const int *perm);
166
167} // namespace impl
168} // namespace dnnl
169
170// Memory descriptor. The description is based on a number of dimensions,
171// dimensions themselves, plus information about elements type and memory
172// format. Additionally, contains format-specific descriptions of the data
173// layout.
174struct dnnl_memory_desc : public dnnl::impl::c_compatible {
175 dnnl_memory_desc() = default;
176 dnnl_memory_desc(const dnnl_memory_desc &other) = default;
177 // Number of dimensions
178 int ndims;
179 // Dimensions in the following order:
180 // - CNN data tensors: mini-batch, channel, spatial
181 // (<code>{N, C, [[D,] H,] W}</code>)
182 // - CNN weight tensors: group (optional), output channel, input channel,
183 // spatial (<code>{[G,] O, I, [[D,] H,] W}</code>)
184 // - RNN data tensors: time, mini-batch, channels (<code>{T, N, C}</code>)
185 // or layers, directions, states, mini-batch, channels (<code>{L, D, S, N, C}</code>)
186 // - RNN weight tensor: layers, directions, input channel, gates, output channels
187 // (<code>{L, D, I, G, O}</code>).
188 //
189 // @note
190 // The order of dimensions does not depend on the memory format, so
191 // whether the data is laid out in #dnnl_nchw or #dnnl_nhwc
192 // the dims for 4D CN data tensor would be <code>{N, C, H, W}</code>.
193 dnnl::impl::dims_t dims;
194
195 // Data type of the tensor elements.
196 dnnl::impl::data_type_t data_type;
197
198 // Size of the data including padding in each dimension.
199 dnnl::impl::dims_t padded_dims;
200
201 // Per-dimension offset from the padding to actual data, the top-level
202 // tensor with offsets applied must lie within the padding area.
203 dnnl::impl::dims_t padded_offsets;
204
205 // Offset from memory origin to the current block, non-zero only in
206 // a description of a memory sub-block.
207 dnnl::impl::dim_t offset0;
208
209 // Memory format kind.
210 dnnl::impl::format_kind_t format_kind;
211 union {
212 // Description of the data layout for memory formats that use
213 // blocking.
214 dnnl::impl::blocking_desc_t blocking;
215 // Tensor of weights for winograd convolution.
216 dnnl::impl::wino_desc_t wino_desc;
217 // Tensor of packed weights for RNN.
218 dnnl::impl::rnn_packed_desc_t rnn_packed_desc;
219 // ... other descriptions possible
220 } format_desc;
221
222 dnnl::impl::memory_extra_desc_t extra;
223};
224
225#endif
226