1 | /******************************************************************************* |
2 | * Copyright 2016-2022 Intel Corporation |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | *******************************************************************************/ |
16 | |
17 | /// @file |
18 | /// C API types definitions |
19 | |
20 | #ifndef ONEAPI_DNNL_DNNL_TYPES_H |
21 | #define ONEAPI_DNNL_DNNL_TYPES_H |
22 | |
23 | #ifdef __cplusplus |
24 | extern "C" { |
25 | #endif |
26 | |
27 | /// @cond DO_NOT_DOCUMENT_THIS |
28 | #include <stddef.h> |
29 | #include <stdint.h> |
30 | /// @endcond |
31 | |
32 | /// @addtogroup dnnl_api |
33 | /// @{ |
34 | |
35 | /// @addtogroup dnnl_api_utils |
36 | /// @{ |
37 | |
38 | /// Status values returned by the library functions. |
39 | typedef enum { |
40 | /// The operation was successful |
41 | dnnl_success = 0, |
42 | /// The operation failed due to an out-of-memory condition |
43 | dnnl_out_of_memory = 1, |
44 | /// The operation failed because of incorrect function arguments |
45 | dnnl_invalid_arguments = 2, |
46 | /// The operation failed because requested functionality is not implemented |
47 | dnnl_unimplemented = 3, |
48 | /// Primitive iterator passed over last primitive descriptor |
49 | dnnl_iterator_ends = 4, |
50 | /// Primitive or engine failed on execution |
51 | dnnl_runtime_error = 5, |
52 | /// Queried element is not required for given primitive |
53 | dnnl_not_required = 6, |
54 | } dnnl_status_t; |
55 | |
56 | /// @} dnnl_api_utils |
57 | |
58 | /// @addtogroup dnnl_api_memory |
59 | /// @{ |
60 | |
61 | /// Data type specification |
62 | typedef enum { |
63 | /// Undefined data type, used for empty memory descriptors. |
64 | dnnl_data_type_undef = 0, |
65 | /// 16-bit/half-precision floating point. |
66 | dnnl_f16 = 1, |
67 | /// non-standard 16-bit (bfloat16 w/ 7 bit mantissa) floating point. |
68 | dnnl_bf16 = 2, |
69 | /// 32-bit/single-precision floating point. |
70 | dnnl_f32 = 3, |
71 | /// 32-bit signed integer. |
72 | dnnl_s32 = 4, |
73 | /// 8-bit signed integer. |
74 | dnnl_s8 = 5, |
75 | /// 8-bit unsigned integer. |
76 | dnnl_u8 = 6, |
77 | /// 64-bit/double-precision floating point. |
78 | dnnl_f64 = 7, |
79 | |
80 | /// Parameter to allow internal only data_types without undefined behavior. |
81 | /// This parameter is chosen to be valid for so long as sizeof(int) >= 2. |
82 | dnnl_data_type_max = 0x7fff, |
83 | } dnnl_data_type_t; |
84 | |
85 | /// Memory format kind |
86 | typedef enum { |
87 | /// Undefined memory format kind, used for empty memory descriptors. |
88 | dnnl_format_kind_undef = 0, |
89 | /// Unspecified format kind. |
90 | /// The primitive selects a format automatically. |
91 | dnnl_format_kind_any, |
92 | /// A tensor in a generic format described by the stride and blocking |
93 | /// values in each dimension. See @ref dnnl_blocking_desc_t for more |
94 | /// information. |
95 | dnnl_blocked, |
96 | /// Weights format used in 8bit Winograd convolution |
97 | dnnl_format_kind_wino, |
98 | /// Packed weights format used in RNN |
99 | dnnl_format_kind_rnn_packed, |
100 | } dnnl_format_kind_t; |
101 | |
102 | /// Memory format tag specification. |
103 | /// |
104 | /// oneDNN formats describe physical data layout. The physical layout |
105 | /// is described as a sequence of the dimensions as they are laid out in the |
106 | /// memory (from the outer-most to the inner-most). Note that this order |
107 | /// doesn't affect the logical order of the dimensions that is kept in the |
108 | /// `dims` field of the dnnl_memory_desc_t structure. The logical order of the |
109 | /// dimensions is specified by the primitive that uses the tensor. |
110 | /// |
111 | /// For example, CNN 5D tensor always has its logical dimensions in the order |
112 | /// `(batch, channels, depth, height, width)`, while the physical layout might be |
113 | /// `NCDHW` (corresponds to #dnnl_ncdhw format tag) or |
114 | /// `NDHWC` (corresponds to #dnnl_ndhwc format tag). |
115 | /// |
116 | /// ~~~cpp |
117 | /// int batch = 2, channels = 16, depth = 13, height = 13, width = 13; |
118 | /// |
119 | /// int ndims = 5; // 5D tensor |
120 | /// dnnl_dims_t dims = {batch, channels, depth, height, width}; |
121 | /// dnnl_memory_desc_t data_in_ncdhw; |
122 | /// dnnl_memory_desc_init_by_tag( |
123 | /// &data_in_ncdhw, 5, dims, dnnl_f32, dnnl_ncdhw); |
124 | /// |
125 | /// // note that in both cases dims passed are the same |
126 | /// dnnl_memory_desc_t data_in_ndhwc; |
127 | /// dnnl_memory_desc_init_by_tag( |
128 | /// &data_in_ndhwc, 5, dims, dnnl_f32, dnnl_ndhwc); |
129 | /// ~~~ |
130 | /// |
131 | /// Memory format tags can be further divided into two categories: |
132 | /// - Domain-agnostic names, i.e. names the do not depend on the tensor usage |
133 | /// in the specific primitive. These names use letters from `a` to `l` to |
134 | /// denote logical dimension from 1 to 12, and form the order in which the |
135 | /// dimensions are laid in memory. For instance, #dnnl_ab is used to denote |
136 | /// 2D tensor where the second logical dimension (aka `b`) is the innermost, |
137 | /// i.e. has stride = 1, and the first logical dimension (`a`) laid out in |
138 | /// memory with stride equal to the size of second dimension. On the other |
139 | /// hand, #dnnl_ba is just transposed version of the same tensor: the |
140 | /// first dimension (`a`) becomes the innermost one. |
141 | /// - Domain-specific names, i.e. names that make sense only in the context of |
142 | /// a certain domain, such as CNN. This names are just aliases to the |
143 | /// corresponding domain-agnostic tags and used mostly for the convenience. |
144 | /// For example, #dnnl_nc is used to denote 2D CNN activations tensor |
145 | /// memory format, where channels are the innermost dimension and batch is an |
146 | /// outermost one. Moreover, #dnnl_nc is just an alias to #dnnl_ab, |
147 | /// since for oneDNN CNN primitives the logical dimensions of |
148 | /// activations tensors come in order: batch, channels, spatial. |
149 | /// In other words, batch corresponds to the first logical dimension (`a`), |
150 | /// channels correspond to the second one (`b`). |
151 | /// |
152 | /// The following domain-specific notation applies to memory format tags: |
153 | /// - @c 'n' denotes the mini-batch dimension |
154 | /// - @c 'c' denotes a channels dimension |
155 | /// - When there are multiple channel dimensions (for example, in convolution |
156 | /// weights tensor), @c 'i' and @c 'o' denote dimensions of input and output |
157 | /// channels |
158 | /// - @c 'd', @c 'h', and @c 'w' denote spatial depth, height, and width |
159 | /// respectively |
160 | /// |
161 | /// Upper-case letters indicate that the data is laid out in blocks for a |
162 | /// particular dimension. In such cases, the format name contains both upper- |
163 | /// and lower-case letters for that dimension with a lower-case letter preceded |
164 | /// by the block size. For example: #dnnl_nChw8c describes a format where the |
165 | /// outermost dimension is mini-batch, followed by the channel block number, |
166 | /// followed by the spatial height and width, and finally followed by 8-element |
167 | /// channel blocks. |
168 | /// |
169 | /// @sa @ref dev_guide_understanding_memory_formats |
170 | typedef enum { |
171 | /// Undefined memory format tag |
172 | dnnl_format_tag_undef = 0, |
173 | /// Undefined memory format tag. |
174 | /// The primitive selects a format automatically. |
175 | dnnl_format_tag_any, |
176 | |
177 | // Semantic agnostic section |
178 | // The physical order of dimensions is defined by the permutation of the |
179 | // characters, assuming that ab..z defines the natural order. |
180 | |
181 | // Plain formats |
182 | |
183 | dnnl_a, ///< plain 1D tensor |
184 | dnnl_ab, ///< plain 2D tensor |
185 | dnnl_abc, ///< plain 3D tensor |
186 | dnnl_abcd, ///< plain 4D tensor |
187 | dnnl_acbd, ///< plain 4D tensor |
188 | dnnl_abcde, ///< plain 5D tensor |
189 | dnnl_abcdef, ///< plain 6D tensor |
190 | dnnl_abcdefg, ///< plain 7D tensor |
191 | dnnl_abcdefgh, ///< plain 8D tensor |
192 | dnnl_abcdefghi, ///< plain 9D tensor |
193 | dnnl_abcdefghij, ///< plain 10D tensor |
194 | dnnl_abcdefghijk, ///< plain 11D tensor |
195 | dnnl_abcdefghijkl, ///< plain 12D tensor |
196 | |
197 | // Permuted plain formats |
198 | |
199 | dnnl_abdc, ///< permuted 4D tensor |
200 | dnnl_abdec, ///< permuted 5D tensor |
201 | dnnl_acb, ///< permuted 3D tensor |
202 | dnnl_acbde, ///< permuted 5D tensor |
203 | dnnl_acbdef, ///< permuted 6D tensor |
204 | dnnl_acdb, ///< permuted 4D tensor |
205 | dnnl_acdeb, ///< permuted 5D tensor |
206 | dnnl_ba, ///< permuted 2D tensor |
207 | dnnl_bac, ///< permuted 3D tensor |
208 | dnnl_bacd, ///< permuted 4D tensor |
209 | dnnl_bacde, ///< permuted 5D tensor |
210 | dnnl_bca, ///< permuted 3D tensor |
211 | dnnl_bcda, ///< permuted 4D tensor |
212 | dnnl_bcdea, ///< permuted 5D tensor |
213 | dnnl_cba, ///< permuted 3D tensor |
214 | dnnl_cdba, ///< permuted 4D tensor |
215 | dnnl_dcab, ///< permuted 4D tensor |
216 | dnnl_cdeba, ///< permuted 5D tensor |
217 | dnnl_decab, ///< permuted 5D tensor |
218 | dnnl_defcab, ///< permuted 6D tensor |
219 | dnnl_abced, ///< permuted 5D tensor |
220 | dnnl_abcdfe, ///< permuted 6D tensor |
221 | dnnl_abcdegf, ///< permuted 7D tensor |
222 | dnnl_abcdefhg, ///< permuted 8D tensor |
223 | dnnl_abcdefgih, ///< permuted 9D tensor |
224 | dnnl_abcdefghji, ///< permuted 10D tensor |
225 | dnnl_abcdefghikj, ///< permuted 11D tensor |
226 | dnnl_abcdefghijlk, ///< permuted 12D tensor |
227 | |
228 | // Opaque blocked formats |
229 | |
230 | dnnl_Abc16a, |
231 | dnnl_ABc16a16b, |
232 | dnnl_ABc32a32b, |
233 | dnnl_ABc4a4b, |
234 | /// 3D tensor blocked by 2nd dimension with block size 16 |
235 | dnnl_aBc16b, |
236 | dnnl_ABc16b16a, |
237 | dnnl_Abc4a, |
238 | /// 3D tensor blocked by 2nd dimension with block size 32 |
239 | dnnl_aBc32b, |
240 | /// 3D tensor blocked by 2nd dimension with block size 4 |
241 | dnnl_aBc4b, |
242 | dnnl_ABc4b16a4b, |
243 | dnnl_ABc2b8a4b, |
244 | dnnl_ABc16b16a4b, |
245 | dnnl_ABc16b16a2b, |
246 | dnnl_ABc4b4a, |
247 | dnnl_ABc8a16b2a, |
248 | dnnl_ABc8a8b, |
249 | dnnl_ABc8a4b, |
250 | /// 3D tensor blocked by 2nd dimension with block size 8 |
251 | dnnl_aBc8b, |
252 | dnnl_ABc8b16a2b, |
253 | dnnl_BAc8a16b2a, |
254 | dnnl_ABc8b8a, |
255 | dnnl_Abcd16a, |
256 | dnnl_Abcd8a, |
257 | dnnl_ABcd16a16b, |
258 | dnnl_Abcd32a, |
259 | dnnl_ABcd32a32b, |
260 | /// 4D tensor blocked by 2nd dimension with block size 16 |
261 | dnnl_aBcd16b, |
262 | dnnl_ABcd16b16a, |
263 | dnnl_aBCd16b16c, |
264 | dnnl_aBCd16c16b, |
265 | dnnl_Abcd4a, |
266 | /// 4D tensor blocked by 2nd dimension with block size 32 |
267 | dnnl_aBcd32b, |
268 | /// 4D tensor blocked by 2nd dimension with block size 4 |
269 | dnnl_aBcd4b, |
270 | dnnl_ABcd4b16a4b, |
271 | dnnl_ABcd16b16a4b, |
272 | dnnl_ABcd16b16a2b, |
273 | dnnl_ABcd4b4a, |
274 | dnnl_ABcd4a4b, |
275 | dnnl_aBCd2c4b2c, |
276 | dnnl_aBCd4b8c2b, |
277 | dnnl_aBCd4c16b4c, |
278 | dnnl_aBCd2c8b4c, |
279 | dnnl_aBCd16c16b4c, |
280 | dnnl_aBCd16c16b2c, |
281 | dnnl_aBCd4c4b, |
282 | dnnl_aBCd4b4c, |
283 | dnnl_ABcd8a16b2a, |
284 | dnnl_ABcd2b8a4b, |
285 | dnnl_ABcd8a8b, |
286 | dnnl_ABcd8a4b, |
287 | /// 4D tensor blocked by 2nd dimension with block size 8 |
288 | dnnl_aBcd8b, |
289 | dnnl_aBCd4c8b2c, |
290 | dnnl_ABcd8b16a2b, |
291 | dnnl_aBCd8b16c2b, |
292 | dnnl_BAcd8a16b2a, |
293 | /// 4D tensor blocked by 1st and 2nd dimension with block size 8 |
294 | dnnl_ABcd8b8a, |
295 | dnnl_aBCd8b8c, |
296 | dnnl_aBCd8b4c, |
297 | dnnl_aBCd8c16b2c, |
298 | dnnl_ABcde8a16b2a, |
299 | dnnl_aCBd8b16c2b, |
300 | dnnl_aBCd8c8b, |
301 | dnnl_Abcde16a, |
302 | dnnl_Abcde32a, |
303 | dnnl_ABcde16a16b, |
304 | dnnl_BAcde8a16b2a, |
305 | /// 4D tensor blocked by 3rd dimension with block size 4 |
306 | dnnl_aBCd2b4c2b, |
307 | /// 5D tensor blocked by 1st dimension with block size 16 |
308 | dnnl_ABcde4b16a4b, |
309 | /// 5D tensor blocked by 1st dimension with block size 8 |
310 | dnnl_ABcde2b8a4b, |
311 | /// 5D tensor blocked by 2nd dimension with block size 16 |
312 | dnnl_aBcde16b, |
313 | dnnl_ABcde16b16a, |
314 | dnnl_aBCde16b16c, |
315 | dnnl_aBCde16c16b, |
316 | dnnl_aBCde2c8b4c, |
317 | dnnl_Abcde4a, |
318 | /// 5D tensor blocked by 2nd dimension with block size 32 |
319 | dnnl_aBcde32b, |
320 | /// 5D tensor blocked by 2nd dimension with block size 4 |
321 | dnnl_aBcde4b, |
322 | dnnl_ABcde4b4a, |
323 | dnnl_ABcde4a4b, |
324 | dnnl_aBCde4b4c, |
325 | dnnl_aBCde2c4b2c, |
326 | dnnl_aBCde4b8c2b, |
327 | dnnl_aBCde4c16b4c, |
328 | dnnl_aBCde16c16b4c, |
329 | dnnl_aBCde16c16b2c, |
330 | dnnl_aBCde4c4b, |
331 | dnnl_Abcde8a, |
332 | dnnl_ABcde8a8b, |
333 | dnnl_ABcde8a4b, |
334 | dnnl_BAcde16b16a, |
335 | /// 5D tensor blocked by 2nd dimension with block size 8 |
336 | dnnl_aBcde8b, |
337 | dnnl_ABcde8b16a2b, |
338 | dnnl_aBCde8b16c2b, |
339 | dnnl_aBCde4c8b2c, |
340 | dnnl_aCBde8b16c2b, |
341 | dnnl_ABcde8b8a, |
342 | dnnl_ABcde32a32b, |
343 | dnnl_aBCde8b8c, |
344 | dnnl_aBCde8b4c, |
345 | dnnl_ABc4a8b8a4b, |
346 | dnnl_ABcd4a8b8a4b, |
347 | dnnl_ABcde4a8b8a4b, |
348 | dnnl_BAc4b8a8b4a, |
349 | dnnl_BAcd4b8a8b4a, |
350 | dnnl_BAcde4b8a8b4a, |
351 | dnnl_ABcd2a8b8a2b, |
352 | dnnl_aBCd4b8c8b4c, |
353 | dnnl_aBCde4b8c8b4c, |
354 | dnnl_aBCde2b8c8b2c, |
355 | dnnl_aBCde8c16b2c, |
356 | dnnl_aBCde8c8b, |
357 | /// 5D tensor blocked by 3rd dimension with block size 4 |
358 | dnnl_aBCde2b4c2b, |
359 | /// 6D tensor blocked by 2nd dimension with block size 16 |
360 | dnnl_aBcdef16b, |
361 | dnnl_aBCdef16b16c, |
362 | dnnl_aBCdef16c16b, |
363 | dnnl_aBCdef4c16b4c, |
364 | /// 6D tensor blocked by 2nd dimension with block size 8 |
365 | dnnl_aBCdef2c8b4c, |
366 | dnnl_aBCdef4c8b2c, |
367 | /// 6D tensor blocked by 3rd dimension with block size 4 |
368 | dnnl_aBCdef2b4c2b, |
369 | /// 6D tensor blocked by 2nd dimension with block size 4 |
370 | dnnl_aBcdef4b, |
371 | dnnl_aBCdef4c4b, |
372 | dnnl_aBCdef4b4c, |
373 | dnnl_aBCdef2c4b2c, |
374 | dnnl_aBCdef4b8c2b, |
375 | dnnl_aBCdef8b8c, |
376 | dnnl_aBCdef8b4c, |
377 | dnnl_aBCdef8c16b2c, |
378 | dnnl_aBCdef4b8c8b4c, |
379 | dnnl_aBCdef8b16c2b, |
380 | dnnl_aCBdef8b16c2b, |
381 | dnnl_aBCdef8c8b, |
382 | dnnl_aBdc16b, |
383 | dnnl_aBdC16b2c, |
384 | dnnl_aBdC16b4c, |
385 | dnnl_aBdc4b, |
386 | dnnl_aBdc8b, |
387 | dnnl_aBdec16b, |
388 | dnnl_aBdeC16b2c, |
389 | dnnl_aBdeC16b4c, |
390 | dnnl_aBdec32b, |
391 | dnnl_aBdec4b, |
392 | dnnl_aBdec8b, |
393 | dnnl_aBdefc16b, |
394 | dnnl_aBdefC16b2c, |
395 | dnnl_aCBdef16c16b, |
396 | dnnl_aBdefc4b, |
397 | dnnl_aBdefc8b, |
398 | dnnl_Abcdef16a, |
399 | dnnl_Abcdef32a, |
400 | dnnl_aBedc16b, |
401 | dnnl_Acb16a, |
402 | dnnl_AcB16a2b, |
403 | dnnl_AcB16a4b, |
404 | dnnl_Acb4a, |
405 | dnnl_Acb8a, |
406 | dnnl_aCBd16b16c, |
407 | dnnl_aCBd16c16b, |
408 | dnnl_aCBde16b16c, |
409 | dnnl_aCBde16c16b, |
410 | dnnl_Acdb16a, |
411 | dnnl_AcdB16a2b, |
412 | dnnl_AcdB16a4b, |
413 | dnnl_Acdb32a, |
414 | dnnl_Acdb4a, |
415 | dnnl_Acdb8a, |
416 | dnnl_Acdeb16a, |
417 | dnnl_AcdeB16a2b, |
418 | dnnl_Acdeb4a, |
419 | dnnl_Acdeb8a, |
420 | dnnl_Adcb16a, |
421 | dnnl_BAc16a16b, |
422 | dnnl_BAc16b16a, |
423 | dnnl_BAcd16a16b, |
424 | dnnl_BAcd16b16a, |
425 | dnnl_aCBd4c8b8c4b, |
426 | dnnl_aCBde4c8b8c4b, |
427 | dnnl_aCBdef4c8b8c4b, |
428 | dnnl_BAcde16a16b, |
429 | dnnl_aCBdef16b16c, |
430 | dnnl_abdfce, ///< permuted 6D tensor |
431 | dnnl_abdefc, ///< permuted 6D tensor |
432 | dnnl_ABc16b32a, |
433 | dnnl_ABc16b64a, |
434 | dnnl_ABc4b32a4b, |
435 | dnnl_ABc4b64a4b, |
436 | dnnl_ABc8b32a2b, |
437 | dnnl_ABc8b64a2b, |
438 | dnnl_AB16b16a, |
439 | dnnl_AB16b32a, |
440 | dnnl_AB16b64a, |
441 | dnnl_AB8b16a2b, |
442 | dnnl_AB8b32a2b, |
443 | dnnl_AB8b64a2b, |
444 | dnnl_AB4b16a4b, |
445 | dnnl_AB4b32a4b, |
446 | dnnl_AB4b64a4b, |
447 | dnnl_AB16b16a4b, |
448 | dnnl_ABcd16b32a, |
449 | dnnl_ABcd16b64a, |
450 | dnnl_ABcd4b32a4b, |
451 | dnnl_ABcd4b64a4b, |
452 | dnnl_ABcd8b32a2b, |
453 | dnnl_ABcd8b64a2b, |
454 | dnnl_ABcde4b32a4b, |
455 | dnnl_ABcde4b64a4b, |
456 | dnnl_ABcde16b16a4b, |
457 | dnnl_ABcde16b16a2b, |
458 | dnnl_ABcde16b32a, |
459 | dnnl_ABcde16b64a, |
460 | dnnl_ABcde8b32a2b, |
461 | dnnl_ABcde8b64a2b, |
462 | dnnl_aBCdef16c16b4c, |
463 | dnnl_aBCdef16c16b2c, |
464 | dnnl_AB32a32b8a4b, |
465 | dnnl_AB8a4b, |
466 | dnnl_AB32a32b8a2b, |
467 | dnnl_AB8a2b, |
468 | dnnl_abDc32d, |
469 | dnnl_abDC32d4c, |
470 | dnnl_abdEc32e, |
471 | dnnl_abdEC32e2c, |
472 | dnnl_abdEC32e4c, |
473 | dnnl_aBdefC16b4c, |
474 | dnnl_AcdeB16a4b, |
475 | dnnl_ABcd16a16b2a, |
476 | dnnl_ABc16a16b2a, |
477 | dnnl_aBCd16b16c2b, |
478 | dnnl_aBCde16b16c2b, |
479 | dnnl_Acb32a, |
480 | dnnl_AcB32a2b, |
481 | dnnl_AcB32a4b, |
482 | dnnl_Acb48a, |
483 | dnnl_AcB48a2b, |
484 | dnnl_AcB48a4b, |
485 | dnnl_Acb64a, |
486 | dnnl_AcB64a2b, |
487 | dnnl_AcB64a4b, |
488 | dnnl_cBa2b, |
489 | dnnl_cBa4b, |
490 | dnnl_aBdc32b, |
491 | dnnl_aBdC32b2c, |
492 | dnnl_aBdC32b4c, |
493 | dnnl_aBdc48b, |
494 | dnnl_aBdC48b2c, |
495 | dnnl_aBdC48b4c, |
496 | dnnl_aBdc64b, |
497 | dnnl_aBdC64b2c, |
498 | dnnl_aBdC64b4c, |
499 | dnnl_adcb, |
500 | dnnl_adCb2c, |
501 | dnnl_adCb4c, |
502 | dnnl_AcdB32a2b, |
503 | dnnl_AcdB32a4b, |
504 | dnnl_Acdb48a, |
505 | dnnl_AcdB48a2b, |
506 | dnnl_AcdB48a4b, |
507 | dnnl_Acdb64a, |
508 | dnnl_AcdB64a2b, |
509 | dnnl_AcdB64a4b, |
510 | dnnl_cdBa2b, |
511 | dnnl_cdBa4b, |
512 | dnnl_aBdeC32b2c, |
513 | dnnl_aBdeC32b4c, |
514 | dnnl_aBdec48b, |
515 | dnnl_aBdeC48b2c, |
516 | dnnl_aBdeC48b4c, |
517 | dnnl_aBdec64b, |
518 | dnnl_aBdeC64b2c, |
519 | dnnl_aBdeC64b4c, |
520 | dnnl_adecb, |
521 | dnnl_adeCb2c, |
522 | dnnl_adeCb4c, |
523 | dnnl_Acdeb32a, |
524 | dnnl_AcdeB32a2b, |
525 | dnnl_AcdeB32a4b, |
526 | dnnl_Acdeb48a, |
527 | dnnl_AcdeB48a2b, |
528 | dnnl_AcdeB48a4b, |
529 | dnnl_Acdeb64a, |
530 | dnnl_AcdeB64a2b, |
531 | dnnl_AcdeB64a4b, |
532 | dnnl_cdeBa2b, |
533 | dnnl_cdeBa4b, |
534 | dnnl_aBdefc32b, |
535 | dnnl_aBdefC32b2c, |
536 | dnnl_aBdefC32b4c, |
537 | dnnl_aBdefc48b, |
538 | dnnl_aBdefC48b2c, |
539 | dnnl_aBdefC48b4c, |
540 | dnnl_aBdefc64b, |
541 | dnnl_aBdefC64b2c, |
542 | dnnl_aBdefC64b4c, |
543 | dnnl_adefcb, |
544 | dnnl_adefCb2c, |
545 | dnnl_adefCb4c, |
546 | dnnl_AB16b32a4b, |
547 | dnnl_AB16b48a4b, |
548 | dnnl_AB16b64a4b, |
549 | dnnl_AB16b16a2b, |
550 | dnnl_AB16b32a2b, |
551 | dnnl_AB16b48a2b, |
552 | dnnl_AB16b64a2b, |
553 | dnnl_ABc16b32a4b, |
554 | dnnl_ABc16b48a4b, |
555 | dnnl_ABc16b64a4b, |
556 | dnnl_ABc16b32a2b, |
557 | dnnl_ABc16b48a2b, |
558 | dnnl_ABc16b64a2b, |
559 | dnnl_ABcd16b32a4b, |
560 | dnnl_ABcd16b48a4b, |
561 | dnnl_ABcd16b64a4b, |
562 | dnnl_ABcd16b32a2b, |
563 | dnnl_ABcd16b48a2b, |
564 | dnnl_ABcd16b64a2b, |
565 | dnnl_ABcde16b32a4b, |
566 | dnnl_ABcde16b48a4b, |
567 | dnnl_ABcde16b64a4b, |
568 | dnnl_ABcde16b32a2b, |
569 | dnnl_ABcde16b48a2b, |
570 | dnnl_ABcde16b64a2b, |
571 | dnnl_ABc32a16b, |
572 | dnnl_ABcd32a16b, |
573 | dnnl_ABcde32a16b, |
574 | dnnl_AB48a16b, |
575 | dnnl_AB48a32b, |
576 | dnnl_ABc40a16b, |
577 | dnnl_ABc40a32b, |
578 | dnnl_aBC48b16c, |
579 | dnnl_aBC48b32c, |
580 | dnnl_ABcd40a16b, |
581 | dnnl_ABcd40a32b, |
582 | dnnl_abCd32c, |
583 | dnnl_abdCe32c, |
584 | dnnl_abdCE32c2e, |
585 | dnnl_BA16a16b2a, |
586 | dnnl_BA16a32b2a, |
587 | dnnl_BA16a48b2a, |
588 | dnnl_BA16a64b2a, |
589 | dnnl_BA16a16b4a, |
590 | dnnl_BA16a32b4a, |
591 | dnnl_BA16a48b4a, |
592 | dnnl_BA16a64b4a, |
593 | dnnl_ABcd8a2b, |
594 | dnnl_aBdeC16c16b2c, |
595 | dnnl_aBdeC16c16b4c, |
596 | dnnl_aBdefC16c16b2c, |
597 | dnnl_AcB16b16a2b, |
598 | dnnl_AcB16b16a4b, |
599 | dnnl_AcdB16b16a2b, |
600 | dnnl_AcdB16b16a4b, |
601 | dnnl_AcdeB16b16a2b, |
602 | dnnl_aBdefC16c16b4c, |
603 | dnnl_AcdeB16b16a4b, |
604 | dnnl_AcB16b32a2b, |
605 | dnnl_AcB16b32a4b, |
606 | dnnl_AcB16b48a2b, |
607 | dnnl_AcB16b48a4b, |
608 | dnnl_AcB16b64a2b, |
609 | dnnl_AcB16b64a4b, |
610 | dnnl_aBdC16c16b2c, |
611 | dnnl_aBdC16c16b4c, |
612 | dnnl_aBdC16c32b2c, |
613 | dnnl_aBdC16c32b4c, |
614 | dnnl_aBdC16c48b2c, |
615 | dnnl_aBdC16c48b4c, |
616 | dnnl_aBdC16c64b2c, |
617 | dnnl_aBdC16c64b4c, |
618 | dnnl_AcdB16b32a2b, |
619 | dnnl_AcdB16b32a4b, |
620 | dnnl_AcdB16b48a2b, |
621 | dnnl_AcdB16b48a4b, |
622 | dnnl_AcdB16b64a2b, |
623 | dnnl_AcdB16b64a4b, |
624 | dnnl_aBdeC16c32b2c, |
625 | dnnl_aBdeC16c32b4c, |
626 | dnnl_aBdeC16c48b2c, |
627 | dnnl_aBdeC16c48b4c, |
628 | dnnl_aBdeC16c64b2c, |
629 | dnnl_aBdeC16c64b4c, |
630 | dnnl_AcdeB16b32a2b, |
631 | dnnl_AcdeB16b32a4b, |
632 | dnnl_AcdeB16b48a2b, |
633 | dnnl_AcdeB16b48a4b, |
634 | dnnl_AcdeB16b64a2b, |
635 | dnnl_AcdeB16b64a4b, |
636 | dnnl_aBdefC16c32b2c, |
637 | dnnl_aBdefC16c32b4c, |
638 | dnnl_aBdefC16c48b2c, |
639 | dnnl_aBdefC16c48b4c, |
640 | dnnl_aBdefC16c64b2c, |
641 | dnnl_aBdefC16c64b4c, |
642 | dnnl_decbA16a, |
643 | dnnl_ABc4a2b, |
644 | dnnl_ABc8a2b, |
645 | dnnl_aBCd8b2c, |
646 | dnnl_ABcde4a2b, |
647 | dnnl_ABcde8a2b, |
648 | dnnl_ABcde40a16b, |
649 | dnnl_ABcde40a32b, |
650 | dnnl_aBCde8b2c, |
651 | dnnl_ABcde4a8b8a2b, |
652 | dnnl_ABcd4a8b8a2b, |
653 | dnnl_ABc4a8b8a2b, |
654 | dnnl_aBCdef4b8c8b2c, |
655 | dnnl_aBCde4b8c8b2c, |
656 | dnnl_aBCd4b8c8b2c, |
657 | dnnl_BAcde4b8a8b2a, |
658 | dnnl_BAcd4b8a8b2a, |
659 | dnnl_BAc4b8a8b2a, |
660 | dnnl_aCBdef4c8b8c2b, |
661 | dnnl_aCBde4c8b8c2b, |
662 | dnnl_aCBd4c8b8c2b, |
663 | dnnl_aBCdef8b2c, |
664 | dnnl_AB32a16b, |
665 | dnnl_AB32a32b, |
666 | dnnl_BA4b8a8b2a, |
667 | dnnl_BA4b8a8b4a, |
668 | dnnl_aBC32b16c, |
669 | dnnl_aBC32b32c, |
670 | dnnl_aCB4c8b8c2b, |
671 | dnnl_aCB4c8b8c4b, |
672 | dnnl_ABcd4a2b, |
673 | dnnl_ABc2b8a16b4a, |
674 | dnnl_ABcd2b8a16b4a, |
675 | dnnl_ABcde2b8a16b4a, |
676 | dnnl_ABc2a8b16a4b, |
677 | dnnl_ABc2a8b16a2b, |
678 | dnnl_ABc2b32a8b, |
679 | dnnl_ABcd2a8b16a4b, |
680 | dnnl_ABcd2a8b16a2b, |
681 | dnnl_aCBd2c8b16c2b, |
682 | dnnl_ABcd2b32a8b, |
683 | dnnl_aBCd2c8b16c2b, |
684 | dnnl_ABcde2a8b16a4b, |
685 | dnnl_ABcde2a8b16a2b, |
686 | dnnl_aCBde2c8b16c2b, |
687 | dnnl_ABcde2b32a8b, |
688 | dnnl_aBC2b8c16b2c, |
689 | dnnl_aBCd2b8c16b2c, |
690 | dnnl_aBCde2b8c16b2c, |
691 | dnnl_aBCdef2b8c16b2c, |
692 | dnnl_BAcde2b8a16b4a, |
693 | dnnl_BAcd2b8a16b4a, |
694 | dnnl_BAc2b8a16b4a, |
695 | dnnl_BAcde2b8a16b2a, |
696 | dnnl_BAcd2b8a16b2a, |
697 | dnnl_BAc2b8a16b2a, |
698 | dnnl_aBCde2c8b16c2b, |
699 | dnnl_aBCdef2c8b16c2b, |
700 | dnnl_aCBdef2c8b16c2b, |
701 | dnnl_aBCd2b8c16b4c, |
702 | dnnl_aBCde2b8c16b4c, |
703 | dnnl_BA4b8a16b2a, |
704 | dnnl_BA4b8a16b4a, |
705 | dnnl_aCB4c8b16c2b, |
706 | dnnl_aCB4c8b16c4b, |
707 | dnnl_BA16a16b, |
708 | dnnl_BA16a32b, |
709 | dnnl_BA16a48b, |
710 | dnnl_BA16a64b, |
711 | dnnl_aCB16c2b, |
712 | dnnl_aCB16c4b, |
713 | dnnl_BA16b2a, |
714 | dnnl_BA16b4a, |
715 | dnnl_aBC16b16c, |
716 | dnnl_aBC16b32c, |
717 | dnnl_AB16a16b, |
718 | dnnl_AB16a32b, |
719 | dnnl_adbc, |
720 | dnnl_ABcde16a16b2a, |
721 | dnnl_aBCdef16b16c2b, |
722 | dnnl_Acedb16a, |
723 | dnnl_aBdfec16b, |
724 | dnnl_abdEC64e2c, |
725 | dnnl_abdEC64e4c, |
726 | |
727 | /// Just a sentinel, not real memory format tag. Must be changed after new |
728 | /// format tag is added. |
729 | dnnl_format_tag_last, |
730 | |
731 | // Aliases |
732 | |
733 | /// 1D tensor, an alias to #dnnl_a |
734 | dnnl_x = dnnl_a, |
735 | /// 2D CNN activations tensor, an alias to #dnnl_ab |
736 | dnnl_nc = dnnl_ab, |
737 | /// 2D CNN activations tensor, an alias to #dnnl_ba |
738 | dnnl_cn = dnnl_ba, |
739 | /// 2D RNN statistics tensor, an alias to #dnnl_ab |
740 | dnnl_tn = dnnl_ab, |
741 | /// 2D RNN statistics tensor, an alias to #dnnl_ba |
742 | dnnl_nt = dnnl_ba, |
743 | /// 3D CNN activations tensor, an alias to #dnnl_abc |
744 | dnnl_ncw = dnnl_abc, |
745 | /// 3D CNN activations tensor, an alias to #dnnl_acb |
746 | dnnl_nwc = dnnl_acb, |
747 | /// 4D CNN activations tensor, an alias to #dnnl_abcd |
748 | dnnl_nchw = dnnl_abcd, |
749 | /// 4D CNN activations tensor, an alias to #dnnl_acdb |
750 | dnnl_nhwc = dnnl_acdb, |
751 | /// 4D CNN activations tensor, an alias to #dnnl_bcda |
752 | dnnl_chwn = dnnl_bcda, |
753 | /// 5D CNN activations tensor, an alias to #dnnl_abcde |
754 | dnnl_ncdhw = dnnl_abcde, |
755 | /// 5D CNN activations tensor, an alias to #dnnl_acdeb |
756 | dnnl_ndhwc = dnnl_acdeb, |
757 | |
758 | /// 2D CNN weights tensor, an alias to #dnnl_ab |
759 | dnnl_oi = dnnl_ab, |
760 | /// 2D CNN weights tensor, an alias to #dnnl_ba |
761 | dnnl_io = dnnl_ba, |
762 | /// 3D CNN weights tensor, an alias to #dnnl_abc |
763 | dnnl_oiw = dnnl_abc, |
764 | /// 3D CNN weights tensor, an alias to #dnnl_acb |
765 | dnnl_owi = dnnl_acb, |
766 | /// 3D CNN weights tensor, an alias to #dnnl_cba |
767 | dnnl_wio = dnnl_cba, |
768 | /// 3D CNN weights tensor, an alias to #dnnl_bca |
769 | dnnl_iwo = dnnl_bca, |
770 | /// 4D CNN weights tensor, an alias to #dnnl_abcd |
771 | dnnl_oihw = dnnl_abcd, |
772 | /// 4D CNN weights tensor, an alias to #dnnl_cdba |
773 | dnnl_hwio = dnnl_cdba, |
774 | /// 4D CNN weights tensor, an alias to #dnnl_acdb |
775 | dnnl_ohwi = dnnl_acdb, |
776 | /// 4D CNN weights tensor, an alias to #dnnl_bcda |
777 | dnnl_ihwo = dnnl_bcda, |
778 | /// 4D CNN weights tensor, an alias to #dnnl_bacd |
779 | dnnl_iohw = dnnl_bacd, |
780 | /// 5D CNN weights tensor, an alias to #dnnl_abcde |
781 | dnnl_oidhw = dnnl_abcde, |
782 | /// 5D CNN weights tensor, an alias to #dnnl_bacde |
783 | dnnl_iodhw = dnnl_bacde, |
784 | /// 5D CNN weights tensor, an alias to #dnnl_cdeba |
785 | dnnl_dhwio = dnnl_cdeba, |
786 | /// 5D CNN weights tensor, an alias to #dnnl_acdeb |
787 | dnnl_odhwi = dnnl_acdeb, |
788 | /// 5D CNN weights tensor, an alias to #dnnl_bcdea |
789 | dnnl_idhwo = dnnl_bcdea, |
790 | |
791 | /// 4D CNN weights tensor (incl. groups), an alias to #dnnl_abcd |
792 | dnnl_goiw = dnnl_abcd, |
793 | /// 4D CNN weights tensor (incl. groups), an alias to #dnnl_abdc |
794 | dnnl_gowi = dnnl_abdc, |
795 | /// 4D CNN weights tensor (incl. groups), an alias to #dnnl_dcab |
796 | dnnl_wigo = dnnl_dcab, |
797 | /// 5D CNN weights tensor (incl. groups), an alias to #dnnl_abcde |
798 | dnnl_goihw = dnnl_abcde, |
799 | /// 5D CNN weights tensor (incl. groups), an alias to #dnnl_abdec |
800 | dnnl_gohwi = dnnl_abdec, |
801 | /// 5D CNN weights tensor (incl. groups), an alias to #dnnl_decab |
802 | dnnl_hwigo = dnnl_decab, |
803 | /// 5D CNN weights tensor (incl. groups), an alias to #dnnl_acbde |
804 | dnnl_giohw = dnnl_acbde, |
805 | /// 6D CNN weights tensor (incl. groups), an alias to #dnnl_abcdef |
806 | dnnl_goidhw = dnnl_abcdef, |
807 | /// 6D CNN weights tensor (incl. groups), an alias to #dnnl_abdefc |
808 | dnnl_godhwi = dnnl_abdefc, |
809 | /// 6D CNN weights tensor (incl. groups), an alias to #dnnl_acbdef |
810 | dnnl_giodhw = dnnl_acbdef, |
811 | /// 6D CNN weights tensor (incl. groups), an alias to #dnnl_defcab |
812 | dnnl_dhwigo = dnnl_defcab, |
813 | |
814 | /// 3D RNN data tensor in the format (seq_length, batch, input channels), |
815 | /// an alias to #dnnl_abc. |
816 | dnnl_tnc = dnnl_abc, |
817 | /// 3D RNN data tensor in the format (batch, seq_length, input channels), |
818 | /// an alias to #dnnl_bac. |
819 | dnnl_ntc = dnnl_bac, |
820 | /// 4D RNN states tensor in the format (num_layers, num_directions, |
821 | /// batch, state channels), an alias to #dnnl_abcd. |
822 | dnnl_ldnc = dnnl_abcd, |
823 | /// 5D RNN weights tensor in the format (num_layers, num_directions, |
824 | /// input_channels, num_gates, output_channels), an alias to #dnnl_abcde. |
825 | /// |
826 | /// - For LSTM cells, the gates order is input, forget, candidate |
827 | /// and output gate. |
828 | /// - For GRU cells, the gates order is update, reset and output gate. |
829 | dnnl_ldigo = dnnl_abcde, |
830 | /// 5D RNN weights tensor in the format (num_layers, num_directions, |
831 | /// num_gates, output_channels, input_channels), an alias to #dnnl_abdec. |
832 | /// |
833 | /// - For LSTM cells, the gates order is input, forget, candidate |
834 | /// and output gate. |
835 | /// - For GRU cells, the gates order is update, reset and output gate. |
836 | dnnl_ldgoi = dnnl_abdec, |
837 | /// 4D LSTM projection tensor in the format (num_layers, num_directions, |
838 | /// num_channels_in_hidden_state, num_channels_in_recurrent_projection), |
839 | /// an alias to #dnnl_abcd. |
840 | dnnl_ldio = dnnl_abcd, |
841 | /// 4D LSTM projection tensor in the format (num_layers, num_directions, |
842 | /// num_channels_in_recurrent_projection, num_channels_in_hidden_state), |
843 | /// an alias to #dnnl_abdc. |
844 | dnnl_ldoi = dnnl_abdc, |
845 | /// 4D RNN bias tensor in the format (num_layers, num_directions, |
846 | /// num_gates, output_channels), an alias to #dnnl_abcd. |
847 | /// |
848 | /// - For LSTM cells, the gates order is input, forget, candidate |
849 | /// and output gate. |
850 | /// - For GRU cells, the gates order is update, reset and output gate. |
851 | dnnl_ldgo = dnnl_abcd, |
852 | /// 5D LSTM projection tensor |
853 | dnnl_ldOi32o = dnnl_abDc32d, |
854 | dnnl_ldOI32o4i = dnnl_abDC32d4c, |
855 | dnnl_ldIo32i = dnnl_abCd32c, |
856 | /// 6D RNN weights tensor |
857 | dnnl_ldgOi32o = dnnl_abdEc32e, |
858 | dnnl_ldgOI32o2i = dnnl_abdEC32e2c, |
859 | dnnl_ldgOI32o4i = dnnl_abdEC32e4c, |
860 | dnnl_ldgOI64o2i = dnnl_abdEC64e2c, |
861 | dnnl_ldgOI64o4i = dnnl_abdEC64e4c, |
862 | dnnl_ldgIo32i = dnnl_abdCe32c, |
863 | dnnl_ldgIO32i2o = dnnl_abdCE32c2e, |
864 | |
865 | // Opaque data types, are not to be used explicitly |
866 | |
867 | // data |
868 | /// 5D CNN activations tensor blocked by channels with block size 32, |
869 | /// an alias to #dnnl_aBcde32b |
870 | dnnl_nCdhw32c = dnnl_aBcde32b, |
871 | /// 5D CNN activations tensor blocked by channels with block size 16, |
872 | /// an alias to #dnnl_aBcde16b |
873 | dnnl_nCdhw16c = dnnl_aBcde16b, |
874 | /// 5D CNN activations tensor blocked by channels with block size 4, |
875 | /// an alias to #dnnl_aBcde4b |
876 | dnnl_nCdhw4c = dnnl_aBcde4b, |
877 | /// 5D CNN activations tensor blocked by channels with block size 8, |
878 | /// an alias to #dnnl_aBcde8b |
879 | dnnl_nCdhw8c = dnnl_aBcde8b, |
880 | /// 4D CNN activations tensor blocked by channels with block size 32, |
881 | /// an alias to #dnnl_aBcd32b |
882 | dnnl_nChw32c = dnnl_aBcd32b, |
883 | /// 4D CNN activations tensor blocked by channels with block size 16, |
884 | /// an alias to #dnnl_aBcd16b |
885 | dnnl_nChw16c = dnnl_aBcd16b, |
886 | /// 4D CNN activations tensor blocked by channels with block size 4, |
887 | /// an alias to #dnnl_aBcd4b |
888 | dnnl_nChw4c = dnnl_aBcd4b, |
889 | /// 4D CNN activations tensor blocked by channels with block size 8, |
890 | /// an alias to #dnnl_aBcd8b |
891 | dnnl_nChw8c = dnnl_aBcd8b, |
892 | /// 3D CNN activations tensor blocked by channels with block size 32, |
893 | /// an alias to #dnnl_aBc32b |
894 | dnnl_nCw32c = dnnl_aBc32b, |
895 | /// 3D CNN activations tensor blocked by channels with block size 16, |
896 | /// an alias to #dnnl_aBc16b |
897 | dnnl_nCw16c = dnnl_aBc16b, |
898 | /// 3D CNN activations tensor blocked by channels with block size 4, |
899 | /// an alias to #dnnl_aBc4b |
900 | dnnl_nCw4c = dnnl_aBc4b, |
901 | /// 3D CNN activations tensor blocked by channels with block size 8, |
902 | /// an alias to #dnnl_aBc8b |
903 | dnnl_nCw8c = dnnl_aBc8b, |
904 | dnnl_NCw16n16c = dnnl_ABc16a16b, |
905 | dnnl_NCdhw16n16c = dnnl_ABcde16a16b, |
906 | dnnl_NChw16n16c = dnnl_ABcd16a16b, |
907 | dnnl_NCw32n16c = dnnl_ABc32a16b, |
908 | dnnl_NChw32n16c = dnnl_ABcd32a16b, |
909 | dnnl_NCdhw32n16c = dnnl_ABcde32a16b, |
910 | dnnl_NCw32n32c = dnnl_ABc32a32b, |
911 | dnnl_NChw32n32c = dnnl_ABcd32a32b, |
912 | dnnl_NCdhw32n32c = dnnl_ABcde32a32b, |
913 | |
914 | // weights, 2D |
915 | dnnl_OI16i16o = dnnl_AB16b16a, |
916 | dnnl_OI16i32o = dnnl_AB16b32a, |
917 | dnnl_OI16i64o = dnnl_AB16b64a, |
918 | dnnl_OI8i16o2i = dnnl_AB8b16a2b, |
919 | dnnl_OI8i32o2i = dnnl_AB8b32a2b, |
920 | dnnl_OI8i64o2i = dnnl_AB8b64a2b, |
921 | dnnl_OI4i16o4i = dnnl_AB4b16a4b, |
922 | dnnl_OI4i32o4i = dnnl_AB4b32a4b, |
923 | dnnl_OI4i64o4i = dnnl_AB4b64a4b, |
924 | dnnl_OI16i16o4i = dnnl_AB16b16a4b, |
925 | // weights, 3D |
926 | dnnl_IOw16o16i = dnnl_BAc16a16b, |
927 | dnnl_IOw16i16o = dnnl_BAc16b16a, |
928 | dnnl_OIw16i16o = dnnl_ABc16b16a, |
929 | dnnl_OIw16i32o = dnnl_ABc16b32a, |
930 | dnnl_OIw16i64o = dnnl_ABc16b64a, |
931 | dnnl_OIw16o16i = dnnl_ABc16a16b, |
932 | dnnl_Oiw16o = dnnl_Abc16a, |
933 | dnnl_OIw4i16o4i = dnnl_ABc4b16a4b, |
934 | dnnl_OIw4i32o4i = dnnl_ABc4b32a4b, |
935 | dnnl_OIw4i64o4i = dnnl_ABc4b64a4b, |
936 | dnnl_OIw2i8o4i = dnnl_ABc2b8a4b, |
937 | dnnl_OIw16i16o4i = dnnl_ABc16b16a4b, |
938 | dnnl_OIw16i16o2i = dnnl_ABc16b16a2b, |
939 | dnnl_OIw16o16i2o = dnnl_ABc16a16b2a, |
940 | dnnl_OIw4i4o = dnnl_ABc4b4a, |
941 | dnnl_OIw4o4i = dnnl_ABc4a4b, |
942 | dnnl_Oiw4o = dnnl_Abc4a, |
943 | dnnl_OIw8i16o2i = dnnl_ABc8b16a2b, |
944 | dnnl_OIw8i32o2i = dnnl_ABc8b32a2b, |
945 | dnnl_OIw8i64o2i = dnnl_ABc8b64a2b, |
946 | dnnl_OIw8i8o = dnnl_ABc8b8a, |
947 | dnnl_OIw8o16i2o = dnnl_ABc8a16b2a, |
948 | dnnl_IOw8o16i2o = dnnl_BAc8a16b2a, |
949 | dnnl_OIw8o8i = dnnl_ABc8a8b, |
950 | dnnl_OIw8o4i = dnnl_ABc8a4b, |
951 | dnnl_Owi16o = dnnl_Acb16a, |
952 | dnnl_OwI16o2i = dnnl_AcB16a2b, |
953 | dnnl_OwI16o4i = dnnl_AcB16a4b, |
954 | dnnl_Owi4o = dnnl_Acb4a, |
955 | dnnl_Owi8o = dnnl_Acb8a, |
956 | |
957 | // weights, 4D |
958 | dnnl_IOhw16i16o = dnnl_BAcd16b16a, |
959 | dnnl_IOhw16o16i = dnnl_BAcd16a16b, |
960 | dnnl_Ohwi16o = dnnl_Acdb16a, |
961 | dnnl_OhwI16o2i = dnnl_AcdB16a2b, |
962 | dnnl_OhwI16o4i = dnnl_AcdB16a4b, |
963 | dnnl_Ohwi32o = dnnl_Acdb32a, |
964 | dnnl_Ohwi4o = dnnl_Acdb4a, |
965 | dnnl_Ohwi8o = dnnl_Acdb8a, |
966 | dnnl_OIhw16i16o = dnnl_ABcd16b16a, |
967 | dnnl_OIhw16i32o = dnnl_ABcd16b32a, |
968 | dnnl_OIhw16i64o = dnnl_ABcd16b64a, |
969 | dnnl_OIhw16o16i = dnnl_ABcd16a16b, |
970 | dnnl_Oihw16o = dnnl_Abcd16a, |
971 | dnnl_OIhw4i16o4i = dnnl_ABcd4b16a4b, |
972 | dnnl_OIhw4i32o4i = dnnl_ABcd4b32a4b, |
973 | dnnl_OIhw4i64o4i = dnnl_ABcd4b64a4b, |
974 | dnnl_OIhw16i16o4i = dnnl_ABcd16b16a4b, |
975 | dnnl_OIhw16i16o2i = dnnl_ABcd16b16a2b, |
976 | dnnl_OIhw16o16i2o = dnnl_ABcd16a16b2a, |
977 | dnnl_OIhw4i4o = dnnl_ABcd4b4a, |
978 | dnnl_OIhw4o4i = dnnl_ABcd4a4b, |
979 | dnnl_Oihw4o = dnnl_Abcd4a, |
980 | dnnl_OIhw8i16o2i = dnnl_ABcd8b16a2b, |
981 | dnnl_OIhw8i32o2i = dnnl_ABcd8b32a2b, |
982 | dnnl_OIhw8i64o2i = dnnl_ABcd8b64a2b, |
983 | dnnl_OIhw8i8o = dnnl_ABcd8b8a, |
984 | dnnl_OIhw8o16i2o = dnnl_ABcd8a16b2a, |
985 | dnnl_OIhw2i8o4i = dnnl_ABcd2b8a4b, |
986 | dnnl_IOhw8o16i2o = dnnl_BAcd8a16b2a, |
987 | dnnl_OIhw8o8i = dnnl_ABcd8a8b, |
988 | dnnl_OIhw8o4i = dnnl_ABcd8a4b, |
989 | dnnl_Owhi16o = dnnl_Adcb16a, |
990 | |
991 | // weights, 5D |
992 | dnnl_Odhwi16o = dnnl_Acdeb16a, |
993 | dnnl_OdhwI16o2i = dnnl_AcdeB16a2b, |
994 | dnnl_OdhwI16o4i = dnnl_AcdeB16a4b, |
995 | dnnl_Odhwi4o = dnnl_Acdeb4a, |
996 | dnnl_Odhwi8o = dnnl_Acdeb8a, |
997 | dnnl_Odwhi16o = dnnl_Acedb16a, |
998 | dnnl_OIdhw16i16o = dnnl_ABcde16b16a, |
999 | dnnl_OIdhw16i32o = dnnl_ABcde16b32a, |
1000 | dnnl_OIdhw16i64o = dnnl_ABcde16b64a, |
1001 | dnnl_OIdhw16o16i = dnnl_ABcde16a16b, |
1002 | dnnl_Oidhw16o = dnnl_Abcde16a, |
1003 | dnnl_OIdhw4i4o = dnnl_ABcde4b4a, |
1004 | dnnl_OIdhw4o4i = dnnl_ABcde4a4b, |
1005 | dnnl_Oidhw4o = dnnl_Abcde4a, |
1006 | dnnl_OIdhw8i16o2i = dnnl_ABcde8b16a2b, |
1007 | dnnl_OIdhw8i32o2i = dnnl_ABcde8b32a2b, |
1008 | dnnl_OIdhw8i64o2i = dnnl_ABcde8b64a2b, |
1009 | dnnl_OIdhw8i8o = dnnl_ABcde8b8a, |
1010 | dnnl_OIdhw8o16i2o = dnnl_ABcde8a16b2a, |
1011 | dnnl_IOdhw8o16i2o = dnnl_BAcde8a16b2a, |
1012 | dnnl_OIdhw4i16o4i = dnnl_ABcde4b16a4b, |
1013 | dnnl_OIdhw4i32o4i = dnnl_ABcde4b32a4b, |
1014 | dnnl_OIdhw4i64o4i = dnnl_ABcde4b64a4b, |
1015 | dnnl_OIdhw16i16o4i = dnnl_ABcde16b16a4b, |
1016 | dnnl_OIdhw16i16o2i = dnnl_ABcde16b16a2b, |
1017 | dnnl_OIdhw2i8o4i = dnnl_ABcde2b8a4b, |
1018 | dnnl_OIdhw8o8i = dnnl_ABcde8a8b, |
1019 | dnnl_OIdhw8o4i = dnnl_ABcde8a4b, |
1020 | dnnl_IOdhw16i16o = dnnl_BAcde16b16a, |
1021 | dnnl_OIdhw4o8i8o4i = dnnl_ABcde4a8b8a4b, |
1022 | dnnl_IOdhw16o16i = dnnl_BAcde16a16b, |
1023 | dnnl_OIdhw16o16i2o = dnnl_ABcde16a16b2a, |
1024 | |
1025 | // weights w/ groups, 3D |
1026 | dnnl_Goiw16g = dnnl_Abcd16a, |
1027 | dnnl_Goiw8g = dnnl_Abcd8a, |
1028 | dnnl_Goiw4g = dnnl_Abcd4a, |
1029 | dnnl_gIOw16o16i = dnnl_aCBd16b16c, |
1030 | dnnl_gIOw16i16o = dnnl_aCBd16c16b, |
1031 | dnnl_gOIw16i16o = dnnl_aBCd16c16b, |
1032 | dnnl_gOIw16o16i = dnnl_aBCd16b16c, |
1033 | dnnl_gOiw16o = dnnl_aBcd16b, |
1034 | dnnl_gOIw4i16o4i = dnnl_aBCd4c16b4c, |
1035 | dnnl_gOIw2i8o4i = dnnl_aBCd2c8b4c, |
1036 | dnnl_gOIw16i16o4i = dnnl_aBCd16c16b4c, |
1037 | dnnl_gOIw16i16o2i = dnnl_aBCd16c16b2c, |
1038 | dnnl_gOIw16o16i2o = dnnl_aBCd16b16c2b, |
1039 | dnnl_gOIw4i4o = dnnl_aBCd4c4b, |
1040 | dnnl_gOIw4o4i = dnnl_aBCd4b4c, |
1041 | dnnl_gOiw4o = dnnl_aBcd4b, |
1042 | dnnl_gOIw8i16o2i = dnnl_aBCd8c16b2c, |
1043 | dnnl_gOIw8i8o = dnnl_aBCd8c8b, |
1044 | dnnl_gOIw8o16i2o = dnnl_aBCd8b16c2b, |
1045 | dnnl_gIOw8o16i2o = dnnl_aCBd8b16c2b, |
1046 | dnnl_gOIw8o8i = dnnl_aBCd8b8c, |
1047 | dnnl_gOIw8o4i = dnnl_aBCd8b4c, |
1048 | dnnl_gOwi16o = dnnl_aBdc16b, |
1049 | dnnl_gOwI16o2i = dnnl_aBdC16b2c, |
1050 | dnnl_gOwI16o4i = dnnl_aBdC16b4c, |
1051 | dnnl_gOwi4o = dnnl_aBdc4b, |
1052 | dnnl_gOwi8o = dnnl_aBdc8b, |
1053 | dnnl_Goiw32g = dnnl_Abcd32a, |
1054 | dnnl_gOIw2i4o2i = dnnl_aBCd2c4b2c, |
1055 | dnnl_gOIw2o4i2o = dnnl_aBCd2b4c2b, |
1056 | dnnl_gOIw4i8o2i = dnnl_aBCd4c8b2c, |
1057 | dnnl_gOIw4o8i2o = dnnl_aBCd4b8c2b, |
1058 | |
1059 | // weights w/ groups, 4D |
1060 | dnnl_gIOhw16i16o = dnnl_aCBde16c16b, |
1061 | dnnl_gIOhw16o16i = dnnl_aCBde16b16c, |
1062 | dnnl_gOhwi16o = dnnl_aBdec16b, |
1063 | dnnl_gOhwI16o2i = dnnl_aBdeC16b2c, |
1064 | dnnl_gOhwI16o4i = dnnl_aBdeC16b4c, |
1065 | dnnl_gOhwi32o = dnnl_aBdec32b, |
1066 | dnnl_gOhwi4o = dnnl_aBdec4b, |
1067 | dnnl_gOhwi8o = dnnl_aBdec8b, |
1068 | dnnl_Goihw16g = dnnl_Abcde16a, |
1069 | dnnl_gOIhw16i16o = dnnl_aBCde16c16b, |
1070 | dnnl_gOIhw16o16i = dnnl_aBCde16b16c, |
1071 | dnnl_gOihw16o = dnnl_aBcde16b, |
1072 | dnnl_gOIhw2i8o4i = dnnl_aBCde2c8b4c, |
1073 | dnnl_gOIhw4i16o4i = dnnl_aBCde4c16b4c, |
1074 | dnnl_gOIhw16i16o4i = dnnl_aBCde16c16b4c, |
1075 | dnnl_gOIhw16i16o2i = dnnl_aBCde16c16b2c, |
1076 | dnnl_gOIhw16o16i2o = dnnl_aBCde16b16c2b, |
1077 | dnnl_gOIhw4i4o = dnnl_aBCde4c4b, |
1078 | dnnl_gOIhw4o4i = dnnl_aBCde4b4c, |
1079 | dnnl_gOihw4o = dnnl_aBcde4b, |
1080 | dnnl_Goihw8g = dnnl_Abcde8a, |
1081 | dnnl_Goihw4g = dnnl_Abcde4a, |
1082 | dnnl_gOIhw8i16o2i = dnnl_aBCde8c16b2c, |
1083 | dnnl_gOIhw8i8o = dnnl_aBCde8c8b, |
1084 | dnnl_gOIhw8o16i2o = dnnl_aBCde8b16c2b, |
1085 | dnnl_gIOhw8o16i2o = dnnl_aCBde8b16c2b, |
1086 | dnnl_gOIhw8o8i = dnnl_aBCde8b8c, |
1087 | dnnl_gOIhw8o4i = dnnl_aBCde8b4c, |
1088 | dnnl_Goihw32g = dnnl_Abcde32a, |
1089 | dnnl_gOwhi16o = dnnl_aBedc16b, |
1090 | |
1091 | dnnl_OIw4o8i8o4i = dnnl_ABc4a8b8a4b, |
1092 | dnnl_OIhw4o8i8o4i = dnnl_ABcd4a8b8a4b, |
1093 | dnnl_IOw4i8o8i4o = dnnl_BAc4b8a8b4a, |
1094 | dnnl_IOhw4i8o8i4o = dnnl_BAcd4b8a8b4a, |
1095 | dnnl_IOdhw4i8o8i4o = dnnl_BAcde4b8a8b4a, |
1096 | |
1097 | dnnl_OIhw2o8i8o2i = dnnl_ABcd2a8b8a2b, |
1098 | dnnl_gOIw4o8i8o4i = dnnl_aBCd4b8c8b4c, |
1099 | dnnl_gOIhw4o8i8o4i = dnnl_aBCde4b8c8b4c, |
1100 | dnnl_gOIdhw4o8i8o4i = dnnl_aBCdef4b8c8b4c, |
1101 | dnnl_gIOw4i8o8i4o = dnnl_aCBd4c8b8c4b, |
1102 | dnnl_gIOhw4i8o8i4o = dnnl_aCBde4c8b8c4b, |
1103 | dnnl_gIOdhw4i8o8i4o = dnnl_aCBdef4c8b8c4b, |
1104 | dnnl_gOIhw2o8i8o2i = dnnl_aBCde2b8c8b2c, |
1105 | dnnl_gOIhw2i4o2i = dnnl_aBCde2c4b2c, |
1106 | dnnl_gOIhw2o4i2o = dnnl_aBCde2b4c2b, |
1107 | dnnl_gOIhw4i8o2i = dnnl_aBCde4c8b2c, |
1108 | dnnl_gOIhw4o8i2o = dnnl_aBCde4b8c2b, |
1109 | |
1110 | // weights w/ groups, 6D |
1111 | dnnl_gIOdhw16i16o = dnnl_aCBdef16c16b, |
1112 | dnnl_gIOdhw16o16i = dnnl_aCBdef16b16c, |
1113 | dnnl_gOdhwi16o = dnnl_aBdefc16b, |
1114 | dnnl_gOdhwI16o2i = dnnl_aBdefC16b2c, |
1115 | dnnl_gOdhwI16o4i = dnnl_aBdefC16b4c, |
1116 | dnnl_gOdhwi4o = dnnl_aBdefc4b, |
1117 | dnnl_gOdhwi8o = dnnl_aBdefc8b, |
1118 | dnnl_gOdwhi16o = dnnl_aBdfec16b, |
1119 | dnnl_gOIdhw16i16o = dnnl_aBCdef16c16b, |
1120 | dnnl_gOIdhw4i16o4i = dnnl_aBCdef4c16b4c, |
1121 | dnnl_gOIdhw16i16o4i = dnnl_aBCdef16c16b4c, |
1122 | dnnl_gOIdhw2i8o4i = dnnl_aBCdef2c8b4c, |
1123 | dnnl_gOIdhw16i16o2i = dnnl_aBCdef16c16b2c, |
1124 | dnnl_gOIdhw16o16i = dnnl_aBCdef16b16c, |
1125 | dnnl_gOIdhw16o16i2o = dnnl_aBCdef16b16c2b, |
1126 | dnnl_gOidhw16o = dnnl_aBcdef16b, |
1127 | dnnl_gOIdhw4i4o = dnnl_aBCdef4c4b, |
1128 | dnnl_gOIdhw4o4i = dnnl_aBCdef4b4c, |
1129 | dnnl_gOidhw4o = dnnl_aBcdef4b, |
1130 | dnnl_gOIdhw8i16o2i = dnnl_aBCdef8c16b2c, |
1131 | dnnl_gOIdhw8i8o = dnnl_aBCdef8c8b, |
1132 | dnnl_gOIdhw8o16i2o = dnnl_aBCdef8b16c2b, |
1133 | dnnl_gIOdhw8o16i2o = dnnl_aCBdef8b16c2b, |
1134 | dnnl_gOIdhw8o8i = dnnl_aBCdef8b8c, |
1135 | dnnl_gOIdhw8o4i = dnnl_aBCdef8b4c, |
1136 | dnnl_Goidhw16g = dnnl_Abcdef16a, |
1137 | dnnl_Goidhw32g = dnnl_Abcdef32a, |
1138 | dnnl_gOIdhw2i4o2i = dnnl_aBCdef2c4b2c, |
1139 | dnnl_gOIdhw4i8o2i = dnnl_aBCdef4c8b2c, |
1140 | dnnl_gOIdhw2o4i2o = dnnl_aBCdef2b4c2b, |
1141 | dnnl_gOIdhw4o8i2o = dnnl_aBCdef4b8c2b, |
1142 | // weights, 3D |
1143 | dnnl_Owi32o = dnnl_Acb32a, |
1144 | dnnl_OwI32o2i = dnnl_AcB32a2b, |
1145 | dnnl_OwI32o4i = dnnl_AcB32a4b, |
1146 | dnnl_Owi48o = dnnl_Acb48a, |
1147 | dnnl_OwI48o2i = dnnl_AcB48a2b, |
1148 | dnnl_OwI48o4i = dnnl_AcB48a4b, |
1149 | dnnl_Owi64o = dnnl_Acb64a, |
1150 | dnnl_OwI64o2i = dnnl_AcB64a2b, |
1151 | dnnl_OwI64o4i = dnnl_AcB64a4b, |
1152 | dnnl_wIo2i = dnnl_cBa2b, |
1153 | dnnl_wIo4i = dnnl_cBa4b, |
1154 | dnnl_gOwi32o = dnnl_aBdc32b, |
1155 | dnnl_gOwI32o2i = dnnl_aBdC32b2c, |
1156 | dnnl_gOwI32o4i = dnnl_aBdC32b4c, |
1157 | dnnl_gOwi48o = dnnl_aBdc48b, |
1158 | dnnl_gOwI48o2i = dnnl_aBdC48b2c, |
1159 | dnnl_gOwI48o4i = dnnl_aBdC48b4c, |
1160 | dnnl_gOwi64o = dnnl_aBdc64b, |
1161 | dnnl_gOwI64o2i = dnnl_aBdC64b2c, |
1162 | dnnl_gOwI64o4i = dnnl_aBdC64b4c, |
1163 | dnnl_gwio = dnnl_adcb, |
1164 | dnnl_gwIo2i = dnnl_adCb2c, |
1165 | dnnl_gwIo4i = dnnl_adCb4c, |
1166 | // weights, 4D |
1167 | dnnl_OhwI32o = dnnl_Acdb32a, |
1168 | dnnl_OhwI32o2i = dnnl_AcdB32a2b, |
1169 | dnnl_OhwI32o4i = dnnl_AcdB32a4b, |
1170 | dnnl_Ohwi48o = dnnl_Acdb48a, |
1171 | dnnl_OhwI48o2i = dnnl_AcdB48a2b, |
1172 | dnnl_OhwI48o4i = dnnl_AcdB48a4b, |
1173 | dnnl_Ohwi64o = dnnl_Acdb64a, |
1174 | dnnl_OhwI64o2i = dnnl_AcdB64a2b, |
1175 | dnnl_OhwI64o4i = dnnl_AcdB64a4b, |
1176 | dnnl_hwIo2i = dnnl_cdBa2b, |
1177 | dnnl_hwIo4i = dnnl_cdBa4b, |
1178 | dnnl_gOhwI32o = dnnl_aBdec32b, |
1179 | dnnl_gOhwI32o2i = dnnl_aBdeC32b2c, |
1180 | dnnl_gOhwI32o4i = dnnl_aBdeC32b4c, |
1181 | dnnl_gOhwi48o = dnnl_aBdec48b, |
1182 | dnnl_gOhwI48o2i = dnnl_aBdeC48b2c, |
1183 | dnnl_gOhwI48o4i = dnnl_aBdeC48b4c, |
1184 | dnnl_gOhwi64o = dnnl_aBdec64b, |
1185 | dnnl_gOhwI64o2i = dnnl_aBdeC64b2c, |
1186 | dnnl_gOhwI64o4i = dnnl_aBdeC64b4c, |
1187 | dnnl_ghwio = dnnl_adecb, |
1188 | dnnl_ghwIo2i = dnnl_adeCb2c, |
1189 | dnnl_ghwIo4i = dnnl_adeCb4c, |
1190 | // weights, 5D |
1191 | dnnl_Odhwi32o = dnnl_Acdeb32a, |
1192 | dnnl_OdhwI32o2i = dnnl_AcdeB32a2b, |
1193 | dnnl_OdhwI32o4i = dnnl_AcdeB32a4b, |
1194 | dnnl_Odhwi48o = dnnl_Acdeb48a, |
1195 | dnnl_OdhwI48o2i = dnnl_AcdeB48a2b, |
1196 | dnnl_OdhwI48o4i = dnnl_AcdeB48a4b, |
1197 | dnnl_Odhwi64o = dnnl_Acdeb64a, |
1198 | dnnl_OdhwI64o2i = dnnl_AcdeB64a2b, |
1199 | dnnl_OdhwI64o4i = dnnl_AcdeB64a4b, |
1200 | dnnl_dhwIo2i = dnnl_cdeBa2b, |
1201 | dnnl_dhwIo4i = dnnl_cdeBa4b, |
1202 | dnnl_gOdhwi32o = dnnl_aBdefc32b, |
1203 | dnnl_gOdhwI32o2i = dnnl_aBdefC32b2c, |
1204 | dnnl_gOdhwI32o4i = dnnl_aBdefC32b4c, |
1205 | dnnl_gOdhwi48o = dnnl_aBdefc48b, |
1206 | dnnl_gOdhwI48o2i = dnnl_aBdefC48b2c, |
1207 | dnnl_gOdhwI48o4i = dnnl_aBdefC48b4c, |
1208 | dnnl_gOdhwi64o = dnnl_aBdefc64b, |
1209 | dnnl_gOdhwI64o2i = dnnl_aBdefC64b2c, |
1210 | dnnl_gOdhwI64o4i = dnnl_aBdefC64b4c, |
1211 | dnnl_gdhwio = dnnl_adefcb, |
1212 | dnnl_gdhwIo2i = dnnl_adefCb2c, |
1213 | dnnl_gdhwIo4i = dnnl_adefCb4c, |
1214 | dnnl_OI16i32o4i = dnnl_AB16b32a4b, |
1215 | dnnl_OI16i48o4i = dnnl_AB16b48a4b, |
1216 | dnnl_OI16i64o4i = dnnl_AB16b64a4b, |
1217 | dnnl_OI16i16o2i = dnnl_AB16b16a2b, |
1218 | dnnl_OI16i32o2i = dnnl_AB16b32a2b, |
1219 | dnnl_OI16i48o2i = dnnl_AB16b48a2b, |
1220 | dnnl_OI16i64o2i = dnnl_AB16b64a2b, |
1221 | dnnl_OIw16i32o4i = dnnl_ABc16b32a4b, |
1222 | dnnl_OIw16i48o4i = dnnl_ABc16b48a4b, |
1223 | dnnl_OIw16i64o4i = dnnl_ABc16b64a4b, |
1224 | dnnl_OIw16i32o2i = dnnl_ABc16b32a2b, |
1225 | dnnl_OIw16i48o2i = dnnl_ABc16b48a2b, |
1226 | dnnl_OIw16i64o2i = dnnl_ABc16b64a2b, |
1227 | dnnl_OIhw16i32o4i = dnnl_ABcd16b32a4b, |
1228 | dnnl_OIhw16i48o4i = dnnl_ABcd16b48a4b, |
1229 | dnnl_OIhw16i64o4i = dnnl_ABcd16b64a4b, |
1230 | dnnl_OIhw16i32o2i = dnnl_ABcd16b32a2b, |
1231 | dnnl_OIhw16i48o2i = dnnl_ABcd16b48a2b, |
1232 | dnnl_OIhw16i64o2i = dnnl_ABcd16b64a2b, |
1233 | dnnl_OIdhw16i32o4i = dnnl_ABcde16b32a4b, |
1234 | dnnl_OIdhw16i48o4i = dnnl_ABcde16b48a4b, |
1235 | dnnl_OIdhw16i64o4i = dnnl_ABcde16b64a4b, |
1236 | dnnl_OIdhw16i32o2i = dnnl_ABcde16b32a2b, |
1237 | dnnl_OIdhw16i48o2i = dnnl_ABcde16b48a2b, |
1238 | dnnl_OIdhw16i64o2i = dnnl_ABcde16b64a2b, |
1239 | dnnl_OwI16i16o2i = dnnl_AcB16b16a2b, |
1240 | dnnl_OwI16i16o4i = dnnl_AcB16b16a4b, |
1241 | dnnl_OhwI16i16o2i = dnnl_AcdB16b16a2b, |
1242 | dnnl_OhwI16i16o4i = dnnl_AcdB16b16a4b, |
1243 | dnnl_OdhwI16i16o2i = dnnl_AcdeB16b16a2b, |
1244 | dnnl_OdhwI16i16o4i = dnnl_AcdeB16b16a4b, |
1245 | dnnl_gOwI16i16o2i = dnnl_aBdC16c16b2c, |
1246 | dnnl_gOwI16i16o4i = dnnl_aBdC16c16b4c, |
1247 | dnnl_gOhwI16i16o2i = dnnl_aBdeC16c16b2c, |
1248 | dnnl_gOhwI16i16o4i = dnnl_aBdeC16c16b4c, |
1249 | dnnl_gOdhwI16i16o2i = dnnl_aBdefC16c16b2c, |
1250 | dnnl_gOdhwI16i16o4i = dnnl_aBdefC16c16b4c, |
1251 | dnnl_OwI16i32o2i = dnnl_AcB16b32a2b, |
1252 | dnnl_OwI16i32o4i = dnnl_AcB16b32a4b, |
1253 | dnnl_OwI16i48o2i = dnnl_AcB16b48a2b, |
1254 | dnnl_OwI16i48o4i = dnnl_AcB16b48a4b, |
1255 | dnnl_OwI16i64o2i = dnnl_AcB16b64a2b, |
1256 | dnnl_OwI16i64o4i = dnnl_AcB16b64a4b, |
1257 | dnnl_gOwI16i32o2i = dnnl_aBdC16c32b2c, |
1258 | dnnl_gOwI16i32o4i = dnnl_aBdC16c32b4c, |
1259 | dnnl_gOwI16i48o2i = dnnl_aBdC16c48b2c, |
1260 | dnnl_gOwI16i48o4i = dnnl_aBdC16c48b4c, |
1261 | dnnl_gOwI16i64o2i = dnnl_aBdC16c64b2c, |
1262 | dnnl_gOwI16i64o4i = dnnl_aBdC16c64b4c, |
1263 | dnnl_OhwI16i32o2i = dnnl_AcdB16b32a2b, |
1264 | dnnl_OhwI16i32o4i = dnnl_AcdB16b32a4b, |
1265 | dnnl_OhwI16i48o2i = dnnl_AcdB16b48a2b, |
1266 | dnnl_OhwI16i48o4i = dnnl_AcdB16b48a4b, |
1267 | dnnl_OhwI16i64o2i = dnnl_AcdB16b64a2b, |
1268 | dnnl_OhwI16i64o4i = dnnl_AcdB16b64a4b, |
1269 | dnnl_gOhwI16i32o2i = dnnl_aBdeC16c32b2c, |
1270 | dnnl_gOhwI16i32o4i = dnnl_aBdeC16c32b4c, |
1271 | dnnl_gOhwI16i48o2i = dnnl_aBdeC16c48b2c, |
1272 | dnnl_gOhwI16i48o4i = dnnl_aBdeC16c48b4c, |
1273 | dnnl_gOhwI16i64o2i = dnnl_aBdeC16c64b2c, |
1274 | dnnl_gOhwI16i64o4i = dnnl_aBdeC16c64b4c, |
1275 | dnnl_OdhwI16i32o2i = dnnl_AcdeB16b32a2b, |
1276 | dnnl_OdhwI16i32o4i = dnnl_AcdeB16b32a4b, |
1277 | dnnl_OdhwI16i48o2i = dnnl_AcdeB16b48a2b, |
1278 | dnnl_OdhwI16i48o4i = dnnl_AcdeB16b48a4b, |
1279 | dnnl_OdhwI16i64o2i = dnnl_AcdeB16b64a2b, |
1280 | dnnl_OdhwI16i64o4i = dnnl_AcdeB16b64a4b, |
1281 | dnnl_gOdhwI16i32o2i = dnnl_aBdefC16c32b2c, |
1282 | dnnl_gOdhwI16i32o4i = dnnl_aBdefC16c32b4c, |
1283 | dnnl_gOdhwI16i48o2i = dnnl_aBdefC16c48b2c, |
1284 | dnnl_gOdhwI16i48o4i = dnnl_aBdefC16c48b4c, |
1285 | dnnl_gOdhwI16i64o2i = dnnl_aBdefC16c64b2c, |
1286 | dnnl_gOdhwI16i64o4i = dnnl_aBdefC16c64b4c, |
1287 | dnnl_hwioG16g = dnnl_decbA16a, |
1288 | dnnl_NCdhw40n16c = dnnl_ABcde40a16b, |
1289 | dnnl_NCw40n16c = dnnl_ABc40a16b, |
1290 | dnnl_NChw40n16c = dnnl_ABcd40a16b, |
1291 | dnnl_NCw40n32c = dnnl_ABc40a32b, |
1292 | dnnl_NChw40n32c = dnnl_ABcd40a32b, |
1293 | dnnl_NCdhw40n32c = dnnl_ABcde40a32b, |
1294 | dnnl_OIdhw4o8i8o2i = dnnl_ABcde4a8b8a2b, |
1295 | dnnl_OIhw4o8i8o2i = dnnl_ABcd4a8b8a2b, |
1296 | dnnl_OIw4o8i8o2i = dnnl_ABc4a8b8a2b, |
1297 | dnnl_gOIdhw4o8i8o2i = dnnl_aBCdef4b8c8b2c, |
1298 | dnnl_gOIhw4o8i8o2i = dnnl_aBCde4b8c8b2c, |
1299 | dnnl_gOIw4o8i8o2i = dnnl_aBCd4b8c8b2c, |
1300 | dnnl_IOdhw4i8o8i2o = dnnl_BAcde4b8a8b2a, |
1301 | dnnl_IOhw4i8o8i2o = dnnl_BAcd4b8a8b2a, |
1302 | dnnl_IOw4i8o8i2o = dnnl_BAc4b8a8b2a, |
1303 | dnnl_gIOdhw4i8o8i2o = dnnl_aCBdef4c8b8c2b, |
1304 | dnnl_gIOhw4i8o8i2o = dnnl_aCBde4c8b8c2b, |
1305 | dnnl_gIOw4i8o8i2o = dnnl_aCBd4c8b8c2b, |
1306 | dnnl_NCw2c32n8c = dnnl_ABc2b32a8b, |
1307 | dnnl_NChw2c32n8c = dnnl_ABcd2b32a8b, |
1308 | dnnl_NCdhw2c32n8c = dnnl_ABcde2b32a8b, |
1309 | dnnl_OIw2i8o16i4o = dnnl_ABc2b8a16b4a, |
1310 | dnnl_OIhw2i8o16i4o = dnnl_ABcd2b8a16b4a, |
1311 | dnnl_OIdhw2i8o16i4o = dnnl_ABcde2b8a16b4a, |
1312 | dnnl_OIw2o8i16o4i = dnnl_ABc2a8b16a4b, |
1313 | dnnl_OIw2o8i16o2i = dnnl_ABc2a8b16a2b, |
1314 | dnnl_IOw2i8o16i4o = dnnl_BAc2b8a16b4a, |
1315 | dnnl_IOw2i8o16i2o = dnnl_BAc2b8a16b2a, |
1316 | dnnl_OIhw2o8i16o4i = dnnl_ABcd2a8b16a4b, |
1317 | dnnl_OIhw2o8i16o2i = dnnl_ABcd2a8b16a2b, |
1318 | dnnl_IOhw2i8o16i4o = dnnl_BAcd2b8a16b4a, |
1319 | dnnl_IOhw2i8o16i2o = dnnl_BAcd2b8a16b2a, |
1320 | dnnl_OIdhw2o8i16o4i = dnnl_ABcde2a8b16a4b, |
1321 | dnnl_OIdhw2o8i16o2i = dnnl_ABcde2a8b16a2b, |
1322 | dnnl_IOdhw2i8o16i4o = dnnl_BAcde2b8a16b4a, |
1323 | dnnl_IOdhw2i8o16i2o = dnnl_BAcde2b8a16b2a, |
1324 | dnnl_gOIw2o8i16o2i = dnnl_aBCd2b8c16b2c, |
1325 | dnnl_gIOw2i8o16i2o = dnnl_aCBd2c8b16c2b, |
1326 | dnnl_gIOhw2i8o16i2o = dnnl_aBCde2c8b16c2b, |
1327 | dnnl_gIOdhw2i8o16i2o = dnnl_aBCdef2c8b16c2b, |
1328 | dnnl_gOIhw2o8i16o2i = dnnl_aBCde2b8c16b2c, |
1329 | dnnl_gOIdhw2o8i16o2i = dnnl_aBCdef2b8c16b2c, |
1330 | dnnl_gOIw2o8i16o4i = dnnl_aBCd2b8c16b4c, |
1331 | dnnl_gOIhw2o8i16o4i = dnnl_aBCde2b8c16b4c, |
1332 | } dnnl_format_tag_t; |
1333 | |
1334 | /// @} dnnl_api_memory |
1335 | |
1336 | /// @addtogroup dnnl_api_primitives |
1337 | /// @{ |
1338 | /// @addtogroup dnnl_api_primitives_common |
1339 | /// @{ |
1340 | |
1341 | /// Kinds of propagation. |
1342 | typedef enum { |
1343 | // TODO: suggest renames |
1344 | /// Undefined propagation type. |
1345 | dnnl_prop_kind_undef = 0, |
1346 | /// Forward data propagation (training mode). In this mode primitives |
1347 | /// perform computations necessary for subsequent backward propagation. |
1348 | dnnl_forward_training = 64, |
1349 | /// Forward data propagation (inference mode). In this mode primitives |
1350 | /// perform only computations that are necessary for inference and omit |
1351 | /// computations that are necessary only for backward propagation. |
1352 | dnnl_forward_inference = 96, |
1353 | /// Forward data propagation (alias for @c dnnl_forward_inference). |
1354 | dnnl_forward_scoring = dnnl_forward_inference, |
1355 | /// Forward data propagation (alias for @c dnnl_forward_training). |
1356 | dnnl_forward = dnnl_forward_training, |
1357 | /// Backward propagation (with respect to all parameters). |
1358 | dnnl_backward = 128, |
1359 | /// Backward data propagation. |
1360 | dnnl_backward_data = 160, |
1361 | /// Backward weights propagation. |
1362 | dnnl_backward_weights = 192, |
1363 | /// Backward bias propagation. |
1364 | dnnl_backward_bias = 193, |
1365 | } dnnl_prop_kind_t; |
1366 | |
1367 | /// Kinds of primitives. Used to implement a way to extend the library with new |
1368 | /// primitives without changing the ABI. |
1369 | typedef enum { |
1370 | /// Undefined primitive |
1371 | dnnl_undefined_primitive, |
1372 | /// A reorder primitive. |
1373 | dnnl_reorder, |
1374 | /// A shuffle primitive. |
1375 | dnnl_shuffle, |
1376 | /// A (out-of-place) concat primitive. |
1377 | dnnl_concat, |
1378 | /// A sum primitive. |
1379 | dnnl_sum, |
1380 | /// A convolution primitive. |
1381 | dnnl_convolution, |
1382 | /// A deconvolution primitive. |
1383 | dnnl_deconvolution, |
1384 | /// An element-wise primitive. |
1385 | dnnl_eltwise, |
1386 | /// A softmax primitive. |
1387 | dnnl_softmax, |
1388 | /// A pooling primitive. |
1389 | dnnl_pooling, |
1390 | /// An LRN primitive. |
1391 | dnnl_lrn, |
1392 | /// A batch normalization primitive. |
1393 | dnnl_batch_normalization, |
1394 | /// A layer normalization primitive. |
1395 | dnnl_layer_normalization, |
1396 | /// An inner product primitive. |
1397 | dnnl_inner_product, |
1398 | /// A rnn primitive. |
1399 | dnnl_rnn, |
1400 | /// A matrix multiplication primitive (internal). |
1401 | dnnl_gemm, |
1402 | /// A binary primitive. |
1403 | dnnl_binary, |
1404 | /// A logsoftmax primitive. |
1405 | dnnl_logsoftmax, |
1406 | /// A matrix multiplication primitive. |
1407 | dnnl_matmul, |
1408 | /// A resampling primitive. |
1409 | dnnl_resampling, |
1410 | /// A pooling version 2 primitive (pooling with dilation support). |
1411 | dnnl_pooling_v2, |
1412 | /// A reduction primitive. |
1413 | dnnl_reduction, |
1414 | /// A PReLU primitive. |
1415 | dnnl_prelu, |
1416 | /// A softmax version 2 primitive (softmax with destination memory |
1417 | /// descriptor and algorithm kind). |
1418 | dnnl_softmax_v2, |
1419 | /// A layer normalization version 2 primitive (layer normalization with |
1420 | /// destination memory descriptor). |
1421 | dnnl_layer_normalization_v2, |
1422 | |
1423 | /// Parameter to allow internal only primitives without undefined behavior. |
1424 | /// This parameter is chosen to be valid for so long as sizeof(int) >= 2. |
1425 | dnnl_primitive_kind_max = 0x7fff, |
1426 | } dnnl_primitive_kind_t; |
1427 | |
1428 | /// Kinds of algorithms. |
1429 | typedef enum { |
1430 | dnnl_alg_kind_undef, |
1431 | /// Direct convolution |
1432 | dnnl_convolution_direct = 0x1, |
1433 | /// Winograd convolution |
1434 | dnnl_convolution_winograd = 0x2, |
1435 | /// Convolution algorithm(either direct or Winograd) is chosen just in time |
1436 | dnnl_convolution_auto = 0x3, |
1437 | /// Direct deconvolution |
1438 | dnnl_deconvolution_direct = 0xa, |
1439 | /// Winograd deconvolution |
1440 | dnnl_deconvolution_winograd = 0xb, |
1441 | /// Eltwise: ReLU |
1442 | dnnl_eltwise_relu = 0x1f, |
1443 | /// Eltwise: hyperbolic tangent non-linearity (tanh) |
1444 | dnnl_eltwise_tanh = 0x2f, |
1445 | /// Eltwise: exponential linear unit (elu) |
1446 | dnnl_eltwise_elu = 0x3f, |
1447 | /// Eltwise: square |
1448 | dnnl_eltwise_square = 0x4f, |
1449 | /// Eltwise: abs |
1450 | dnnl_eltwise_abs = 0x5f, |
1451 | /// Eltwise: square root |
1452 | dnnl_eltwise_sqrt = 0x6f, |
1453 | /// Eltwise: linear |
1454 | dnnl_eltwise_linear = 0x7f, |
1455 | /// Eltwise: bounded_relu |
1456 | dnnl_eltwise_bounded_relu = 0x8f, |
1457 | /// Eltwise: soft_relu |
1458 | dnnl_eltwise_soft_relu = 0x9f, |
1459 | /// Eltwise: soft_relu version 2 |
1460 | dnnl_eltwise_soft_relu_v2 = 0xa0, |
1461 | /// Eltwise: hardsigmoid |
1462 | dnnl_eltwise_hardsigmoid = 0xa1, |
1463 | /// Eltwise: logistic |
1464 | dnnl_eltwise_logistic = 0xaf, |
1465 | /// Eltwise: exponent |
1466 | dnnl_eltwise_exp = 0xbf, |
1467 | /// Eltwise: gelu |
1468 | /// |
1469 | /// @note Tanh approximation formula is used to approximate |
1470 | /// the cumulative distribution function of a Gaussian here |
1471 | dnnl_eltwise_gelu_tanh = 0xcf, |
1472 | /// Eltwise: tanh-based gelu (alias for dnnl_eltwise_gelu_tanh) |
1473 | dnnl_eltwise_gelu = dnnl_eltwise_gelu_tanh, |
1474 | /// Eltwise: swish |
1475 | dnnl_eltwise_swish = 0xdf, |
1476 | /// Eltwise: natural logarithm |
1477 | dnnl_eltwise_log = 0xef, |
1478 | /// Eltwise: clip |
1479 | dnnl_eltwise_clip = 0xff, |
1480 | /// Eltwise: clip version 2 |
1481 | dnnl_eltwise_clip_v2 = 0x10, |
1482 | /// Eltwise: pow |
1483 | dnnl_eltwise_pow = 0x20, |
1484 | /// Eltwise: erf-based gelu |
1485 | dnnl_eltwise_gelu_erf = 0x30, |
1486 | /// Eltwise: round |
1487 | dnnl_eltwise_round = 0x40, |
1488 | /// Eltwise: logsigmoid |
1489 | dnnl_eltwise_logsigmoid = 0x50, |
1490 | /// Eltwise: mish |
1491 | dnnl_eltwise_mish = 0x60, |
1492 | /// Eltwise: hardswish |
1493 | dnnl_eltwise_hardswish = 0x70, |
1494 | /// Eltwise: ReLU (dst for backward) |
1495 | dnnl_eltwise_relu_use_dst_for_bwd = 0x100, |
1496 | /// Eltwise: hyperbolic tangent non-linearity (tanh) (dst for backward) |
1497 | dnnl_eltwise_tanh_use_dst_for_bwd = 0x101, |
1498 | /// Eltwise: exponential linear unit (elu) (dst for backward) |
1499 | dnnl_eltwise_elu_use_dst_for_bwd = 0x102, |
1500 | /// Eltwise: square root (dst for backward) |
1501 | dnnl_eltwise_sqrt_use_dst_for_bwd = 0x103, |
1502 | /// Eltwise: logistic (dst for backward) |
1503 | dnnl_eltwise_logistic_use_dst_for_bwd = 0x104, |
1504 | /// Eltwise: exp (dst for backward) |
1505 | dnnl_eltwise_exp_use_dst_for_bwd = 0x105, |
1506 | /// Eltwise: clip version 2 (dst for backward) |
1507 | dnnl_eltwise_clip_v2_use_dst_for_bwd = 0x106, |
1508 | /// Max pooling |
1509 | dnnl_pooling_max = 0x1ff, |
1510 | /// Average pooling include padding |
1511 | dnnl_pooling_avg_include_padding = 0x2ff, |
1512 | /// Average pooling exclude padding |
1513 | dnnl_pooling_avg_exclude_padding = 0x3ff, |
1514 | /// Average pooling (alias for #dnnl_pooling_avg_exclude_padding) |
1515 | dnnl_pooling_avg = dnnl_pooling_avg_exclude_padding, |
1516 | /// Local response normalization (LRN) across multiple channels |
1517 | dnnl_lrn_across_channels = 0xaff, |
1518 | /// LRN within a single channel |
1519 | dnnl_lrn_within_channel = 0xbff, |
1520 | /// RNN cell |
1521 | dnnl_vanilla_rnn = 0x1fff, |
1522 | /// LSTM cell |
1523 | dnnl_vanilla_lstm = 0x2fff, |
1524 | /// GRU cell |
1525 | dnnl_vanilla_gru = 0x3fff, |
1526 | /// GRU cell with linear before reset |
1527 | /// |
1528 | /// Modification of original GRU cell. Differs from #dnnl_vanilla_gru |
1529 | /// in how the new memory gate is calculated: |
1530 | /// \f[ c_t = tanh(W_c*x_t + b_{c_x} + r_t*(U_c*h_{t-1}+b_{c_h})) \f] |
1531 | /// Primitive expects 4 biases on input: |
1532 | /// \f$[b_{u}, b_{r}, b_{c_x}, b_{c_h}]\f$ |
1533 | dnnl_lbr_gru = 0x4fff, |
1534 | /// AUGRU cell |
1535 | dnnl_vanilla_augru = 0x5fff, |
1536 | /// AUGRU cell with linear before reset |
1537 | dnnl_lbr_augru = 0x6fff, |
1538 | /// Binary add |
1539 | dnnl_binary_add = 0x1fff0, |
1540 | /// Binary mul |
1541 | dnnl_binary_mul = 0x1fff1, |
1542 | /// Binary max |
1543 | dnnl_binary_max = 0x1fff2, |
1544 | /// Binary min |
1545 | dnnl_binary_min = 0x1fff3, |
1546 | /// Binary div |
1547 | dnnl_binary_div = 0x1fff4, |
1548 | /// Binary sub |
1549 | dnnl_binary_sub = 0x1fff5, |
1550 | /// Binary greater or equal |
1551 | dnnl_binary_ge = 0x1fff6, |
1552 | /// Binary greater than |
1553 | dnnl_binary_gt = 0x1fff7, |
1554 | /// Binary less or equal |
1555 | dnnl_binary_le = 0x1fff8, |
1556 | /// Binary less than |
1557 | dnnl_binary_lt = 0x1fff9, |
1558 | /// Binary equal |
1559 | dnnl_binary_eq = 0x1fffa, |
1560 | /// Binary not equal |
1561 | dnnl_binary_ne = 0x1fffb, |
1562 | /// Nearest Neighbor Resampling Method |
1563 | dnnl_resampling_nearest = 0x2fff0, |
1564 | /// Linear Resampling Method |
1565 | dnnl_resampling_linear = 0x2fff1, |
1566 | /// Reduction using max |
1567 | dnnl_reduction_max, |
1568 | /// Reduction using min |
1569 | dnnl_reduction_min, |
1570 | /// Reduction using sum |
1571 | dnnl_reduction_sum, |
1572 | /// Reduction using mul |
1573 | dnnl_reduction_mul, |
1574 | /// Reduction using mean |
1575 | dnnl_reduction_mean, |
1576 | /// Reduction using lp norm |
1577 | dnnl_reduction_norm_lp_max, |
1578 | /// Reduction using lp norm |
1579 | dnnl_reduction_norm_lp_sum, |
1580 | /// Reduction using lp norm without final pth-root |
1581 | dnnl_reduction_norm_lp_power_p_max, |
1582 | /// Reduction using lp norm without final pth-root |
1583 | dnnl_reduction_norm_lp_power_p_sum, |
1584 | /// Softmax |
1585 | dnnl_softmax_accurate = 0x30000, |
1586 | /// Logsoftmax |
1587 | dnnl_softmax_log, |
1588 | } dnnl_alg_kind_t; |
1589 | |
1590 | /// Flags for normalization primitives. |
1591 | typedef enum { |
1592 | /// Use no normalization flags |
1593 | /// |
1594 | /// If specified |
1595 | /// - on forward training propagation mean and variance are computed and |
1596 | /// stored as output |
1597 | /// - on backward propagation compute full derivative wrt data |
1598 | /// - on backward propagation prop_kind == #dnnl_backward_data has the same |
1599 | /// behavior as prop_kind == #dnnl_backward |
1600 | dnnl_normalization_flags_none = 0x0U, |
1601 | |
1602 | /// Use global statistics |
1603 | /// |
1604 | /// If specified |
1605 | /// - on forward propagation use mean and variance provided by user (input) |
1606 | /// - on backward propagation reduces the amount of computations, since |
1607 | /// mean and variance are considered as constants |
1608 | /// |
1609 | /// If not specified: |
1610 | /// - on forward propagation mean and variance are computed and stored as |
1611 | /// output |
1612 | /// - on backward propagation compute full derivative wrt data |
1613 | dnnl_use_global_stats = 0x1U, |
1614 | |
1615 | /// Use scale and shift parameters |
1616 | /// |
1617 | /// If specified: |
1618 | /// - on forward propagation use scale and shift (aka scale and bias) for |
1619 | /// the normalization results |
1620 | /// - on backward propagation (for prop_kind == #dnnl_backward) compute |
1621 | /// diff wrt scale and shift (hence one extra output used) |
1622 | /// |
1623 | /// If no specified: |
1624 | /// - on backward propagation prop_kind == #dnnl_backward_data has the |
1625 | /// same behavior as prop_kind == #dnnl_backward |
1626 | dnnl_use_scaleshift = 0x2U, |
1627 | |
1628 | /// Fuse with ReLU |
1629 | /// |
1630 | /// The flag implies negative slope being 0. On training this is the only |
1631 | /// configuration supported. For inference, to use non-zero negative slope |
1632 | /// consider using @ref dev_guide_attributes_post_ops. |
1633 | /// |
1634 | /// If specified: |
1635 | /// - on inference this option behaves the same as if the primitive were |
1636 | /// fused with ReLU using post ops API with zero negative slope. |
1637 | /// - on training primitive requires workspace (required to be able to |
1638 | /// perform backward pass) |
1639 | dnnl_fuse_norm_relu = 0x4U, |
1640 | |
1641 | /// Use scale parameter |
1642 | /// |
1643 | /// If specified: |
1644 | /// - on forward propagation use scale for the normalization results |
1645 | /// - on backward propagation (for prop_kind == #dnnl_backward) compute |
1646 | /// diff wrt scale (hence one extra output used) |
1647 | dnnl_use_scale = 0x8U, |
1648 | |
1649 | /// Use shift parameter |
1650 | /// |
1651 | /// If specified: |
1652 | /// - on forward propagation use shift (aka bias) for the normalization |
1653 | /// results |
1654 | /// - on backward propagation (for prop_kind == #dnnl_backward) compute |
1655 | /// diff wrt shift (hence one extra output used) |
1656 | dnnl_use_shift = 0x10U, |
1657 | |
1658 | /// Fuse with Add and then fuse with ReLU |
1659 | /// |
1660 | /// If specified: |
1661 | /// |
1662 | /// - on forward propagation apply element-wise binary Add operation to |
1663 | /// to the normalization results with an additional input tensor and then |
1664 | /// apply ReLU with negative slope being 0. |
1665 | /// - on training primitive requires workspace (required to be able to |
1666 | /// perform backward pass). |
1667 | /// - on backward propagation save the result of backward ReLU operation |
1668 | /// with input tensor and workspace from forward pass to extra output |
1669 | /// tensor and then perform backward normalization. |
1670 | dnnl_fuse_norm_add_relu = 0x20U, |
1671 | |
1672 | } dnnl_normalization_flags_t; |
1673 | |
1674 | /// @} dnnl_api_primitives_common |
1675 | /// @} dnnl_api_primitives |
1676 | |
1677 | /// @addtogroup dnnl_api_memory |
1678 | /// @{ |
1679 | |
1680 | /// Maximum number of dimensions a tensor can have. Only restricts the amount |
1681 | /// of space used for the tensor description. Individual computational |
1682 | /// primitives may support only tensors of certain dimensions. |
1683 | #define DNNL_MAX_NDIMS 12 |
1684 | |
1685 | /// A wildcard value for dimensions that are unknown at a primitive creation |
1686 | /// time. |
1687 | #define DNNL_RUNTIME_DIM_VAL INT64_MIN |
1688 | |
1689 | /// A `size_t` counterpart of the DNNL_RUNTIME_DIM_VAL. |
1690 | /// For instance, this value is returned by dnnl_memory_desc_get_size() if |
1691 | /// either of the dimensions or strides equal to #DNNL_RUNTIME_DIM_VAL. |
1692 | #define DNNL_RUNTIME_SIZE_VAL ((size_t)DNNL_RUNTIME_DIM_VAL) |
1693 | |
1694 | /// @cond DO_NOT_DOCUMENT_THIS |
1695 | /// Hex representation for a **special** quiet NAN (!= NAN from math.h) |
1696 | static const union { |
1697 | unsigned u; |
1698 | float f; |
1699 | } DNNL_RUNTIME_F32_VAL_REP = {0x7fc000d0}; |
1700 | /// @endcond |
1701 | |
1702 | /// A wildcard value for floating point values that are unknown at a primitive |
1703 | /// creation time. |
1704 | #define DNNL_RUNTIME_F32_VAL (DNNL_RUNTIME_F32_VAL_REP.f) |
1705 | |
1706 | /// @cond DO_NOT_DOCUMENT_THIS |
1707 | static const int DNNL_RUNTIME_S32_VAL_REP = INT32_MIN; |
1708 | /// @endcond |
1709 | |
1710 | /// A wildcard value for int32_t values that are unknown at a primitive creation |
1711 | /// time. |
1712 | #define DNNL_RUNTIME_S32_VAL DNNL_RUNTIME_S32_VAL_REP |
1713 | |
1714 | /// A type to describe tensor dimension. |
1715 | typedef int64_t dnnl_dim_t; |
1716 | |
1717 | /// A type to describe tensor dimensions. |
1718 | typedef dnnl_dim_t dnnl_dims_t[DNNL_MAX_NDIMS]; |
1719 | |
1720 | /// Generic description of blocked data layout for most memory formats. |
1721 | /// |
1722 | /// @sa @ref dev_guide_understanding_memory_formats |
1723 | typedef struct { |
1724 | /// The strides between the outermost blocks. |
1725 | /// In case of plain (non-blocked) formats the strides between dimensions. |
1726 | dnnl_dims_t strides; |
1727 | // Innermost section |
1728 | // ASSUMPTION: the innermost blocks are always dense |
1729 | /// The number of innermost blocks, e.g. 3 in case of `OIhw_4i16o4i_` |
1730 | int inner_nblks; |
1731 | /// The size of the blocks, e.g. `{4, 16, 4}` in case of `OIhw_4i16o4i` |
1732 | dnnl_dims_t inner_blks; |
1733 | /// The logical indices of the blocks, e.g. `{1, 0, 1}` in case of |
1734 | /// `4i16o4i`, because `i` is the 1st dim and `o` is the 0st dim |
1735 | dnnl_dims_t inner_idxs; |
1736 | } dnnl_blocking_desc_t; |
1737 | |
1738 | /// Winograd-specific formats |
1739 | typedef enum { |
1740 | /// Undefined memory format, used for empty memory descriptors. |
1741 | dnnl_wino_undef = 0, |
1742 | // Tensors of weights for 2x3 winograd convolutions. |
1743 | dnnl_wino_wei_aaOIoi, ///< Internal weights format for 2x3 Winograd |
1744 | dnnl_wino_wei_aaOio, ///< Internal weights format for 2x3 Winograd |
1745 | dnnl_wino_wei_aaOBiOo, ///< Internal weights format for 2x3 Winograd |
1746 | // Tensor of weights for 4x3 convolution. |
1747 | dnnl_wino_wei_OBaaIBOIio ///< Internal weights format for 4x3 Winograd |
1748 | } dnnl_wino_memory_format_t; |
1749 | |
1750 | /// Description of tensor of weights for winograd 2x3 convolution. |
1751 | typedef struct { |
1752 | dnnl_wino_memory_format_t wino_format; |
1753 | int r; |
1754 | int alpha; |
1755 | int ic; |
1756 | int oc; |
1757 | int ic_block; |
1758 | int oc_block; |
1759 | int ic2_block; |
1760 | int oc2_block; |
1761 | float adj_scale; |
1762 | size_t size; |
1763 | } dnnl_wino_desc_t; |
1764 | |
1765 | typedef enum { |
1766 | dnnl_packed_format_undef = 0, |
1767 | dnnl_ldigo_p, |
1768 | dnnl_ldgoi_p, |
1769 | dnnl_ldio_p |
1770 | } dnnl_rnn_packed_memory_format_t; |
1771 | |
1772 | /// Maximum number of parts of RNN weights tensor that require separate |
1773 | /// computation. |
1774 | #define DNNL_RNN_MAX_N_PARTS 4 |
1775 | |
1776 | /// Description of tensor of packed weights for rnn. |
1777 | typedef struct { |
1778 | dnnl_rnn_packed_memory_format_t format; |
1779 | int n_parts; |
1780 | int n; |
1781 | int ldb; |
1782 | int parts[DNNL_RNN_MAX_N_PARTS]; |
1783 | size_t part_pack_size[DNNL_RNN_MAX_N_PARTS]; |
1784 | unsigned pack_part[DNNL_RNN_MAX_N_PARTS]; |
1785 | size_t offset_compensation; |
1786 | size_t size; |
1787 | char reserved[200]; |
1788 | } dnnl_rnn_packed_desc_t; |
1789 | |
1790 | /// Flags for memory special features |
1791 | typedef enum { |
1792 | = 0x0U, |
1793 | /// Indicates the weights have an additional buffer, that depends on the |
1794 | /// @p compensation_mask. |
1795 | /// |
1796 | /// For instance, in 4D case with the compensation mask equals (1 << 0) |
1797 | /// the additional buffer would consist of OC values: |
1798 | /// O[oc : 0,OC] = |
1799 | /// -128 * SUM(ic : 0,IC; kh : 0,KH; kw : 0,KW){ weights(oc, ic, kh, kw) } |
1800 | = 0x1U, |
1801 | = 0x2U, |
1802 | = 0x4U, |
1803 | |
1804 | = dnnl_memory_extra_flag_rnn_u8s8_compensation, |
1805 | = 0x8U, |
1806 | = 0x16U, |
1807 | } ; |
1808 | |
1809 | /// Description of extra information stored in memory |
1810 | typedef struct { |
1811 | /// The flags contain arbitrary extra information, such as compensation. |
1812 | /// @sa dnnl_memory_extra_flags_t |
1813 | uint64_t flags; |
1814 | /// Compensation mask |
1815 | int compensation_mask; |
1816 | /// Scale applied to the data |
1817 | float scale_adjust; |
1818 | /// Compensation mask for asymmetric quantization |
1819 | int asymm_compensation_mask; |
1820 | /// For future backwards compatibility |
1821 | char reserved[60]; |
1822 | } ; |
1823 | |
1824 | /// Memory descriptor. The description is based on a number of dimensions, |
1825 | /// dimensions themselves, plus information about elements type and memory |
1826 | /// format. Additionally, contains format-specific descriptions of the data |
1827 | /// layout. |
1828 | typedef struct { |
1829 | /// Number of dimensions |
1830 | int ndims; |
1831 | /// Dimensions in the following order: |
1832 | /// - CNN data tensors: mini-batch, channel, spatial |
1833 | /// (<code>{N, C, [[D,] H,] W}</code>) |
1834 | /// - CNN weight tensors: group (optional), output channel, input channel, |
1835 | /// spatial (<code>{[G,] O, I, [[D,] H,] W}</code>) |
1836 | /// - RNN data tensors: time, mini-batch, channels (<code>{T, N, C}</code>) |
1837 | /// or layers, directions, states, mini-batch, channels (<code>{L, D, S, N, C}</code>) |
1838 | /// - RNN weight tensor: layers, directions, input channel, gates, output channels |
1839 | /// (<code>{L, D, I, G, O}</code>). |
1840 | /// |
1841 | /// @note |
1842 | /// The order of dimensions does not depend on the memory format, so |
1843 | /// whether the data is laid out in #dnnl_nchw or #dnnl_nhwc |
1844 | /// the dims for 4D CN data tensor would be <code>{N, C, H, W}</code>. |
1845 | dnnl_dims_t dims; |
1846 | |
1847 | /// Data type of the tensor elements. |
1848 | dnnl_data_type_t data_type; |
1849 | |
1850 | /// Size of the data including padding in each dimension. |
1851 | dnnl_dims_t padded_dims; |
1852 | |
1853 | /// Per-dimension offset from the padding to actual data, the top-level |
1854 | /// tensor with offsets applied must lie within the padding area. |
1855 | dnnl_dims_t padded_offsets; |
1856 | |
1857 | /// Offset from memory origin to the current block, non-zero only in |
1858 | /// a description of a memory sub-block. |
1859 | dnnl_dim_t offset0; |
1860 | |
1861 | /// Memory format kind. |
1862 | dnnl_format_kind_t format_kind; |
1863 | union { |
1864 | /// Description of the data layout for memory formats that use |
1865 | /// blocking. |
1866 | dnnl_blocking_desc_t blocking; |
1867 | /// Tensor of weights for integer 8bit winograd convolution. |
1868 | dnnl_wino_desc_t wino_desc; |
1869 | /// Tensor of packed weights for RNN. |
1870 | dnnl_rnn_packed_desc_t rnn_packed_desc; |
1871 | // ... other descriptions possible |
1872 | } format_desc; |
1873 | |
1874 | dnnl_memory_extra_desc_t ; |
1875 | } dnnl_memory_desc_t; |
1876 | |
1877 | /// @struct dnnl_memory |
1878 | /// An opaque structure to describe a memory. |
1879 | struct dnnl_memory; |
1880 | |
1881 | /// A memory handle. |
1882 | typedef struct dnnl_memory *dnnl_memory_t; |
1883 | |
1884 | /// A constant memory handle. |
1885 | typedef const struct dnnl_memory *const_dnnl_memory_t; |
1886 | |
1887 | /// Special pointer value that indicates that a memory object should not have |
1888 | /// an underlying buffer. |
1889 | #define DNNL_MEMORY_NONE (NULL) |
1890 | |
1891 | /// Special pointer value that indicates that the library needs to allocate an |
1892 | /// underlying buffer for a memory object. |
1893 | #define DNNL_MEMORY_ALLOCATE ((void *)(size_t)-1) |
1894 | |
1895 | /// @} dnnl_api_memory |
1896 | |
1897 | /// @addtogroup dnnl_api_primitives |
1898 | /// @{ |
1899 | /// @addtogroup dnnl_api_primitives_common |
1900 | /// @{ |
1901 | |
1902 | /// A pointer to any of the operation descriptors. |
1903 | typedef void *dnnl_op_desc_t; |
1904 | /// A pointer to any of the operation descriptors (constant variant). |
1905 | typedef const void *const_dnnl_op_desc_t; |
1906 | |
1907 | /// @} dnnl_api_primitives_common |
1908 | /// @} dnnl_api_primitives |
1909 | |
1910 | /// @addtogroup dnnl_api_primitives |
1911 | /// @{ |
1912 | |
1913 | /// @addtogroup dnnl_api_convolution |
1914 | /// @{ |
1915 | |
1916 | /// A descriptor of a convolution operation. |
1917 | typedef struct { |
1918 | /// The kind of primitive. Used for self-identifying the primitive |
1919 | /// descriptor. Must be #dnnl_convolution. |
1920 | dnnl_primitive_kind_t primitive_kind; |
1921 | /// The kind of propagation. Possible values: #dnnl_forward_training, |
1922 | /// #dnnl_forward_inference, #dnnl_backward_data, |
1923 | /// #dnnl_backward_weights, and #dnnl_backward_bias. |
1924 | dnnl_prop_kind_t prop_kind; |
1925 | /// The kind of the convolution algorithm. Possible values: |
1926 | /// #dnnl_convolution_direct. |
1927 | dnnl_alg_kind_t alg_kind; |
1928 | /// Source memory descriptor. |
1929 | dnnl_memory_desc_t src_desc; |
1930 | /// Source gradient memory descriptor. |
1931 | dnnl_memory_desc_t diff_src_desc; |
1932 | /// Weights memory descriptor. |
1933 | dnnl_memory_desc_t weights_desc; |
1934 | /// Weights gradient memory descriptor. |
1935 | dnnl_memory_desc_t diff_weights_desc; |
1936 | /// Bias memory descriptor. |
1937 | dnnl_memory_desc_t bias_desc; |
1938 | /// Bias gradient memory descriptor. |
1939 | dnnl_memory_desc_t diff_bias_desc; |
1940 | /// Destination memory descriptor. |
1941 | dnnl_memory_desc_t dst_desc; |
1942 | /// Destination gradient memory descriptor. |
1943 | dnnl_memory_desc_t diff_dst_desc; |
1944 | /// Convolution strides in each spatial dimension. |
1945 | dnnl_dims_t strides; |
1946 | /// Convolution dilates in each spatial dimension. |
1947 | dnnl_dims_t dilates; |
1948 | /// Padding in each spatial dimension. padding[0] is a padding in the |
1949 | /// beginning (@p padding_l), padding[1] is a padding in the end (@p |
1950 | /// padding_r). |
1951 | dnnl_dims_t padding[2]; |
1952 | /// The accumulator data type. Initialized automatically. |
1953 | dnnl_data_type_t accum_data_type; |
1954 | } dnnl_convolution_desc_t; |
1955 | |
1956 | /// @} dnnl_api_convolution |
1957 | |
1958 | /// @addtogroup dnnl_api_deconvolution |
1959 | /// @{ |
1960 | |
1961 | /// A descriptor of a deconvolution operation. |
1962 | typedef dnnl_convolution_desc_t dnnl_deconvolution_desc_t; |
1963 | |
1964 | /// @} dnnl_api_deconvolution |
1965 | |
1966 | /// @addtogroup dnnl_api_shuffle |
1967 | /// @{ |
1968 | |
1969 | /// A descriptor of a shuffle operation. |
1970 | typedef struct { |
1971 | /// The kind of primitive. Used for self-identifying the primitive |
1972 | /// descriptor. Must be #dnnl_shuffle. |
1973 | dnnl_primitive_kind_t primitive_kind; |
1974 | /// The kind of propagation. Possible values: #dnnl_forward_training, |
1975 | /// #dnnl_forward_inference, and #dnnl_backward_data. |
1976 | dnnl_prop_kind_t prop_kind; |
1977 | /// Source and destination memory descriptor, |
1978 | /// and source and destination gradient memory descriptor. |
1979 | dnnl_memory_desc_t data_desc; |
1980 | /// Axis for shuffling. |
1981 | int axis; |
1982 | /// Number of groups. |
1983 | dnnl_dim_t group_size; |
1984 | } dnnl_shuffle_desc_t; |
1985 | |
1986 | /// @} dnnl_api_shuffle |
1987 | |
1988 | /// @addtogroup dnnl_api_eltwise |
1989 | /// @{ |
1990 | |
1991 | /// A descriptor of a element-wise operation. |
1992 | typedef struct { |
1993 | /// The kind of primitive. Used for self-identifying the primitive |
1994 | /// descriptor. Must be #dnnl_eltwise. |
1995 | dnnl_primitive_kind_t primitive_kind; |
1996 | /// The kind of propagation. Possible values: #dnnl_forward_training, |
1997 | /// #dnnl_forward_inference, #dnnl_backward, and #dnnl_backward_data. |
1998 | dnnl_prop_kind_t prop_kind; |
1999 | /// The kind of eltwise algorithm. Possible values: #dnnl_eltwise_relu, |
2000 | /// #dnnl_eltwise_tanh, #dnnl_eltwise_elu, #dnnl_eltwise_square, |
2001 | /// #dnnl_eltwise_abs, #dnnl_eltwise_sqrt, #dnnl_eltwise_linear, |
2002 | /// #dnnl_eltwise_bounded_relu, #dnnl_eltwise_soft_relu, |
2003 | /// #dnnl_eltwise_soft_relu_v2, #dnnl_eltwise_logistic, #dnnl_eltwise_exp, |
2004 | /// #dnnl_eltwise_gelu_tanh, #dnnl_eltwise_swish, #dnnl_eltwise_log, |
2005 | /// #dnnl_eltwise_clip, #dnnl_eltwise_clip_v2, #dnnl_eltwise_pow, |
2006 | /// #dnnl_eltwise_gelu_erf, #dnnl_eltwise_round, #dnnl_eltwise_logsigmoid, |
2007 | /// #dnnl_eltwise_mish, #dnnl_eltwise_hardswish, #dnnl_eltwise_hardsigmoid. |
2008 | /// Possible values for passing destination memory on backward: |
2009 | /// #dnnl_eltwise_relu_use_dst_for_bwd, #dnnl_eltwise_tanh_use_dst_for_bwd, |
2010 | /// #dnnl_eltwise_elu_use_dst_for_bwd, #dnnl_eltwise_sqrt_use_dst_for_bwd, |
2011 | /// #dnnl_eltwise_logistic_use_dst_for_bwd, |
2012 | /// #dnnl_eltwise_exp_use_dst_for_bwd, |
2013 | /// #dnnl_eltwise_clip_v2_use_dst_for_bwd. |
2014 | dnnl_alg_kind_t alg_kind; |
2015 | /// Source and destination memory descriptor. |
2016 | dnnl_memory_desc_t data_desc; |
2017 | /// Source and destination gradient memory descriptor. |
2018 | dnnl_memory_desc_t diff_data_desc; |
2019 | /// Algorithm specific parameter. |
2020 | /// Accordance table: |
2021 | /// - #dnnl_eltwise_relu: @p alpha -- negative slope, @p beta ignored |
2022 | /// - #dnnl_eltwise_tanh: @p alpha and @p beta ignored |
2023 | /// - #dnnl_eltwise_elu: @p alpha -- negative slope, @p beta ignored |
2024 | /// - #dnnl_eltwise_square: @p alpha and @p beta ignored |
2025 | /// - #dnnl_eltwise_abs: @p alpha and @p beta ignored |
2026 | /// - #dnnl_eltwise_sqrt: @p alpha and @p beta ignored |
2027 | /// - #dnnl_eltwise_linear: @p alpha -- scale, @p beta -- shift |
2028 | /// - #dnnl_eltwise_bounded_relu: @p alpha -- upper bound, @p beta ignored |
2029 | /// - #dnnl_eltwise_soft_relu: @p alpha and @p beta ignored |
2030 | /// - #dnnl_eltwise_soft_relu_v2: @p alpha -- soft_relu_v2 arg scaling, @p beta ignored |
2031 | /// - #dnnl_eltwise_logistic: @p alpha and @p beta ignored |
2032 | /// - #dnnl_eltwise_exp: @p alpha and @p beta ignored |
2033 | /// - #dnnl_eltwise_gelu_tanh: @p alpha and @p beta ignored |
2034 | /// - #dnnl_eltwise_swish: @p alpha -- sigmoid arg scaling, @p beta ignored |
2035 | /// - #dnnl_eltwise_log: @p alpha and @p beta ignored |
2036 | /// - #dnnl_eltwise_clip: @p alpha -- lower bound, @p beta -- upper bound |
2037 | /// - #dnnl_eltwise_clip_v2: @p alpha -- lower bound, @p beta -- upper bound |
2038 | /// - #dnnl_eltwise_pow: @p alpha -- scale, @p beta -- exponent |
2039 | /// - #dnnl_eltwise_gelu_erf: @p alpha and @p beta ignored |
2040 | /// - #dnnl_eltwise_round: @p alpha and @p beta ignored |
2041 | /// - #dnnl_eltwise_logsigmoid: @p alpha and @p beta ignored |
2042 | /// - #dnnl_eltwise_mish: @p alpha and @p beta ignored |
2043 | /// - #dnnl_eltwise_hardswish: @p alpha and @p beta ignored |
2044 | /// - #dnnl_eltwise_hardsigmoid: @p alpha -- scale, @p beta -- shift |
2045 | float alpha, beta; |
2046 | } dnnl_eltwise_desc_t; |
2047 | |
2048 | /// @} dnnl_api_eltwise |
2049 | |
2050 | /// @addtogroup dnnl_api_softmax |
2051 | /// @{ |
2052 | |
2053 | /// A descriptor of a Softmax operation. |
2054 | typedef struct { |
2055 | /// The kind of primitive. Used for self-identifying the primitive |
2056 | /// descriptor. Must be #dnnl_softmax. |
2057 | dnnl_primitive_kind_t primitive_kind; |
2058 | /// The kind of propagation. Possible values: #dnnl_forward_training, |
2059 | /// #dnnl_forward_inference, and #dnnl_backward_data. |
2060 | dnnl_prop_kind_t prop_kind; |
2061 | /// Source and destination memory descriptor. |
2062 | dnnl_memory_desc_t data_desc; |
2063 | /// Source and Destination of gradient memory descriptor. |
2064 | dnnl_memory_desc_t diff_desc; |
2065 | /// The axis along which to perform the softmax. |
2066 | int softmax_axis; |
2067 | } dnnl_softmax_desc_t; |
2068 | |
2069 | /// @} dnnl_api_softmax |
2070 | |
2071 | /// @addtogroup dnnl_api_softmax_v2 |
2072 | /// @{ |
2073 | |
2074 | /// A descriptor of a Softmax operation. |
2075 | typedef struct { |
2076 | /// The kind of primitive. Used for self-identifying the primitive |
2077 | /// descriptor. Must be #dnnl_softmax_v2. |
2078 | dnnl_primitive_kind_t primitive_kind; |
2079 | /// The kind of propagation. Possible values: #dnnl_forward_training, |
2080 | /// #dnnl_forward_inference, and #dnnl_backward_data. |
2081 | dnnl_prop_kind_t prop_kind; |
2082 | /// Source memory descriptor. |
2083 | dnnl_memory_desc_t src_desc; |
2084 | /// Source gradient memory descriptor. |
2085 | dnnl_memory_desc_t diff_src_desc; |
2086 | /// The axis along which to perform the softmax. |
2087 | int softmax_axis; |
2088 | /// Softmax algorithm. Possible values: #dnnl_softmax_accurate and |
2089 | /// #dnnl_softmax_log. |
2090 | dnnl_alg_kind_t alg_kind; |
2091 | /// Destination memory descriptor. |
2092 | dnnl_memory_desc_t dst_desc; |
2093 | /// Destination gradient memory descriptor. |
2094 | dnnl_memory_desc_t diff_dst_desc; |
2095 | } dnnl_softmax_v2_desc_t; |
2096 | |
2097 | /// @} dnnl_api_softmax_v2 |
2098 | |
2099 | /// @addtogroup dnnl_api_logsoftmax |
2100 | /// @{ |
2101 | |
2102 | /// A descriptor of a LogSoftmax operation. An alias of Softmax structure, but |
2103 | /// primitive_kind must be #dnnl_logsoftmax. |
2104 | typedef dnnl_softmax_desc_t dnnl_logsoftmax_desc_t; |
2105 | |
2106 | /// @} dnnl_api_logsoftmax |
2107 | |
2108 | /// @addtogroup dnnl_api_pooling |
2109 | /// @{ |
2110 | |
2111 | /// A descriptor of a pooling operation. |
2112 | typedef struct { |
2113 | /// The kind of primitive. Used for self-identifying the primitive |
2114 | /// descriptor. Must be #dnnl_pooling. |
2115 | dnnl_primitive_kind_t primitive_kind; |
2116 | /// The kind of propagation. Possible values: #dnnl_forward_training, |
2117 | /// #dnnl_forward_inference, #dnnl_backward, and #dnnl_backward_data. |
2118 | dnnl_prop_kind_t prop_kind; |
2119 | /// The kind of pooling algorithm. |
2120 | /// Possible values: #dnnl_pooling_max, |
2121 | /// #dnnl_pooling_avg_include_padding, and |
2122 | /// #dnnl_pooling_avg_exclude_padding. |
2123 | dnnl_alg_kind_t alg_kind; |
2124 | /// Source memory descriptor. |
2125 | dnnl_memory_desc_t src_desc; |
2126 | /// Source gradient memory descriptor. |
2127 | dnnl_memory_desc_t diff_src_desc; |
2128 | /// Destination memory descriptor. |
2129 | dnnl_memory_desc_t dst_desc; |
2130 | /// Destination gradient memory descriptor. |
2131 | dnnl_memory_desc_t diff_dst_desc; |
2132 | /// Pooling kernel strides for spatial dimensions. |
2133 | dnnl_dims_t strides; |
2134 | /// Pooling kernel spatial dimensions. |
2135 | dnnl_dims_t kernel; |
2136 | /// Padding in each spatial dimension. padding[0] is a padding in the |
2137 | /// beginning (@p padding_l), padding[1] is a padding in the end (@p |
2138 | /// padding_r). |
2139 | dnnl_dims_t padding[2]; |
2140 | /// The accumulator data type. Initialized automatically. |
2141 | dnnl_data_type_t accum_data_type; |
2142 | } dnnl_pooling_desc_t; |
2143 | |
2144 | /// @} dnnl_api_pooling |
2145 | |
2146 | /// @addtogroup dnnl_api_pooling_v2 |
2147 | /// @{ |
2148 | |
2149 | /// A descriptor of a pooling operation. |
2150 | typedef struct { |
2151 | /// The kind of primitive. Used for self-identifying the primitive |
2152 | /// descriptor. Must be #dnnl_pooling_v2. |
2153 | dnnl_primitive_kind_t primitive_kind; |
2154 | /// The kind of propagation. Possible values: #dnnl_forward_training, |
2155 | /// #dnnl_forward_inference, #dnnl_backward, and #dnnl_backward_data. |
2156 | dnnl_prop_kind_t prop_kind; |
2157 | /// The kind of pooling algorithm. |
2158 | /// Possible values: #dnnl_pooling_max, |
2159 | /// #dnnl_pooling_avg_include_padding, and |
2160 | /// #dnnl_pooling_avg_exclude_padding. |
2161 | dnnl_alg_kind_t alg_kind; |
2162 | /// Source memory descriptor. |
2163 | dnnl_memory_desc_t src_desc; |
2164 | /// Source gradient memory descriptor. |
2165 | dnnl_memory_desc_t diff_src_desc; |
2166 | /// Destination memory descriptor. |
2167 | dnnl_memory_desc_t dst_desc; |
2168 | /// Destination gradient memory descriptor. |
2169 | dnnl_memory_desc_t diff_dst_desc; |
2170 | /// Pooling kernel strides for spatial dimensions. |
2171 | dnnl_dims_t strides; |
2172 | /// Pooling kernel spatial dimensions. |
2173 | dnnl_dims_t kernel; |
2174 | /// Padding in each spatial dimension. padding[0] is a padding in the |
2175 | /// beginning (@p padding_l), padding[1] is a padding in the end (@p |
2176 | /// padding_r). |
2177 | dnnl_dims_t padding[2]; |
2178 | /// The accumulator data type. Initialized automatically. |
2179 | dnnl_data_type_t accum_data_type; |
2180 | /// Pooling dilations for spatial dimensions. |
2181 | dnnl_dims_t dilation; |
2182 | } dnnl_pooling_v2_desc_t; |
2183 | |
2184 | /// @} dnnl_api_pooling_v2 |
2185 | |
2186 | /// @addtogroup dnnl_api_prelu |
2187 | /// @{ |
2188 | typedef struct { |
2189 | /// The kind of primitive. Used for self-identifying the primitive |
2190 | /// descriptor. Must be #dnnl_prelu. |
2191 | dnnl_primitive_kind_t primitive_kind; |
2192 | /// The kind of propagation. Possible values: #dnnl_forward_training, |
2193 | /// #dnnl_forward_inference, #dnnl_backward |
2194 | dnnl_prop_kind_t prop_kind; |
2195 | /// Source and destination memory descriptor. |
2196 | dnnl_memory_desc_t data_desc; |
2197 | /// Learnable parameter alpha memory descriptor. |
2198 | /// Alpha describes negative slope. |
2199 | dnnl_memory_desc_t weights_desc; |
2200 | /// Source and destination gradient memory descriptor. |
2201 | dnnl_memory_desc_t diff_data_desc; |
2202 | /// Learnable parameter alpha gradient memory descriptor. |
2203 | dnnl_memory_desc_t diff_weights_desc; |
2204 | } dnnl_prelu_desc_t; |
2205 | |
2206 | /// @} dnnl_api_prelu |
2207 | |
2208 | /// @addtogroup dnnl_api_lrn |
2209 | /// @{ |
2210 | |
2211 | /// A descriptor of a Local Response Normalization (LRN) operation. |
2212 | typedef struct { |
2213 | /// The kind of primitive. Used for self-identifying the primitive |
2214 | /// descriptor. Must be #dnnl_lrn. |
2215 | dnnl_primitive_kind_t primitive_kind; |
2216 | /// The kind of propagation. Possible values: #dnnl_forward_training, |
2217 | /// #dnnl_forward_inference, #dnnl_backward, and #dnnl_backward_data. |
2218 | dnnl_prop_kind_t prop_kind; |
2219 | /// LRN algorithm. Possible values: #dnnl_lrn_within_channel and |
2220 | /// #dnnl_lrn_across_channels. |
2221 | dnnl_alg_kind_t alg_kind; |
2222 | /// Source and destination memory descriptor. |
2223 | dnnl_memory_desc_t data_desc; |
2224 | /// Source and destination gradient memory descriptor. |
2225 | dnnl_memory_desc_t diff_data_desc; |
2226 | /// The number of channels to sum over (for cross-channel LRN) or the side |
2227 | /// length of the square region to sum over (for within-channel LRN). |
2228 | dnnl_dim_t local_size; |
2229 | /// LRN alpha parameter. |
2230 | float lrn_alpha; |
2231 | /// LRN beta parameter. |
2232 | float lrn_beta; |
2233 | /// LRN k parameter. |
2234 | float lrn_k; |
2235 | } dnnl_lrn_desc_t; |
2236 | |
2237 | /// @} dnnl_api_lrn |
2238 | |
2239 | /// @addtogroup dnnl_api_batch_normalization |
2240 | /// @{ |
2241 | |
2242 | /// A descriptor of a Batch Normalization operation. |
2243 | typedef struct { |
2244 | /// The kind of primitive. Used for self-identifying the primitive |
2245 | /// descriptor. Must be #dnnl_batch_normalization. |
2246 | dnnl_primitive_kind_t primitive_kind; |
2247 | /// The kind of propagation. Possible values: #dnnl_forward_training, |
2248 | /// #dnnl_forward_inference, #dnnl_backward, and #dnnl_backward_data. |
2249 | dnnl_prop_kind_t prop_kind; |
2250 | /// Source and destination memory descriptor. |
2251 | dnnl_memory_desc_t data_desc; |
2252 | /// Source and destination gradient memory descriptor. |
2253 | dnnl_memory_desc_t diff_data_desc; |
2254 | /// Scale and shift data and gradient memory descriptors. |
2255 | /// |
2256 | /// Scaleshift memory descriptor uses 2D #dnnl_nc format[2,Channels]. 1-st |
2257 | /// dimension contains gamma parameter, 2-nd dimension contains beta |
2258 | /// parameter. |
2259 | dnnl_memory_desc_t data_scaleshift_desc; |
2260 | dnnl_memory_desc_t diff_data_scaleshift_desc; |
2261 | /// Statistics memory descriptor. |
2262 | /// |
2263 | /// Statistics (mean or variance) descriptor use 1D #dnnl_x format[Channels]. |
2264 | dnnl_memory_desc_t stat_desc; |
2265 | /// Batch normalization epsilon parameter. |
2266 | float batch_norm_epsilon; |
2267 | unsigned flags; |
2268 | } dnnl_batch_normalization_desc_t; |
2269 | |
2270 | /// @} dnnl_api_batch_normalization |
2271 | |
2272 | /// @addtogroup dnnl_api_layer_normalization |
2273 | /// @{ |
2274 | |
2275 | /// A descriptor of a Layer Normalization operation. |
2276 | typedef struct { |
2277 | /// The kind of primitive. Used for self-identifying the primitive |
2278 | /// descriptor. Must be #dnnl_layer_normalization. |
2279 | dnnl_primitive_kind_t primitive_kind; |
2280 | /// The kind of propagation. Possible values: #dnnl_forward_training, |
2281 | /// #dnnl_forward_inference, #dnnl_backward, and #dnnl_backward_data. |
2282 | dnnl_prop_kind_t prop_kind; |
2283 | /// Source and destination memory descriptor. |
2284 | dnnl_memory_desc_t data_desc; |
2285 | /// Source and destination gradient memory descriptor. |
2286 | dnnl_memory_desc_t diff_data_desc; |
2287 | /// Scale and shift data and gradient memory descriptors. |
2288 | /// |
2289 | /// Scaleshift memory descriptor uses 2D #dnnl_ab |
2290 | /// format[2, normalized_dim] where 1-st dimension contains gamma parameter, |
2291 | /// 2-nd dimension contains beta parameter. Normalized_dim is equal to the |
2292 | /// last logical dimension of the data tensor across which normalization is |
2293 | /// performed. |
2294 | dnnl_memory_desc_t data_scaleshift_desc; |
2295 | dnnl_memory_desc_t diff_data_scaleshift_desc; |
2296 | /// Mean and variance data memory descriptors. |
2297 | /// |
2298 | /// Statistics (mean and variance) memory descriptor is the k-dimensional tensor |
2299 | /// where k is equal to data_tensor_ndims - 1 and may have any plain |
2300 | /// (stride[last_dim] == 1) user-provided format. |
2301 | dnnl_memory_desc_t stat_desc; |
2302 | /// Layer normalization epsilon parameter. |
2303 | float layer_norm_epsilon; |
2304 | unsigned flags; |
2305 | } dnnl_layer_normalization_desc_t; |
2306 | |
2307 | /// @} dnnl_api_layer_normalization |
2308 | |
2309 | /// @addtogroup dnnl_api_layer_normalization_v2 |
2310 | /// @{ |
2311 | |
2312 | /// A descriptor of a Layer Normalization operation. |
2313 | typedef struct { |
2314 | /// The kind of primitive. Used for self-identifying the primitive |
2315 | /// descriptor. Must be #dnnl_layer_normalization_v2. |
2316 | dnnl_primitive_kind_t primitive_kind; |
2317 | /// The kind of propagation. Possible values: #dnnl_forward_training, |
2318 | /// #dnnl_forward_inference, #dnnl_backward, and #dnnl_backward_data. |
2319 | dnnl_prop_kind_t prop_kind; |
2320 | /// Source memory descriptor. |
2321 | dnnl_memory_desc_t src_desc; |
2322 | /// Source gradient memory descriptor. |
2323 | dnnl_memory_desc_t diff_src_desc; |
2324 | /// Scale and shift data and gradient memory descriptors. |
2325 | /// |
2326 | /// Scaleshift memory descriptor uses 2D #dnnl_ab |
2327 | /// format[2, normalized_dim] where 1-st dimension contains gamma parameter, |
2328 | /// 2-nd dimension contains beta parameter. Normalized_dim is equal to the |
2329 | /// last logical dimension of the data tensor across which normalization is |
2330 | /// performed. |
2331 | dnnl_memory_desc_t data_scaleshift_desc; |
2332 | dnnl_memory_desc_t diff_data_scaleshift_desc; |
2333 | /// Mean and variance data memory descriptors. |
2334 | /// |
2335 | /// Statistics (mean and variance) memory descriptor is the k-dimensional tensor |
2336 | /// where k is equal to data_tensor_ndims - 1 and may have any plain |
2337 | /// (stride[last_dim] == 1) user-provided format. |
2338 | dnnl_memory_desc_t stat_desc; |
2339 | /// Layer normalization epsilon parameter. |
2340 | float layer_norm_epsilon; |
2341 | unsigned flags; |
2342 | /// Destination memory descriptor. |
2343 | dnnl_memory_desc_t dst_desc; |
2344 | /// Destination gradient memory descriptor. |
2345 | dnnl_memory_desc_t diff_dst_desc; |
2346 | } dnnl_layer_normalization_v2_desc_t; |
2347 | |
2348 | /// @} dnnl_api_layer_normalization_v2 |
2349 | |
2350 | /// @addtogroup dnnl_api_inner_product |
2351 | /// @{ |
2352 | |
2353 | /// A descriptor of an inner product operation. |
2354 | typedef struct { |
2355 | /// The kind of primitive. Used for self-identifying the primitive |
2356 | /// descriptor. Must be #dnnl_inner_product. |
2357 | dnnl_primitive_kind_t primitive_kind; |
2358 | /// The kind of propagation. Possible values: #dnnl_forward_training, |
2359 | /// #dnnl_forward_inference, #dnnl_backward_data, |
2360 | /// #dnnl_backward_weights, and #dnnl_backward_bias. |
2361 | dnnl_prop_kind_t prop_kind; |
2362 | /// Source memory descriptor. |
2363 | dnnl_memory_desc_t src_desc; |
2364 | /// Source gradient memory descriptor. |
2365 | dnnl_memory_desc_t diff_src_desc; |
2366 | /// Weights memory descriptor. |
2367 | dnnl_memory_desc_t weights_desc; |
2368 | /// Weights gradient memory descriptor. |
2369 | dnnl_memory_desc_t diff_weights_desc; |
2370 | /// Bias memory descriptor. |
2371 | dnnl_memory_desc_t bias_desc; |
2372 | /// Bias gradient memory descriptor. |
2373 | dnnl_memory_desc_t diff_bias_desc; |
2374 | /// Destination memory descriptor. |
2375 | dnnl_memory_desc_t dst_desc; |
2376 | /// Destination gradient memory descriptor. |
2377 | dnnl_memory_desc_t diff_dst_desc; |
2378 | /// The accumulator data type. Initialized automatically. |
2379 | dnnl_data_type_t accum_data_type; |
2380 | } dnnl_inner_product_desc_t; |
2381 | |
2382 | /// @} dnnl_api_inner_product |
2383 | |
2384 | /// @addtogroup dnnl_api_rnn |
2385 | /// @{ |
2386 | |
2387 | /// Flags for RNN cell. |
2388 | typedef enum { |
2389 | /// Undefined RNN flags |
2390 | dnnl_rnn_flags_undef = 0x0 |
2391 | } dnnl_rnn_flags_t; |
2392 | |
2393 | /// A direction of RNN primitive execution. |
2394 | typedef enum { |
2395 | /// Unidirectional execution of RNN primitive from left to right. |
2396 | dnnl_unidirectional_left2right, |
2397 | /// Unidirectional execution of RNN primitive from right to left. |
2398 | dnnl_unidirectional_right2left, |
2399 | /// Bidirectional execution of RNN primitive with concatenation of the |
2400 | /// results. |
2401 | dnnl_bidirectional_concat, |
2402 | /// Bidirectional execution of RNN primitive with summation of the |
2403 | /// results. |
2404 | dnnl_bidirectional_sum, |
2405 | /// Alias for #dnnl_unidirectional_left2right. |
2406 | dnnl_unidirectional = dnnl_unidirectional_left2right, |
2407 | } dnnl_rnn_direction_t; |
2408 | |
2409 | /// A descriptor for an RNN operation. |
2410 | typedef struct { |
2411 | /// The kind of primitive. Used for self-identifying the primitive |
2412 | /// descriptor. Must be #dnnl_rnn. |
2413 | dnnl_primitive_kind_t primitive_kind; |
2414 | /// The kind of propagation. Possible values: #dnnl_forward_training, |
2415 | /// #dnnl_forward_inference, and #dnnl_backward. |
2416 | dnnl_prop_kind_t prop_kind; |
2417 | /// RNN cell kind. Must be one of #dnnl_vanilla_rnn, |
2418 | /// #dnnl_vanilla_lstm, #dnnl_vanilla_gru, or #dnnl_lbr_gru. |
2419 | dnnl_alg_kind_t cell_kind; |
2420 | /// The direction of RNN primitive execution. |
2421 | dnnl_rnn_direction_t direction; |
2422 | /// Source layer memory descriptor. |
2423 | dnnl_memory_desc_t src_layer_desc; |
2424 | /// Source iteration memory descriptor for hidden state. |
2425 | dnnl_memory_desc_t src_iter_desc; |
2426 | /// Source iteration memory descriptor for cell state. |
2427 | dnnl_memory_desc_t src_iter_c_desc; |
2428 | /// Weights layer memory descriptor. |
2429 | dnnl_memory_desc_t weights_layer_desc; |
2430 | /// Weights iteration memory descriptor. |
2431 | dnnl_memory_desc_t weights_iter_desc; |
2432 | /// Bias memory descriptor. |
2433 | dnnl_memory_desc_t bias_desc; |
2434 | /// Destination layer memory descriptor. |
2435 | dnnl_memory_desc_t dst_layer_desc; |
2436 | /// Destination iter memory descriptor for hidden state. |
2437 | dnnl_memory_desc_t dst_iter_desc; |
2438 | /// Destination iter memory descriptor for cell state. |
2439 | dnnl_memory_desc_t dst_iter_c_desc; |
2440 | /// Weights peephole memory descriptor. |
2441 | /// This memory descriptor is equal to zero memory descriptor in case of |
2442 | /// non-peephole LSTMs and other non-LSTM RNNs. |
2443 | dnnl_memory_desc_t weights_peephole_desc; |
2444 | /// Weights projection memory descriptor. |
2445 | /// This memory descriptor is equal to zero memory descriptor in case of |
2446 | /// non-projection LSTMs and other non-LSTM RNNs. |
2447 | dnnl_memory_desc_t weights_projection_desc; |
2448 | |
2449 | /// Source gradient layer memory descriptor. |
2450 | dnnl_memory_desc_t diff_src_layer_desc; |
2451 | /// Source gradient iter memory descriptor for hidden state. |
2452 | dnnl_memory_desc_t diff_src_iter_desc; |
2453 | /// Source gradient iter memory descriptor for cell state. |
2454 | dnnl_memory_desc_t diff_src_iter_c_desc; |
2455 | /// Weights gradient layer memory descriptor. |
2456 | dnnl_memory_desc_t diff_weights_layer_desc; |
2457 | /// Weights gradient iter memory descriptor. |
2458 | dnnl_memory_desc_t diff_weights_iter_desc; |
2459 | /// Bias gradient memory descriptor. |
2460 | dnnl_memory_desc_t diff_bias_desc; |
2461 | /// Destination gradient layer memory descriptor. |
2462 | dnnl_memory_desc_t diff_dst_layer_desc; |
2463 | /// Destination gradient iteration memory descriptor for hidden state. |
2464 | dnnl_memory_desc_t diff_dst_iter_desc; |
2465 | /// Destination gradient iteration memory descriptor for cell state. |
2466 | dnnl_memory_desc_t diff_dst_iter_c_desc; |
2467 | /// Weights gradient peephole memory descriptor. |
2468 | /// This memory descriptor is equal to zero memory descriptor in case of |
2469 | /// non-peephole LSTMs and other non-LSTM RNNs. |
2470 | dnnl_memory_desc_t diff_weights_peephole_desc; |
2471 | /// Weights gradient projection memory descriptor. |
2472 | /// This memory descriptor is equal to zero memory descriptor in case of |
2473 | /// non-projection LSTMs and other non-LSTM RNNs. |
2474 | dnnl_memory_desc_t diff_weights_projection_desc; |
2475 | |
2476 | /// RNN cell flags |
2477 | unsigned int flags; |
2478 | /// Activation function used for vanilla_rnn cell kind. |
2479 | /// Must be either #dnnl_eltwise_relu or #dnnl_eltwise_tanh. |
2480 | dnnl_alg_kind_t activation_kind; |
2481 | float alpha; |
2482 | float beta; |
2483 | |
2484 | } dnnl_rnn_desc_t; |
2485 | |
2486 | /// @} dnnl_api_rnn |
2487 | |
2488 | /// @addtogroup dnnl_api_binary |
2489 | /// @{ |
2490 | |
2491 | /// A descriptor of a binary operation. |
2492 | typedef struct { |
2493 | /// The kind of primitive. Used for self-identifying the primitive |
2494 | /// descriptor. Must be #dnnl_binary. |
2495 | dnnl_primitive_kind_t primitive_kind; |
2496 | /// The kind of the binary algorithm. Possible values: |
2497 | /// #dnnl_binary_add, #dnnl_binary_mul, #dnnl_binary_max, #dnnl_binary_min, |
2498 | /// #dnnl_binary_div and #dnnl_binary_sub. |
2499 | dnnl_alg_kind_t alg_kind; |
2500 | /// Source memory descriptors. |
2501 | dnnl_memory_desc_t src_desc[2]; |
2502 | /// Destination memory descriptor. |
2503 | dnnl_memory_desc_t dst_desc; |
2504 | } dnnl_binary_desc_t; |
2505 | |
2506 | /// @} dnnl_api_binary |
2507 | |
2508 | /// @addtogroup dnnl_api_matmul |
2509 | /// @{ |
2510 | |
2511 | /// A descriptor of a matrix multiplication operation. |
2512 | /// |
2513 | /// 2D case: |
2514 | /// dst[m, n] = src[m, k] * weights[k, n] + bias[m, n] |
2515 | /// |
2516 | /// 3D case: |
2517 | /// dst[mb, m, n] = src[mb, m, k] * weights[mb, k, n] + bias[mb, m, n] |
2518 | typedef struct { |
2519 | /// The kind of primitive. Used for self-identifying the primitive |
2520 | /// descriptor. Must be #dnnl_matmul. |
2521 | dnnl_primitive_kind_t primitive_kind; |
2522 | /// Source memory descriptor. |
2523 | dnnl_memory_desc_t src_desc; |
2524 | /// Weights memory descriptor. |
2525 | dnnl_memory_desc_t weights_desc; |
2526 | /// Bias memory descriptor. |
2527 | dnnl_memory_desc_t bias_desc; |
2528 | /// Destination memory descriptor. |
2529 | dnnl_memory_desc_t dst_desc; |
2530 | /// The accumulator data type. Initialized automatically. |
2531 | dnnl_data_type_t accum_data_type; |
2532 | } dnnl_matmul_desc_t; |
2533 | |
2534 | /// @} dnnl_api_matmul |
2535 | |
2536 | /// @addtogroup dnnl_api_resampling |
2537 | /// @{ |
2538 | |
2539 | /// A descriptor of resampling operation. |
2540 | typedef struct { |
2541 | /// The kind of primitive. Used for self-identifying the primitive |
2542 | /// descriptor. Must be #dnnl_resampling. |
2543 | dnnl_primitive_kind_t primitive_kind; |
2544 | /// The kind of propagation. Possible values: #dnnl_forward_training, |
2545 | /// #dnnl_forward_inference, #dnnl_backward_data, |
2546 | dnnl_prop_kind_t prop_kind; |
2547 | /// The kind of the resampling algorithm. Possible values: |
2548 | /// #dnnl_resampling_nearest, #dnnl_resampling_linear. |
2549 | dnnl_alg_kind_t alg_kind; |
2550 | /// Source memory descriptor. |
2551 | dnnl_memory_desc_t src_desc; |
2552 | /// Source gradient memory descriptor. |
2553 | dnnl_memory_desc_t diff_src_desc; |
2554 | /// Destination memory descriptor. |
2555 | dnnl_memory_desc_t dst_desc; |
2556 | /// Destination gradient memory descriptor. |
2557 | dnnl_memory_desc_t diff_dst_desc; |
2558 | /// Resampling factor in each spatial dimension. |
2559 | float factors[DNNL_MAX_NDIMS]; |
2560 | } dnnl_resampling_desc_t; |
2561 | |
2562 | /// @} dnnl_api_resampling |
2563 | |
2564 | /// @addtogroup dnnl_api_reduction |
2565 | /// @{ |
2566 | |
2567 | /// A descriptor of reduction operation. |
2568 | typedef struct { |
2569 | /// The kind of primitive. Used for self-identifying the primitive |
2570 | /// descriptor. Must be #dnnl_reduction. |
2571 | dnnl_primitive_kind_t primitive_kind; |
2572 | /// The kind of reduction algorithm. Possible values: |
2573 | /// #dnnl_reduction_max, #dnnl_reduction_min, #dnnl_reduction_sum, |
2574 | /// #dnnl_reduction_mul, #dnnl_reduction_mean, #dnnl_reduction_norm_lp_max, |
2575 | /// #dnnl_reduction_norm_lp_sum, #dnnl_reduction_norm_lp_power_p_max, |
2576 | /// #dnnl_reduction_norm_lp_power_p_sum. |
2577 | dnnl_alg_kind_t alg_kind; |
2578 | /// Source memory descriptor. |
2579 | dnnl_memory_desc_t src_desc; |
2580 | /// Destination memory descriptor. |
2581 | dnnl_memory_desc_t dst_desc; |
2582 | /// Algorithm specific parameters. |
2583 | /// Accordance table: |
2584 | /// #dnnl_reduction_max: @p p and @p eps are ignored |
2585 | /// #dnnl_reduction_min: @p p and @p eps are ignored |
2586 | /// #dnnl_reduction_norm_lp_max: @p p -- power, @p eps -- epsilon |
2587 | /// #dnnl_reduction_norm_lp_sum: @p p -- power, @p eps -- epsilon |
2588 | /// #dnnl_reduction_norm_lp_power_p_max: @p p -- power, @p eps -- epsilon |
2589 | /// #dnnl_reduction_norm_lp_power_p_sum: @p p -- power, @p eps -- epsilon |
2590 | /// #dnnl_reduction_sum: @p p and @p eps are ignored |
2591 | /// #dnnl_reduction_mul: @p p and @p eps are ignored |
2592 | /// #dnnl_reduction_mean: @p p and @p eps are ignored |
2593 | float p, eps; |
2594 | } dnnl_reduction_desc_t; |
2595 | |
2596 | /// @} dnnl_api_reduction |
2597 | |
2598 | /// @} dnnl_api_primitives |
2599 | |
2600 | /// @addtogroup dnnl_api_engine |
2601 | /// @{ |
2602 | |
2603 | /// @brief Kinds of engines. |
2604 | typedef enum { |
2605 | /// An unspecified engine. |
2606 | dnnl_any_engine, |
2607 | /// CPU engine. |
2608 | dnnl_cpu, |
2609 | /// GPU engine. |
2610 | dnnl_gpu, |
2611 | } dnnl_engine_kind_t; |
2612 | |
2613 | /// @struct dnnl_engine |
2614 | /// @brief An opaque structure to describe an engine. |
2615 | struct dnnl_engine; |
2616 | /// @brief An engine handle. |
2617 | typedef struct dnnl_engine *dnnl_engine_t; |
2618 | #if 0 |
2619 | // FIXME: looks like this never happens |
2620 | /// @brief A constant engine handle. |
2621 | typedef const struct dnnl_engine *const_dnnl_engine_t; |
2622 | #endif |
2623 | |
2624 | /// @} dnnl_api_engine |
2625 | |
2626 | /// @addtogroup dnnl_api_primitives |
2627 | /// @{ |
2628 | /// @addtogroup dnnl_api_primitives_common |
2629 | /// @{ |
2630 | |
2631 | /// @struct dnnl_primitive_desc_iterator |
2632 | /// @brief An opaque structure to describe a primitive descriptor iterator. |
2633 | struct dnnl_primitive_desc_iterator; |
2634 | |
2635 | /// @brief A primitive descriptor iterator handle. |
2636 | typedef struct dnnl_primitive_desc_iterator *dnnl_primitive_desc_iterator_t; |
2637 | |
2638 | /// @brief A constant primitive descriptor iterator handle. |
2639 | typedef const struct dnnl_primitive_desc_iterator |
2640 | *const_dnnl_primitive_desc_iterator_t; |
2641 | |
2642 | /// @struct dnnl_primitive_desc |
2643 | /// @brief An opaque structure to describe a primitive descriptor. |
2644 | struct dnnl_primitive_desc; |
2645 | |
2646 | /// @brief A primitive descriptor handle. |
2647 | typedef struct dnnl_primitive_desc *dnnl_primitive_desc_t; |
2648 | |
2649 | /// @brief A constant primitive descriptor handle. |
2650 | typedef const struct dnnl_primitive_desc *const_dnnl_primitive_desc_t; |
2651 | |
2652 | /// @} dnnl_api_primitives_common |
2653 | |
2654 | /// @addtogroup dnnl_api_attributes |
2655 | /// @{ |
2656 | |
2657 | /// Floating-point math mode |
2658 | typedef enum { |
2659 | /// Default behavior, no downconversions allowed |
2660 | dnnl_fpmath_mode_strict, |
2661 | /// Implicit f32->bf16 conversions allowed |
2662 | dnnl_fpmath_mode_bf16, |
2663 | /// Implicit f32->f16 conversions allowed |
2664 | dnnl_fpmath_mode_f16, |
2665 | /// Implicit f32->f16 or f32->bf16 conversions allowed |
2666 | dnnl_fpmath_mode_any, |
2667 | /// Implicit f32->tf32 conversions allowed |
2668 | dnnl_fpmath_mode_tf32, |
2669 | } dnnl_fpmath_mode_t; |
2670 | |
2671 | /// Scratchpad mode |
2672 | typedef enum { |
2673 | /// The library manages the scratchpad allocation according to the policy |
2674 | /// specified by the `DNNL_ENABLE_CONCURRENT_EXEC` |
2675 | /// [build option](@ref dev_guide_build_options) (default). |
2676 | /// |
2677 | /// When `DNNL_ENABLE_CONCURRENT_EXEC=OFF` (default), the library |
2678 | /// scratchpad is common to all primitives to reduce the memory footprint. |
2679 | /// This configuration comes with limited thread-safety properties, namely |
2680 | /// primitives can be created and executed in parallel but cannot migrate |
2681 | /// between threads (in other words, each primitive should be executed in |
2682 | /// the same thread it was created in). |
2683 | /// |
2684 | /// When `DNNL_ENABLE_CONCURRENT_EXEC=ON`, the library scratchpad is |
2685 | /// private to each primitive. The memory footprint is larger than when |
2686 | /// using `DNNL_ENABLE_CONCURRENT_EXEC=OFF` but different primitives can be |
2687 | /// created and run concurrently (the same primitive cannot be run |
2688 | /// concurrently from two different threads though). |
2689 | dnnl_scratchpad_mode_library, |
2690 | /// The user manages the scratchpad allocation by querying and providing |
2691 | /// the scratchpad memory to primitives. This mode is thread-safe as long |
2692 | /// as the scratchpad buffers are not used concurrently by two primitive |
2693 | /// executions. |
2694 | dnnl_scratchpad_mode_user, |
2695 | } dnnl_scratchpad_mode_t; |
2696 | |
2697 | /// @struct dnnl_primitive_attr |
2698 | /// @brief An opaque structure for primitive descriptor attributes. |
2699 | /// |
2700 | /// Attributes may contain: |
2701 | /// - output scales (to scale the result prior to storing it to the memory) |
2702 | struct dnnl_primitive_attr; |
2703 | |
2704 | /// @brief A primitive descriptor attributes handle that controls primitive |
2705 | /// behavior. |
2706 | typedef struct dnnl_primitive_attr *dnnl_primitive_attr_t; |
2707 | |
2708 | /// @brief A constant primitive descriptor attributes handle. |
2709 | typedef const struct dnnl_primitive_attr *const_dnnl_primitive_attr_t; |
2710 | |
2711 | /// @struct dnnl_post_ops |
2712 | /// @brief An opaque structure for a chain of post operations. |
2713 | /// |
2714 | /// dnnl_post_ops can be used to perform some (trivial) operations like |
2715 | /// accumulation or eltwise after certain primitives like convolution. |
2716 | /// |
2717 | /// Post operations might be combined together, making a chain of post |
2718 | /// operations. For instance one can configure convolution followed by |
2719 | /// accumulation followed by eltwise. This might be especially beneficial |
2720 | /// for residual learning blocks. |
2721 | /// |
2722 | /// @warning |
2723 | /// Of course not all combinations are supported, so the user should handle |
2724 | /// errors accordingly. |
2725 | /// |
2726 | /// Supported post operations: |
2727 | /// - accumulation (base primitive: convolution) |
2728 | /// - eltwise (base primitive: convolution) |
2729 | struct dnnl_post_ops; |
2730 | |
2731 | /// @brief A post operation chain handle. |
2732 | typedef struct dnnl_post_ops *dnnl_post_ops_t; |
2733 | |
2734 | /// @brief A constant post operation chain handle. |
2735 | typedef const struct dnnl_post_ops *const_dnnl_post_ops_t; |
2736 | |
2737 | /// @} dnnl_api_attributes |
2738 | |
2739 | /// @addtogroup dnnl_api_primitives_common |
2740 | /// @{ |
2741 | |
2742 | /// @struct dnnl_primitive |
2743 | /// An opaque structure to describe a primitive. |
2744 | struct dnnl_primitive; |
2745 | /// A primitive handle. |
2746 | typedef struct dnnl_primitive *dnnl_primitive_t; |
2747 | /// A constant primitive handle. |
2748 | typedef const struct dnnl_primitive *const_dnnl_primitive_t; |
2749 | |
2750 | /// Source argument #0. |
2751 | #define DNNL_ARG_SRC_0 1 |
2752 | /// A special mnemonic for source argument for primitives that have a |
2753 | /// single source. An alias for #DNNL_ARG_SRC_0. |
2754 | #define DNNL_ARG_SRC DNNL_ARG_SRC_0 |
2755 | /// A special mnemonic for RNN input vector. An alias for |
2756 | /// #DNNL_ARG_SRC_0. |
2757 | #define DNNL_ARG_SRC_LAYER DNNL_ARG_SRC_0 |
2758 | /// A special mnemonic for reorder source argument. An alias for |
2759 | /// #DNNL_ARG_SRC_0. |
2760 | #define DNNL_ARG_FROM DNNL_ARG_SRC_0 |
2761 | |
2762 | /// Source argument #1. |
2763 | #define DNNL_ARG_SRC_1 2 |
2764 | /// A special mnemonic for RNN input recurrent hidden state vector. An alias |
2765 | /// for #DNNL_ARG_SRC_1. |
2766 | #define DNNL_ARG_SRC_ITER DNNL_ARG_SRC_1 |
2767 | |
2768 | /// Source argument #2. |
2769 | #define DNNL_ARG_SRC_2 3 |
2770 | /// A special mnemonic for RNN input recurrent cell state vector. An alias for |
2771 | /// #DNNL_ARG_SRC_2. |
2772 | #define DNNL_ARG_SRC_ITER_C DNNL_ARG_SRC_2 |
2773 | |
2774 | /// Source argument #3. |
2775 | #define DNNL_ARG_SRC_3 4 |
2776 | /// A special mnemonic for RNN input recurrent cell attention vector. An alias for |
2777 | /// #DNNL_ARG_SRC_3. |
2778 | #define DNNL_ARG_AUGRU_ATTENTION DNNL_ARG_SRC_3 |
2779 | |
2780 | /// Destination argument #0. |
2781 | #define DNNL_ARG_DST_0 17 |
2782 | /// A special mnemonic for destination argument for primitives that have a |
2783 | /// single destination. An alias for #DNNL_ARG_DST_0. |
2784 | #define DNNL_ARG_DST DNNL_ARG_DST_0 |
2785 | /// A special mnemonic for reorder destination argument. An alias for |
2786 | /// #DNNL_ARG_DST_0. |
2787 | #define DNNL_ARG_TO DNNL_ARG_DST_0 |
2788 | /// A special mnemonic for RNN output vector. An alias for #DNNL_ARG_DST_0. |
2789 | #define DNNL_ARG_DST_LAYER DNNL_ARG_DST_0 |
2790 | |
2791 | /// Destination argument #1. |
2792 | #define DNNL_ARG_DST_1 18 |
2793 | /// A special mnemonic for RNN input recurrent hidden state vector. An |
2794 | /// alias for #DNNL_ARG_DST_1. |
2795 | #define DNNL_ARG_DST_ITER DNNL_ARG_DST_1 |
2796 | |
2797 | /// Destination argument #2. |
2798 | #define DNNL_ARG_DST_2 19 |
2799 | /// A special mnemonic for LSTM output recurrent cell state vector. An |
2800 | /// alias for #DNNL_ARG_DST_2. |
2801 | #define DNNL_ARG_DST_ITER_C DNNL_ARG_DST_2 |
2802 | |
2803 | /// Weights argument #0. |
2804 | #define DNNL_ARG_WEIGHTS_0 33 |
2805 | /// A special mnemonic for primitives that have a single weights |
2806 | /// argument. Alias for #DNNL_ARG_WEIGHTS_0. |
2807 | #define DNNL_ARG_WEIGHTS DNNL_ARG_WEIGHTS_0 |
2808 | /// A special mnemonic for scale and shift argument of normalization |
2809 | /// primitives. Alias for #DNNL_ARG_WEIGHTS_0. |
2810 | #define DNNL_ARG_SCALE_SHIFT DNNL_ARG_WEIGHTS_0 |
2811 | /// A special mnemonic for RNN weights applied to the layer input. An |
2812 | /// alias for #DNNL_ARG_WEIGHTS_0. |
2813 | #define DNNL_ARG_WEIGHTS_LAYER DNNL_ARG_WEIGHTS_0 |
2814 | |
2815 | /// Weights argument #1. |
2816 | #define DNNL_ARG_WEIGHTS_1 34 |
2817 | /// A special mnemonic for RNN weights applied to the recurrent input. |
2818 | /// An alias for #DNNL_ARG_WEIGHTS_1. |
2819 | #define DNNL_ARG_WEIGHTS_ITER DNNL_ARG_WEIGHTS_1 |
2820 | |
2821 | /// Weights argument #2. |
2822 | #define DNNL_ARG_WEIGHTS_2 35 |
2823 | /// A special mnemonic for RNN weights applied to the peephole weights. |
2824 | /// An alias for #DNNL_ARG_WEIGHTS_2. |
2825 | #define DNNL_ARG_WEIGHTS_PEEPHOLE DNNL_ARG_WEIGHTS_2 |
2826 | |
2827 | /// Weights argument #3. |
2828 | #define DNNL_ARG_WEIGHTS_3 36 |
2829 | /// A special mnemonic for RNN weights applied to the projection weights. |
2830 | /// An alias for #DNNL_ARG_WEIGHTS_3. |
2831 | #define DNNL_ARG_WEIGHTS_PROJECTION DNNL_ARG_WEIGHTS_3 |
2832 | |
2833 | /// Bias tensor argument. |
2834 | #define DNNL_ARG_BIAS 41 |
2835 | |
2836 | /// Mean values tensor argument. |
2837 | #define DNNL_ARG_MEAN 49 |
2838 | /// Variance values tensor argument. |
2839 | #define DNNL_ARG_VARIANCE 50 |
2840 | |
2841 | /// A special mnemonic for scale argument of normalization primitives. |
2842 | #define DNNL_ARG_SCALE 51 |
2843 | /// A special mnemonic for shift argument of normalization primitives. |
2844 | #define DNNL_ARG_SHIFT 52 |
2845 | |
2846 | /// Workspace tensor argument. Workspace is used to pass information |
2847 | /// from forward propagation to backward propagation computations. |
2848 | #define DNNL_ARG_WORKSPACE 64 |
2849 | /// Scratchpad (temporary storage) tensor argument. |
2850 | #define DNNL_ARG_SCRATCHPAD 80 |
2851 | |
2852 | /// Gradient (diff) of the source argument #0. |
2853 | #define DNNL_ARG_DIFF_SRC_0 129 |
2854 | /// A special mnemonic for primitives that have a single diff source argument. |
2855 | /// An alias for #DNNL_ARG_DIFF_SRC_0. |
2856 | #define DNNL_ARG_DIFF_SRC DNNL_ARG_DIFF_SRC_0 |
2857 | /// A special mnemonic for gradient (diff) of RNN input vector. An alias for |
2858 | /// #DNNL_ARG_DIFF_SRC_0. |
2859 | #define DNNL_ARG_DIFF_SRC_LAYER DNNL_ARG_DIFF_SRC_0 |
2860 | |
2861 | /// Gradient (diff) of the source argument #1. |
2862 | #define DNNL_ARG_DIFF_SRC_1 130 |
2863 | /// A special mnemonic for gradient (diff) of RNN input recurrent hidden state |
2864 | /// vector. An alias for #DNNL_ARG_DIFF_SRC_1. |
2865 | #define DNNL_ARG_DIFF_SRC_ITER DNNL_ARG_DIFF_SRC_1 |
2866 | |
2867 | /// Gradient (diff) of the source argument #2. |
2868 | #define DNNL_ARG_DIFF_SRC_2 131 |
2869 | /// A special mnemonic for gradient (diff) of RNN input recurrent cell state |
2870 | /// vector. An alias for #DNNL_ARG_DIFF_SRC_1. |
2871 | #define DNNL_ARG_DIFF_SRC_ITER_C DNNL_ARG_DIFF_SRC_2 |
2872 | |
2873 | /// Gradient (diff) of the source argument #3. |
2874 | #define DNNL_ARG_DIFF_SRC_3 132 |
2875 | /// A special mnemonic for gradient (diff) of RNN input recurrent cell attention |
2876 | /// vector. An alias for #DNNL_ARG_DIFF_SRC_3. |
2877 | #define DNNL_ARG_DIFF_AUGRU_ATTENTION DNNL_ARG_DIFF_SRC_3 |
2878 | |
2879 | /// Gradient (diff) of the destination argument #0. |
2880 | #define DNNL_ARG_DIFF_DST_0 145 |
2881 | /// A special mnemonic for primitives that have a single diff destination |
2882 | /// argument. An alias for #DNNL_ARG_DIFF_DST_0. |
2883 | #define DNNL_ARG_DIFF_DST DNNL_ARG_DIFF_DST_0 |
2884 | /// A special mnemonic for gradient (diff) of RNN output vector. An alias for |
2885 | /// #DNNL_ARG_DIFF_DST_0. |
2886 | #define DNNL_ARG_DIFF_DST_LAYER DNNL_ARG_DIFF_DST_0 |
2887 | |
2888 | /// Gradient (diff) of the destination argument #1. |
2889 | #define DNNL_ARG_DIFF_DST_1 146 |
2890 | /// A special mnemonic for gradient (diff) of RNN input recurrent hidden state |
2891 | /// vector. An alias for #DNNL_ARG_DIFF_DST_1. |
2892 | #define DNNL_ARG_DIFF_DST_ITER DNNL_ARG_DIFF_DST_1 |
2893 | |
2894 | /// Gradient (diff) of the destination argument #2. |
2895 | #define DNNL_ARG_DIFF_DST_2 147 |
2896 | /// A special mnemonic for gradient (diff) of RNN input recurrent cell state |
2897 | /// vector. An alias for #DNNL_ARG_DIFF_DST_2. |
2898 | #define DNNL_ARG_DIFF_DST_ITER_C DNNL_ARG_DIFF_DST_2 |
2899 | |
2900 | /// Gradient (diff) of the weights argument #0. |
2901 | #define DNNL_ARG_DIFF_WEIGHTS_0 161 |
2902 | /// A special mnemonic for primitives that have a single diff weights |
2903 | /// argument. Alias for #DNNL_ARG_DIFF_WEIGHTS_0. |
2904 | #define DNNL_ARG_DIFF_WEIGHTS DNNL_ARG_DIFF_WEIGHTS_0 |
2905 | /// A special mnemonic for diff of scale and shift argument of normalization |
2906 | /// primitives. Alias for #DNNL_ARG_DIFF_WEIGHTS_0. |
2907 | #define DNNL_ARG_DIFF_SCALE_SHIFT DNNL_ARG_DIFF_WEIGHTS_0 |
2908 | /// A special mnemonic for diff of RNN weights applied to the layer input. An |
2909 | /// alias for #DNNL_ARG_DIFF_WEIGHTS_0. |
2910 | #define DNNL_ARG_DIFF_WEIGHTS_LAYER DNNL_ARG_DIFF_WEIGHTS_0 |
2911 | |
2912 | /// Gradient (diff) of the weights argument #1. |
2913 | #define DNNL_ARG_DIFF_WEIGHTS_1 162 |
2914 | /// A special mnemonic for diff of RNN weights applied to the recurrent input. |
2915 | /// An alias for #DNNL_ARG_DIFF_WEIGHTS_1. |
2916 | #define DNNL_ARG_DIFF_WEIGHTS_ITER DNNL_ARG_DIFF_WEIGHTS_1 |
2917 | |
2918 | /// Gradient (diff) of the weights argument #2. |
2919 | #define DNNL_ARG_DIFF_WEIGHTS_2 163 |
2920 | /// A special mnemonic for diff of RNN weights applied to the peephole weights. |
2921 | /// An alias for #DNNL_ARG_DIFF_WEIGHTS_2. |
2922 | #define DNNL_ARG_DIFF_WEIGHTS_PEEPHOLE DNNL_ARG_DIFF_WEIGHTS_2 |
2923 | |
2924 | /// Gradient (diff) of the weights argument #3. |
2925 | #define DNNL_ARG_DIFF_WEIGHTS_3 164 |
2926 | /// A special mnemonic for diff of RNN weights applied to the projection |
2927 | /// weights. An alias for #DNNL_ARG_DIFF_WEIGHTS_3. |
2928 | #define DNNL_ARG_DIFF_WEIGHTS_PROJECTION DNNL_ARG_DIFF_WEIGHTS_3 |
2929 | |
2930 | /// Gradient (diff) of the bias tensor argument. |
2931 | #define DNNL_ARG_DIFF_BIAS 169 |
2932 | |
2933 | /// A special mnemonic for scale argument of normalization primitives. |
2934 | #define DNNL_ARG_DIFF_SCALE 255 |
2935 | /// A special mnemonic for shift argument of normalization primitives. |
2936 | #define DNNL_ARG_DIFF_SHIFT 256 |
2937 | |
2938 | /// Output scaling factors provided at execution time. |
2939 | #define DNNL_ARG_ATTR_OUTPUT_SCALES 513 |
2940 | |
2941 | /// Starting index for source arguments for primitives that take a variable |
2942 | /// number of source arguments. |
2943 | #define DNNL_ARG_MULTIPLE_SRC 1024 |
2944 | /// Starting index for destination arguments for primitives that produce a |
2945 | /// variable number of destination arguments. |
2946 | #define DNNL_ARG_MULTIPLE_DST 2048 |
2947 | |
2948 | /// Zero points provided at execution time. |
2949 | #define DNNL_ARG_ATTR_ZERO_POINTS 4096 |
2950 | |
2951 | /// Arguments for fused depthwise convolution. |
2952 | /// See @ref dev_guide_attributes_post_ops_depthwise_fusion |
2953 | #define DNNL_ARG_ATTR_POST_OP_DW 8192 |
2954 | |
2955 | /// Starting point for a binary post operation. |
2956 | #define DNNL_ARG_ATTR_MULTIPLE_POST_OP_BASE 16384 |
2957 | |
2958 | /// Arguments for a binary post operation. Up to 32 arguments are supported. |
2959 | /// See @ref dev_guide_attributes_post_ops_binary_fusion |
2960 | #define DNNL_ARG_ATTR_MULTIPLE_POST_OP(idx) \ |
2961 | (DNNL_ARG_ATTR_MULTIPLE_POST_OP_BASE * ((idx) + 1)) |
2962 | |
2963 | /// Input scaling factors provided at execution time. |
2964 | #define DNNL_ARG_ATTR_INPUT_SCALES 1048576 |
2965 | |
2966 | /// A structure that contains an index and a memory object, and is used to pass |
2967 | /// arguments to dnnl_primitive_execute(). |
2968 | typedef struct { |
2969 | int arg; ///< An argument index, e.g. DNNL_ARG_SRC |
2970 | dnnl_memory_t memory; ///< Input/output memory |
2971 | } dnnl_exec_arg_t; |
2972 | |
2973 | /// @} dnnl_api_primitives_common |
2974 | |
2975 | /// @addtogroup dnnl_api_primitives_common |
2976 | /// @{ |
2977 | |
2978 | /// Primitive descriptor query specification |
2979 | /// |
2980 | /// For generic function dnnl_primitive_desc_query(), the type of result must |
2981 | /// agree with the queried argument. The correspondence table: |
2982 | /// |
2983 | /// Query kind | Type of query result |
2984 | /// --------------------------------|----------------------------- |
2985 | /// dnnl_query_*_engine | #dnnl_engine_t * |
2986 | /// #dnnl_query_primitive_kind | #dnnl_primitive_kind_t * |
2987 | /// dnnl_query_*_s32 | int * |
2988 | /// dnnl_query_*_s64 | #dnnl_dim_t * (same as int64_t *) |
2989 | /// dnnl_query_*_f64 | double * |
2990 | /// dnnl_query_*_str | const char ** |
2991 | /// #dnnl_query_op_d | #const_dnnl_op_desc_t * |
2992 | /// dnnl_query_*_md | const #dnnl_memory_desc_t ** |
2993 | /// dnnl_query_*_\<op\>_d | const dnnl_\<op\>_desc_t ** |
2994 | /// dnnl_query_*_pd | #const_dnnl_primitive_desc_t * |
2995 | /// dnnl_query_cache_blob_id | const uint8_t ** |
2996 | /// |
2997 | /// @note |
2998 | /// Rule of thumb: all opaque types and structures are returned by |
2999 | /// reference. All numbers are returned by value. |
3000 | /// |
3001 | /// @warning |
3002 | /// All returned references point to constant objects and are valid only |
3003 | /// during the lifetime of the queried primitive descriptor. Returned objects |
3004 | /// must not be destroyed by the user. If you need to keep the object longer |
3005 | /// than the lifetime of the queried primitive descriptor, use |
3006 | /// dnnl_primitive_desc_clone() to make a copy. |
3007 | typedef enum { |
3008 | dnnl_query_undef = 0, ///< no query |
3009 | |
3010 | dnnl_query_engine, ///< execution engine |
3011 | dnnl_query_primitive_kind, ///< primitive kind |
3012 | |
3013 | dnnl_query_num_of_inputs_s32, ///< number of inputs expected |
3014 | dnnl_query_num_of_outputs_s32, ///< number of outputs expected |
3015 | |
3016 | dnnl_query_time_estimate_f64, ///< runtime estimation (seconds) |
3017 | dnnl_query_memory_consumption_s64, ///< memory consumption -- extra |
3018 | /// (scratch) memory, additional to |
3019 | /// all inputs and outputs memory |
3020 | /// (bytes) |
3021 | |
3022 | dnnl_query_scratchpad_engine, ///< scratchpad engine -- engine to be used |
3023 | /// for creating scratchpad memory |
3024 | |
3025 | dnnl_query_impl_info_str, ///< implementation name |
3026 | |
3027 | dnnl_query_reorder_src_engine, ///< source engine |
3028 | dnnl_query_reorder_dst_engine, ///< destination engine |
3029 | |
3030 | dnnl_query_prop_kind, ///< propagation kind |
3031 | |
3032 | dnnl_query_cache_blob_id_size_s64, ///< size of cache blob ID in bytes |
3033 | dnnl_query_cache_blob_id, ///< cache blob ID (pointer to array) |
3034 | |
3035 | // memory and op descriptor section |
3036 | dnnl_query_some_d = 64, ///< stub |
3037 | dnnl_query_op_d, ///< op descriptor |
3038 | dnnl_query_convolution_d, ///< convolution descriptor |
3039 | dnnl_query_deconvolution_d, ///< deconvolution descriptor |
3040 | dnnl_query_shuffle_d, ///< shuffle descriptor |
3041 | dnnl_query_eltwise_d, ///< eltwise descriptor |
3042 | dnnl_query_softmax_d, ///< softmax descriptor |
3043 | dnnl_query_pooling_d, ///< pooling descriptor |
3044 | dnnl_query_lrn_d, ///< lrn descriptor |
3045 | dnnl_query_batch_normalization_d, ///< batch normalization descriptor |
3046 | dnnl_query_layer_normalization_d, ///< layer normalization descriptor |
3047 | dnnl_query_inner_product_d, ///< inner product descriptor |
3048 | dnnl_query_rnn_d, ///< rnn descriptor |
3049 | dnnl_query_gemm_d, ///< GEMM descriptor (internal) |
3050 | dnnl_query_binary_d, ///< binary descriptor |
3051 | dnnl_query_logsoftmax_d, ///< logsoftmax descriptor |
3052 | dnnl_query_matmul_d, ///< matrix multiplication (matmul) descriptor |
3053 | dnnl_query_resampling_d, ///< resampling descriptor |
3054 | dnnl_query_pooling_v2_d, ///< pooling version 2 descriptor |
3055 | dnnl_query_reduction_d, ///< reduction descriptor |
3056 | dnnl_query_prelu_d, ///< prelu descriptor |
3057 | dnnl_query_softmax_v2_d, ///< softmax version 2 descriptor |
3058 | dnnl_query_layer_normalization_v2_d, ///< layer normalization version 2 descriptor |
3059 | |
3060 | // memory descriptor section |
3061 | dnnl_query_some_md = 128, ///< stub |
3062 | dnnl_query_src_md, ///< source memory desc |
3063 | dnnl_query_diff_src_md, ///< source gradient memory desc |
3064 | dnnl_query_weights_md, ///< weights memory descriptor desc |
3065 | dnnl_query_diff_weights_md, ///< weights grad. memory desc |
3066 | dnnl_query_dst_md, ///< destination memory desc |
3067 | dnnl_query_diff_dst_md, ///< destination grad. memory desc |
3068 | dnnl_query_workspace_md, ///< workspace memory desc |
3069 | dnnl_query_scratchpad_md, ///< scratchpad memory desc |
3070 | dnnl_query_exec_arg_md = 255, ///< memory desc of an execute argument |
3071 | |
3072 | // Max value to prevent UB for internal use only dnnl_query_t |
3073 | dnnl_query_max = 0x7fff, |
3074 | } dnnl_query_t; |
3075 | |
3076 | /// @} dnnl_api_primitives_common |
3077 | |
3078 | /// @} dnnl_api_primitives |
3079 | |
3080 | /// @addtogroup dnnl_api_stream |
3081 | /// @{ |
3082 | |
3083 | /// @brief Stream flags. |
3084 | typedef enum { |
3085 | // In-order execution. |
3086 | dnnl_stream_in_order = 0x1U, |
3087 | /// Out-of-order execution. |
3088 | dnnl_stream_out_of_order = 0x2U, |
3089 | /// Default stream configuration. |
3090 | dnnl_stream_default_flags = dnnl_stream_in_order, |
3091 | } dnnl_stream_flags_t; |
3092 | |
3093 | /// @struct dnnl_stream |
3094 | /// An opaque structure to describe an execution stream. |
3095 | struct dnnl_stream; |
3096 | /// An execution stream handle. |
3097 | typedef struct dnnl_stream *dnnl_stream_t; |
3098 | /// A constant execution stream handle. |
3099 | typedef const struct dnnl_stream *const_dnnl_stream_t; |
3100 | |
3101 | /// @} dnnl_api_stream |
3102 | |
3103 | /// @addtogroup dnnl_api_service |
3104 | /// @{ |
3105 | |
3106 | /// No runtime (disabled) |
3107 | #define DNNL_RUNTIME_NONE 0u |
3108 | |
3109 | /// Sequential runtime (CPU only) |
3110 | #define DNNL_RUNTIME_SEQ 1u |
3111 | |
3112 | /// OpenMP runtime (CPU only) |
3113 | #define DNNL_RUNTIME_OMP 2u |
3114 | |
3115 | /// TBB runtime (CPU only) |
3116 | #define DNNL_RUNTIME_TBB 4u |
3117 | |
3118 | /// Threadpool runtime (CPU only) |
3119 | #define DNNL_RUNTIME_THREADPOOL 8u |
3120 | |
3121 | /// OpenCL runtime |
3122 | #define DNNL_RUNTIME_OCL 256u |
3123 | |
3124 | /// SYCL runtime |
3125 | #define DNNL_RUNTIME_SYCL 512u |
3126 | |
3127 | /// DPC++ runtime |
3128 | #define DNNL_RUNTIME_DPCPP DNNL_RUNTIME_SYCL |
3129 | |
3130 | /// Structure containing version information as per [Semantic |
3131 | /// Versioning](https://semver.org) |
3132 | typedef struct { |
3133 | int major; ///< Major version |
3134 | int minor; ///< Minor version |
3135 | int patch; ///< Patch version |
3136 | const char *hash; ///< Git hash of the sources (may be absent) |
3137 | unsigned cpu_runtime; ///< CPU runtime |
3138 | unsigned gpu_runtime; ///< GPU runtime |
3139 | } dnnl_version_t; |
3140 | |
3141 | /// Disable profiling completely |
3142 | #define DNNL_JIT_PROFILE_NONE 0u |
3143 | |
3144 | /// Enable VTune Amplifier integration |
3145 | #define DNNL_JIT_PROFILE_VTUNE 1u |
3146 | |
3147 | /// Enable Linux perf integration via perfmap files |
3148 | #define DNNL_JIT_PROFILE_LINUX_PERFMAP 2u |
3149 | |
3150 | /// Enable Linux perf integration via jitdump files |
3151 | #define DNNL_JIT_PROFILE_LINUX_JITDUMP 4u |
3152 | |
3153 | /// Instruct Linux perf integration via jitdump files to use TSC. @ref |
3154 | /// DNNL_JIT_PROFILE_LINUX_JITDUMP must be set too for this to take effect. |
3155 | #define DNNL_JIT_PROFILE_LINUX_JITDUMP_USE_TSC 8u |
3156 | |
3157 | /// Enable Linux perf integration (both jitdump and perfmap) |
3158 | #define DNNL_JIT_PROFILE_LINUX_PERF \ |
3159 | (DNNL_JIT_PROFILE_LINUX_JITDUMP | DNNL_JIT_PROFILE_LINUX_PERFMAP) |
3160 | |
3161 | /// CPU instruction set flags |
3162 | typedef enum { |
3163 | /// Any ISA (excepting those listed as initial support) |
3164 | dnnl_cpu_isa_all = 0x0, |
3165 | |
3166 | /// Intel Streaming SIMD Extensions 4.1 (Intel SSE4.1) |
3167 | dnnl_cpu_isa_sse41 = 0x1, |
3168 | |
3169 | /// Intel Advanced Vector Extensions (Intel AVX) |
3170 | dnnl_cpu_isa_avx = 0x3, |
3171 | |
3172 | /// Intel Advanced Vector Extensions 2 (Intel AVX2) |
3173 | dnnl_cpu_isa_avx2 = 0x7, |
3174 | |
3175 | /// (deprecated) Intel Advanced Vector Extensions 512 (Intel AVX-512) subset |
3176 | /// for Intel Xeon Phi processors x200 Series. |
3177 | dnnl_cpu_isa_avx512_mic = 0xf, |
3178 | |
3179 | /// (deprecated) Intel AVX-512 subset |
3180 | /// for Intel Xeon Phi processors 7235, 7285, 7295 Series. |
3181 | dnnl_cpu_isa_avx512_mic_4ops = 0x1f, |
3182 | |
3183 | /// Intel AVX-512 subset for Intel Xeon Scalable processor family |
3184 | /// and Intel Core processor family. |
3185 | dnnl_cpu_isa_avx512_core = 0x27, |
3186 | |
3187 | /// Intel AVX-512 and Intel Deep Learning Boost (Intel DL Boost) support |
3188 | /// for Intel Xeon Scalable processor family |
3189 | /// and Intel Core processor family. |
3190 | dnnl_cpu_isa_avx512_core_vnni = 0x67, |
3191 | |
3192 | /// Intel AVX-512, Intel DL Boost and bfloat16 support |
3193 | /// for Intel Xeon Scalable processor family |
3194 | /// and Intel Core processor family. |
3195 | dnnl_cpu_isa_avx512_core_bf16 = 0xe7, |
3196 | |
3197 | /// Intel AVX-512 with float16, Intel DL Boost and bfloat16 support |
3198 | /// for Intel Xeon Scalable processor family |
3199 | /// and Intel Core processor family. |
3200 | dnnl_cpu_isa_avx512_core_fp16 = 0x1e7, |
3201 | |
3202 | /// Intel AVX-512 with float16, Intel DL Boost and bfloat16 support and |
3203 | /// Intel AMX with 8-bit integer and bfloat16 support |
3204 | dnnl_cpu_isa_avx512_core_amx = 0x3e7, |
3205 | |
3206 | /// Intel AVX2 and Intel Deep Learning Boost (Intel DL Boost) support |
3207 | dnnl_cpu_isa_avx2_vnni = 0x407, |
3208 | |
3209 | } dnnl_cpu_isa_t; |
3210 | |
3211 | /// CPU ISA hints flags |
3212 | typedef enum { |
3213 | /// No hints (use default features) |
3214 | dnnl_cpu_isa_no_hints = 0x0, |
3215 | |
3216 | /// Prefer to exclusively use Ymm registers for computations |
3217 | dnnl_cpu_isa_prefer_ymm = 0x1, |
3218 | } dnnl_cpu_isa_hints_t; |
3219 | |
3220 | /// @} dnnl_api_service |
3221 | |
3222 | /// @} dnnl_api |
3223 | |
3224 | #ifdef __cplusplus |
3225 | } |
3226 | #endif |
3227 | |
3228 | #endif /* ONEAPI_DNNL_TYPES_H */ |
3229 | |