1/*******************************************************************************
2* Copyright 2016-2022 Intel Corporation
3*
4* Licensed under the Apache License, Version 2.0 (the "License");
5* you may not use this file except in compliance with the License.
6* You may obtain a copy of the License at
7*
8* http://www.apache.org/licenses/LICENSE-2.0
9*
10* Unless required by applicable law or agreed to in writing, software
11* distributed under the License is distributed on an "AS IS" BASIS,
12* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13* See the License for the specific language governing permissions and
14* limitations under the License.
15*******************************************************************************/
16
17/// @file
18/// C API types definitions
19
20#ifndef ONEAPI_DNNL_DNNL_TYPES_H
21#define ONEAPI_DNNL_DNNL_TYPES_H
22
23#ifdef __cplusplus
24extern "C" {
25#endif
26
27/// @cond DO_NOT_DOCUMENT_THIS
28#include <stddef.h>
29#include <stdint.h>
30/// @endcond
31
32/// @addtogroup dnnl_api
33/// @{
34
35/// @addtogroup dnnl_api_utils
36/// @{
37
38/// Status values returned by the library functions.
39typedef enum {
40 /// The operation was successful
41 dnnl_success = 0,
42 /// The operation failed due to an out-of-memory condition
43 dnnl_out_of_memory = 1,
44 /// The operation failed because of incorrect function arguments
45 dnnl_invalid_arguments = 2,
46 /// The operation failed because requested functionality is not implemented
47 dnnl_unimplemented = 3,
48 /// Primitive iterator passed over last primitive descriptor
49 dnnl_iterator_ends = 4,
50 /// Primitive or engine failed on execution
51 dnnl_runtime_error = 5,
52 /// Queried element is not required for given primitive
53 dnnl_not_required = 6,
54} dnnl_status_t;
55
56/// @} dnnl_api_utils
57
58/// @addtogroup dnnl_api_memory
59/// @{
60
61/// Data type specification
62typedef enum {
63 /// Undefined data type, used for empty memory descriptors.
64 dnnl_data_type_undef = 0,
65 /// 16-bit/half-precision floating point.
66 dnnl_f16 = 1,
67 /// non-standard 16-bit (bfloat16 w/ 7 bit mantissa) floating point.
68 dnnl_bf16 = 2,
69 /// 32-bit/single-precision floating point.
70 dnnl_f32 = 3,
71 /// 32-bit signed integer.
72 dnnl_s32 = 4,
73 /// 8-bit signed integer.
74 dnnl_s8 = 5,
75 /// 8-bit unsigned integer.
76 dnnl_u8 = 6,
77 /// 64-bit/double-precision floating point.
78 dnnl_f64 = 7,
79
80 /// Parameter to allow internal only data_types without undefined behavior.
81 /// This parameter is chosen to be valid for so long as sizeof(int) >= 2.
82 dnnl_data_type_max = 0x7fff,
83} dnnl_data_type_t;
84
85/// Memory format kind
86typedef enum {
87 /// Undefined memory format kind, used for empty memory descriptors.
88 dnnl_format_kind_undef = 0,
89 /// Unspecified format kind.
90 /// The primitive selects a format automatically.
91 dnnl_format_kind_any,
92 /// A tensor in a generic format described by the stride and blocking
93 /// values in each dimension. See @ref dnnl_blocking_desc_t for more
94 /// information.
95 dnnl_blocked,
96 /// Weights format used in 8bit Winograd convolution
97 dnnl_format_kind_wino,
98 /// Packed weights format used in RNN
99 dnnl_format_kind_rnn_packed,
100} dnnl_format_kind_t;
101
102/// Memory format tag specification.
103///
104/// oneDNN formats describe physical data layout. The physical layout
105/// is described as a sequence of the dimensions as they are laid out in the
106/// memory (from the outer-most to the inner-most). Note that this order
107/// doesn't affect the logical order of the dimensions that is kept in the
108/// `dims` field of the dnnl_memory_desc_t structure. The logical order of the
109/// dimensions is specified by the primitive that uses the tensor.
110///
111/// For example, CNN 5D tensor always has its logical dimensions in the order
112/// `(batch, channels, depth, height, width)`, while the physical layout might be
113/// `NCDHW` (corresponds to #dnnl_ncdhw format tag) or
114/// `NDHWC` (corresponds to #dnnl_ndhwc format tag).
115///
116/// ~~~cpp
117/// int batch = 2, channels = 16, depth = 13, height = 13, width = 13;
118///
119/// int ndims = 5; // 5D tensor
120/// dnnl_dims_t dims = {batch, channels, depth, height, width};
121/// dnnl_memory_desc_t data_in_ncdhw;
122/// dnnl_memory_desc_init_by_tag(
123/// &data_in_ncdhw, 5, dims, dnnl_f32, dnnl_ncdhw);
124///
125/// // note that in both cases dims passed are the same
126/// dnnl_memory_desc_t data_in_ndhwc;
127/// dnnl_memory_desc_init_by_tag(
128/// &data_in_ndhwc, 5, dims, dnnl_f32, dnnl_ndhwc);
129/// ~~~
130///
131/// Memory format tags can be further divided into two categories:
132/// - Domain-agnostic names, i.e. names the do not depend on the tensor usage
133/// in the specific primitive. These names use letters from `a` to `l` to
134/// denote logical dimension from 1 to 12, and form the order in which the
135/// dimensions are laid in memory. For instance, #dnnl_ab is used to denote
136/// 2D tensor where the second logical dimension (aka `b`) is the innermost,
137/// i.e. has stride = 1, and the first logical dimension (`a`) laid out in
138/// memory with stride equal to the size of second dimension. On the other
139/// hand, #dnnl_ba is just transposed version of the same tensor: the
140/// first dimension (`a`) becomes the innermost one.
141/// - Domain-specific names, i.e. names that make sense only in the context of
142/// a certain domain, such as CNN. This names are just aliases to the
143/// corresponding domain-agnostic tags and used mostly for the convenience.
144/// For example, #dnnl_nc is used to denote 2D CNN activations tensor
145/// memory format, where channels are the innermost dimension and batch is an
146/// outermost one. Moreover, #dnnl_nc is just an alias to #dnnl_ab,
147/// since for oneDNN CNN primitives the logical dimensions of
148/// activations tensors come in order: batch, channels, spatial.
149/// In other words, batch corresponds to the first logical dimension (`a`),
150/// channels correspond to the second one (`b`).
151///
152/// The following domain-specific notation applies to memory format tags:
153/// - @c 'n' denotes the mini-batch dimension
154/// - @c 'c' denotes a channels dimension
155/// - When there are multiple channel dimensions (for example, in convolution
156/// weights tensor), @c 'i' and @c 'o' denote dimensions of input and output
157/// channels
158/// - @c 'd', @c 'h', and @c 'w' denote spatial depth, height, and width
159/// respectively
160///
161/// Upper-case letters indicate that the data is laid out in blocks for a
162/// particular dimension. In such cases, the format name contains both upper-
163/// and lower-case letters for that dimension with a lower-case letter preceded
164/// by the block size. For example: #dnnl_nChw8c describes a format where the
165/// outermost dimension is mini-batch, followed by the channel block number,
166/// followed by the spatial height and width, and finally followed by 8-element
167/// channel blocks.
168///
169/// @sa @ref dev_guide_understanding_memory_formats
170typedef enum {
171 /// Undefined memory format tag
172 dnnl_format_tag_undef = 0,
173 /// Undefined memory format tag.
174 /// The primitive selects a format automatically.
175 dnnl_format_tag_any,
176
177 // Semantic agnostic section
178 // The physical order of dimensions is defined by the permutation of the
179 // characters, assuming that ab..z defines the natural order.
180
181 // Plain formats
182
183 dnnl_a, ///< plain 1D tensor
184 dnnl_ab, ///< plain 2D tensor
185 dnnl_abc, ///< plain 3D tensor
186 dnnl_abcd, ///< plain 4D tensor
187 dnnl_acbd, ///< plain 4D tensor
188 dnnl_abcde, ///< plain 5D tensor
189 dnnl_abcdef, ///< plain 6D tensor
190 dnnl_abcdefg, ///< plain 7D tensor
191 dnnl_abcdefgh, ///< plain 8D tensor
192 dnnl_abcdefghi, ///< plain 9D tensor
193 dnnl_abcdefghij, ///< plain 10D tensor
194 dnnl_abcdefghijk, ///< plain 11D tensor
195 dnnl_abcdefghijkl, ///< plain 12D tensor
196
197 // Permuted plain formats
198
199 dnnl_abdc, ///< permuted 4D tensor
200 dnnl_abdec, ///< permuted 5D tensor
201 dnnl_acb, ///< permuted 3D tensor
202 dnnl_acbde, ///< permuted 5D tensor
203 dnnl_acbdef, ///< permuted 6D tensor
204 dnnl_acdb, ///< permuted 4D tensor
205 dnnl_acdeb, ///< permuted 5D tensor
206 dnnl_ba, ///< permuted 2D tensor
207 dnnl_bac, ///< permuted 3D tensor
208 dnnl_bacd, ///< permuted 4D tensor
209 dnnl_bacde, ///< permuted 5D tensor
210 dnnl_bca, ///< permuted 3D tensor
211 dnnl_bcda, ///< permuted 4D tensor
212 dnnl_bcdea, ///< permuted 5D tensor
213 dnnl_cba, ///< permuted 3D tensor
214 dnnl_cdba, ///< permuted 4D tensor
215 dnnl_dcab, ///< permuted 4D tensor
216 dnnl_cdeba, ///< permuted 5D tensor
217 dnnl_decab, ///< permuted 5D tensor
218 dnnl_defcab, ///< permuted 6D tensor
219 dnnl_abced, ///< permuted 5D tensor
220 dnnl_abcdfe, ///< permuted 6D tensor
221 dnnl_abcdegf, ///< permuted 7D tensor
222 dnnl_abcdefhg, ///< permuted 8D tensor
223 dnnl_abcdefgih, ///< permuted 9D tensor
224 dnnl_abcdefghji, ///< permuted 10D tensor
225 dnnl_abcdefghikj, ///< permuted 11D tensor
226 dnnl_abcdefghijlk, ///< permuted 12D tensor
227
228 // Opaque blocked formats
229
230 dnnl_Abc16a,
231 dnnl_ABc16a16b,
232 dnnl_ABc32a32b,
233 dnnl_ABc4a4b,
234 /// 3D tensor blocked by 2nd dimension with block size 16
235 dnnl_aBc16b,
236 dnnl_ABc16b16a,
237 dnnl_Abc4a,
238 /// 3D tensor blocked by 2nd dimension with block size 32
239 dnnl_aBc32b,
240 /// 3D tensor blocked by 2nd dimension with block size 4
241 dnnl_aBc4b,
242 dnnl_ABc4b16a4b,
243 dnnl_ABc2b8a4b,
244 dnnl_ABc16b16a4b,
245 dnnl_ABc16b16a2b,
246 dnnl_ABc4b4a,
247 dnnl_ABc8a16b2a,
248 dnnl_ABc8a8b,
249 dnnl_ABc8a4b,
250 /// 3D tensor blocked by 2nd dimension with block size 8
251 dnnl_aBc8b,
252 dnnl_ABc8b16a2b,
253 dnnl_BAc8a16b2a,
254 dnnl_ABc8b8a,
255 dnnl_Abcd16a,
256 dnnl_Abcd8a,
257 dnnl_ABcd16a16b,
258 dnnl_Abcd32a,
259 dnnl_ABcd32a32b,
260 /// 4D tensor blocked by 2nd dimension with block size 16
261 dnnl_aBcd16b,
262 dnnl_ABcd16b16a,
263 dnnl_aBCd16b16c,
264 dnnl_aBCd16c16b,
265 dnnl_Abcd4a,
266 /// 4D tensor blocked by 2nd dimension with block size 32
267 dnnl_aBcd32b,
268 /// 4D tensor blocked by 2nd dimension with block size 4
269 dnnl_aBcd4b,
270 dnnl_ABcd4b16a4b,
271 dnnl_ABcd16b16a4b,
272 dnnl_ABcd16b16a2b,
273 dnnl_ABcd4b4a,
274 dnnl_ABcd4a4b,
275 dnnl_aBCd2c4b2c,
276 dnnl_aBCd4b8c2b,
277 dnnl_aBCd4c16b4c,
278 dnnl_aBCd2c8b4c,
279 dnnl_aBCd16c16b4c,
280 dnnl_aBCd16c16b2c,
281 dnnl_aBCd4c4b,
282 dnnl_aBCd4b4c,
283 dnnl_ABcd8a16b2a,
284 dnnl_ABcd2b8a4b,
285 dnnl_ABcd8a8b,
286 dnnl_ABcd8a4b,
287 /// 4D tensor blocked by 2nd dimension with block size 8
288 dnnl_aBcd8b,
289 dnnl_aBCd4c8b2c,
290 dnnl_ABcd8b16a2b,
291 dnnl_aBCd8b16c2b,
292 dnnl_BAcd8a16b2a,
293 /// 4D tensor blocked by 1st and 2nd dimension with block size 8
294 dnnl_ABcd8b8a,
295 dnnl_aBCd8b8c,
296 dnnl_aBCd8b4c,
297 dnnl_aBCd8c16b2c,
298 dnnl_ABcde8a16b2a,
299 dnnl_aCBd8b16c2b,
300 dnnl_aBCd8c8b,
301 dnnl_Abcde16a,
302 dnnl_Abcde32a,
303 dnnl_ABcde16a16b,
304 dnnl_BAcde8a16b2a,
305 /// 4D tensor blocked by 3rd dimension with block size 4
306 dnnl_aBCd2b4c2b,
307 /// 5D tensor blocked by 1st dimension with block size 16
308 dnnl_ABcde4b16a4b,
309 /// 5D tensor blocked by 1st dimension with block size 8
310 dnnl_ABcde2b8a4b,
311 /// 5D tensor blocked by 2nd dimension with block size 16
312 dnnl_aBcde16b,
313 dnnl_ABcde16b16a,
314 dnnl_aBCde16b16c,
315 dnnl_aBCde16c16b,
316 dnnl_aBCde2c8b4c,
317 dnnl_Abcde4a,
318 /// 5D tensor blocked by 2nd dimension with block size 32
319 dnnl_aBcde32b,
320 /// 5D tensor blocked by 2nd dimension with block size 4
321 dnnl_aBcde4b,
322 dnnl_ABcde4b4a,
323 dnnl_ABcde4a4b,
324 dnnl_aBCde4b4c,
325 dnnl_aBCde2c4b2c,
326 dnnl_aBCde4b8c2b,
327 dnnl_aBCde4c16b4c,
328 dnnl_aBCde16c16b4c,
329 dnnl_aBCde16c16b2c,
330 dnnl_aBCde4c4b,
331 dnnl_Abcde8a,
332 dnnl_ABcde8a8b,
333 dnnl_ABcde8a4b,
334 dnnl_BAcde16b16a,
335 /// 5D tensor blocked by 2nd dimension with block size 8
336 dnnl_aBcde8b,
337 dnnl_ABcde8b16a2b,
338 dnnl_aBCde8b16c2b,
339 dnnl_aBCde4c8b2c,
340 dnnl_aCBde8b16c2b,
341 dnnl_ABcde8b8a,
342 dnnl_ABcde32a32b,
343 dnnl_aBCde8b8c,
344 dnnl_aBCde8b4c,
345 dnnl_ABc4a8b8a4b,
346 dnnl_ABcd4a8b8a4b,
347 dnnl_ABcde4a8b8a4b,
348 dnnl_BAc4b8a8b4a,
349 dnnl_BAcd4b8a8b4a,
350 dnnl_BAcde4b8a8b4a,
351 dnnl_ABcd2a8b8a2b,
352 dnnl_aBCd4b8c8b4c,
353 dnnl_aBCde4b8c8b4c,
354 dnnl_aBCde2b8c8b2c,
355 dnnl_aBCde8c16b2c,
356 dnnl_aBCde8c8b,
357 /// 5D tensor blocked by 3rd dimension with block size 4
358 dnnl_aBCde2b4c2b,
359 /// 6D tensor blocked by 2nd dimension with block size 16
360 dnnl_aBcdef16b,
361 dnnl_aBCdef16b16c,
362 dnnl_aBCdef16c16b,
363 dnnl_aBCdef4c16b4c,
364 /// 6D tensor blocked by 2nd dimension with block size 8
365 dnnl_aBCdef2c8b4c,
366 dnnl_aBCdef4c8b2c,
367 /// 6D tensor blocked by 3rd dimension with block size 4
368 dnnl_aBCdef2b4c2b,
369 /// 6D tensor blocked by 2nd dimension with block size 4
370 dnnl_aBcdef4b,
371 dnnl_aBCdef4c4b,
372 dnnl_aBCdef4b4c,
373 dnnl_aBCdef2c4b2c,
374 dnnl_aBCdef4b8c2b,
375 dnnl_aBCdef8b8c,
376 dnnl_aBCdef8b4c,
377 dnnl_aBCdef8c16b2c,
378 dnnl_aBCdef4b8c8b4c,
379 dnnl_aBCdef8b16c2b,
380 dnnl_aCBdef8b16c2b,
381 dnnl_aBCdef8c8b,
382 dnnl_aBdc16b,
383 dnnl_aBdC16b2c,
384 dnnl_aBdC16b4c,
385 dnnl_aBdc4b,
386 dnnl_aBdc8b,
387 dnnl_aBdec16b,
388 dnnl_aBdeC16b2c,
389 dnnl_aBdeC16b4c,
390 dnnl_aBdec32b,
391 dnnl_aBdec4b,
392 dnnl_aBdec8b,
393 dnnl_aBdefc16b,
394 dnnl_aBdefC16b2c,
395 dnnl_aCBdef16c16b,
396 dnnl_aBdefc4b,
397 dnnl_aBdefc8b,
398 dnnl_Abcdef16a,
399 dnnl_Abcdef32a,
400 dnnl_aBedc16b,
401 dnnl_Acb16a,
402 dnnl_AcB16a2b,
403 dnnl_AcB16a4b,
404 dnnl_Acb4a,
405 dnnl_Acb8a,
406 dnnl_aCBd16b16c,
407 dnnl_aCBd16c16b,
408 dnnl_aCBde16b16c,
409 dnnl_aCBde16c16b,
410 dnnl_Acdb16a,
411 dnnl_AcdB16a2b,
412 dnnl_AcdB16a4b,
413 dnnl_Acdb32a,
414 dnnl_Acdb4a,
415 dnnl_Acdb8a,
416 dnnl_Acdeb16a,
417 dnnl_AcdeB16a2b,
418 dnnl_Acdeb4a,
419 dnnl_Acdeb8a,
420 dnnl_Adcb16a,
421 dnnl_BAc16a16b,
422 dnnl_BAc16b16a,
423 dnnl_BAcd16a16b,
424 dnnl_BAcd16b16a,
425 dnnl_aCBd4c8b8c4b,
426 dnnl_aCBde4c8b8c4b,
427 dnnl_aCBdef4c8b8c4b,
428 dnnl_BAcde16a16b,
429 dnnl_aCBdef16b16c,
430 dnnl_abdfce, ///< permuted 6D tensor
431 dnnl_abdefc, ///< permuted 6D tensor
432 dnnl_ABc16b32a,
433 dnnl_ABc16b64a,
434 dnnl_ABc4b32a4b,
435 dnnl_ABc4b64a4b,
436 dnnl_ABc8b32a2b,
437 dnnl_ABc8b64a2b,
438 dnnl_AB16b16a,
439 dnnl_AB16b32a,
440 dnnl_AB16b64a,
441 dnnl_AB8b16a2b,
442 dnnl_AB8b32a2b,
443 dnnl_AB8b64a2b,
444 dnnl_AB4b16a4b,
445 dnnl_AB4b32a4b,
446 dnnl_AB4b64a4b,
447 dnnl_AB16b16a4b,
448 dnnl_ABcd16b32a,
449 dnnl_ABcd16b64a,
450 dnnl_ABcd4b32a4b,
451 dnnl_ABcd4b64a4b,
452 dnnl_ABcd8b32a2b,
453 dnnl_ABcd8b64a2b,
454 dnnl_ABcde4b32a4b,
455 dnnl_ABcde4b64a4b,
456 dnnl_ABcde16b16a4b,
457 dnnl_ABcde16b16a2b,
458 dnnl_ABcde16b32a,
459 dnnl_ABcde16b64a,
460 dnnl_ABcde8b32a2b,
461 dnnl_ABcde8b64a2b,
462 dnnl_aBCdef16c16b4c,
463 dnnl_aBCdef16c16b2c,
464 dnnl_AB32a32b8a4b,
465 dnnl_AB8a4b,
466 dnnl_AB32a32b8a2b,
467 dnnl_AB8a2b,
468 dnnl_abDc32d,
469 dnnl_abDC32d4c,
470 dnnl_abdEc32e,
471 dnnl_abdEC32e2c,
472 dnnl_abdEC32e4c,
473 dnnl_aBdefC16b4c,
474 dnnl_AcdeB16a4b,
475 dnnl_ABcd16a16b2a,
476 dnnl_ABc16a16b2a,
477 dnnl_aBCd16b16c2b,
478 dnnl_aBCde16b16c2b,
479 dnnl_Acb32a,
480 dnnl_AcB32a2b,
481 dnnl_AcB32a4b,
482 dnnl_Acb48a,
483 dnnl_AcB48a2b,
484 dnnl_AcB48a4b,
485 dnnl_Acb64a,
486 dnnl_AcB64a2b,
487 dnnl_AcB64a4b,
488 dnnl_cBa2b,
489 dnnl_cBa4b,
490 dnnl_aBdc32b,
491 dnnl_aBdC32b2c,
492 dnnl_aBdC32b4c,
493 dnnl_aBdc48b,
494 dnnl_aBdC48b2c,
495 dnnl_aBdC48b4c,
496 dnnl_aBdc64b,
497 dnnl_aBdC64b2c,
498 dnnl_aBdC64b4c,
499 dnnl_adcb,
500 dnnl_adCb2c,
501 dnnl_adCb4c,
502 dnnl_AcdB32a2b,
503 dnnl_AcdB32a4b,
504 dnnl_Acdb48a,
505 dnnl_AcdB48a2b,
506 dnnl_AcdB48a4b,
507 dnnl_Acdb64a,
508 dnnl_AcdB64a2b,
509 dnnl_AcdB64a4b,
510 dnnl_cdBa2b,
511 dnnl_cdBa4b,
512 dnnl_aBdeC32b2c,
513 dnnl_aBdeC32b4c,
514 dnnl_aBdec48b,
515 dnnl_aBdeC48b2c,
516 dnnl_aBdeC48b4c,
517 dnnl_aBdec64b,
518 dnnl_aBdeC64b2c,
519 dnnl_aBdeC64b4c,
520 dnnl_adecb,
521 dnnl_adeCb2c,
522 dnnl_adeCb4c,
523 dnnl_Acdeb32a,
524 dnnl_AcdeB32a2b,
525 dnnl_AcdeB32a4b,
526 dnnl_Acdeb48a,
527 dnnl_AcdeB48a2b,
528 dnnl_AcdeB48a4b,
529 dnnl_Acdeb64a,
530 dnnl_AcdeB64a2b,
531 dnnl_AcdeB64a4b,
532 dnnl_cdeBa2b,
533 dnnl_cdeBa4b,
534 dnnl_aBdefc32b,
535 dnnl_aBdefC32b2c,
536 dnnl_aBdefC32b4c,
537 dnnl_aBdefc48b,
538 dnnl_aBdefC48b2c,
539 dnnl_aBdefC48b4c,
540 dnnl_aBdefc64b,
541 dnnl_aBdefC64b2c,
542 dnnl_aBdefC64b4c,
543 dnnl_adefcb,
544 dnnl_adefCb2c,
545 dnnl_adefCb4c,
546 dnnl_AB16b32a4b,
547 dnnl_AB16b48a4b,
548 dnnl_AB16b64a4b,
549 dnnl_AB16b16a2b,
550 dnnl_AB16b32a2b,
551 dnnl_AB16b48a2b,
552 dnnl_AB16b64a2b,
553 dnnl_ABc16b32a4b,
554 dnnl_ABc16b48a4b,
555 dnnl_ABc16b64a4b,
556 dnnl_ABc16b32a2b,
557 dnnl_ABc16b48a2b,
558 dnnl_ABc16b64a2b,
559 dnnl_ABcd16b32a4b,
560 dnnl_ABcd16b48a4b,
561 dnnl_ABcd16b64a4b,
562 dnnl_ABcd16b32a2b,
563 dnnl_ABcd16b48a2b,
564 dnnl_ABcd16b64a2b,
565 dnnl_ABcde16b32a4b,
566 dnnl_ABcde16b48a4b,
567 dnnl_ABcde16b64a4b,
568 dnnl_ABcde16b32a2b,
569 dnnl_ABcde16b48a2b,
570 dnnl_ABcde16b64a2b,
571 dnnl_ABc32a16b,
572 dnnl_ABcd32a16b,
573 dnnl_ABcde32a16b,
574 dnnl_AB48a16b,
575 dnnl_AB48a32b,
576 dnnl_ABc40a16b,
577 dnnl_ABc40a32b,
578 dnnl_aBC48b16c,
579 dnnl_aBC48b32c,
580 dnnl_ABcd40a16b,
581 dnnl_ABcd40a32b,
582 dnnl_abCd32c,
583 dnnl_abdCe32c,
584 dnnl_abdCE32c2e,
585 dnnl_BA16a16b2a,
586 dnnl_BA16a32b2a,
587 dnnl_BA16a48b2a,
588 dnnl_BA16a64b2a,
589 dnnl_BA16a16b4a,
590 dnnl_BA16a32b4a,
591 dnnl_BA16a48b4a,
592 dnnl_BA16a64b4a,
593 dnnl_ABcd8a2b,
594 dnnl_aBdeC16c16b2c,
595 dnnl_aBdeC16c16b4c,
596 dnnl_aBdefC16c16b2c,
597 dnnl_AcB16b16a2b,
598 dnnl_AcB16b16a4b,
599 dnnl_AcdB16b16a2b,
600 dnnl_AcdB16b16a4b,
601 dnnl_AcdeB16b16a2b,
602 dnnl_aBdefC16c16b4c,
603 dnnl_AcdeB16b16a4b,
604 dnnl_AcB16b32a2b,
605 dnnl_AcB16b32a4b,
606 dnnl_AcB16b48a2b,
607 dnnl_AcB16b48a4b,
608 dnnl_AcB16b64a2b,
609 dnnl_AcB16b64a4b,
610 dnnl_aBdC16c16b2c,
611 dnnl_aBdC16c16b4c,
612 dnnl_aBdC16c32b2c,
613 dnnl_aBdC16c32b4c,
614 dnnl_aBdC16c48b2c,
615 dnnl_aBdC16c48b4c,
616 dnnl_aBdC16c64b2c,
617 dnnl_aBdC16c64b4c,
618 dnnl_AcdB16b32a2b,
619 dnnl_AcdB16b32a4b,
620 dnnl_AcdB16b48a2b,
621 dnnl_AcdB16b48a4b,
622 dnnl_AcdB16b64a2b,
623 dnnl_AcdB16b64a4b,
624 dnnl_aBdeC16c32b2c,
625 dnnl_aBdeC16c32b4c,
626 dnnl_aBdeC16c48b2c,
627 dnnl_aBdeC16c48b4c,
628 dnnl_aBdeC16c64b2c,
629 dnnl_aBdeC16c64b4c,
630 dnnl_AcdeB16b32a2b,
631 dnnl_AcdeB16b32a4b,
632 dnnl_AcdeB16b48a2b,
633 dnnl_AcdeB16b48a4b,
634 dnnl_AcdeB16b64a2b,
635 dnnl_AcdeB16b64a4b,
636 dnnl_aBdefC16c32b2c,
637 dnnl_aBdefC16c32b4c,
638 dnnl_aBdefC16c48b2c,
639 dnnl_aBdefC16c48b4c,
640 dnnl_aBdefC16c64b2c,
641 dnnl_aBdefC16c64b4c,
642 dnnl_decbA16a,
643 dnnl_ABc4a2b,
644 dnnl_ABc8a2b,
645 dnnl_aBCd8b2c,
646 dnnl_ABcde4a2b,
647 dnnl_ABcde8a2b,
648 dnnl_ABcde40a16b,
649 dnnl_ABcde40a32b,
650 dnnl_aBCde8b2c,
651 dnnl_ABcde4a8b8a2b,
652 dnnl_ABcd4a8b8a2b,
653 dnnl_ABc4a8b8a2b,
654 dnnl_aBCdef4b8c8b2c,
655 dnnl_aBCde4b8c8b2c,
656 dnnl_aBCd4b8c8b2c,
657 dnnl_BAcde4b8a8b2a,
658 dnnl_BAcd4b8a8b2a,
659 dnnl_BAc4b8a8b2a,
660 dnnl_aCBdef4c8b8c2b,
661 dnnl_aCBde4c8b8c2b,
662 dnnl_aCBd4c8b8c2b,
663 dnnl_aBCdef8b2c,
664 dnnl_AB32a16b,
665 dnnl_AB32a32b,
666 dnnl_BA4b8a8b2a,
667 dnnl_BA4b8a8b4a,
668 dnnl_aBC32b16c,
669 dnnl_aBC32b32c,
670 dnnl_aCB4c8b8c2b,
671 dnnl_aCB4c8b8c4b,
672 dnnl_ABcd4a2b,
673 dnnl_ABc2b8a16b4a,
674 dnnl_ABcd2b8a16b4a,
675 dnnl_ABcde2b8a16b4a,
676 dnnl_ABc2a8b16a4b,
677 dnnl_ABc2a8b16a2b,
678 dnnl_ABc2b32a8b,
679 dnnl_ABcd2a8b16a4b,
680 dnnl_ABcd2a8b16a2b,
681 dnnl_aCBd2c8b16c2b,
682 dnnl_ABcd2b32a8b,
683 dnnl_aBCd2c8b16c2b,
684 dnnl_ABcde2a8b16a4b,
685 dnnl_ABcde2a8b16a2b,
686 dnnl_aCBde2c8b16c2b,
687 dnnl_ABcde2b32a8b,
688 dnnl_aBC2b8c16b2c,
689 dnnl_aBCd2b8c16b2c,
690 dnnl_aBCde2b8c16b2c,
691 dnnl_aBCdef2b8c16b2c,
692 dnnl_BAcde2b8a16b4a,
693 dnnl_BAcd2b8a16b4a,
694 dnnl_BAc2b8a16b4a,
695 dnnl_BAcde2b8a16b2a,
696 dnnl_BAcd2b8a16b2a,
697 dnnl_BAc2b8a16b2a,
698 dnnl_aBCde2c8b16c2b,
699 dnnl_aBCdef2c8b16c2b,
700 dnnl_aCBdef2c8b16c2b,
701 dnnl_aBCd2b8c16b4c,
702 dnnl_aBCde2b8c16b4c,
703 dnnl_BA4b8a16b2a,
704 dnnl_BA4b8a16b4a,
705 dnnl_aCB4c8b16c2b,
706 dnnl_aCB4c8b16c4b,
707 dnnl_BA16a16b,
708 dnnl_BA16a32b,
709 dnnl_BA16a48b,
710 dnnl_BA16a64b,
711 dnnl_aCB16c2b,
712 dnnl_aCB16c4b,
713 dnnl_BA16b2a,
714 dnnl_BA16b4a,
715 dnnl_aBC16b16c,
716 dnnl_aBC16b32c,
717 dnnl_AB16a16b,
718 dnnl_AB16a32b,
719 dnnl_adbc,
720 dnnl_ABcde16a16b2a,
721 dnnl_aBCdef16b16c2b,
722 dnnl_Acedb16a,
723 dnnl_aBdfec16b,
724 dnnl_abdEC64e2c,
725 dnnl_abdEC64e4c,
726
727 /// Just a sentinel, not real memory format tag. Must be changed after new
728 /// format tag is added.
729 dnnl_format_tag_last,
730
731 // Aliases
732
733 /// 1D tensor, an alias to #dnnl_a
734 dnnl_x = dnnl_a,
735 /// 2D CNN activations tensor, an alias to #dnnl_ab
736 dnnl_nc = dnnl_ab,
737 /// 2D CNN activations tensor, an alias to #dnnl_ba
738 dnnl_cn = dnnl_ba,
739 /// 2D RNN statistics tensor, an alias to #dnnl_ab
740 dnnl_tn = dnnl_ab,
741 /// 2D RNN statistics tensor, an alias to #dnnl_ba
742 dnnl_nt = dnnl_ba,
743 /// 3D CNN activations tensor, an alias to #dnnl_abc
744 dnnl_ncw = dnnl_abc,
745 /// 3D CNN activations tensor, an alias to #dnnl_acb
746 dnnl_nwc = dnnl_acb,
747 /// 4D CNN activations tensor, an alias to #dnnl_abcd
748 dnnl_nchw = dnnl_abcd,
749 /// 4D CNN activations tensor, an alias to #dnnl_acdb
750 dnnl_nhwc = dnnl_acdb,
751 /// 4D CNN activations tensor, an alias to #dnnl_bcda
752 dnnl_chwn = dnnl_bcda,
753 /// 5D CNN activations tensor, an alias to #dnnl_abcde
754 dnnl_ncdhw = dnnl_abcde,
755 /// 5D CNN activations tensor, an alias to #dnnl_acdeb
756 dnnl_ndhwc = dnnl_acdeb,
757
758 /// 2D CNN weights tensor, an alias to #dnnl_ab
759 dnnl_oi = dnnl_ab,
760 /// 2D CNN weights tensor, an alias to #dnnl_ba
761 dnnl_io = dnnl_ba,
762 /// 3D CNN weights tensor, an alias to #dnnl_abc
763 dnnl_oiw = dnnl_abc,
764 /// 3D CNN weights tensor, an alias to #dnnl_acb
765 dnnl_owi = dnnl_acb,
766 /// 3D CNN weights tensor, an alias to #dnnl_cba
767 dnnl_wio = dnnl_cba,
768 /// 3D CNN weights tensor, an alias to #dnnl_bca
769 dnnl_iwo = dnnl_bca,
770 /// 4D CNN weights tensor, an alias to #dnnl_abcd
771 dnnl_oihw = dnnl_abcd,
772 /// 4D CNN weights tensor, an alias to #dnnl_cdba
773 dnnl_hwio = dnnl_cdba,
774 /// 4D CNN weights tensor, an alias to #dnnl_acdb
775 dnnl_ohwi = dnnl_acdb,
776 /// 4D CNN weights tensor, an alias to #dnnl_bcda
777 dnnl_ihwo = dnnl_bcda,
778 /// 4D CNN weights tensor, an alias to #dnnl_bacd
779 dnnl_iohw = dnnl_bacd,
780 /// 5D CNN weights tensor, an alias to #dnnl_abcde
781 dnnl_oidhw = dnnl_abcde,
782 /// 5D CNN weights tensor, an alias to #dnnl_bacde
783 dnnl_iodhw = dnnl_bacde,
784 /// 5D CNN weights tensor, an alias to #dnnl_cdeba
785 dnnl_dhwio = dnnl_cdeba,
786 /// 5D CNN weights tensor, an alias to #dnnl_acdeb
787 dnnl_odhwi = dnnl_acdeb,
788 /// 5D CNN weights tensor, an alias to #dnnl_bcdea
789 dnnl_idhwo = dnnl_bcdea,
790
791 /// 4D CNN weights tensor (incl. groups), an alias to #dnnl_abcd
792 dnnl_goiw = dnnl_abcd,
793 /// 4D CNN weights tensor (incl. groups), an alias to #dnnl_abdc
794 dnnl_gowi = dnnl_abdc,
795 /// 4D CNN weights tensor (incl. groups), an alias to #dnnl_dcab
796 dnnl_wigo = dnnl_dcab,
797 /// 5D CNN weights tensor (incl. groups), an alias to #dnnl_abcde
798 dnnl_goihw = dnnl_abcde,
799 /// 5D CNN weights tensor (incl. groups), an alias to #dnnl_abdec
800 dnnl_gohwi = dnnl_abdec,
801 /// 5D CNN weights tensor (incl. groups), an alias to #dnnl_decab
802 dnnl_hwigo = dnnl_decab,
803 /// 5D CNN weights tensor (incl. groups), an alias to #dnnl_acbde
804 dnnl_giohw = dnnl_acbde,
805 /// 6D CNN weights tensor (incl. groups), an alias to #dnnl_abcdef
806 dnnl_goidhw = dnnl_abcdef,
807 /// 6D CNN weights tensor (incl. groups), an alias to #dnnl_abdefc
808 dnnl_godhwi = dnnl_abdefc,
809 /// 6D CNN weights tensor (incl. groups), an alias to #dnnl_acbdef
810 dnnl_giodhw = dnnl_acbdef,
811 /// 6D CNN weights tensor (incl. groups), an alias to #dnnl_defcab
812 dnnl_dhwigo = dnnl_defcab,
813
814 /// 3D RNN data tensor in the format (seq_length, batch, input channels),
815 /// an alias to #dnnl_abc.
816 dnnl_tnc = dnnl_abc,
817 /// 3D RNN data tensor in the format (batch, seq_length, input channels),
818 /// an alias to #dnnl_bac.
819 dnnl_ntc = dnnl_bac,
820 /// 4D RNN states tensor in the format (num_layers, num_directions,
821 /// batch, state channels), an alias to #dnnl_abcd.
822 dnnl_ldnc = dnnl_abcd,
823 /// 5D RNN weights tensor in the format (num_layers, num_directions,
824 /// input_channels, num_gates, output_channels), an alias to #dnnl_abcde.
825 ///
826 /// - For LSTM cells, the gates order is input, forget, candidate
827 /// and output gate.
828 /// - For GRU cells, the gates order is update, reset and output gate.
829 dnnl_ldigo = dnnl_abcde,
830 /// 5D RNN weights tensor in the format (num_layers, num_directions,
831 /// num_gates, output_channels, input_channels), an alias to #dnnl_abdec.
832 ///
833 /// - For LSTM cells, the gates order is input, forget, candidate
834 /// and output gate.
835 /// - For GRU cells, the gates order is update, reset and output gate.
836 dnnl_ldgoi = dnnl_abdec,
837 /// 4D LSTM projection tensor in the format (num_layers, num_directions,
838 /// num_channels_in_hidden_state, num_channels_in_recurrent_projection),
839 /// an alias to #dnnl_abcd.
840 dnnl_ldio = dnnl_abcd,
841 /// 4D LSTM projection tensor in the format (num_layers, num_directions,
842 /// num_channels_in_recurrent_projection, num_channels_in_hidden_state),
843 /// an alias to #dnnl_abdc.
844 dnnl_ldoi = dnnl_abdc,
845 /// 4D RNN bias tensor in the format (num_layers, num_directions,
846 /// num_gates, output_channels), an alias to #dnnl_abcd.
847 ///
848 /// - For LSTM cells, the gates order is input, forget, candidate
849 /// and output gate.
850 /// - For GRU cells, the gates order is update, reset and output gate.
851 dnnl_ldgo = dnnl_abcd,
852 /// 5D LSTM projection tensor
853 dnnl_ldOi32o = dnnl_abDc32d,
854 dnnl_ldOI32o4i = dnnl_abDC32d4c,
855 dnnl_ldIo32i = dnnl_abCd32c,
856 /// 6D RNN weights tensor
857 dnnl_ldgOi32o = dnnl_abdEc32e,
858 dnnl_ldgOI32o2i = dnnl_abdEC32e2c,
859 dnnl_ldgOI32o4i = dnnl_abdEC32e4c,
860 dnnl_ldgOI64o2i = dnnl_abdEC64e2c,
861 dnnl_ldgOI64o4i = dnnl_abdEC64e4c,
862 dnnl_ldgIo32i = dnnl_abdCe32c,
863 dnnl_ldgIO32i2o = dnnl_abdCE32c2e,
864
865 // Opaque data types, are not to be used explicitly
866
867 // data
868 /// 5D CNN activations tensor blocked by channels with block size 32,
869 /// an alias to #dnnl_aBcde32b
870 dnnl_nCdhw32c = dnnl_aBcde32b,
871 /// 5D CNN activations tensor blocked by channels with block size 16,
872 /// an alias to #dnnl_aBcde16b
873 dnnl_nCdhw16c = dnnl_aBcde16b,
874 /// 5D CNN activations tensor blocked by channels with block size 4,
875 /// an alias to #dnnl_aBcde4b
876 dnnl_nCdhw4c = dnnl_aBcde4b,
877 /// 5D CNN activations tensor blocked by channels with block size 8,
878 /// an alias to #dnnl_aBcde8b
879 dnnl_nCdhw8c = dnnl_aBcde8b,
880 /// 4D CNN activations tensor blocked by channels with block size 32,
881 /// an alias to #dnnl_aBcd32b
882 dnnl_nChw32c = dnnl_aBcd32b,
883 /// 4D CNN activations tensor blocked by channels with block size 16,
884 /// an alias to #dnnl_aBcd16b
885 dnnl_nChw16c = dnnl_aBcd16b,
886 /// 4D CNN activations tensor blocked by channels with block size 4,
887 /// an alias to #dnnl_aBcd4b
888 dnnl_nChw4c = dnnl_aBcd4b,
889 /// 4D CNN activations tensor blocked by channels with block size 8,
890 /// an alias to #dnnl_aBcd8b
891 dnnl_nChw8c = dnnl_aBcd8b,
892 /// 3D CNN activations tensor blocked by channels with block size 32,
893 /// an alias to #dnnl_aBc32b
894 dnnl_nCw32c = dnnl_aBc32b,
895 /// 3D CNN activations tensor blocked by channels with block size 16,
896 /// an alias to #dnnl_aBc16b
897 dnnl_nCw16c = dnnl_aBc16b,
898 /// 3D CNN activations tensor blocked by channels with block size 4,
899 /// an alias to #dnnl_aBc4b
900 dnnl_nCw4c = dnnl_aBc4b,
901 /// 3D CNN activations tensor blocked by channels with block size 8,
902 /// an alias to #dnnl_aBc8b
903 dnnl_nCw8c = dnnl_aBc8b,
904 dnnl_NCw16n16c = dnnl_ABc16a16b,
905 dnnl_NCdhw16n16c = dnnl_ABcde16a16b,
906 dnnl_NChw16n16c = dnnl_ABcd16a16b,
907 dnnl_NCw32n16c = dnnl_ABc32a16b,
908 dnnl_NChw32n16c = dnnl_ABcd32a16b,
909 dnnl_NCdhw32n16c = dnnl_ABcde32a16b,
910 dnnl_NCw32n32c = dnnl_ABc32a32b,
911 dnnl_NChw32n32c = dnnl_ABcd32a32b,
912 dnnl_NCdhw32n32c = dnnl_ABcde32a32b,
913
914 // weights, 2D
915 dnnl_OI16i16o = dnnl_AB16b16a,
916 dnnl_OI16i32o = dnnl_AB16b32a,
917 dnnl_OI16i64o = dnnl_AB16b64a,
918 dnnl_OI8i16o2i = dnnl_AB8b16a2b,
919 dnnl_OI8i32o2i = dnnl_AB8b32a2b,
920 dnnl_OI8i64o2i = dnnl_AB8b64a2b,
921 dnnl_OI4i16o4i = dnnl_AB4b16a4b,
922 dnnl_OI4i32o4i = dnnl_AB4b32a4b,
923 dnnl_OI4i64o4i = dnnl_AB4b64a4b,
924 dnnl_OI16i16o4i = dnnl_AB16b16a4b,
925 // weights, 3D
926 dnnl_IOw16o16i = dnnl_BAc16a16b,
927 dnnl_IOw16i16o = dnnl_BAc16b16a,
928 dnnl_OIw16i16o = dnnl_ABc16b16a,
929 dnnl_OIw16i32o = dnnl_ABc16b32a,
930 dnnl_OIw16i64o = dnnl_ABc16b64a,
931 dnnl_OIw16o16i = dnnl_ABc16a16b,
932 dnnl_Oiw16o = dnnl_Abc16a,
933 dnnl_OIw4i16o4i = dnnl_ABc4b16a4b,
934 dnnl_OIw4i32o4i = dnnl_ABc4b32a4b,
935 dnnl_OIw4i64o4i = dnnl_ABc4b64a4b,
936 dnnl_OIw2i8o4i = dnnl_ABc2b8a4b,
937 dnnl_OIw16i16o4i = dnnl_ABc16b16a4b,
938 dnnl_OIw16i16o2i = dnnl_ABc16b16a2b,
939 dnnl_OIw16o16i2o = dnnl_ABc16a16b2a,
940 dnnl_OIw4i4o = dnnl_ABc4b4a,
941 dnnl_OIw4o4i = dnnl_ABc4a4b,
942 dnnl_Oiw4o = dnnl_Abc4a,
943 dnnl_OIw8i16o2i = dnnl_ABc8b16a2b,
944 dnnl_OIw8i32o2i = dnnl_ABc8b32a2b,
945 dnnl_OIw8i64o2i = dnnl_ABc8b64a2b,
946 dnnl_OIw8i8o = dnnl_ABc8b8a,
947 dnnl_OIw8o16i2o = dnnl_ABc8a16b2a,
948 dnnl_IOw8o16i2o = dnnl_BAc8a16b2a,
949 dnnl_OIw8o8i = dnnl_ABc8a8b,
950 dnnl_OIw8o4i = dnnl_ABc8a4b,
951 dnnl_Owi16o = dnnl_Acb16a,
952 dnnl_OwI16o2i = dnnl_AcB16a2b,
953 dnnl_OwI16o4i = dnnl_AcB16a4b,
954 dnnl_Owi4o = dnnl_Acb4a,
955 dnnl_Owi8o = dnnl_Acb8a,
956
957 // weights, 4D
958 dnnl_IOhw16i16o = dnnl_BAcd16b16a,
959 dnnl_IOhw16o16i = dnnl_BAcd16a16b,
960 dnnl_Ohwi16o = dnnl_Acdb16a,
961 dnnl_OhwI16o2i = dnnl_AcdB16a2b,
962 dnnl_OhwI16o4i = dnnl_AcdB16a4b,
963 dnnl_Ohwi32o = dnnl_Acdb32a,
964 dnnl_Ohwi4o = dnnl_Acdb4a,
965 dnnl_Ohwi8o = dnnl_Acdb8a,
966 dnnl_OIhw16i16o = dnnl_ABcd16b16a,
967 dnnl_OIhw16i32o = dnnl_ABcd16b32a,
968 dnnl_OIhw16i64o = dnnl_ABcd16b64a,
969 dnnl_OIhw16o16i = dnnl_ABcd16a16b,
970 dnnl_Oihw16o = dnnl_Abcd16a,
971 dnnl_OIhw4i16o4i = dnnl_ABcd4b16a4b,
972 dnnl_OIhw4i32o4i = dnnl_ABcd4b32a4b,
973 dnnl_OIhw4i64o4i = dnnl_ABcd4b64a4b,
974 dnnl_OIhw16i16o4i = dnnl_ABcd16b16a4b,
975 dnnl_OIhw16i16o2i = dnnl_ABcd16b16a2b,
976 dnnl_OIhw16o16i2o = dnnl_ABcd16a16b2a,
977 dnnl_OIhw4i4o = dnnl_ABcd4b4a,
978 dnnl_OIhw4o4i = dnnl_ABcd4a4b,
979 dnnl_Oihw4o = dnnl_Abcd4a,
980 dnnl_OIhw8i16o2i = dnnl_ABcd8b16a2b,
981 dnnl_OIhw8i32o2i = dnnl_ABcd8b32a2b,
982 dnnl_OIhw8i64o2i = dnnl_ABcd8b64a2b,
983 dnnl_OIhw8i8o = dnnl_ABcd8b8a,
984 dnnl_OIhw8o16i2o = dnnl_ABcd8a16b2a,
985 dnnl_OIhw2i8o4i = dnnl_ABcd2b8a4b,
986 dnnl_IOhw8o16i2o = dnnl_BAcd8a16b2a,
987 dnnl_OIhw8o8i = dnnl_ABcd8a8b,
988 dnnl_OIhw8o4i = dnnl_ABcd8a4b,
989 dnnl_Owhi16o = dnnl_Adcb16a,
990
991 // weights, 5D
992 dnnl_Odhwi16o = dnnl_Acdeb16a,
993 dnnl_OdhwI16o2i = dnnl_AcdeB16a2b,
994 dnnl_OdhwI16o4i = dnnl_AcdeB16a4b,
995 dnnl_Odhwi4o = dnnl_Acdeb4a,
996 dnnl_Odhwi8o = dnnl_Acdeb8a,
997 dnnl_Odwhi16o = dnnl_Acedb16a,
998 dnnl_OIdhw16i16o = dnnl_ABcde16b16a,
999 dnnl_OIdhw16i32o = dnnl_ABcde16b32a,
1000 dnnl_OIdhw16i64o = dnnl_ABcde16b64a,
1001 dnnl_OIdhw16o16i = dnnl_ABcde16a16b,
1002 dnnl_Oidhw16o = dnnl_Abcde16a,
1003 dnnl_OIdhw4i4o = dnnl_ABcde4b4a,
1004 dnnl_OIdhw4o4i = dnnl_ABcde4a4b,
1005 dnnl_Oidhw4o = dnnl_Abcde4a,
1006 dnnl_OIdhw8i16o2i = dnnl_ABcde8b16a2b,
1007 dnnl_OIdhw8i32o2i = dnnl_ABcde8b32a2b,
1008 dnnl_OIdhw8i64o2i = dnnl_ABcde8b64a2b,
1009 dnnl_OIdhw8i8o = dnnl_ABcde8b8a,
1010 dnnl_OIdhw8o16i2o = dnnl_ABcde8a16b2a,
1011 dnnl_IOdhw8o16i2o = dnnl_BAcde8a16b2a,
1012 dnnl_OIdhw4i16o4i = dnnl_ABcde4b16a4b,
1013 dnnl_OIdhw4i32o4i = dnnl_ABcde4b32a4b,
1014 dnnl_OIdhw4i64o4i = dnnl_ABcde4b64a4b,
1015 dnnl_OIdhw16i16o4i = dnnl_ABcde16b16a4b,
1016 dnnl_OIdhw16i16o2i = dnnl_ABcde16b16a2b,
1017 dnnl_OIdhw2i8o4i = dnnl_ABcde2b8a4b,
1018 dnnl_OIdhw8o8i = dnnl_ABcde8a8b,
1019 dnnl_OIdhw8o4i = dnnl_ABcde8a4b,
1020 dnnl_IOdhw16i16o = dnnl_BAcde16b16a,
1021 dnnl_OIdhw4o8i8o4i = dnnl_ABcde4a8b8a4b,
1022 dnnl_IOdhw16o16i = dnnl_BAcde16a16b,
1023 dnnl_OIdhw16o16i2o = dnnl_ABcde16a16b2a,
1024
1025 // weights w/ groups, 3D
1026 dnnl_Goiw16g = dnnl_Abcd16a,
1027 dnnl_Goiw8g = dnnl_Abcd8a,
1028 dnnl_Goiw4g = dnnl_Abcd4a,
1029 dnnl_gIOw16o16i = dnnl_aCBd16b16c,
1030 dnnl_gIOw16i16o = dnnl_aCBd16c16b,
1031 dnnl_gOIw16i16o = dnnl_aBCd16c16b,
1032 dnnl_gOIw16o16i = dnnl_aBCd16b16c,
1033 dnnl_gOiw16o = dnnl_aBcd16b,
1034 dnnl_gOIw4i16o4i = dnnl_aBCd4c16b4c,
1035 dnnl_gOIw2i8o4i = dnnl_aBCd2c8b4c,
1036 dnnl_gOIw16i16o4i = dnnl_aBCd16c16b4c,
1037 dnnl_gOIw16i16o2i = dnnl_aBCd16c16b2c,
1038 dnnl_gOIw16o16i2o = dnnl_aBCd16b16c2b,
1039 dnnl_gOIw4i4o = dnnl_aBCd4c4b,
1040 dnnl_gOIw4o4i = dnnl_aBCd4b4c,
1041 dnnl_gOiw4o = dnnl_aBcd4b,
1042 dnnl_gOIw8i16o2i = dnnl_aBCd8c16b2c,
1043 dnnl_gOIw8i8o = dnnl_aBCd8c8b,
1044 dnnl_gOIw8o16i2o = dnnl_aBCd8b16c2b,
1045 dnnl_gIOw8o16i2o = dnnl_aCBd8b16c2b,
1046 dnnl_gOIw8o8i = dnnl_aBCd8b8c,
1047 dnnl_gOIw8o4i = dnnl_aBCd8b4c,
1048 dnnl_gOwi16o = dnnl_aBdc16b,
1049 dnnl_gOwI16o2i = dnnl_aBdC16b2c,
1050 dnnl_gOwI16o4i = dnnl_aBdC16b4c,
1051 dnnl_gOwi4o = dnnl_aBdc4b,
1052 dnnl_gOwi8o = dnnl_aBdc8b,
1053 dnnl_Goiw32g = dnnl_Abcd32a,
1054 dnnl_gOIw2i4o2i = dnnl_aBCd2c4b2c,
1055 dnnl_gOIw2o4i2o = dnnl_aBCd2b4c2b,
1056 dnnl_gOIw4i8o2i = dnnl_aBCd4c8b2c,
1057 dnnl_gOIw4o8i2o = dnnl_aBCd4b8c2b,
1058
1059 // weights w/ groups, 4D
1060 dnnl_gIOhw16i16o = dnnl_aCBde16c16b,
1061 dnnl_gIOhw16o16i = dnnl_aCBde16b16c,
1062 dnnl_gOhwi16o = dnnl_aBdec16b,
1063 dnnl_gOhwI16o2i = dnnl_aBdeC16b2c,
1064 dnnl_gOhwI16o4i = dnnl_aBdeC16b4c,
1065 dnnl_gOhwi32o = dnnl_aBdec32b,
1066 dnnl_gOhwi4o = dnnl_aBdec4b,
1067 dnnl_gOhwi8o = dnnl_aBdec8b,
1068 dnnl_Goihw16g = dnnl_Abcde16a,
1069 dnnl_gOIhw16i16o = dnnl_aBCde16c16b,
1070 dnnl_gOIhw16o16i = dnnl_aBCde16b16c,
1071 dnnl_gOihw16o = dnnl_aBcde16b,
1072 dnnl_gOIhw2i8o4i = dnnl_aBCde2c8b4c,
1073 dnnl_gOIhw4i16o4i = dnnl_aBCde4c16b4c,
1074 dnnl_gOIhw16i16o4i = dnnl_aBCde16c16b4c,
1075 dnnl_gOIhw16i16o2i = dnnl_aBCde16c16b2c,
1076 dnnl_gOIhw16o16i2o = dnnl_aBCde16b16c2b,
1077 dnnl_gOIhw4i4o = dnnl_aBCde4c4b,
1078 dnnl_gOIhw4o4i = dnnl_aBCde4b4c,
1079 dnnl_gOihw4o = dnnl_aBcde4b,
1080 dnnl_Goihw8g = dnnl_Abcde8a,
1081 dnnl_Goihw4g = dnnl_Abcde4a,
1082 dnnl_gOIhw8i16o2i = dnnl_aBCde8c16b2c,
1083 dnnl_gOIhw8i8o = dnnl_aBCde8c8b,
1084 dnnl_gOIhw8o16i2o = dnnl_aBCde8b16c2b,
1085 dnnl_gIOhw8o16i2o = dnnl_aCBde8b16c2b,
1086 dnnl_gOIhw8o8i = dnnl_aBCde8b8c,
1087 dnnl_gOIhw8o4i = dnnl_aBCde8b4c,
1088 dnnl_Goihw32g = dnnl_Abcde32a,
1089 dnnl_gOwhi16o = dnnl_aBedc16b,
1090
1091 dnnl_OIw4o8i8o4i = dnnl_ABc4a8b8a4b,
1092 dnnl_OIhw4o8i8o4i = dnnl_ABcd4a8b8a4b,
1093 dnnl_IOw4i8o8i4o = dnnl_BAc4b8a8b4a,
1094 dnnl_IOhw4i8o8i4o = dnnl_BAcd4b8a8b4a,
1095 dnnl_IOdhw4i8o8i4o = dnnl_BAcde4b8a8b4a,
1096
1097 dnnl_OIhw2o8i8o2i = dnnl_ABcd2a8b8a2b,
1098 dnnl_gOIw4o8i8o4i = dnnl_aBCd4b8c8b4c,
1099 dnnl_gOIhw4o8i8o4i = dnnl_aBCde4b8c8b4c,
1100 dnnl_gOIdhw4o8i8o4i = dnnl_aBCdef4b8c8b4c,
1101 dnnl_gIOw4i8o8i4o = dnnl_aCBd4c8b8c4b,
1102 dnnl_gIOhw4i8o8i4o = dnnl_aCBde4c8b8c4b,
1103 dnnl_gIOdhw4i8o8i4o = dnnl_aCBdef4c8b8c4b,
1104 dnnl_gOIhw2o8i8o2i = dnnl_aBCde2b8c8b2c,
1105 dnnl_gOIhw2i4o2i = dnnl_aBCde2c4b2c,
1106 dnnl_gOIhw2o4i2o = dnnl_aBCde2b4c2b,
1107 dnnl_gOIhw4i8o2i = dnnl_aBCde4c8b2c,
1108 dnnl_gOIhw4o8i2o = dnnl_aBCde4b8c2b,
1109
1110 // weights w/ groups, 6D
1111 dnnl_gIOdhw16i16o = dnnl_aCBdef16c16b,
1112 dnnl_gIOdhw16o16i = dnnl_aCBdef16b16c,
1113 dnnl_gOdhwi16o = dnnl_aBdefc16b,
1114 dnnl_gOdhwI16o2i = dnnl_aBdefC16b2c,
1115 dnnl_gOdhwI16o4i = dnnl_aBdefC16b4c,
1116 dnnl_gOdhwi4o = dnnl_aBdefc4b,
1117 dnnl_gOdhwi8o = dnnl_aBdefc8b,
1118 dnnl_gOdwhi16o = dnnl_aBdfec16b,
1119 dnnl_gOIdhw16i16o = dnnl_aBCdef16c16b,
1120 dnnl_gOIdhw4i16o4i = dnnl_aBCdef4c16b4c,
1121 dnnl_gOIdhw16i16o4i = dnnl_aBCdef16c16b4c,
1122 dnnl_gOIdhw2i8o4i = dnnl_aBCdef2c8b4c,
1123 dnnl_gOIdhw16i16o2i = dnnl_aBCdef16c16b2c,
1124 dnnl_gOIdhw16o16i = dnnl_aBCdef16b16c,
1125 dnnl_gOIdhw16o16i2o = dnnl_aBCdef16b16c2b,
1126 dnnl_gOidhw16o = dnnl_aBcdef16b,
1127 dnnl_gOIdhw4i4o = dnnl_aBCdef4c4b,
1128 dnnl_gOIdhw4o4i = dnnl_aBCdef4b4c,
1129 dnnl_gOidhw4o = dnnl_aBcdef4b,
1130 dnnl_gOIdhw8i16o2i = dnnl_aBCdef8c16b2c,
1131 dnnl_gOIdhw8i8o = dnnl_aBCdef8c8b,
1132 dnnl_gOIdhw8o16i2o = dnnl_aBCdef8b16c2b,
1133 dnnl_gIOdhw8o16i2o = dnnl_aCBdef8b16c2b,
1134 dnnl_gOIdhw8o8i = dnnl_aBCdef8b8c,
1135 dnnl_gOIdhw8o4i = dnnl_aBCdef8b4c,
1136 dnnl_Goidhw16g = dnnl_Abcdef16a,
1137 dnnl_Goidhw32g = dnnl_Abcdef32a,
1138 dnnl_gOIdhw2i4o2i = dnnl_aBCdef2c4b2c,
1139 dnnl_gOIdhw4i8o2i = dnnl_aBCdef4c8b2c,
1140 dnnl_gOIdhw2o4i2o = dnnl_aBCdef2b4c2b,
1141 dnnl_gOIdhw4o8i2o = dnnl_aBCdef4b8c2b,
1142 // weights, 3D
1143 dnnl_Owi32o = dnnl_Acb32a,
1144 dnnl_OwI32o2i = dnnl_AcB32a2b,
1145 dnnl_OwI32o4i = dnnl_AcB32a4b,
1146 dnnl_Owi48o = dnnl_Acb48a,
1147 dnnl_OwI48o2i = dnnl_AcB48a2b,
1148 dnnl_OwI48o4i = dnnl_AcB48a4b,
1149 dnnl_Owi64o = dnnl_Acb64a,
1150 dnnl_OwI64o2i = dnnl_AcB64a2b,
1151 dnnl_OwI64o4i = dnnl_AcB64a4b,
1152 dnnl_wIo2i = dnnl_cBa2b,
1153 dnnl_wIo4i = dnnl_cBa4b,
1154 dnnl_gOwi32o = dnnl_aBdc32b,
1155 dnnl_gOwI32o2i = dnnl_aBdC32b2c,
1156 dnnl_gOwI32o4i = dnnl_aBdC32b4c,
1157 dnnl_gOwi48o = dnnl_aBdc48b,
1158 dnnl_gOwI48o2i = dnnl_aBdC48b2c,
1159 dnnl_gOwI48o4i = dnnl_aBdC48b4c,
1160 dnnl_gOwi64o = dnnl_aBdc64b,
1161 dnnl_gOwI64o2i = dnnl_aBdC64b2c,
1162 dnnl_gOwI64o4i = dnnl_aBdC64b4c,
1163 dnnl_gwio = dnnl_adcb,
1164 dnnl_gwIo2i = dnnl_adCb2c,
1165 dnnl_gwIo4i = dnnl_adCb4c,
1166 // weights, 4D
1167 dnnl_OhwI32o = dnnl_Acdb32a,
1168 dnnl_OhwI32o2i = dnnl_AcdB32a2b,
1169 dnnl_OhwI32o4i = dnnl_AcdB32a4b,
1170 dnnl_Ohwi48o = dnnl_Acdb48a,
1171 dnnl_OhwI48o2i = dnnl_AcdB48a2b,
1172 dnnl_OhwI48o4i = dnnl_AcdB48a4b,
1173 dnnl_Ohwi64o = dnnl_Acdb64a,
1174 dnnl_OhwI64o2i = dnnl_AcdB64a2b,
1175 dnnl_OhwI64o4i = dnnl_AcdB64a4b,
1176 dnnl_hwIo2i = dnnl_cdBa2b,
1177 dnnl_hwIo4i = dnnl_cdBa4b,
1178 dnnl_gOhwI32o = dnnl_aBdec32b,
1179 dnnl_gOhwI32o2i = dnnl_aBdeC32b2c,
1180 dnnl_gOhwI32o4i = dnnl_aBdeC32b4c,
1181 dnnl_gOhwi48o = dnnl_aBdec48b,
1182 dnnl_gOhwI48o2i = dnnl_aBdeC48b2c,
1183 dnnl_gOhwI48o4i = dnnl_aBdeC48b4c,
1184 dnnl_gOhwi64o = dnnl_aBdec64b,
1185 dnnl_gOhwI64o2i = dnnl_aBdeC64b2c,
1186 dnnl_gOhwI64o4i = dnnl_aBdeC64b4c,
1187 dnnl_ghwio = dnnl_adecb,
1188 dnnl_ghwIo2i = dnnl_adeCb2c,
1189 dnnl_ghwIo4i = dnnl_adeCb4c,
1190 // weights, 5D
1191 dnnl_Odhwi32o = dnnl_Acdeb32a,
1192 dnnl_OdhwI32o2i = dnnl_AcdeB32a2b,
1193 dnnl_OdhwI32o4i = dnnl_AcdeB32a4b,
1194 dnnl_Odhwi48o = dnnl_Acdeb48a,
1195 dnnl_OdhwI48o2i = dnnl_AcdeB48a2b,
1196 dnnl_OdhwI48o4i = dnnl_AcdeB48a4b,
1197 dnnl_Odhwi64o = dnnl_Acdeb64a,
1198 dnnl_OdhwI64o2i = dnnl_AcdeB64a2b,
1199 dnnl_OdhwI64o4i = dnnl_AcdeB64a4b,
1200 dnnl_dhwIo2i = dnnl_cdeBa2b,
1201 dnnl_dhwIo4i = dnnl_cdeBa4b,
1202 dnnl_gOdhwi32o = dnnl_aBdefc32b,
1203 dnnl_gOdhwI32o2i = dnnl_aBdefC32b2c,
1204 dnnl_gOdhwI32o4i = dnnl_aBdefC32b4c,
1205 dnnl_gOdhwi48o = dnnl_aBdefc48b,
1206 dnnl_gOdhwI48o2i = dnnl_aBdefC48b2c,
1207 dnnl_gOdhwI48o4i = dnnl_aBdefC48b4c,
1208 dnnl_gOdhwi64o = dnnl_aBdefc64b,
1209 dnnl_gOdhwI64o2i = dnnl_aBdefC64b2c,
1210 dnnl_gOdhwI64o4i = dnnl_aBdefC64b4c,
1211 dnnl_gdhwio = dnnl_adefcb,
1212 dnnl_gdhwIo2i = dnnl_adefCb2c,
1213 dnnl_gdhwIo4i = dnnl_adefCb4c,
1214 dnnl_OI16i32o4i = dnnl_AB16b32a4b,
1215 dnnl_OI16i48o4i = dnnl_AB16b48a4b,
1216 dnnl_OI16i64o4i = dnnl_AB16b64a4b,
1217 dnnl_OI16i16o2i = dnnl_AB16b16a2b,
1218 dnnl_OI16i32o2i = dnnl_AB16b32a2b,
1219 dnnl_OI16i48o2i = dnnl_AB16b48a2b,
1220 dnnl_OI16i64o2i = dnnl_AB16b64a2b,
1221 dnnl_OIw16i32o4i = dnnl_ABc16b32a4b,
1222 dnnl_OIw16i48o4i = dnnl_ABc16b48a4b,
1223 dnnl_OIw16i64o4i = dnnl_ABc16b64a4b,
1224 dnnl_OIw16i32o2i = dnnl_ABc16b32a2b,
1225 dnnl_OIw16i48o2i = dnnl_ABc16b48a2b,
1226 dnnl_OIw16i64o2i = dnnl_ABc16b64a2b,
1227 dnnl_OIhw16i32o4i = dnnl_ABcd16b32a4b,
1228 dnnl_OIhw16i48o4i = dnnl_ABcd16b48a4b,
1229 dnnl_OIhw16i64o4i = dnnl_ABcd16b64a4b,
1230 dnnl_OIhw16i32o2i = dnnl_ABcd16b32a2b,
1231 dnnl_OIhw16i48o2i = dnnl_ABcd16b48a2b,
1232 dnnl_OIhw16i64o2i = dnnl_ABcd16b64a2b,
1233 dnnl_OIdhw16i32o4i = dnnl_ABcde16b32a4b,
1234 dnnl_OIdhw16i48o4i = dnnl_ABcde16b48a4b,
1235 dnnl_OIdhw16i64o4i = dnnl_ABcde16b64a4b,
1236 dnnl_OIdhw16i32o2i = dnnl_ABcde16b32a2b,
1237 dnnl_OIdhw16i48o2i = dnnl_ABcde16b48a2b,
1238 dnnl_OIdhw16i64o2i = dnnl_ABcde16b64a2b,
1239 dnnl_OwI16i16o2i = dnnl_AcB16b16a2b,
1240 dnnl_OwI16i16o4i = dnnl_AcB16b16a4b,
1241 dnnl_OhwI16i16o2i = dnnl_AcdB16b16a2b,
1242 dnnl_OhwI16i16o4i = dnnl_AcdB16b16a4b,
1243 dnnl_OdhwI16i16o2i = dnnl_AcdeB16b16a2b,
1244 dnnl_OdhwI16i16o4i = dnnl_AcdeB16b16a4b,
1245 dnnl_gOwI16i16o2i = dnnl_aBdC16c16b2c,
1246 dnnl_gOwI16i16o4i = dnnl_aBdC16c16b4c,
1247 dnnl_gOhwI16i16o2i = dnnl_aBdeC16c16b2c,
1248 dnnl_gOhwI16i16o4i = dnnl_aBdeC16c16b4c,
1249 dnnl_gOdhwI16i16o2i = dnnl_aBdefC16c16b2c,
1250 dnnl_gOdhwI16i16o4i = dnnl_aBdefC16c16b4c,
1251 dnnl_OwI16i32o2i = dnnl_AcB16b32a2b,
1252 dnnl_OwI16i32o4i = dnnl_AcB16b32a4b,
1253 dnnl_OwI16i48o2i = dnnl_AcB16b48a2b,
1254 dnnl_OwI16i48o4i = dnnl_AcB16b48a4b,
1255 dnnl_OwI16i64o2i = dnnl_AcB16b64a2b,
1256 dnnl_OwI16i64o4i = dnnl_AcB16b64a4b,
1257 dnnl_gOwI16i32o2i = dnnl_aBdC16c32b2c,
1258 dnnl_gOwI16i32o4i = dnnl_aBdC16c32b4c,
1259 dnnl_gOwI16i48o2i = dnnl_aBdC16c48b2c,
1260 dnnl_gOwI16i48o4i = dnnl_aBdC16c48b4c,
1261 dnnl_gOwI16i64o2i = dnnl_aBdC16c64b2c,
1262 dnnl_gOwI16i64o4i = dnnl_aBdC16c64b4c,
1263 dnnl_OhwI16i32o2i = dnnl_AcdB16b32a2b,
1264 dnnl_OhwI16i32o4i = dnnl_AcdB16b32a4b,
1265 dnnl_OhwI16i48o2i = dnnl_AcdB16b48a2b,
1266 dnnl_OhwI16i48o4i = dnnl_AcdB16b48a4b,
1267 dnnl_OhwI16i64o2i = dnnl_AcdB16b64a2b,
1268 dnnl_OhwI16i64o4i = dnnl_AcdB16b64a4b,
1269 dnnl_gOhwI16i32o2i = dnnl_aBdeC16c32b2c,
1270 dnnl_gOhwI16i32o4i = dnnl_aBdeC16c32b4c,
1271 dnnl_gOhwI16i48o2i = dnnl_aBdeC16c48b2c,
1272 dnnl_gOhwI16i48o4i = dnnl_aBdeC16c48b4c,
1273 dnnl_gOhwI16i64o2i = dnnl_aBdeC16c64b2c,
1274 dnnl_gOhwI16i64o4i = dnnl_aBdeC16c64b4c,
1275 dnnl_OdhwI16i32o2i = dnnl_AcdeB16b32a2b,
1276 dnnl_OdhwI16i32o4i = dnnl_AcdeB16b32a4b,
1277 dnnl_OdhwI16i48o2i = dnnl_AcdeB16b48a2b,
1278 dnnl_OdhwI16i48o4i = dnnl_AcdeB16b48a4b,
1279 dnnl_OdhwI16i64o2i = dnnl_AcdeB16b64a2b,
1280 dnnl_OdhwI16i64o4i = dnnl_AcdeB16b64a4b,
1281 dnnl_gOdhwI16i32o2i = dnnl_aBdefC16c32b2c,
1282 dnnl_gOdhwI16i32o4i = dnnl_aBdefC16c32b4c,
1283 dnnl_gOdhwI16i48o2i = dnnl_aBdefC16c48b2c,
1284 dnnl_gOdhwI16i48o4i = dnnl_aBdefC16c48b4c,
1285 dnnl_gOdhwI16i64o2i = dnnl_aBdefC16c64b2c,
1286 dnnl_gOdhwI16i64o4i = dnnl_aBdefC16c64b4c,
1287 dnnl_hwioG16g = dnnl_decbA16a,
1288 dnnl_NCdhw40n16c = dnnl_ABcde40a16b,
1289 dnnl_NCw40n16c = dnnl_ABc40a16b,
1290 dnnl_NChw40n16c = dnnl_ABcd40a16b,
1291 dnnl_NCw40n32c = dnnl_ABc40a32b,
1292 dnnl_NChw40n32c = dnnl_ABcd40a32b,
1293 dnnl_NCdhw40n32c = dnnl_ABcde40a32b,
1294 dnnl_OIdhw4o8i8o2i = dnnl_ABcde4a8b8a2b,
1295 dnnl_OIhw4o8i8o2i = dnnl_ABcd4a8b8a2b,
1296 dnnl_OIw4o8i8o2i = dnnl_ABc4a8b8a2b,
1297 dnnl_gOIdhw4o8i8o2i = dnnl_aBCdef4b8c8b2c,
1298 dnnl_gOIhw4o8i8o2i = dnnl_aBCde4b8c8b2c,
1299 dnnl_gOIw4o8i8o2i = dnnl_aBCd4b8c8b2c,
1300 dnnl_IOdhw4i8o8i2o = dnnl_BAcde4b8a8b2a,
1301 dnnl_IOhw4i8o8i2o = dnnl_BAcd4b8a8b2a,
1302 dnnl_IOw4i8o8i2o = dnnl_BAc4b8a8b2a,
1303 dnnl_gIOdhw4i8o8i2o = dnnl_aCBdef4c8b8c2b,
1304 dnnl_gIOhw4i8o8i2o = dnnl_aCBde4c8b8c2b,
1305 dnnl_gIOw4i8o8i2o = dnnl_aCBd4c8b8c2b,
1306 dnnl_NCw2c32n8c = dnnl_ABc2b32a8b,
1307 dnnl_NChw2c32n8c = dnnl_ABcd2b32a8b,
1308 dnnl_NCdhw2c32n8c = dnnl_ABcde2b32a8b,
1309 dnnl_OIw2i8o16i4o = dnnl_ABc2b8a16b4a,
1310 dnnl_OIhw2i8o16i4o = dnnl_ABcd2b8a16b4a,
1311 dnnl_OIdhw2i8o16i4o = dnnl_ABcde2b8a16b4a,
1312 dnnl_OIw2o8i16o4i = dnnl_ABc2a8b16a4b,
1313 dnnl_OIw2o8i16o2i = dnnl_ABc2a8b16a2b,
1314 dnnl_IOw2i8o16i4o = dnnl_BAc2b8a16b4a,
1315 dnnl_IOw2i8o16i2o = dnnl_BAc2b8a16b2a,
1316 dnnl_OIhw2o8i16o4i = dnnl_ABcd2a8b16a4b,
1317 dnnl_OIhw2o8i16o2i = dnnl_ABcd2a8b16a2b,
1318 dnnl_IOhw2i8o16i4o = dnnl_BAcd2b8a16b4a,
1319 dnnl_IOhw2i8o16i2o = dnnl_BAcd2b8a16b2a,
1320 dnnl_OIdhw2o8i16o4i = dnnl_ABcde2a8b16a4b,
1321 dnnl_OIdhw2o8i16o2i = dnnl_ABcde2a8b16a2b,
1322 dnnl_IOdhw2i8o16i4o = dnnl_BAcde2b8a16b4a,
1323 dnnl_IOdhw2i8o16i2o = dnnl_BAcde2b8a16b2a,
1324 dnnl_gOIw2o8i16o2i = dnnl_aBCd2b8c16b2c,
1325 dnnl_gIOw2i8o16i2o = dnnl_aCBd2c8b16c2b,
1326 dnnl_gIOhw2i8o16i2o = dnnl_aBCde2c8b16c2b,
1327 dnnl_gIOdhw2i8o16i2o = dnnl_aBCdef2c8b16c2b,
1328 dnnl_gOIhw2o8i16o2i = dnnl_aBCde2b8c16b2c,
1329 dnnl_gOIdhw2o8i16o2i = dnnl_aBCdef2b8c16b2c,
1330 dnnl_gOIw2o8i16o4i = dnnl_aBCd2b8c16b4c,
1331 dnnl_gOIhw2o8i16o4i = dnnl_aBCde2b8c16b4c,
1332} dnnl_format_tag_t;
1333
1334/// @} dnnl_api_memory
1335
1336/// @addtogroup dnnl_api_primitives
1337/// @{
1338/// @addtogroup dnnl_api_primitives_common
1339/// @{
1340
1341/// Kinds of propagation.
1342typedef enum {
1343 // TODO: suggest renames
1344 /// Undefined propagation type.
1345 dnnl_prop_kind_undef = 0,
1346 /// Forward data propagation (training mode). In this mode primitives
1347 /// perform computations necessary for subsequent backward propagation.
1348 dnnl_forward_training = 64,
1349 /// Forward data propagation (inference mode). In this mode primitives
1350 /// perform only computations that are necessary for inference and omit
1351 /// computations that are necessary only for backward propagation.
1352 dnnl_forward_inference = 96,
1353 /// Forward data propagation (alias for @c dnnl_forward_inference).
1354 dnnl_forward_scoring = dnnl_forward_inference,
1355 /// Forward data propagation (alias for @c dnnl_forward_training).
1356 dnnl_forward = dnnl_forward_training,
1357 /// Backward propagation (with respect to all parameters).
1358 dnnl_backward = 128,
1359 /// Backward data propagation.
1360 dnnl_backward_data = 160,
1361 /// Backward weights propagation.
1362 dnnl_backward_weights = 192,
1363 /// Backward bias propagation.
1364 dnnl_backward_bias = 193,
1365} dnnl_prop_kind_t;
1366
1367/// Kinds of primitives. Used to implement a way to extend the library with new
1368/// primitives without changing the ABI.
1369typedef enum {
1370 /// Undefined primitive
1371 dnnl_undefined_primitive,
1372 /// A reorder primitive.
1373 dnnl_reorder,
1374 /// A shuffle primitive.
1375 dnnl_shuffle,
1376 /// A (out-of-place) concat primitive.
1377 dnnl_concat,
1378 /// A sum primitive.
1379 dnnl_sum,
1380 /// A convolution primitive.
1381 dnnl_convolution,
1382 /// A deconvolution primitive.
1383 dnnl_deconvolution,
1384 /// An element-wise primitive.
1385 dnnl_eltwise,
1386 /// A softmax primitive.
1387 dnnl_softmax,
1388 /// A pooling primitive.
1389 dnnl_pooling,
1390 /// An LRN primitive.
1391 dnnl_lrn,
1392 /// A batch normalization primitive.
1393 dnnl_batch_normalization,
1394 /// A layer normalization primitive.
1395 dnnl_layer_normalization,
1396 /// An inner product primitive.
1397 dnnl_inner_product,
1398 /// A rnn primitive.
1399 dnnl_rnn,
1400 /// A matrix multiplication primitive (internal).
1401 dnnl_gemm,
1402 /// A binary primitive.
1403 dnnl_binary,
1404 /// A logsoftmax primitive.
1405 dnnl_logsoftmax,
1406 /// A matrix multiplication primitive.
1407 dnnl_matmul,
1408 /// A resampling primitive.
1409 dnnl_resampling,
1410 /// A pooling version 2 primitive (pooling with dilation support).
1411 dnnl_pooling_v2,
1412 /// A reduction primitive.
1413 dnnl_reduction,
1414 /// A PReLU primitive.
1415 dnnl_prelu,
1416 /// A softmax version 2 primitive (softmax with destination memory
1417 /// descriptor and algorithm kind).
1418 dnnl_softmax_v2,
1419 /// A layer normalization version 2 primitive (layer normalization with
1420 /// destination memory descriptor).
1421 dnnl_layer_normalization_v2,
1422
1423 /// Parameter to allow internal only primitives without undefined behavior.
1424 /// This parameter is chosen to be valid for so long as sizeof(int) >= 2.
1425 dnnl_primitive_kind_max = 0x7fff,
1426} dnnl_primitive_kind_t;
1427
1428/// Kinds of algorithms.
1429typedef enum {
1430 dnnl_alg_kind_undef,
1431 /// Direct convolution
1432 dnnl_convolution_direct = 0x1,
1433 /// Winograd convolution
1434 dnnl_convolution_winograd = 0x2,
1435 /// Convolution algorithm(either direct or Winograd) is chosen just in time
1436 dnnl_convolution_auto = 0x3,
1437 /// Direct deconvolution
1438 dnnl_deconvolution_direct = 0xa,
1439 /// Winograd deconvolution
1440 dnnl_deconvolution_winograd = 0xb,
1441 /// Eltwise: ReLU
1442 dnnl_eltwise_relu = 0x1f,
1443 /// Eltwise: hyperbolic tangent non-linearity (tanh)
1444 dnnl_eltwise_tanh = 0x2f,
1445 /// Eltwise: exponential linear unit (elu)
1446 dnnl_eltwise_elu = 0x3f,
1447 /// Eltwise: square
1448 dnnl_eltwise_square = 0x4f,
1449 /// Eltwise: abs
1450 dnnl_eltwise_abs = 0x5f,
1451 /// Eltwise: square root
1452 dnnl_eltwise_sqrt = 0x6f,
1453 /// Eltwise: linear
1454 dnnl_eltwise_linear = 0x7f,
1455 /// Eltwise: bounded_relu
1456 dnnl_eltwise_bounded_relu = 0x8f,
1457 /// Eltwise: soft_relu
1458 dnnl_eltwise_soft_relu = 0x9f,
1459 /// Eltwise: soft_relu version 2
1460 dnnl_eltwise_soft_relu_v2 = 0xa0,
1461 /// Eltwise: hardsigmoid
1462 dnnl_eltwise_hardsigmoid = 0xa1,
1463 /// Eltwise: logistic
1464 dnnl_eltwise_logistic = 0xaf,
1465 /// Eltwise: exponent
1466 dnnl_eltwise_exp = 0xbf,
1467 /// Eltwise: gelu
1468 ///
1469 /// @note Tanh approximation formula is used to approximate
1470 /// the cumulative distribution function of a Gaussian here
1471 dnnl_eltwise_gelu_tanh = 0xcf,
1472 /// Eltwise: tanh-based gelu (alias for dnnl_eltwise_gelu_tanh)
1473 dnnl_eltwise_gelu = dnnl_eltwise_gelu_tanh,
1474 /// Eltwise: swish
1475 dnnl_eltwise_swish = 0xdf,
1476 /// Eltwise: natural logarithm
1477 dnnl_eltwise_log = 0xef,
1478 /// Eltwise: clip
1479 dnnl_eltwise_clip = 0xff,
1480 /// Eltwise: clip version 2
1481 dnnl_eltwise_clip_v2 = 0x10,
1482 /// Eltwise: pow
1483 dnnl_eltwise_pow = 0x20,
1484 /// Eltwise: erf-based gelu
1485 dnnl_eltwise_gelu_erf = 0x30,
1486 /// Eltwise: round
1487 dnnl_eltwise_round = 0x40,
1488 /// Eltwise: logsigmoid
1489 dnnl_eltwise_logsigmoid = 0x50,
1490 /// Eltwise: mish
1491 dnnl_eltwise_mish = 0x60,
1492 /// Eltwise: hardswish
1493 dnnl_eltwise_hardswish = 0x70,
1494 /// Eltwise: ReLU (dst for backward)
1495 dnnl_eltwise_relu_use_dst_for_bwd = 0x100,
1496 /// Eltwise: hyperbolic tangent non-linearity (tanh) (dst for backward)
1497 dnnl_eltwise_tanh_use_dst_for_bwd = 0x101,
1498 /// Eltwise: exponential linear unit (elu) (dst for backward)
1499 dnnl_eltwise_elu_use_dst_for_bwd = 0x102,
1500 /// Eltwise: square root (dst for backward)
1501 dnnl_eltwise_sqrt_use_dst_for_bwd = 0x103,
1502 /// Eltwise: logistic (dst for backward)
1503 dnnl_eltwise_logistic_use_dst_for_bwd = 0x104,
1504 /// Eltwise: exp (dst for backward)
1505 dnnl_eltwise_exp_use_dst_for_bwd = 0x105,
1506 /// Eltwise: clip version 2 (dst for backward)
1507 dnnl_eltwise_clip_v2_use_dst_for_bwd = 0x106,
1508 /// Max pooling
1509 dnnl_pooling_max = 0x1ff,
1510 /// Average pooling include padding
1511 dnnl_pooling_avg_include_padding = 0x2ff,
1512 /// Average pooling exclude padding
1513 dnnl_pooling_avg_exclude_padding = 0x3ff,
1514 /// Average pooling (alias for #dnnl_pooling_avg_exclude_padding)
1515 dnnl_pooling_avg = dnnl_pooling_avg_exclude_padding,
1516 /// Local response normalization (LRN) across multiple channels
1517 dnnl_lrn_across_channels = 0xaff,
1518 /// LRN within a single channel
1519 dnnl_lrn_within_channel = 0xbff,
1520 /// RNN cell
1521 dnnl_vanilla_rnn = 0x1fff,
1522 /// LSTM cell
1523 dnnl_vanilla_lstm = 0x2fff,
1524 /// GRU cell
1525 dnnl_vanilla_gru = 0x3fff,
1526 /// GRU cell with linear before reset
1527 ///
1528 /// Modification of original GRU cell. Differs from #dnnl_vanilla_gru
1529 /// in how the new memory gate is calculated:
1530 /// \f[ c_t = tanh(W_c*x_t + b_{c_x} + r_t*(U_c*h_{t-1}+b_{c_h})) \f]
1531 /// Primitive expects 4 biases on input:
1532 /// \f$[b_{u}, b_{r}, b_{c_x}, b_{c_h}]\f$
1533 dnnl_lbr_gru = 0x4fff,
1534 /// AUGRU cell
1535 dnnl_vanilla_augru = 0x5fff,
1536 /// AUGRU cell with linear before reset
1537 dnnl_lbr_augru = 0x6fff,
1538 /// Binary add
1539 dnnl_binary_add = 0x1fff0,
1540 /// Binary mul
1541 dnnl_binary_mul = 0x1fff1,
1542 /// Binary max
1543 dnnl_binary_max = 0x1fff2,
1544 /// Binary min
1545 dnnl_binary_min = 0x1fff3,
1546 /// Binary div
1547 dnnl_binary_div = 0x1fff4,
1548 /// Binary sub
1549 dnnl_binary_sub = 0x1fff5,
1550 /// Binary greater or equal
1551 dnnl_binary_ge = 0x1fff6,
1552 /// Binary greater than
1553 dnnl_binary_gt = 0x1fff7,
1554 /// Binary less or equal
1555 dnnl_binary_le = 0x1fff8,
1556 /// Binary less than
1557 dnnl_binary_lt = 0x1fff9,
1558 /// Binary equal
1559 dnnl_binary_eq = 0x1fffa,
1560 /// Binary not equal
1561 dnnl_binary_ne = 0x1fffb,
1562 /// Nearest Neighbor Resampling Method
1563 dnnl_resampling_nearest = 0x2fff0,
1564 /// Linear Resampling Method
1565 dnnl_resampling_linear = 0x2fff1,
1566 /// Reduction using max
1567 dnnl_reduction_max,
1568 /// Reduction using min
1569 dnnl_reduction_min,
1570 /// Reduction using sum
1571 dnnl_reduction_sum,
1572 /// Reduction using mul
1573 dnnl_reduction_mul,
1574 /// Reduction using mean
1575 dnnl_reduction_mean,
1576 /// Reduction using lp norm
1577 dnnl_reduction_norm_lp_max,
1578 /// Reduction using lp norm
1579 dnnl_reduction_norm_lp_sum,
1580 /// Reduction using lp norm without final pth-root
1581 dnnl_reduction_norm_lp_power_p_max,
1582 /// Reduction using lp norm without final pth-root
1583 dnnl_reduction_norm_lp_power_p_sum,
1584 /// Softmax
1585 dnnl_softmax_accurate = 0x30000,
1586 /// Logsoftmax
1587 dnnl_softmax_log,
1588} dnnl_alg_kind_t;
1589
1590/// Flags for normalization primitives.
1591typedef enum {
1592 /// Use no normalization flags
1593 ///
1594 /// If specified
1595 /// - on forward training propagation mean and variance are computed and
1596 /// stored as output
1597 /// - on backward propagation compute full derivative wrt data
1598 /// - on backward propagation prop_kind == #dnnl_backward_data has the same
1599 /// behavior as prop_kind == #dnnl_backward
1600 dnnl_normalization_flags_none = 0x0U,
1601
1602 /// Use global statistics
1603 ///
1604 /// If specified
1605 /// - on forward propagation use mean and variance provided by user (input)
1606 /// - on backward propagation reduces the amount of computations, since
1607 /// mean and variance are considered as constants
1608 ///
1609 /// If not specified:
1610 /// - on forward propagation mean and variance are computed and stored as
1611 /// output
1612 /// - on backward propagation compute full derivative wrt data
1613 dnnl_use_global_stats = 0x1U,
1614
1615 /// Use scale and shift parameters
1616 ///
1617 /// If specified:
1618 /// - on forward propagation use scale and shift (aka scale and bias) for
1619 /// the normalization results
1620 /// - on backward propagation (for prop_kind == #dnnl_backward) compute
1621 /// diff wrt scale and shift (hence one extra output used)
1622 ///
1623 /// If no specified:
1624 /// - on backward propagation prop_kind == #dnnl_backward_data has the
1625 /// same behavior as prop_kind == #dnnl_backward
1626 dnnl_use_scaleshift = 0x2U,
1627
1628 /// Fuse with ReLU
1629 ///
1630 /// The flag implies negative slope being 0. On training this is the only
1631 /// configuration supported. For inference, to use non-zero negative slope
1632 /// consider using @ref dev_guide_attributes_post_ops.
1633 ///
1634 /// If specified:
1635 /// - on inference this option behaves the same as if the primitive were
1636 /// fused with ReLU using post ops API with zero negative slope.
1637 /// - on training primitive requires workspace (required to be able to
1638 /// perform backward pass)
1639 dnnl_fuse_norm_relu = 0x4U,
1640
1641 /// Use scale parameter
1642 ///
1643 /// If specified:
1644 /// - on forward propagation use scale for the normalization results
1645 /// - on backward propagation (for prop_kind == #dnnl_backward) compute
1646 /// diff wrt scale (hence one extra output used)
1647 dnnl_use_scale = 0x8U,
1648
1649 /// Use shift parameter
1650 ///
1651 /// If specified:
1652 /// - on forward propagation use shift (aka bias) for the normalization
1653 /// results
1654 /// - on backward propagation (for prop_kind == #dnnl_backward) compute
1655 /// diff wrt shift (hence one extra output used)
1656 dnnl_use_shift = 0x10U,
1657
1658 /// Fuse with Add and then fuse with ReLU
1659 ///
1660 /// If specified:
1661 ///
1662 /// - on forward propagation apply element-wise binary Add operation to
1663 /// to the normalization results with an additional input tensor and then
1664 /// apply ReLU with negative slope being 0.
1665 /// - on training primitive requires workspace (required to be able to
1666 /// perform backward pass).
1667 /// - on backward propagation save the result of backward ReLU operation
1668 /// with input tensor and workspace from forward pass to extra output
1669 /// tensor and then perform backward normalization.
1670 dnnl_fuse_norm_add_relu = 0x20U,
1671
1672} dnnl_normalization_flags_t;
1673
1674/// @} dnnl_api_primitives_common
1675/// @} dnnl_api_primitives
1676
1677/// @addtogroup dnnl_api_memory
1678/// @{
1679
1680/// Maximum number of dimensions a tensor can have. Only restricts the amount
1681/// of space used for the tensor description. Individual computational
1682/// primitives may support only tensors of certain dimensions.
1683#define DNNL_MAX_NDIMS 12
1684
1685/// A wildcard value for dimensions that are unknown at a primitive creation
1686/// time.
1687#define DNNL_RUNTIME_DIM_VAL INT64_MIN
1688
1689/// A `size_t` counterpart of the DNNL_RUNTIME_DIM_VAL.
1690/// For instance, this value is returned by dnnl_memory_desc_get_size() if
1691/// either of the dimensions or strides equal to #DNNL_RUNTIME_DIM_VAL.
1692#define DNNL_RUNTIME_SIZE_VAL ((size_t)DNNL_RUNTIME_DIM_VAL)
1693
1694/// @cond DO_NOT_DOCUMENT_THIS
1695/// Hex representation for a **special** quiet NAN (!= NAN from math.h)
1696static const union {
1697 unsigned u;
1698 float f;
1699} DNNL_RUNTIME_F32_VAL_REP = {0x7fc000d0};
1700/// @endcond
1701
1702/// A wildcard value for floating point values that are unknown at a primitive
1703/// creation time.
1704#define DNNL_RUNTIME_F32_VAL (DNNL_RUNTIME_F32_VAL_REP.f)
1705
1706/// @cond DO_NOT_DOCUMENT_THIS
1707static const int DNNL_RUNTIME_S32_VAL_REP = INT32_MIN;
1708/// @endcond
1709
1710/// A wildcard value for int32_t values that are unknown at a primitive creation
1711/// time.
1712#define DNNL_RUNTIME_S32_VAL DNNL_RUNTIME_S32_VAL_REP
1713
1714/// A type to describe tensor dimension.
1715typedef int64_t dnnl_dim_t;
1716
1717/// A type to describe tensor dimensions.
1718typedef dnnl_dim_t dnnl_dims_t[DNNL_MAX_NDIMS];
1719
1720/// Generic description of blocked data layout for most memory formats.
1721///
1722/// @sa @ref dev_guide_understanding_memory_formats
1723typedef struct {
1724 /// The strides between the outermost blocks.
1725 /// In case of plain (non-blocked) formats the strides between dimensions.
1726 dnnl_dims_t strides;
1727 // Innermost section
1728 // ASSUMPTION: the innermost blocks are always dense
1729 /// The number of innermost blocks, e.g. 3 in case of `OIhw_4i16o4i_`
1730 int inner_nblks;
1731 /// The size of the blocks, e.g. `{4, 16, 4}` in case of `OIhw_4i16o4i`
1732 dnnl_dims_t inner_blks;
1733 /// The logical indices of the blocks, e.g. `{1, 0, 1}` in case of
1734 /// `4i16o4i`, because `i` is the 1st dim and `o` is the 0st dim
1735 dnnl_dims_t inner_idxs;
1736} dnnl_blocking_desc_t;
1737
1738/// Winograd-specific formats
1739typedef enum {
1740 /// Undefined memory format, used for empty memory descriptors.
1741 dnnl_wino_undef = 0,
1742 // Tensors of weights for 2x3 winograd convolutions.
1743 dnnl_wino_wei_aaOIoi, ///< Internal weights format for 2x3 Winograd
1744 dnnl_wino_wei_aaOio, ///< Internal weights format for 2x3 Winograd
1745 dnnl_wino_wei_aaOBiOo, ///< Internal weights format for 2x3 Winograd
1746 // Tensor of weights for 4x3 convolution.
1747 dnnl_wino_wei_OBaaIBOIio ///< Internal weights format for 4x3 Winograd
1748} dnnl_wino_memory_format_t;
1749
1750/// Description of tensor of weights for winograd 2x3 convolution.
1751typedef struct {
1752 dnnl_wino_memory_format_t wino_format;
1753 int r;
1754 int alpha;
1755 int ic;
1756 int oc;
1757 int ic_block;
1758 int oc_block;
1759 int ic2_block;
1760 int oc2_block;
1761 float adj_scale;
1762 size_t size;
1763} dnnl_wino_desc_t;
1764
1765typedef enum {
1766 dnnl_packed_format_undef = 0,
1767 dnnl_ldigo_p,
1768 dnnl_ldgoi_p,
1769 dnnl_ldio_p
1770} dnnl_rnn_packed_memory_format_t;
1771
1772/// Maximum number of parts of RNN weights tensor that require separate
1773/// computation.
1774#define DNNL_RNN_MAX_N_PARTS 4
1775
1776/// Description of tensor of packed weights for rnn.
1777typedef struct {
1778 dnnl_rnn_packed_memory_format_t format;
1779 int n_parts;
1780 int n;
1781 int ldb;
1782 int parts[DNNL_RNN_MAX_N_PARTS];
1783 size_t part_pack_size[DNNL_RNN_MAX_N_PARTS];
1784 unsigned pack_part[DNNL_RNN_MAX_N_PARTS];
1785 size_t offset_compensation;
1786 size_t size;
1787 char reserved[200];
1788} dnnl_rnn_packed_desc_t;
1789
1790/// Flags for memory special features
1791typedef enum {
1792 dnnl_memory_extra_flag_none = 0x0U,
1793 /// Indicates the weights have an additional buffer, that depends on the
1794 /// @p compensation_mask.
1795 ///
1796 /// For instance, in 4D case with the compensation mask equals (1 << 0)
1797 /// the additional buffer would consist of OC values:
1798 /// O[oc : 0,OC] =
1799 /// -128 * SUM(ic : 0,IC; kh : 0,KH; kw : 0,KW){ weights(oc, ic, kh, kw) }
1800 dnnl_memory_extra_flag_compensation_conv_s8s8 = 0x1U,
1801 dnnl_memory_extra_flag_scale_adjust = 0x2U,
1802 dnnl_memory_extra_flag_rnn_u8s8_compensation = 0x4U,
1803 dnnl_memory_extra_flag_gpu_rnn_u8s8_compensation
1804 = dnnl_memory_extra_flag_rnn_u8s8_compensation,
1805 dnnl_memory_extra_flag_compensation_conv_asymmetric_src = 0x8U,
1806 dnnl_memory_extra_flag_rnn_s8s8_compensation = 0x16U,
1807} dnnl_memory_extra_flags_t;
1808
1809/// Description of extra information stored in memory
1810typedef struct {
1811 /// The flags contain arbitrary extra information, such as compensation.
1812 /// @sa dnnl_memory_extra_flags_t
1813 uint64_t flags;
1814 /// Compensation mask
1815 int compensation_mask;
1816 /// Scale applied to the data
1817 float scale_adjust;
1818 /// Compensation mask for asymmetric quantization
1819 int asymm_compensation_mask;
1820 /// For future backwards compatibility
1821 char reserved[60];
1822} dnnl_memory_extra_desc_t;
1823
1824/// Memory descriptor. The description is based on a number of dimensions,
1825/// dimensions themselves, plus information about elements type and memory
1826/// format. Additionally, contains format-specific descriptions of the data
1827/// layout.
1828typedef struct {
1829 /// Number of dimensions
1830 int ndims;
1831 /// Dimensions in the following order:
1832 /// - CNN data tensors: mini-batch, channel, spatial
1833 /// (<code>{N, C, [[D,] H,] W}</code>)
1834 /// - CNN weight tensors: group (optional), output channel, input channel,
1835 /// spatial (<code>{[G,] O, I, [[D,] H,] W}</code>)
1836 /// - RNN data tensors: time, mini-batch, channels (<code>{T, N, C}</code>)
1837 /// or layers, directions, states, mini-batch, channels (<code>{L, D, S, N, C}</code>)
1838 /// - RNN weight tensor: layers, directions, input channel, gates, output channels
1839 /// (<code>{L, D, I, G, O}</code>).
1840 ///
1841 /// @note
1842 /// The order of dimensions does not depend on the memory format, so
1843 /// whether the data is laid out in #dnnl_nchw or #dnnl_nhwc
1844 /// the dims for 4D CN data tensor would be <code>{N, C, H, W}</code>.
1845 dnnl_dims_t dims;
1846
1847 /// Data type of the tensor elements.
1848 dnnl_data_type_t data_type;
1849
1850 /// Size of the data including padding in each dimension.
1851 dnnl_dims_t padded_dims;
1852
1853 /// Per-dimension offset from the padding to actual data, the top-level
1854 /// tensor with offsets applied must lie within the padding area.
1855 dnnl_dims_t padded_offsets;
1856
1857 /// Offset from memory origin to the current block, non-zero only in
1858 /// a description of a memory sub-block.
1859 dnnl_dim_t offset0;
1860
1861 /// Memory format kind.
1862 dnnl_format_kind_t format_kind;
1863 union {
1864 /// Description of the data layout for memory formats that use
1865 /// blocking.
1866 dnnl_blocking_desc_t blocking;
1867 /// Tensor of weights for integer 8bit winograd convolution.
1868 dnnl_wino_desc_t wino_desc;
1869 /// Tensor of packed weights for RNN.
1870 dnnl_rnn_packed_desc_t rnn_packed_desc;
1871 // ... other descriptions possible
1872 } format_desc;
1873
1874 dnnl_memory_extra_desc_t extra;
1875} dnnl_memory_desc_t;
1876
1877/// @struct dnnl_memory
1878/// An opaque structure to describe a memory.
1879struct dnnl_memory;
1880
1881/// A memory handle.
1882typedef struct dnnl_memory *dnnl_memory_t;
1883
1884/// A constant memory handle.
1885typedef const struct dnnl_memory *const_dnnl_memory_t;
1886
1887/// Special pointer value that indicates that a memory object should not have
1888/// an underlying buffer.
1889#define DNNL_MEMORY_NONE (NULL)
1890
1891/// Special pointer value that indicates that the library needs to allocate an
1892/// underlying buffer for a memory object.
1893#define DNNL_MEMORY_ALLOCATE ((void *)(size_t)-1)
1894
1895/// @} dnnl_api_memory
1896
1897/// @addtogroup dnnl_api_primitives
1898/// @{
1899/// @addtogroup dnnl_api_primitives_common
1900/// @{
1901
1902/// A pointer to any of the operation descriptors.
1903typedef void *dnnl_op_desc_t;
1904/// A pointer to any of the operation descriptors (constant variant).
1905typedef const void *const_dnnl_op_desc_t;
1906
1907/// @} dnnl_api_primitives_common
1908/// @} dnnl_api_primitives
1909
1910/// @addtogroup dnnl_api_primitives
1911/// @{
1912
1913/// @addtogroup dnnl_api_convolution
1914/// @{
1915
1916/// A descriptor of a convolution operation.
1917typedef struct {
1918 /// The kind of primitive. Used for self-identifying the primitive
1919 /// descriptor. Must be #dnnl_convolution.
1920 dnnl_primitive_kind_t primitive_kind;
1921 /// The kind of propagation. Possible values: #dnnl_forward_training,
1922 /// #dnnl_forward_inference, #dnnl_backward_data,
1923 /// #dnnl_backward_weights, and #dnnl_backward_bias.
1924 dnnl_prop_kind_t prop_kind;
1925 /// The kind of the convolution algorithm. Possible values:
1926 /// #dnnl_convolution_direct.
1927 dnnl_alg_kind_t alg_kind;
1928 /// Source memory descriptor.
1929 dnnl_memory_desc_t src_desc;
1930 /// Source gradient memory descriptor.
1931 dnnl_memory_desc_t diff_src_desc;
1932 /// Weights memory descriptor.
1933 dnnl_memory_desc_t weights_desc;
1934 /// Weights gradient memory descriptor.
1935 dnnl_memory_desc_t diff_weights_desc;
1936 /// Bias memory descriptor.
1937 dnnl_memory_desc_t bias_desc;
1938 /// Bias gradient memory descriptor.
1939 dnnl_memory_desc_t diff_bias_desc;
1940 /// Destination memory descriptor.
1941 dnnl_memory_desc_t dst_desc;
1942 /// Destination gradient memory descriptor.
1943 dnnl_memory_desc_t diff_dst_desc;
1944 /// Convolution strides in each spatial dimension.
1945 dnnl_dims_t strides;
1946 /// Convolution dilates in each spatial dimension.
1947 dnnl_dims_t dilates;
1948 /// Padding in each spatial dimension. padding[0] is a padding in the
1949 /// beginning (@p padding_l), padding[1] is a padding in the end (@p
1950 /// padding_r).
1951 dnnl_dims_t padding[2];
1952 /// The accumulator data type. Initialized automatically.
1953 dnnl_data_type_t accum_data_type;
1954} dnnl_convolution_desc_t;
1955
1956/// @} dnnl_api_convolution
1957
1958/// @addtogroup dnnl_api_deconvolution
1959/// @{
1960
1961/// A descriptor of a deconvolution operation.
1962typedef dnnl_convolution_desc_t dnnl_deconvolution_desc_t;
1963
1964/// @} dnnl_api_deconvolution
1965
1966/// @addtogroup dnnl_api_shuffle
1967/// @{
1968
1969/// A descriptor of a shuffle operation.
1970typedef struct {
1971 /// The kind of primitive. Used for self-identifying the primitive
1972 /// descriptor. Must be #dnnl_shuffle.
1973 dnnl_primitive_kind_t primitive_kind;
1974 /// The kind of propagation. Possible values: #dnnl_forward_training,
1975 /// #dnnl_forward_inference, and #dnnl_backward_data.
1976 dnnl_prop_kind_t prop_kind;
1977 /// Source and destination memory descriptor,
1978 /// and source and destination gradient memory descriptor.
1979 dnnl_memory_desc_t data_desc;
1980 /// Axis for shuffling.
1981 int axis;
1982 /// Number of groups.
1983 dnnl_dim_t group_size;
1984} dnnl_shuffle_desc_t;
1985
1986/// @} dnnl_api_shuffle
1987
1988/// @addtogroup dnnl_api_eltwise
1989/// @{
1990
1991/// A descriptor of a element-wise operation.
1992typedef struct {
1993 /// The kind of primitive. Used for self-identifying the primitive
1994 /// descriptor. Must be #dnnl_eltwise.
1995 dnnl_primitive_kind_t primitive_kind;
1996 /// The kind of propagation. Possible values: #dnnl_forward_training,
1997 /// #dnnl_forward_inference, #dnnl_backward, and #dnnl_backward_data.
1998 dnnl_prop_kind_t prop_kind;
1999 /// The kind of eltwise algorithm. Possible values: #dnnl_eltwise_relu,
2000 /// #dnnl_eltwise_tanh, #dnnl_eltwise_elu, #dnnl_eltwise_square,
2001 /// #dnnl_eltwise_abs, #dnnl_eltwise_sqrt, #dnnl_eltwise_linear,
2002 /// #dnnl_eltwise_bounded_relu, #dnnl_eltwise_soft_relu,
2003 /// #dnnl_eltwise_soft_relu_v2, #dnnl_eltwise_logistic, #dnnl_eltwise_exp,
2004 /// #dnnl_eltwise_gelu_tanh, #dnnl_eltwise_swish, #dnnl_eltwise_log,
2005 /// #dnnl_eltwise_clip, #dnnl_eltwise_clip_v2, #dnnl_eltwise_pow,
2006 /// #dnnl_eltwise_gelu_erf, #dnnl_eltwise_round, #dnnl_eltwise_logsigmoid,
2007 /// #dnnl_eltwise_mish, #dnnl_eltwise_hardswish, #dnnl_eltwise_hardsigmoid.
2008 /// Possible values for passing destination memory on backward:
2009 /// #dnnl_eltwise_relu_use_dst_for_bwd, #dnnl_eltwise_tanh_use_dst_for_bwd,
2010 /// #dnnl_eltwise_elu_use_dst_for_bwd, #dnnl_eltwise_sqrt_use_dst_for_bwd,
2011 /// #dnnl_eltwise_logistic_use_dst_for_bwd,
2012 /// #dnnl_eltwise_exp_use_dst_for_bwd,
2013 /// #dnnl_eltwise_clip_v2_use_dst_for_bwd.
2014 dnnl_alg_kind_t alg_kind;
2015 /// Source and destination memory descriptor.
2016 dnnl_memory_desc_t data_desc;
2017 /// Source and destination gradient memory descriptor.
2018 dnnl_memory_desc_t diff_data_desc;
2019 /// Algorithm specific parameter.
2020 /// Accordance table:
2021 /// - #dnnl_eltwise_relu: @p alpha -- negative slope, @p beta ignored
2022 /// - #dnnl_eltwise_tanh: @p alpha and @p beta ignored
2023 /// - #dnnl_eltwise_elu: @p alpha -- negative slope, @p beta ignored
2024 /// - #dnnl_eltwise_square: @p alpha and @p beta ignored
2025 /// - #dnnl_eltwise_abs: @p alpha and @p beta ignored
2026 /// - #dnnl_eltwise_sqrt: @p alpha and @p beta ignored
2027 /// - #dnnl_eltwise_linear: @p alpha -- scale, @p beta -- shift
2028 /// - #dnnl_eltwise_bounded_relu: @p alpha -- upper bound, @p beta ignored
2029 /// - #dnnl_eltwise_soft_relu: @p alpha and @p beta ignored
2030 /// - #dnnl_eltwise_soft_relu_v2: @p alpha -- soft_relu_v2 arg scaling, @p beta ignored
2031 /// - #dnnl_eltwise_logistic: @p alpha and @p beta ignored
2032 /// - #dnnl_eltwise_exp: @p alpha and @p beta ignored
2033 /// - #dnnl_eltwise_gelu_tanh: @p alpha and @p beta ignored
2034 /// - #dnnl_eltwise_swish: @p alpha -- sigmoid arg scaling, @p beta ignored
2035 /// - #dnnl_eltwise_log: @p alpha and @p beta ignored
2036 /// - #dnnl_eltwise_clip: @p alpha -- lower bound, @p beta -- upper bound
2037 /// - #dnnl_eltwise_clip_v2: @p alpha -- lower bound, @p beta -- upper bound
2038 /// - #dnnl_eltwise_pow: @p alpha -- scale, @p beta -- exponent
2039 /// - #dnnl_eltwise_gelu_erf: @p alpha and @p beta ignored
2040 /// - #dnnl_eltwise_round: @p alpha and @p beta ignored
2041 /// - #dnnl_eltwise_logsigmoid: @p alpha and @p beta ignored
2042 /// - #dnnl_eltwise_mish: @p alpha and @p beta ignored
2043 /// - #dnnl_eltwise_hardswish: @p alpha and @p beta ignored
2044 /// - #dnnl_eltwise_hardsigmoid: @p alpha -- scale, @p beta -- shift
2045 float alpha, beta;
2046} dnnl_eltwise_desc_t;
2047
2048/// @} dnnl_api_eltwise
2049
2050/// @addtogroup dnnl_api_softmax
2051/// @{
2052
2053/// A descriptor of a Softmax operation.
2054typedef struct {
2055 /// The kind of primitive. Used for self-identifying the primitive
2056 /// descriptor. Must be #dnnl_softmax.
2057 dnnl_primitive_kind_t primitive_kind;
2058 /// The kind of propagation. Possible values: #dnnl_forward_training,
2059 /// #dnnl_forward_inference, and #dnnl_backward_data.
2060 dnnl_prop_kind_t prop_kind;
2061 /// Source and destination memory descriptor.
2062 dnnl_memory_desc_t data_desc;
2063 /// Source and Destination of gradient memory descriptor.
2064 dnnl_memory_desc_t diff_desc;
2065 /// The axis along which to perform the softmax.
2066 int softmax_axis;
2067} dnnl_softmax_desc_t;
2068
2069/// @} dnnl_api_softmax
2070
2071/// @addtogroup dnnl_api_softmax_v2
2072/// @{
2073
2074/// A descriptor of a Softmax operation.
2075typedef struct {
2076 /// The kind of primitive. Used for self-identifying the primitive
2077 /// descriptor. Must be #dnnl_softmax_v2.
2078 dnnl_primitive_kind_t primitive_kind;
2079 /// The kind of propagation. Possible values: #dnnl_forward_training,
2080 /// #dnnl_forward_inference, and #dnnl_backward_data.
2081 dnnl_prop_kind_t prop_kind;
2082 /// Source memory descriptor.
2083 dnnl_memory_desc_t src_desc;
2084 /// Source gradient memory descriptor.
2085 dnnl_memory_desc_t diff_src_desc;
2086 /// The axis along which to perform the softmax.
2087 int softmax_axis;
2088 /// Softmax algorithm. Possible values: #dnnl_softmax_accurate and
2089 /// #dnnl_softmax_log.
2090 dnnl_alg_kind_t alg_kind;
2091 /// Destination memory descriptor.
2092 dnnl_memory_desc_t dst_desc;
2093 /// Destination gradient memory descriptor.
2094 dnnl_memory_desc_t diff_dst_desc;
2095} dnnl_softmax_v2_desc_t;
2096
2097/// @} dnnl_api_softmax_v2
2098
2099/// @addtogroup dnnl_api_logsoftmax
2100/// @{
2101
2102/// A descriptor of a LogSoftmax operation. An alias of Softmax structure, but
2103/// primitive_kind must be #dnnl_logsoftmax.
2104typedef dnnl_softmax_desc_t dnnl_logsoftmax_desc_t;
2105
2106/// @} dnnl_api_logsoftmax
2107
2108/// @addtogroup dnnl_api_pooling
2109/// @{
2110
2111/// A descriptor of a pooling operation.
2112typedef struct {
2113 /// The kind of primitive. Used for self-identifying the primitive
2114 /// descriptor. Must be #dnnl_pooling.
2115 dnnl_primitive_kind_t primitive_kind;
2116 /// The kind of propagation. Possible values: #dnnl_forward_training,
2117 /// #dnnl_forward_inference, #dnnl_backward, and #dnnl_backward_data.
2118 dnnl_prop_kind_t prop_kind;
2119 /// The kind of pooling algorithm.
2120 /// Possible values: #dnnl_pooling_max,
2121 /// #dnnl_pooling_avg_include_padding, and
2122 /// #dnnl_pooling_avg_exclude_padding.
2123 dnnl_alg_kind_t alg_kind;
2124 /// Source memory descriptor.
2125 dnnl_memory_desc_t src_desc;
2126 /// Source gradient memory descriptor.
2127 dnnl_memory_desc_t diff_src_desc;
2128 /// Destination memory descriptor.
2129 dnnl_memory_desc_t dst_desc;
2130 /// Destination gradient memory descriptor.
2131 dnnl_memory_desc_t diff_dst_desc;
2132 /// Pooling kernel strides for spatial dimensions.
2133 dnnl_dims_t strides;
2134 /// Pooling kernel spatial dimensions.
2135 dnnl_dims_t kernel;
2136 /// Padding in each spatial dimension. padding[0] is a padding in the
2137 /// beginning (@p padding_l), padding[1] is a padding in the end (@p
2138 /// padding_r).
2139 dnnl_dims_t padding[2];
2140 /// The accumulator data type. Initialized automatically.
2141 dnnl_data_type_t accum_data_type;
2142} dnnl_pooling_desc_t;
2143
2144/// @} dnnl_api_pooling
2145
2146/// @addtogroup dnnl_api_pooling_v2
2147/// @{
2148
2149/// A descriptor of a pooling operation.
2150typedef struct {
2151 /// The kind of primitive. Used for self-identifying the primitive
2152 /// descriptor. Must be #dnnl_pooling_v2.
2153 dnnl_primitive_kind_t primitive_kind;
2154 /// The kind of propagation. Possible values: #dnnl_forward_training,
2155 /// #dnnl_forward_inference, #dnnl_backward, and #dnnl_backward_data.
2156 dnnl_prop_kind_t prop_kind;
2157 /// The kind of pooling algorithm.
2158 /// Possible values: #dnnl_pooling_max,
2159 /// #dnnl_pooling_avg_include_padding, and
2160 /// #dnnl_pooling_avg_exclude_padding.
2161 dnnl_alg_kind_t alg_kind;
2162 /// Source memory descriptor.
2163 dnnl_memory_desc_t src_desc;
2164 /// Source gradient memory descriptor.
2165 dnnl_memory_desc_t diff_src_desc;
2166 /// Destination memory descriptor.
2167 dnnl_memory_desc_t dst_desc;
2168 /// Destination gradient memory descriptor.
2169 dnnl_memory_desc_t diff_dst_desc;
2170 /// Pooling kernel strides for spatial dimensions.
2171 dnnl_dims_t strides;
2172 /// Pooling kernel spatial dimensions.
2173 dnnl_dims_t kernel;
2174 /// Padding in each spatial dimension. padding[0] is a padding in the
2175 /// beginning (@p padding_l), padding[1] is a padding in the end (@p
2176 /// padding_r).
2177 dnnl_dims_t padding[2];
2178 /// The accumulator data type. Initialized automatically.
2179 dnnl_data_type_t accum_data_type;
2180 /// Pooling dilations for spatial dimensions.
2181 dnnl_dims_t dilation;
2182} dnnl_pooling_v2_desc_t;
2183
2184/// @} dnnl_api_pooling_v2
2185
2186/// @addtogroup dnnl_api_prelu
2187/// @{
2188typedef struct {
2189 /// The kind of primitive. Used for self-identifying the primitive
2190 /// descriptor. Must be #dnnl_prelu.
2191 dnnl_primitive_kind_t primitive_kind;
2192 /// The kind of propagation. Possible values: #dnnl_forward_training,
2193 /// #dnnl_forward_inference, #dnnl_backward
2194 dnnl_prop_kind_t prop_kind;
2195 /// Source and destination memory descriptor.
2196 dnnl_memory_desc_t data_desc;
2197 /// Learnable parameter alpha memory descriptor.
2198 /// Alpha describes negative slope.
2199 dnnl_memory_desc_t weights_desc;
2200 /// Source and destination gradient memory descriptor.
2201 dnnl_memory_desc_t diff_data_desc;
2202 /// Learnable parameter alpha gradient memory descriptor.
2203 dnnl_memory_desc_t diff_weights_desc;
2204} dnnl_prelu_desc_t;
2205
2206/// @} dnnl_api_prelu
2207
2208/// @addtogroup dnnl_api_lrn
2209/// @{
2210
2211/// A descriptor of a Local Response Normalization (LRN) operation.
2212typedef struct {
2213 /// The kind of primitive. Used for self-identifying the primitive
2214 /// descriptor. Must be #dnnl_lrn.
2215 dnnl_primitive_kind_t primitive_kind;
2216 /// The kind of propagation. Possible values: #dnnl_forward_training,
2217 /// #dnnl_forward_inference, #dnnl_backward, and #dnnl_backward_data.
2218 dnnl_prop_kind_t prop_kind;
2219 /// LRN algorithm. Possible values: #dnnl_lrn_within_channel and
2220 /// #dnnl_lrn_across_channels.
2221 dnnl_alg_kind_t alg_kind;
2222 /// Source and destination memory descriptor.
2223 dnnl_memory_desc_t data_desc;
2224 /// Source and destination gradient memory descriptor.
2225 dnnl_memory_desc_t diff_data_desc;
2226 /// The number of channels to sum over (for cross-channel LRN) or the side
2227 /// length of the square region to sum over (for within-channel LRN).
2228 dnnl_dim_t local_size;
2229 /// LRN alpha parameter.
2230 float lrn_alpha;
2231 /// LRN beta parameter.
2232 float lrn_beta;
2233 /// LRN k parameter.
2234 float lrn_k;
2235} dnnl_lrn_desc_t;
2236
2237/// @} dnnl_api_lrn
2238
2239/// @addtogroup dnnl_api_batch_normalization
2240/// @{
2241
2242/// A descriptor of a Batch Normalization operation.
2243typedef struct {
2244 /// The kind of primitive. Used for self-identifying the primitive
2245 /// descriptor. Must be #dnnl_batch_normalization.
2246 dnnl_primitive_kind_t primitive_kind;
2247 /// The kind of propagation. Possible values: #dnnl_forward_training,
2248 /// #dnnl_forward_inference, #dnnl_backward, and #dnnl_backward_data.
2249 dnnl_prop_kind_t prop_kind;
2250 /// Source and destination memory descriptor.
2251 dnnl_memory_desc_t data_desc;
2252 /// Source and destination gradient memory descriptor.
2253 dnnl_memory_desc_t diff_data_desc;
2254 /// Scale and shift data and gradient memory descriptors.
2255 ///
2256 /// Scaleshift memory descriptor uses 2D #dnnl_nc format[2,Channels]. 1-st
2257 /// dimension contains gamma parameter, 2-nd dimension contains beta
2258 /// parameter.
2259 dnnl_memory_desc_t data_scaleshift_desc;
2260 dnnl_memory_desc_t diff_data_scaleshift_desc;
2261 /// Statistics memory descriptor.
2262 ///
2263 /// Statistics (mean or variance) descriptor use 1D #dnnl_x format[Channels].
2264 dnnl_memory_desc_t stat_desc;
2265 /// Batch normalization epsilon parameter.
2266 float batch_norm_epsilon;
2267 unsigned flags;
2268} dnnl_batch_normalization_desc_t;
2269
2270/// @} dnnl_api_batch_normalization
2271
2272/// @addtogroup dnnl_api_layer_normalization
2273/// @{
2274
2275/// A descriptor of a Layer Normalization operation.
2276typedef struct {
2277 /// The kind of primitive. Used for self-identifying the primitive
2278 /// descriptor. Must be #dnnl_layer_normalization.
2279 dnnl_primitive_kind_t primitive_kind;
2280 /// The kind of propagation. Possible values: #dnnl_forward_training,
2281 /// #dnnl_forward_inference, #dnnl_backward, and #dnnl_backward_data.
2282 dnnl_prop_kind_t prop_kind;
2283 /// Source and destination memory descriptor.
2284 dnnl_memory_desc_t data_desc;
2285 /// Source and destination gradient memory descriptor.
2286 dnnl_memory_desc_t diff_data_desc;
2287 /// Scale and shift data and gradient memory descriptors.
2288 ///
2289 /// Scaleshift memory descriptor uses 2D #dnnl_ab
2290 /// format[2, normalized_dim] where 1-st dimension contains gamma parameter,
2291 /// 2-nd dimension contains beta parameter. Normalized_dim is equal to the
2292 /// last logical dimension of the data tensor across which normalization is
2293 /// performed.
2294 dnnl_memory_desc_t data_scaleshift_desc;
2295 dnnl_memory_desc_t diff_data_scaleshift_desc;
2296 /// Mean and variance data memory descriptors.
2297 ///
2298 /// Statistics (mean and variance) memory descriptor is the k-dimensional tensor
2299 /// where k is equal to data_tensor_ndims - 1 and may have any plain
2300 /// (stride[last_dim] == 1) user-provided format.
2301 dnnl_memory_desc_t stat_desc;
2302 /// Layer normalization epsilon parameter.
2303 float layer_norm_epsilon;
2304 unsigned flags;
2305} dnnl_layer_normalization_desc_t;
2306
2307/// @} dnnl_api_layer_normalization
2308
2309/// @addtogroup dnnl_api_layer_normalization_v2
2310/// @{
2311
2312/// A descriptor of a Layer Normalization operation.
2313typedef struct {
2314 /// The kind of primitive. Used for self-identifying the primitive
2315 /// descriptor. Must be #dnnl_layer_normalization_v2.
2316 dnnl_primitive_kind_t primitive_kind;
2317 /// The kind of propagation. Possible values: #dnnl_forward_training,
2318 /// #dnnl_forward_inference, #dnnl_backward, and #dnnl_backward_data.
2319 dnnl_prop_kind_t prop_kind;
2320 /// Source memory descriptor.
2321 dnnl_memory_desc_t src_desc;
2322 /// Source gradient memory descriptor.
2323 dnnl_memory_desc_t diff_src_desc;
2324 /// Scale and shift data and gradient memory descriptors.
2325 ///
2326 /// Scaleshift memory descriptor uses 2D #dnnl_ab
2327 /// format[2, normalized_dim] where 1-st dimension contains gamma parameter,
2328 /// 2-nd dimension contains beta parameter. Normalized_dim is equal to the
2329 /// last logical dimension of the data tensor across which normalization is
2330 /// performed.
2331 dnnl_memory_desc_t data_scaleshift_desc;
2332 dnnl_memory_desc_t diff_data_scaleshift_desc;
2333 /// Mean and variance data memory descriptors.
2334 ///
2335 /// Statistics (mean and variance) memory descriptor is the k-dimensional tensor
2336 /// where k is equal to data_tensor_ndims - 1 and may have any plain
2337 /// (stride[last_dim] == 1) user-provided format.
2338 dnnl_memory_desc_t stat_desc;
2339 /// Layer normalization epsilon parameter.
2340 float layer_norm_epsilon;
2341 unsigned flags;
2342 /// Destination memory descriptor.
2343 dnnl_memory_desc_t dst_desc;
2344 /// Destination gradient memory descriptor.
2345 dnnl_memory_desc_t diff_dst_desc;
2346} dnnl_layer_normalization_v2_desc_t;
2347
2348/// @} dnnl_api_layer_normalization_v2
2349
2350/// @addtogroup dnnl_api_inner_product
2351/// @{
2352
2353/// A descriptor of an inner product operation.
2354typedef struct {
2355 /// The kind of primitive. Used for self-identifying the primitive
2356 /// descriptor. Must be #dnnl_inner_product.
2357 dnnl_primitive_kind_t primitive_kind;
2358 /// The kind of propagation. Possible values: #dnnl_forward_training,
2359 /// #dnnl_forward_inference, #dnnl_backward_data,
2360 /// #dnnl_backward_weights, and #dnnl_backward_bias.
2361 dnnl_prop_kind_t prop_kind;
2362 /// Source memory descriptor.
2363 dnnl_memory_desc_t src_desc;
2364 /// Source gradient memory descriptor.
2365 dnnl_memory_desc_t diff_src_desc;
2366 /// Weights memory descriptor.
2367 dnnl_memory_desc_t weights_desc;
2368 /// Weights gradient memory descriptor.
2369 dnnl_memory_desc_t diff_weights_desc;
2370 /// Bias memory descriptor.
2371 dnnl_memory_desc_t bias_desc;
2372 /// Bias gradient memory descriptor.
2373 dnnl_memory_desc_t diff_bias_desc;
2374 /// Destination memory descriptor.
2375 dnnl_memory_desc_t dst_desc;
2376 /// Destination gradient memory descriptor.
2377 dnnl_memory_desc_t diff_dst_desc;
2378 /// The accumulator data type. Initialized automatically.
2379 dnnl_data_type_t accum_data_type;
2380} dnnl_inner_product_desc_t;
2381
2382/// @} dnnl_api_inner_product
2383
2384/// @addtogroup dnnl_api_rnn
2385/// @{
2386
2387/// Flags for RNN cell.
2388typedef enum {
2389 /// Undefined RNN flags
2390 dnnl_rnn_flags_undef = 0x0
2391} dnnl_rnn_flags_t;
2392
2393/// A direction of RNN primitive execution.
2394typedef enum {
2395 /// Unidirectional execution of RNN primitive from left to right.
2396 dnnl_unidirectional_left2right,
2397 /// Unidirectional execution of RNN primitive from right to left.
2398 dnnl_unidirectional_right2left,
2399 /// Bidirectional execution of RNN primitive with concatenation of the
2400 /// results.
2401 dnnl_bidirectional_concat,
2402 /// Bidirectional execution of RNN primitive with summation of the
2403 /// results.
2404 dnnl_bidirectional_sum,
2405 /// Alias for #dnnl_unidirectional_left2right.
2406 dnnl_unidirectional = dnnl_unidirectional_left2right,
2407} dnnl_rnn_direction_t;
2408
2409/// A descriptor for an RNN operation.
2410typedef struct {
2411 /// The kind of primitive. Used for self-identifying the primitive
2412 /// descriptor. Must be #dnnl_rnn.
2413 dnnl_primitive_kind_t primitive_kind;
2414 /// The kind of propagation. Possible values: #dnnl_forward_training,
2415 /// #dnnl_forward_inference, and #dnnl_backward.
2416 dnnl_prop_kind_t prop_kind;
2417 /// RNN cell kind. Must be one of #dnnl_vanilla_rnn,
2418 /// #dnnl_vanilla_lstm, #dnnl_vanilla_gru, or #dnnl_lbr_gru.
2419 dnnl_alg_kind_t cell_kind;
2420 /// The direction of RNN primitive execution.
2421 dnnl_rnn_direction_t direction;
2422 /// Source layer memory descriptor.
2423 dnnl_memory_desc_t src_layer_desc;
2424 /// Source iteration memory descriptor for hidden state.
2425 dnnl_memory_desc_t src_iter_desc;
2426 /// Source iteration memory descriptor for cell state.
2427 dnnl_memory_desc_t src_iter_c_desc;
2428 /// Weights layer memory descriptor.
2429 dnnl_memory_desc_t weights_layer_desc;
2430 /// Weights iteration memory descriptor.
2431 dnnl_memory_desc_t weights_iter_desc;
2432 /// Bias memory descriptor.
2433 dnnl_memory_desc_t bias_desc;
2434 /// Destination layer memory descriptor.
2435 dnnl_memory_desc_t dst_layer_desc;
2436 /// Destination iter memory descriptor for hidden state.
2437 dnnl_memory_desc_t dst_iter_desc;
2438 /// Destination iter memory descriptor for cell state.
2439 dnnl_memory_desc_t dst_iter_c_desc;
2440 /// Weights peephole memory descriptor.
2441 /// This memory descriptor is equal to zero memory descriptor in case of
2442 /// non-peephole LSTMs and other non-LSTM RNNs.
2443 dnnl_memory_desc_t weights_peephole_desc;
2444 /// Weights projection memory descriptor.
2445 /// This memory descriptor is equal to zero memory descriptor in case of
2446 /// non-projection LSTMs and other non-LSTM RNNs.
2447 dnnl_memory_desc_t weights_projection_desc;
2448
2449 /// Source gradient layer memory descriptor.
2450 dnnl_memory_desc_t diff_src_layer_desc;
2451 /// Source gradient iter memory descriptor for hidden state.
2452 dnnl_memory_desc_t diff_src_iter_desc;
2453 /// Source gradient iter memory descriptor for cell state.
2454 dnnl_memory_desc_t diff_src_iter_c_desc;
2455 /// Weights gradient layer memory descriptor.
2456 dnnl_memory_desc_t diff_weights_layer_desc;
2457 /// Weights gradient iter memory descriptor.
2458 dnnl_memory_desc_t diff_weights_iter_desc;
2459 /// Bias gradient memory descriptor.
2460 dnnl_memory_desc_t diff_bias_desc;
2461 /// Destination gradient layer memory descriptor.
2462 dnnl_memory_desc_t diff_dst_layer_desc;
2463 /// Destination gradient iteration memory descriptor for hidden state.
2464 dnnl_memory_desc_t diff_dst_iter_desc;
2465 /// Destination gradient iteration memory descriptor for cell state.
2466 dnnl_memory_desc_t diff_dst_iter_c_desc;
2467 /// Weights gradient peephole memory descriptor.
2468 /// This memory descriptor is equal to zero memory descriptor in case of
2469 /// non-peephole LSTMs and other non-LSTM RNNs.
2470 dnnl_memory_desc_t diff_weights_peephole_desc;
2471 /// Weights gradient projection memory descriptor.
2472 /// This memory descriptor is equal to zero memory descriptor in case of
2473 /// non-projection LSTMs and other non-LSTM RNNs.
2474 dnnl_memory_desc_t diff_weights_projection_desc;
2475
2476 /// RNN cell flags
2477 unsigned int flags;
2478 /// Activation function used for vanilla_rnn cell kind.
2479 /// Must be either #dnnl_eltwise_relu or #dnnl_eltwise_tanh.
2480 dnnl_alg_kind_t activation_kind;
2481 float alpha;
2482 float beta;
2483
2484} dnnl_rnn_desc_t;
2485
2486/// @} dnnl_api_rnn
2487
2488/// @addtogroup dnnl_api_binary
2489/// @{
2490
2491/// A descriptor of a binary operation.
2492typedef struct {
2493 /// The kind of primitive. Used for self-identifying the primitive
2494 /// descriptor. Must be #dnnl_binary.
2495 dnnl_primitive_kind_t primitive_kind;
2496 /// The kind of the binary algorithm. Possible values:
2497 /// #dnnl_binary_add, #dnnl_binary_mul, #dnnl_binary_max, #dnnl_binary_min,
2498 /// #dnnl_binary_div and #dnnl_binary_sub.
2499 dnnl_alg_kind_t alg_kind;
2500 /// Source memory descriptors.
2501 dnnl_memory_desc_t src_desc[2];
2502 /// Destination memory descriptor.
2503 dnnl_memory_desc_t dst_desc;
2504} dnnl_binary_desc_t;
2505
2506/// @} dnnl_api_binary
2507
2508/// @addtogroup dnnl_api_matmul
2509/// @{
2510
2511/// A descriptor of a matrix multiplication operation.
2512///
2513/// 2D case:
2514/// dst[m, n] = src[m, k] * weights[k, n] + bias[m, n]
2515///
2516/// 3D case:
2517/// dst[mb, m, n] = src[mb, m, k] * weights[mb, k, n] + bias[mb, m, n]
2518typedef struct {
2519 /// The kind of primitive. Used for self-identifying the primitive
2520 /// descriptor. Must be #dnnl_matmul.
2521 dnnl_primitive_kind_t primitive_kind;
2522 /// Source memory descriptor.
2523 dnnl_memory_desc_t src_desc;
2524 /// Weights memory descriptor.
2525 dnnl_memory_desc_t weights_desc;
2526 /// Bias memory descriptor.
2527 dnnl_memory_desc_t bias_desc;
2528 /// Destination memory descriptor.
2529 dnnl_memory_desc_t dst_desc;
2530 /// The accumulator data type. Initialized automatically.
2531 dnnl_data_type_t accum_data_type;
2532} dnnl_matmul_desc_t;
2533
2534/// @} dnnl_api_matmul
2535
2536/// @addtogroup dnnl_api_resampling
2537/// @{
2538
2539/// A descriptor of resampling operation.
2540typedef struct {
2541 /// The kind of primitive. Used for self-identifying the primitive
2542 /// descriptor. Must be #dnnl_resampling.
2543 dnnl_primitive_kind_t primitive_kind;
2544 /// The kind of propagation. Possible values: #dnnl_forward_training,
2545 /// #dnnl_forward_inference, #dnnl_backward_data,
2546 dnnl_prop_kind_t prop_kind;
2547 /// The kind of the resampling algorithm. Possible values:
2548 /// #dnnl_resampling_nearest, #dnnl_resampling_linear.
2549 dnnl_alg_kind_t alg_kind;
2550 /// Source memory descriptor.
2551 dnnl_memory_desc_t src_desc;
2552 /// Source gradient memory descriptor.
2553 dnnl_memory_desc_t diff_src_desc;
2554 /// Destination memory descriptor.
2555 dnnl_memory_desc_t dst_desc;
2556 /// Destination gradient memory descriptor.
2557 dnnl_memory_desc_t diff_dst_desc;
2558 /// Resampling factor in each spatial dimension.
2559 float factors[DNNL_MAX_NDIMS];
2560} dnnl_resampling_desc_t;
2561
2562/// @} dnnl_api_resampling
2563
2564/// @addtogroup dnnl_api_reduction
2565/// @{
2566
2567/// A descriptor of reduction operation.
2568typedef struct {
2569 /// The kind of primitive. Used for self-identifying the primitive
2570 /// descriptor. Must be #dnnl_reduction.
2571 dnnl_primitive_kind_t primitive_kind;
2572 /// The kind of reduction algorithm. Possible values:
2573 /// #dnnl_reduction_max, #dnnl_reduction_min, #dnnl_reduction_sum,
2574 /// #dnnl_reduction_mul, #dnnl_reduction_mean, #dnnl_reduction_norm_lp_max,
2575 /// #dnnl_reduction_norm_lp_sum, #dnnl_reduction_norm_lp_power_p_max,
2576 /// #dnnl_reduction_norm_lp_power_p_sum.
2577 dnnl_alg_kind_t alg_kind;
2578 /// Source memory descriptor.
2579 dnnl_memory_desc_t src_desc;
2580 /// Destination memory descriptor.
2581 dnnl_memory_desc_t dst_desc;
2582 /// Algorithm specific parameters.
2583 /// Accordance table:
2584 /// #dnnl_reduction_max: @p p and @p eps are ignored
2585 /// #dnnl_reduction_min: @p p and @p eps are ignored
2586 /// #dnnl_reduction_norm_lp_max: @p p -- power, @p eps -- epsilon
2587 /// #dnnl_reduction_norm_lp_sum: @p p -- power, @p eps -- epsilon
2588 /// #dnnl_reduction_norm_lp_power_p_max: @p p -- power, @p eps -- epsilon
2589 /// #dnnl_reduction_norm_lp_power_p_sum: @p p -- power, @p eps -- epsilon
2590 /// #dnnl_reduction_sum: @p p and @p eps are ignored
2591 /// #dnnl_reduction_mul: @p p and @p eps are ignored
2592 /// #dnnl_reduction_mean: @p p and @p eps are ignored
2593 float p, eps;
2594} dnnl_reduction_desc_t;
2595
2596/// @} dnnl_api_reduction
2597
2598/// @} dnnl_api_primitives
2599
2600/// @addtogroup dnnl_api_engine
2601/// @{
2602
2603/// @brief Kinds of engines.
2604typedef enum {
2605 /// An unspecified engine.
2606 dnnl_any_engine,
2607 /// CPU engine.
2608 dnnl_cpu,
2609 /// GPU engine.
2610 dnnl_gpu,
2611} dnnl_engine_kind_t;
2612
2613/// @struct dnnl_engine
2614/// @brief An opaque structure to describe an engine.
2615struct dnnl_engine;
2616/// @brief An engine handle.
2617typedef struct dnnl_engine *dnnl_engine_t;
2618#if 0
2619// FIXME: looks like this never happens
2620/// @brief A constant engine handle.
2621typedef const struct dnnl_engine *const_dnnl_engine_t;
2622#endif
2623
2624/// @} dnnl_api_engine
2625
2626/// @addtogroup dnnl_api_primitives
2627/// @{
2628/// @addtogroup dnnl_api_primitives_common
2629/// @{
2630
2631/// @struct dnnl_primitive_desc_iterator
2632/// @brief An opaque structure to describe a primitive descriptor iterator.
2633struct dnnl_primitive_desc_iterator;
2634
2635/// @brief A primitive descriptor iterator handle.
2636typedef struct dnnl_primitive_desc_iterator *dnnl_primitive_desc_iterator_t;
2637
2638/// @brief A constant primitive descriptor iterator handle.
2639typedef const struct dnnl_primitive_desc_iterator
2640 *const_dnnl_primitive_desc_iterator_t;
2641
2642/// @struct dnnl_primitive_desc
2643/// @brief An opaque structure to describe a primitive descriptor.
2644struct dnnl_primitive_desc;
2645
2646/// @brief A primitive descriptor handle.
2647typedef struct dnnl_primitive_desc *dnnl_primitive_desc_t;
2648
2649/// @brief A constant primitive descriptor handle.
2650typedef const struct dnnl_primitive_desc *const_dnnl_primitive_desc_t;
2651
2652/// @} dnnl_api_primitives_common
2653
2654/// @addtogroup dnnl_api_attributes
2655/// @{
2656
2657/// Floating-point math mode
2658typedef enum {
2659 /// Default behavior, no downconversions allowed
2660 dnnl_fpmath_mode_strict,
2661 /// Implicit f32->bf16 conversions allowed
2662 dnnl_fpmath_mode_bf16,
2663 /// Implicit f32->f16 conversions allowed
2664 dnnl_fpmath_mode_f16,
2665 /// Implicit f32->f16 or f32->bf16 conversions allowed
2666 dnnl_fpmath_mode_any,
2667 /// Implicit f32->tf32 conversions allowed
2668 dnnl_fpmath_mode_tf32,
2669} dnnl_fpmath_mode_t;
2670
2671/// Scratchpad mode
2672typedef enum {
2673 /// The library manages the scratchpad allocation according to the policy
2674 /// specified by the `DNNL_ENABLE_CONCURRENT_EXEC`
2675 /// [build option](@ref dev_guide_build_options) (default).
2676 ///
2677 /// When `DNNL_ENABLE_CONCURRENT_EXEC=OFF` (default), the library
2678 /// scratchpad is common to all primitives to reduce the memory footprint.
2679 /// This configuration comes with limited thread-safety properties, namely
2680 /// primitives can be created and executed in parallel but cannot migrate
2681 /// between threads (in other words, each primitive should be executed in
2682 /// the same thread it was created in).
2683 ///
2684 /// When `DNNL_ENABLE_CONCURRENT_EXEC=ON`, the library scratchpad is
2685 /// private to each primitive. The memory footprint is larger than when
2686 /// using `DNNL_ENABLE_CONCURRENT_EXEC=OFF` but different primitives can be
2687 /// created and run concurrently (the same primitive cannot be run
2688 /// concurrently from two different threads though).
2689 dnnl_scratchpad_mode_library,
2690 /// The user manages the scratchpad allocation by querying and providing
2691 /// the scratchpad memory to primitives. This mode is thread-safe as long
2692 /// as the scratchpad buffers are not used concurrently by two primitive
2693 /// executions.
2694 dnnl_scratchpad_mode_user,
2695} dnnl_scratchpad_mode_t;
2696
2697/// @struct dnnl_primitive_attr
2698/// @brief An opaque structure for primitive descriptor attributes.
2699///
2700/// Attributes may contain:
2701/// - output scales (to scale the result prior to storing it to the memory)
2702struct dnnl_primitive_attr;
2703
2704/// @brief A primitive descriptor attributes handle that controls primitive
2705/// behavior.
2706typedef struct dnnl_primitive_attr *dnnl_primitive_attr_t;
2707
2708/// @brief A constant primitive descriptor attributes handle.
2709typedef const struct dnnl_primitive_attr *const_dnnl_primitive_attr_t;
2710
2711/// @struct dnnl_post_ops
2712/// @brief An opaque structure for a chain of post operations.
2713///
2714/// dnnl_post_ops can be used to perform some (trivial) operations like
2715/// accumulation or eltwise after certain primitives like convolution.
2716///
2717/// Post operations might be combined together, making a chain of post
2718/// operations. For instance one can configure convolution followed by
2719/// accumulation followed by eltwise. This might be especially beneficial
2720/// for residual learning blocks.
2721///
2722/// @warning
2723/// Of course not all combinations are supported, so the user should handle
2724/// errors accordingly.
2725///
2726/// Supported post operations:
2727/// - accumulation (base primitive: convolution)
2728/// - eltwise (base primitive: convolution)
2729struct dnnl_post_ops;
2730
2731/// @brief A post operation chain handle.
2732typedef struct dnnl_post_ops *dnnl_post_ops_t;
2733
2734/// @brief A constant post operation chain handle.
2735typedef const struct dnnl_post_ops *const_dnnl_post_ops_t;
2736
2737/// @} dnnl_api_attributes
2738
2739/// @addtogroup dnnl_api_primitives_common
2740/// @{
2741
2742/// @struct dnnl_primitive
2743/// An opaque structure to describe a primitive.
2744struct dnnl_primitive;
2745/// A primitive handle.
2746typedef struct dnnl_primitive *dnnl_primitive_t;
2747/// A constant primitive handle.
2748typedef const struct dnnl_primitive *const_dnnl_primitive_t;
2749
2750/// Source argument #0.
2751#define DNNL_ARG_SRC_0 1
2752/// A special mnemonic for source argument for primitives that have a
2753/// single source. An alias for #DNNL_ARG_SRC_0.
2754#define DNNL_ARG_SRC DNNL_ARG_SRC_0
2755/// A special mnemonic for RNN input vector. An alias for
2756/// #DNNL_ARG_SRC_0.
2757#define DNNL_ARG_SRC_LAYER DNNL_ARG_SRC_0
2758/// A special mnemonic for reorder source argument. An alias for
2759/// #DNNL_ARG_SRC_0.
2760#define DNNL_ARG_FROM DNNL_ARG_SRC_0
2761
2762/// Source argument #1.
2763#define DNNL_ARG_SRC_1 2
2764/// A special mnemonic for RNN input recurrent hidden state vector. An alias
2765/// for #DNNL_ARG_SRC_1.
2766#define DNNL_ARG_SRC_ITER DNNL_ARG_SRC_1
2767
2768/// Source argument #2.
2769#define DNNL_ARG_SRC_2 3
2770/// A special mnemonic for RNN input recurrent cell state vector. An alias for
2771/// #DNNL_ARG_SRC_2.
2772#define DNNL_ARG_SRC_ITER_C DNNL_ARG_SRC_2
2773
2774/// Source argument #3.
2775#define DNNL_ARG_SRC_3 4
2776/// A special mnemonic for RNN input recurrent cell attention vector. An alias for
2777/// #DNNL_ARG_SRC_3.
2778#define DNNL_ARG_AUGRU_ATTENTION DNNL_ARG_SRC_3
2779
2780/// Destination argument #0.
2781#define DNNL_ARG_DST_0 17
2782/// A special mnemonic for destination argument for primitives that have a
2783/// single destination. An alias for #DNNL_ARG_DST_0.
2784#define DNNL_ARG_DST DNNL_ARG_DST_0
2785/// A special mnemonic for reorder destination argument. An alias for
2786/// #DNNL_ARG_DST_0.
2787#define DNNL_ARG_TO DNNL_ARG_DST_0
2788/// A special mnemonic for RNN output vector. An alias for #DNNL_ARG_DST_0.
2789#define DNNL_ARG_DST_LAYER DNNL_ARG_DST_0
2790
2791/// Destination argument #1.
2792#define DNNL_ARG_DST_1 18
2793/// A special mnemonic for RNN input recurrent hidden state vector. An
2794/// alias for #DNNL_ARG_DST_1.
2795#define DNNL_ARG_DST_ITER DNNL_ARG_DST_1
2796
2797/// Destination argument #2.
2798#define DNNL_ARG_DST_2 19
2799/// A special mnemonic for LSTM output recurrent cell state vector. An
2800/// alias for #DNNL_ARG_DST_2.
2801#define DNNL_ARG_DST_ITER_C DNNL_ARG_DST_2
2802
2803/// Weights argument #0.
2804#define DNNL_ARG_WEIGHTS_0 33
2805/// A special mnemonic for primitives that have a single weights
2806/// argument. Alias for #DNNL_ARG_WEIGHTS_0.
2807#define DNNL_ARG_WEIGHTS DNNL_ARG_WEIGHTS_0
2808/// A special mnemonic for scale and shift argument of normalization
2809/// primitives. Alias for #DNNL_ARG_WEIGHTS_0.
2810#define DNNL_ARG_SCALE_SHIFT DNNL_ARG_WEIGHTS_0
2811/// A special mnemonic for RNN weights applied to the layer input. An
2812/// alias for #DNNL_ARG_WEIGHTS_0.
2813#define DNNL_ARG_WEIGHTS_LAYER DNNL_ARG_WEIGHTS_0
2814
2815/// Weights argument #1.
2816#define DNNL_ARG_WEIGHTS_1 34
2817/// A special mnemonic for RNN weights applied to the recurrent input.
2818/// An alias for #DNNL_ARG_WEIGHTS_1.
2819#define DNNL_ARG_WEIGHTS_ITER DNNL_ARG_WEIGHTS_1
2820
2821/// Weights argument #2.
2822#define DNNL_ARG_WEIGHTS_2 35
2823/// A special mnemonic for RNN weights applied to the peephole weights.
2824/// An alias for #DNNL_ARG_WEIGHTS_2.
2825#define DNNL_ARG_WEIGHTS_PEEPHOLE DNNL_ARG_WEIGHTS_2
2826
2827/// Weights argument #3.
2828#define DNNL_ARG_WEIGHTS_3 36
2829/// A special mnemonic for RNN weights applied to the projection weights.
2830/// An alias for #DNNL_ARG_WEIGHTS_3.
2831#define DNNL_ARG_WEIGHTS_PROJECTION DNNL_ARG_WEIGHTS_3
2832
2833/// Bias tensor argument.
2834#define DNNL_ARG_BIAS 41
2835
2836/// Mean values tensor argument.
2837#define DNNL_ARG_MEAN 49
2838/// Variance values tensor argument.
2839#define DNNL_ARG_VARIANCE 50
2840
2841/// A special mnemonic for scale argument of normalization primitives.
2842#define DNNL_ARG_SCALE 51
2843/// A special mnemonic for shift argument of normalization primitives.
2844#define DNNL_ARG_SHIFT 52
2845
2846/// Workspace tensor argument. Workspace is used to pass information
2847/// from forward propagation to backward propagation computations.
2848#define DNNL_ARG_WORKSPACE 64
2849/// Scratchpad (temporary storage) tensor argument.
2850#define DNNL_ARG_SCRATCHPAD 80
2851
2852/// Gradient (diff) of the source argument #0.
2853#define DNNL_ARG_DIFF_SRC_0 129
2854/// A special mnemonic for primitives that have a single diff source argument.
2855/// An alias for #DNNL_ARG_DIFF_SRC_0.
2856#define DNNL_ARG_DIFF_SRC DNNL_ARG_DIFF_SRC_0
2857/// A special mnemonic for gradient (diff) of RNN input vector. An alias for
2858/// #DNNL_ARG_DIFF_SRC_0.
2859#define DNNL_ARG_DIFF_SRC_LAYER DNNL_ARG_DIFF_SRC_0
2860
2861/// Gradient (diff) of the source argument #1.
2862#define DNNL_ARG_DIFF_SRC_1 130
2863/// A special mnemonic for gradient (diff) of RNN input recurrent hidden state
2864/// vector. An alias for #DNNL_ARG_DIFF_SRC_1.
2865#define DNNL_ARG_DIFF_SRC_ITER DNNL_ARG_DIFF_SRC_1
2866
2867/// Gradient (diff) of the source argument #2.
2868#define DNNL_ARG_DIFF_SRC_2 131
2869/// A special mnemonic for gradient (diff) of RNN input recurrent cell state
2870/// vector. An alias for #DNNL_ARG_DIFF_SRC_1.
2871#define DNNL_ARG_DIFF_SRC_ITER_C DNNL_ARG_DIFF_SRC_2
2872
2873/// Gradient (diff) of the source argument #3.
2874#define DNNL_ARG_DIFF_SRC_3 132
2875/// A special mnemonic for gradient (diff) of RNN input recurrent cell attention
2876/// vector. An alias for #DNNL_ARG_DIFF_SRC_3.
2877#define DNNL_ARG_DIFF_AUGRU_ATTENTION DNNL_ARG_DIFF_SRC_3
2878
2879/// Gradient (diff) of the destination argument #0.
2880#define DNNL_ARG_DIFF_DST_0 145
2881/// A special mnemonic for primitives that have a single diff destination
2882/// argument. An alias for #DNNL_ARG_DIFF_DST_0.
2883#define DNNL_ARG_DIFF_DST DNNL_ARG_DIFF_DST_0
2884/// A special mnemonic for gradient (diff) of RNN output vector. An alias for
2885/// #DNNL_ARG_DIFF_DST_0.
2886#define DNNL_ARG_DIFF_DST_LAYER DNNL_ARG_DIFF_DST_0
2887
2888/// Gradient (diff) of the destination argument #1.
2889#define DNNL_ARG_DIFF_DST_1 146
2890/// A special mnemonic for gradient (diff) of RNN input recurrent hidden state
2891/// vector. An alias for #DNNL_ARG_DIFF_DST_1.
2892#define DNNL_ARG_DIFF_DST_ITER DNNL_ARG_DIFF_DST_1
2893
2894/// Gradient (diff) of the destination argument #2.
2895#define DNNL_ARG_DIFF_DST_2 147
2896/// A special mnemonic for gradient (diff) of RNN input recurrent cell state
2897/// vector. An alias for #DNNL_ARG_DIFF_DST_2.
2898#define DNNL_ARG_DIFF_DST_ITER_C DNNL_ARG_DIFF_DST_2
2899
2900/// Gradient (diff) of the weights argument #0.
2901#define DNNL_ARG_DIFF_WEIGHTS_0 161
2902/// A special mnemonic for primitives that have a single diff weights
2903/// argument. Alias for #DNNL_ARG_DIFF_WEIGHTS_0.
2904#define DNNL_ARG_DIFF_WEIGHTS DNNL_ARG_DIFF_WEIGHTS_0
2905/// A special mnemonic for diff of scale and shift argument of normalization
2906/// primitives. Alias for #DNNL_ARG_DIFF_WEIGHTS_0.
2907#define DNNL_ARG_DIFF_SCALE_SHIFT DNNL_ARG_DIFF_WEIGHTS_0
2908/// A special mnemonic for diff of RNN weights applied to the layer input. An
2909/// alias for #DNNL_ARG_DIFF_WEIGHTS_0.
2910#define DNNL_ARG_DIFF_WEIGHTS_LAYER DNNL_ARG_DIFF_WEIGHTS_0
2911
2912/// Gradient (diff) of the weights argument #1.
2913#define DNNL_ARG_DIFF_WEIGHTS_1 162
2914/// A special mnemonic for diff of RNN weights applied to the recurrent input.
2915/// An alias for #DNNL_ARG_DIFF_WEIGHTS_1.
2916#define DNNL_ARG_DIFF_WEIGHTS_ITER DNNL_ARG_DIFF_WEIGHTS_1
2917
2918/// Gradient (diff) of the weights argument #2.
2919#define DNNL_ARG_DIFF_WEIGHTS_2 163
2920/// A special mnemonic for diff of RNN weights applied to the peephole weights.
2921/// An alias for #DNNL_ARG_DIFF_WEIGHTS_2.
2922#define DNNL_ARG_DIFF_WEIGHTS_PEEPHOLE DNNL_ARG_DIFF_WEIGHTS_2
2923
2924/// Gradient (diff) of the weights argument #3.
2925#define DNNL_ARG_DIFF_WEIGHTS_3 164
2926/// A special mnemonic for diff of RNN weights applied to the projection
2927/// weights. An alias for #DNNL_ARG_DIFF_WEIGHTS_3.
2928#define DNNL_ARG_DIFF_WEIGHTS_PROJECTION DNNL_ARG_DIFF_WEIGHTS_3
2929
2930/// Gradient (diff) of the bias tensor argument.
2931#define DNNL_ARG_DIFF_BIAS 169
2932
2933/// A special mnemonic for scale argument of normalization primitives.
2934#define DNNL_ARG_DIFF_SCALE 255
2935/// A special mnemonic for shift argument of normalization primitives.
2936#define DNNL_ARG_DIFF_SHIFT 256
2937
2938/// Output scaling factors provided at execution time.
2939#define DNNL_ARG_ATTR_OUTPUT_SCALES 513
2940
2941/// Starting index for source arguments for primitives that take a variable
2942/// number of source arguments.
2943#define DNNL_ARG_MULTIPLE_SRC 1024
2944/// Starting index for destination arguments for primitives that produce a
2945/// variable number of destination arguments.
2946#define DNNL_ARG_MULTIPLE_DST 2048
2947
2948/// Zero points provided at execution time.
2949#define DNNL_ARG_ATTR_ZERO_POINTS 4096
2950
2951/// Arguments for fused depthwise convolution.
2952/// See @ref dev_guide_attributes_post_ops_depthwise_fusion
2953#define DNNL_ARG_ATTR_POST_OP_DW 8192
2954
2955/// Starting point for a binary post operation.
2956#define DNNL_ARG_ATTR_MULTIPLE_POST_OP_BASE 16384
2957
2958/// Arguments for a binary post operation. Up to 32 arguments are supported.
2959/// See @ref dev_guide_attributes_post_ops_binary_fusion
2960#define DNNL_ARG_ATTR_MULTIPLE_POST_OP(idx) \
2961 (DNNL_ARG_ATTR_MULTIPLE_POST_OP_BASE * ((idx) + 1))
2962
2963/// Input scaling factors provided at execution time.
2964#define DNNL_ARG_ATTR_INPUT_SCALES 1048576
2965
2966/// A structure that contains an index and a memory object, and is used to pass
2967/// arguments to dnnl_primitive_execute().
2968typedef struct {
2969 int arg; ///< An argument index, e.g. DNNL_ARG_SRC
2970 dnnl_memory_t memory; ///< Input/output memory
2971} dnnl_exec_arg_t;
2972
2973/// @} dnnl_api_primitives_common
2974
2975/// @addtogroup dnnl_api_primitives_common
2976/// @{
2977
2978/// Primitive descriptor query specification
2979///
2980/// For generic function dnnl_primitive_desc_query(), the type of result must
2981/// agree with the queried argument. The correspondence table:
2982///
2983/// Query kind | Type of query result
2984/// --------------------------------|-----------------------------
2985/// dnnl_query_*_engine | #dnnl_engine_t *
2986/// #dnnl_query_primitive_kind | #dnnl_primitive_kind_t *
2987/// dnnl_query_*_s32 | int *
2988/// dnnl_query_*_s64 | #dnnl_dim_t * (same as int64_t *)
2989/// dnnl_query_*_f64 | double *
2990/// dnnl_query_*_str | const char **
2991/// #dnnl_query_op_d | #const_dnnl_op_desc_t *
2992/// dnnl_query_*_md | const #dnnl_memory_desc_t **
2993/// dnnl_query_*_\<op\>_d | const dnnl_\<op\>_desc_t **
2994/// dnnl_query_*_pd | #const_dnnl_primitive_desc_t *
2995/// dnnl_query_cache_blob_id | const uint8_t **
2996///
2997/// @note
2998/// Rule of thumb: all opaque types and structures are returned by
2999/// reference. All numbers are returned by value.
3000///
3001/// @warning
3002/// All returned references point to constant objects and are valid only
3003/// during the lifetime of the queried primitive descriptor. Returned objects
3004/// must not be destroyed by the user. If you need to keep the object longer
3005/// than the lifetime of the queried primitive descriptor, use
3006/// dnnl_primitive_desc_clone() to make a copy.
3007typedef enum {
3008 dnnl_query_undef = 0, ///< no query
3009
3010 dnnl_query_engine, ///< execution engine
3011 dnnl_query_primitive_kind, ///< primitive kind
3012
3013 dnnl_query_num_of_inputs_s32, ///< number of inputs expected
3014 dnnl_query_num_of_outputs_s32, ///< number of outputs expected
3015
3016 dnnl_query_time_estimate_f64, ///< runtime estimation (seconds)
3017 dnnl_query_memory_consumption_s64, ///< memory consumption -- extra
3018 /// (scratch) memory, additional to
3019 /// all inputs and outputs memory
3020 /// (bytes)
3021
3022 dnnl_query_scratchpad_engine, ///< scratchpad engine -- engine to be used
3023 /// for creating scratchpad memory
3024
3025 dnnl_query_impl_info_str, ///< implementation name
3026
3027 dnnl_query_reorder_src_engine, ///< source engine
3028 dnnl_query_reorder_dst_engine, ///< destination engine
3029
3030 dnnl_query_prop_kind, ///< propagation kind
3031
3032 dnnl_query_cache_blob_id_size_s64, ///< size of cache blob ID in bytes
3033 dnnl_query_cache_blob_id, ///< cache blob ID (pointer to array)
3034
3035 // memory and op descriptor section
3036 dnnl_query_some_d = 64, ///< stub
3037 dnnl_query_op_d, ///< op descriptor
3038 dnnl_query_convolution_d, ///< convolution descriptor
3039 dnnl_query_deconvolution_d, ///< deconvolution descriptor
3040 dnnl_query_shuffle_d, ///< shuffle descriptor
3041 dnnl_query_eltwise_d, ///< eltwise descriptor
3042 dnnl_query_softmax_d, ///< softmax descriptor
3043 dnnl_query_pooling_d, ///< pooling descriptor
3044 dnnl_query_lrn_d, ///< lrn descriptor
3045 dnnl_query_batch_normalization_d, ///< batch normalization descriptor
3046 dnnl_query_layer_normalization_d, ///< layer normalization descriptor
3047 dnnl_query_inner_product_d, ///< inner product descriptor
3048 dnnl_query_rnn_d, ///< rnn descriptor
3049 dnnl_query_gemm_d, ///< GEMM descriptor (internal)
3050 dnnl_query_binary_d, ///< binary descriptor
3051 dnnl_query_logsoftmax_d, ///< logsoftmax descriptor
3052 dnnl_query_matmul_d, ///< matrix multiplication (matmul) descriptor
3053 dnnl_query_resampling_d, ///< resampling descriptor
3054 dnnl_query_pooling_v2_d, ///< pooling version 2 descriptor
3055 dnnl_query_reduction_d, ///< reduction descriptor
3056 dnnl_query_prelu_d, ///< prelu descriptor
3057 dnnl_query_softmax_v2_d, ///< softmax version 2 descriptor
3058 dnnl_query_layer_normalization_v2_d, ///< layer normalization version 2 descriptor
3059
3060 // memory descriptor section
3061 dnnl_query_some_md = 128, ///< stub
3062 dnnl_query_src_md, ///< source memory desc
3063 dnnl_query_diff_src_md, ///< source gradient memory desc
3064 dnnl_query_weights_md, ///< weights memory descriptor desc
3065 dnnl_query_diff_weights_md, ///< weights grad. memory desc
3066 dnnl_query_dst_md, ///< destination memory desc
3067 dnnl_query_diff_dst_md, ///< destination grad. memory desc
3068 dnnl_query_workspace_md, ///< workspace memory desc
3069 dnnl_query_scratchpad_md, ///< scratchpad memory desc
3070 dnnl_query_exec_arg_md = 255, ///< memory desc of an execute argument
3071
3072 // Max value to prevent UB for internal use only dnnl_query_t
3073 dnnl_query_max = 0x7fff,
3074} dnnl_query_t;
3075
3076/// @} dnnl_api_primitives_common
3077
3078/// @} dnnl_api_primitives
3079
3080/// @addtogroup dnnl_api_stream
3081/// @{
3082
3083/// @brief Stream flags.
3084typedef enum {
3085 // In-order execution.
3086 dnnl_stream_in_order = 0x1U,
3087 /// Out-of-order execution.
3088 dnnl_stream_out_of_order = 0x2U,
3089 /// Default stream configuration.
3090 dnnl_stream_default_flags = dnnl_stream_in_order,
3091} dnnl_stream_flags_t;
3092
3093/// @struct dnnl_stream
3094/// An opaque structure to describe an execution stream.
3095struct dnnl_stream;
3096/// An execution stream handle.
3097typedef struct dnnl_stream *dnnl_stream_t;
3098/// A constant execution stream handle.
3099typedef const struct dnnl_stream *const_dnnl_stream_t;
3100
3101/// @} dnnl_api_stream
3102
3103/// @addtogroup dnnl_api_service
3104/// @{
3105
3106/// No runtime (disabled)
3107#define DNNL_RUNTIME_NONE 0u
3108
3109/// Sequential runtime (CPU only)
3110#define DNNL_RUNTIME_SEQ 1u
3111
3112/// OpenMP runtime (CPU only)
3113#define DNNL_RUNTIME_OMP 2u
3114
3115/// TBB runtime (CPU only)
3116#define DNNL_RUNTIME_TBB 4u
3117
3118/// Threadpool runtime (CPU only)
3119#define DNNL_RUNTIME_THREADPOOL 8u
3120
3121/// OpenCL runtime
3122#define DNNL_RUNTIME_OCL 256u
3123
3124/// SYCL runtime
3125#define DNNL_RUNTIME_SYCL 512u
3126
3127/// DPC++ runtime
3128#define DNNL_RUNTIME_DPCPP DNNL_RUNTIME_SYCL
3129
3130/// Structure containing version information as per [Semantic
3131/// Versioning](https://semver.org)
3132typedef struct {
3133 int major; ///< Major version
3134 int minor; ///< Minor version
3135 int patch; ///< Patch version
3136 const char *hash; ///< Git hash of the sources (may be absent)
3137 unsigned cpu_runtime; ///< CPU runtime
3138 unsigned gpu_runtime; ///< GPU runtime
3139} dnnl_version_t;
3140
3141/// Disable profiling completely
3142#define DNNL_JIT_PROFILE_NONE 0u
3143
3144/// Enable VTune Amplifier integration
3145#define DNNL_JIT_PROFILE_VTUNE 1u
3146
3147/// Enable Linux perf integration via perfmap files
3148#define DNNL_JIT_PROFILE_LINUX_PERFMAP 2u
3149
3150/// Enable Linux perf integration via jitdump files
3151#define DNNL_JIT_PROFILE_LINUX_JITDUMP 4u
3152
3153/// Instruct Linux perf integration via jitdump files to use TSC. @ref
3154/// DNNL_JIT_PROFILE_LINUX_JITDUMP must be set too for this to take effect.
3155#define DNNL_JIT_PROFILE_LINUX_JITDUMP_USE_TSC 8u
3156
3157/// Enable Linux perf integration (both jitdump and perfmap)
3158#define DNNL_JIT_PROFILE_LINUX_PERF \
3159 (DNNL_JIT_PROFILE_LINUX_JITDUMP | DNNL_JIT_PROFILE_LINUX_PERFMAP)
3160
3161/// CPU instruction set flags
3162typedef enum {
3163 /// Any ISA (excepting those listed as initial support)
3164 dnnl_cpu_isa_all = 0x0,
3165
3166 /// Intel Streaming SIMD Extensions 4.1 (Intel SSE4.1)
3167 dnnl_cpu_isa_sse41 = 0x1,
3168
3169 /// Intel Advanced Vector Extensions (Intel AVX)
3170 dnnl_cpu_isa_avx = 0x3,
3171
3172 /// Intel Advanced Vector Extensions 2 (Intel AVX2)
3173 dnnl_cpu_isa_avx2 = 0x7,
3174
3175 /// (deprecated) Intel Advanced Vector Extensions 512 (Intel AVX-512) subset
3176 /// for Intel Xeon Phi processors x200 Series.
3177 dnnl_cpu_isa_avx512_mic = 0xf,
3178
3179 /// (deprecated) Intel AVX-512 subset
3180 /// for Intel Xeon Phi processors 7235, 7285, 7295 Series.
3181 dnnl_cpu_isa_avx512_mic_4ops = 0x1f,
3182
3183 /// Intel AVX-512 subset for Intel Xeon Scalable processor family
3184 /// and Intel Core processor family.
3185 dnnl_cpu_isa_avx512_core = 0x27,
3186
3187 /// Intel AVX-512 and Intel Deep Learning Boost (Intel DL Boost) support
3188 /// for Intel Xeon Scalable processor family
3189 /// and Intel Core processor family.
3190 dnnl_cpu_isa_avx512_core_vnni = 0x67,
3191
3192 /// Intel AVX-512, Intel DL Boost and bfloat16 support
3193 /// for Intel Xeon Scalable processor family
3194 /// and Intel Core processor family.
3195 dnnl_cpu_isa_avx512_core_bf16 = 0xe7,
3196
3197 /// Intel AVX-512 with float16, Intel DL Boost and bfloat16 support
3198 /// for Intel Xeon Scalable processor family
3199 /// and Intel Core processor family.
3200 dnnl_cpu_isa_avx512_core_fp16 = 0x1e7,
3201
3202 /// Intel AVX-512 with float16, Intel DL Boost and bfloat16 support and
3203 /// Intel AMX with 8-bit integer and bfloat16 support
3204 dnnl_cpu_isa_avx512_core_amx = 0x3e7,
3205
3206 /// Intel AVX2 and Intel Deep Learning Boost (Intel DL Boost) support
3207 dnnl_cpu_isa_avx2_vnni = 0x407,
3208
3209} dnnl_cpu_isa_t;
3210
3211/// CPU ISA hints flags
3212typedef enum {
3213 /// No hints (use default features)
3214 dnnl_cpu_isa_no_hints = 0x0,
3215
3216 /// Prefer to exclusively use Ymm registers for computations
3217 dnnl_cpu_isa_prefer_ymm = 0x1,
3218} dnnl_cpu_isa_hints_t;
3219
3220/// @} dnnl_api_service
3221
3222/// @} dnnl_api
3223
3224#ifdef __cplusplus
3225}
3226#endif
3227
3228#endif /* ONEAPI_DNNL_TYPES_H */
3229