1 | /******************************************************************************* |
2 | * Copyright 2019-2022 Intel Corporation |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | *******************************************************************************/ |
16 | |
17 | /// @example getting_started.cpp |
18 | /// @copybrief getting_started_cpp |
19 | /// > Annotated version: @ref getting_started_cpp |
20 | |
21 | #include <cmath> |
22 | #include <numeric> |
23 | #include <stdexcept> |
24 | #include <vector> |
25 | |
26 | #include "oneapi/dnnl/dnnl.hpp" |
27 | #include "oneapi/dnnl/dnnl_debug.h" |
28 | |
29 | #include "example_utils.hpp" |
30 | |
31 | using namespace dnnl; |
32 | // [Prologue] |
33 | |
34 | /// @page getting_started_cpp Getting started |
35 | /// |
36 | /// This C++ API example demonstrates the basics of the oneDNN programming model. |
37 | /// |
38 | /// > Example code: @ref getting_started.cpp |
39 | /// |
40 | /// This C++ API example demonstrates the basics of the oneDNN programming model: |
41 | /// - How to create oneDNN memory objects. |
42 | /// - How to get data from the user's buffer into a oneDNN memory object. |
43 | /// - How a tensor's logical dimensions and memory object formats relate. |
44 | /// - How to create oneDNN primitives. |
45 | /// - How to execute the primitives. |
46 | /// |
47 | /// The example uses the ReLU operation and comprises the following steps: |
48 | /// 1. Creating @ref getting_started_cpp_sub1 to execute a primitive. |
49 | /// 2. Performing @ref getting_started_cpp_sub2. |
50 | /// 3. @ref getting_started_cpp_sub3 (using different flavors). |
51 | /// 4. @ref getting_started_cpp_sub4. |
52 | /// 5. @ref getting_started_cpp_sub5. |
53 | /// 6. @ref getting_started_cpp_sub6 (checking that the resulting image does |
54 | /// not contain negative values). |
55 | /// |
56 | /// These steps are implemented in the @ref getting_started_cpp_tutorial, which |
57 | /// in turn is called from @ref getting_started_cpp_main (which is also |
58 | /// responsible for error handling). |
59 | /// |
60 | /// @section getting_started_cpp_headers Public headers |
61 | /// |
62 | /// To start using oneDNN we must first include the @ref dnnl.hpp |
63 | /// header file in the program. We also include @ref dnnl_debug.h in |
64 | /// example_utils.hpp, which contains some debugging facilities like returning |
65 | /// a string representation for common oneDNN C types. |
66 | |
67 | // [Prologue] |
68 | |
69 | /// @page getting_started_cpp |
70 | /// @section getting_started_cpp_tutorial getting_started_tutorial() function |
71 | /// |
72 | void getting_started_tutorial(engine::kind engine_kind) { |
73 | /// @page getting_started_cpp |
74 | /// @subsection getting_started_cpp_sub1 Engine and stream |
75 | /// |
76 | /// All oneDNN primitives and memory objects are attached to a |
77 | /// particular @ref dnnl::engine, which is an abstraction of a |
78 | /// computational device (see also @ref dev_guide_basic_concepts). The |
79 | /// primitives are created and optimized for the device they are attached |
80 | /// to and the memory objects refer to memory residing on the |
81 | /// corresponding device. In particular, that means neither memory objects |
82 | /// nor primitives that were created for one engine can be used on |
83 | /// another. |
84 | /// |
85 | /// To create an engine, we should specify the @ref dnnl::engine::kind |
86 | /// and the index of the device of the given kind. |
87 | /// |
88 | /// @snippet getting_started.cpp Initialize engine |
89 | // [Initialize engine] |
90 | engine eng(engine_kind, 0); |
91 | // [Initialize engine] |
92 | |
93 | /// In addition to an engine, all primitives require a @ref dnnl::stream |
94 | /// for the execution. The stream encapsulates an execution context and is |
95 | /// tied to a particular engine. |
96 | /// |
97 | /// The creation is pretty straightforward: |
98 | /// @snippet getting_started.cpp Initialize stream |
99 | // [Initialize stream] |
100 | stream engine_stream(eng); |
101 | // [Initialize stream] |
102 | |
103 | /// In the simple cases, when a program works with one device only (e.g. |
104 | /// only on CPU), an engine and a stream can be created once and used |
105 | /// throughout the program. Some frameworks create singleton objects that |
106 | /// hold oneDNN engine and stream and use them throughout the code. |
107 | |
108 | /// @subsection getting_started_cpp_sub2 Data preparation (code outside of oneDNN) |
109 | /// |
110 | /// Now that the preparation work is done, let's create some data to work |
111 | /// with. We will create a 4D tensor in NHWC format, which is quite |
112 | /// popular in many frameworks. |
113 | /// |
114 | /// Note that even though we work with one image only, the image tensor |
115 | /// is still 4D. The extra dimension (here N) corresponds to the |
116 | /// batch, and, in case of a single image, is equal to 1. It is pretty |
117 | /// typical to have the batch dimension even when working with a single |
118 | /// image. |
119 | /// |
120 | /// In oneDNN, all CNN primitives assume that tensors have the batch |
121 | /// dimension, which is always the first logical dimension (see also @ref |
122 | /// dev_guide_conventions). |
123 | /// |
124 | /// @snippet getting_started.cpp Create user's data |
125 | // [Create user's data] |
126 | const int N = 1, H = 13, W = 13, C = 3; |
127 | |
128 | // Compute physical strides for each dimension |
129 | const int stride_N = H * W * C; |
130 | const int stride_H = W * C; |
131 | const int stride_W = C; |
132 | const int stride_C = 1; |
133 | |
134 | // An auxiliary function that maps logical index to the physical offset |
135 | auto offset = [=](int n, int h, int w, int c) { |
136 | return n * stride_N + h * stride_H + w * stride_W + c * stride_C; |
137 | }; |
138 | |
139 | // The image size |
140 | const int image_size = N * H * W * C; |
141 | |
142 | // Allocate a buffer for the image |
143 | std::vector<float> image(image_size); |
144 | |
145 | // Initialize the image with some values |
146 | for (int n = 0; n < N; ++n) |
147 | for (int h = 0; h < H; ++h) |
148 | for (int w = 0; w < W; ++w) |
149 | for (int c = 0; c < C; ++c) { |
150 | int off = offset( |
151 | n, h, w, c); // Get the physical offset of a pixel |
152 | image[off] = -std::cos(off / 10.f); |
153 | } |
154 | // [Create user's data] |
155 | /// @subsection getting_started_cpp_sub3 Wrapping data into a oneDNN memory object |
156 | /// |
157 | /// Now, having the image ready, let's wrap it in a @ref dnnl::memory |
158 | /// object to be able to pass the data to oneDNN primitives. |
159 | /// |
160 | /// Creating @ref dnnl::memory comprises two steps: |
161 | /// 1. Initializing the @ref dnnl::memory::desc struct (also referred to |
162 | /// as a memory descriptor), which only describes the tensor data and |
163 | /// doesn't contain the data itself. Memory descriptors are used to |
164 | /// create @ref dnnl::memory objects and to initialize primitive |
165 | /// descriptors (shown later in the example). |
166 | /// 2. Creating the @ref dnnl::memory object itself (also referred to as |
167 | /// a memory object), based on the memory descriptor initialized in |
168 | /// step 1, an engine, and, optionally, a handle to data. The |
169 | /// memory object is used when a primitive is executed. |
170 | /// |
171 | /// Thanks to the |
172 | /// [list initialization](https://en.cppreference.com/w/cpp/language/list_initialization) |
173 | /// introduced in C++11, it is possible to combine these two steps whenever |
174 | /// a memory descriptor is not used anywhere else but in creating a @ref |
175 | /// dnnl::memory object. |
176 | /// |
177 | /// However, for the sake of demonstration, we will show both steps |
178 | /// explicitly. |
179 | |
180 | /// @subsubsection getting_started_cpp_sub31 Memory descriptor |
181 | /// |
182 | /// To initialize the @ref dnnl::memory::desc, we need to pass: |
183 | /// 1. The tensor's dimensions, **the semantic order** of which is |
184 | /// defined by **the primitive** that will use this memory |
185 | /// (descriptor). |
186 | /// |
187 | /// @warning |
188 | /// Memory descriptors and objects are not aware of any meaning of |
189 | /// the data they describe or contain. |
190 | /// 2. The data type for the tensor (@ref dnnl::memory::data_type). |
191 | /// 3. The memory format tag (@ref dnnl::memory::format_tag) that |
192 | /// describes how the data is going to be laid out in the device's |
193 | /// memory. The memory format is required for the primitive to |
194 | /// correctly handle the data. |
195 | /// |
196 | /// The code: |
197 | /// @snippet getting_started.cpp Init src_md |
198 | // [Init src_md] |
199 | auto src_md = memory::desc( |
200 | {N, C, H, W}, // logical dims, the order is defined by a primitive |
201 | memory::data_type::f32, // tensor's data type |
202 | memory::format_tag::nhwc // memory format, NHWC in this case |
203 | ); |
204 | // [Init src_md] |
205 | |
206 | /// The first thing to notice here is that we pass dimensions as `{N, C, |
207 | /// H, W}` while it might seem more natural to pass `{N, H, W, C}`, which |
208 | /// better corresponds to the user's code. This is because oneDNN |
209 | /// CNN primitives like ReLU always expect tensors in the following form: |
210 | /// |
211 | /// | Spatial dim | Tensor dimensions |
212 | /// | :-- | :-- |
213 | /// | 0D | \f$N \times C\f$ |
214 | /// | 1D | \f$N \times C \times W\f$ |
215 | /// | 2D | \f$N \times C \times H \times W\f$ |
216 | /// | 3D | \f$N \times C \times D \times H \times W\f$ |
217 | /// |
218 | /// where: |
219 | /// - \f$N\f$ is a batch dimension (discussed above), |
220 | /// - \f$C\f$ is channel (aka feature maps) dimension, and |
221 | /// - \f$D\f$, \f$H\f$, and \f$W\f$ are spatial dimensions. |
222 | /// |
223 | /// Now that the logical order of dimension is defined, we need to specify |
224 | /// the memory format (the third parameter), which describes how logical |
225 | /// indices map to the offset in memory. This is the place where the user's |
226 | /// format NHWC comes into play. oneDNN has different @ref |
227 | /// dnnl::memory::format_tag values that cover the most popular memory |
228 | /// formats like NCHW, NHWC, CHWN, and some others. |
229 | /// |
230 | /// The memory descriptor for the image is called `src_md`. The `src` part |
231 | /// comes from the fact that the image will be a source for the ReLU |
232 | /// primitive (that is, we formulate memory names from the primitive |
233 | /// perspective; hence we will use `dst` to name the output memory). The |
234 | /// `md` is an initialism for Memory Descriptor. |
235 | |
236 | /// @paragraph getting_started_cpp_sub311 Alternative way to create a memory descriptor |
237 | /// |
238 | /// Before we continue with memory creation, let us show the alternative |
239 | /// way to create the same memory descriptor: instead of using the |
240 | /// @ref dnnl::memory::format_tag, we can directly specify the strides |
241 | /// of each tensor dimension: |
242 | /// @snippet getting_started.cpp Init alt_src_md |
243 | // [Init alt_src_md] |
244 | auto alt_src_md = memory::desc( |
245 | {N, C, H, W}, // logical dims, the order is defined by a primitive |
246 | memory::data_type::f32, // tensor's data type |
247 | {stride_N, stride_C, stride_H, stride_W} // the strides |
248 | ); |
249 | |
250 | // Sanity check: the memory descriptors should be the same |
251 | if (src_md != alt_src_md) |
252 | throw std::logic_error("Memory descriptor initialization mismatch." ); |
253 | // [Init alt_src_md] |
254 | |
255 | /// Just as before, the tensor's dimensions come in the `N, C, H, W` order |
256 | /// as required by CNN primitives. To define the physical memory format, |
257 | /// the strides are passed as the third parameter. Note that the order of |
258 | /// the strides corresponds to the order of the tensor's dimensions. |
259 | /// |
260 | /// @warning |
261 | /// Using the wrong order might lead to incorrect results or even a |
262 | /// crash. |
263 | |
264 | /// @subsubsection getting_started_cpp_sub32 Creating a memory object |
265 | /// |
266 | /// Having a memory descriptor and an engine prepared, let's create |
267 | /// input and output memory objects for a ReLU primitive. |
268 | /// @snippet getting_started.cpp Create memory objects |
269 | // [Create memory objects] |
270 | // src_mem contains a copy of image after write_to_dnnl_memory function |
271 | auto src_mem = memory(src_md, eng); |
272 | write_to_dnnl_memory(image.data(), src_mem); |
273 | |
274 | // For dst_mem the library allocates buffer |
275 | auto dst_mem = memory(src_md, eng); |
276 | // [Create memory objects] |
277 | |
278 | /// We already have a memory buffer for the source memory object. We pass |
279 | /// it to the |
280 | /// @ref dnnl::memory::memory(const dnnl::memory::desc &, const dnnl::engine &, void *) |
281 | /// constructor that takes a buffer pointer as its last argument. |
282 | /// |
283 | /// Let's use a constructor that instructs the library to allocate a |
284 | /// memory buffer for the `dst_mem` for educational purposes. |
285 | /// |
286 | /// The key difference between these two are: |
287 | /// 1. The library will own the memory for `dst_mem` and will deallocate |
288 | /// it when `dst_mem` is destroyed. That means the memory buffer can |
289 | /// be used only while `dst_mem` is alive. |
290 | /// 2. Library-allocated buffers have good alignment, which typically |
291 | /// results in better performance. |
292 | /// |
293 | /// @note |
294 | /// Memory allocated outside of the library and passed to oneDNN |
295 | /// should have good alignment for better performance. |
296 | /// |
297 | /// In the subsequent section we will show how to get the buffer (pointer) |
298 | /// from the `dst_mem` memory object. |
299 | /// @subsection getting_started_cpp_sub4 Creating a ReLU primitive |
300 | /// |
301 | /// Let's now create a ReLU primitive. |
302 | /// |
303 | /// The library implements ReLU primitive as a particular algorithm of a |
304 | /// more general @ref dev_guide_eltwise primitive, which applies a specified |
305 | /// function to each and every element of the source tensor. |
306 | /// |
307 | /// Just as in the case of @ref dnnl::memory, a user should always go |
308 | /// through (at least) two creation steps (which however, can be sometimes |
309 | /// combined thanks to C++11): |
310 | /// 1. Create an operation primitive descriptor (here @ref |
311 | /// dnnl::eltwise_forward::primitive_desc) that defines operation |
312 | /// parameters and is a **lightweight** descriptor of the actual |
313 | /// algorithm that **implements** the given operation. |
314 | /// The user can query different characteristics of the chosen |
315 | /// implementation such as memory consumptions and some others that will |
316 | /// be covered in the next topic (@ref memory_format_propagation_cpp). |
317 | /// 2. Create a primitive (here @ref dnnl::eltwise_forward) that can be |
318 | /// executed on memory objects to compute the operation. |
319 | /// |
320 | /// oneDNN separates steps 2 and 3 to enable the user to inspect details of a |
321 | /// primitive implementation prior to creating the primitive. This may be |
322 | /// expensive, because, for example, oneDNN generates the optimized |
323 | /// computational code on the fly. |
324 | /// |
325 | ///@note |
326 | /// Primitive creation might be a very expensive operation, so consider |
327 | /// creating primitive objects once and executing them multiple times. |
328 | /// |
329 | /// The code: |
330 | /// @snippet getting_started.cpp Create a ReLU primitive |
331 | // [Create a ReLU primitive] |
332 | // ReLU primitive descriptor, which corresponds to a particular |
333 | // implementation in the library |
334 | auto relu_pd = eltwise_forward::primitive_desc( |
335 | eng, // an engine the primitive will be created for |
336 | prop_kind::forward_inference, algorithm::eltwise_relu, |
337 | src_md, // source memory descriptor for an operation to work on |
338 | src_md, // destination memory descriptor for an operation to work on |
339 | 0.f, // alpha parameter means negative slope in case of ReLU |
340 | 0.f // beta parameter is ignored in case of ReLU |
341 | ); |
342 | |
343 | // ReLU primitive |
344 | auto relu = eltwise_forward(relu_pd); // !!! this can take quite some time |
345 | // [Create a ReLU primitive] |
346 | |
347 | /// A note about variable names. Similar to the `_md` suffix used for |
348 | /// memory descriptor, we use `_d` for the operation descriptor names, |
349 | /// `_pd` for the primitive descriptors, and no suffix for primitives |
350 | /// themselves. |
351 | /// |
352 | /// It is worth mentioning that we specified the exact tensor and its |
353 | /// memory format when we were initializing the `relu_d`. That means |
354 | /// `relu` primitive would perform computations with memory objects that |
355 | /// correspond to this description. This is the one and only one way of |
356 | /// creating non-compute-intensive primitives like @ref dev_guide_eltwise, |
357 | /// @ref dev_guide_batch_normalization, and others. |
358 | /// |
359 | /// Compute-intensive primitives (like @ref dev_guide_convolution) have an |
360 | /// ability to define the appropriate memory format on their own. This is |
361 | /// one of the key features of the library and will be discussed in detail |
362 | /// in the next topic: @ref memory_format_propagation_cpp. |
363 | |
364 | /// @subsection getting_started_cpp_sub5 Executing the ReLU primitive |
365 | /// |
366 | /// Finally, let's execute the primitive and wait for its completion. |
367 | /// |
368 | /// The input and output memory objects are passed to the `execute()` |
369 | /// method using a <tag, memory> map. Each tag specifies what kind of |
370 | /// tensor each memory object represents. All @ref dev_guide_eltwise |
371 | /// primitives require the map to have two elements: a source memory |
372 | /// object (input) and a destination memory (output). |
373 | /// |
374 | /// A primitive is executed in a stream (the first parameter of the |
375 | /// `execute()` method). Depending on a stream kind, an execution might be |
376 | /// blocking or non-blocking. This means that we need to call @ref |
377 | /// dnnl::stream::wait before accessing the results. |
378 | /// |
379 | /// @snippet getting_started.cpp Execute ReLU primitive |
380 | // [Execute ReLU primitive] |
381 | // Execute ReLU (out-of-place) |
382 | relu.execute(engine_stream, // The execution stream |
383 | { |
384 | // A map with all inputs and outputs |
385 | {DNNL_ARG_SRC, src_mem}, // Source tag and memory obj |
386 | {DNNL_ARG_DST, dst_mem}, // Destination tag and memory obj |
387 | }); |
388 | |
389 | // Wait the stream to complete the execution |
390 | engine_stream.wait(); |
391 | // [Execute ReLU primitive] |
392 | |
393 | /// The @ref dev_guide_eltwise is one of the primitives that support |
394 | /// in-place operations, meaning that the source and destination memory can |
395 | /// be the same. To perform in-place transformation, the user must pass the |
396 | /// same memory object for both the `DNNL_ARG_SRC` and |
397 | /// `DNNL_ARG_DST` tags: |
398 | /// @snippet getting_started.cpp Execute ReLU primitive in-place |
399 | // [Execute ReLU primitive in-place] |
400 | // Execute ReLU (in-place) |
401 | // relu.execute(engine_stream, { |
402 | // {DNNL_ARG_SRC, src_mem}, |
403 | // {DNNL_ARG_DST, src_mem}, |
404 | // }); |
405 | // [Execute ReLU primitive in-place] |
406 | |
407 | /// @page getting_started_cpp |
408 | /// @subsection getting_started_cpp_sub6 Obtaining the result and validation |
409 | /// |
410 | /// Now that we have the computed result, let's validate that it is |
411 | /// actually correct. The result is stored in the `dst_mem` memory object. |
412 | /// So we need to obtain the C++ pointer to a buffer with data via @ref |
413 | /// dnnl::memory::get_data_handle() and cast it to the proper data type |
414 | /// as shown below. |
415 | /// |
416 | /// @warning |
417 | /// The @ref dnnl::memory::get_data_handle() returns a raw handle |
418 | /// to the buffer, the type of which is engine specific. For the CPU |
419 | /// engine the buffer is always a pointer to `void`, which can safely |
420 | /// be used. However, for engines other than CPU the handle might be |
421 | /// runtime-specific type, such as `cl_mem` in case of GPU/OpenCL. |
422 | /// |
423 | /// @snippet getting_started.cpp Check the results |
424 | // [Check the results] |
425 | // Obtain a buffer for the `dst_mem` and cast it to `float *`. |
426 | // This is safe since we created `dst_mem` as f32 tensor with known |
427 | // memory format. |
428 | std::vector<float> relu_image(image_size); |
429 | read_from_dnnl_memory(relu_image.data(), dst_mem); |
430 | /* |
431 | // Check the results |
432 | for (int n = 0; n < N; ++n) |
433 | for (int h = 0; h < H; ++h) |
434 | for (int w = 0; w < W; ++w) |
435 | for (int c = 0; c < C; ++c) { |
436 | int off = offset( |
437 | n, h, w, c); // get the physical offset of a pixel |
438 | float expected = image[off] < 0 |
439 | ? 0.f |
440 | : image[off]; // expected value |
441 | if (relu_image[off] != expected) { |
442 | std::cout << "At index(" << n << ", " << c << ", " << h |
443 | << ", " << w << ") expect " << expected |
444 | << " but got " << relu_image[off] |
445 | << std::endl; |
446 | throw std::logic_error("Accuracy check failed."); |
447 | } |
448 | } |
449 | // [Check the results] |
450 | */ |
451 | } |
452 | |
453 | /// @page getting_started_cpp |
454 | /// |
455 | /// @section getting_started_cpp_main main() function |
456 | /// |
457 | /// We now just call everything we prepared earlier. |
458 | /// |
459 | /// Because we are using the oneDNN C++ API, we use exceptions to handle errors |
460 | /// (see @ref dev_guide_c_and_cpp_apis). |
461 | /// The oneDNN C++ API throws exceptions of type @ref dnnl::error, |
462 | /// which contains the error status (of type @ref dnnl_status_t) and a |
463 | /// human-readable error message accessible through regular `what()` method. |
464 | /// @page getting_started_cpp |
465 | /// @snippet getting_started.cpp Main |
466 | // [Main] |
467 | int main(int argc, char **argv) { |
468 | int exit_code = 0; |
469 | |
470 | engine::kind engine_kind = parse_engine_kind(argc, argv); |
471 | try { |
472 | getting_started_tutorial(engine_kind); |
473 | } catch (dnnl::error &e) { |
474 | std::cout << "oneDNN error caught: " << std::endl |
475 | << "\tStatus: " << dnnl_status2str(e.status) << std::endl |
476 | << "\tMessage: " << e.what() << std::endl; |
477 | exit_code = 1; |
478 | } catch (std::string &e) { |
479 | std::cout << "Error in the example: " << e << "." << std::endl; |
480 | exit_code = 2; |
481 | } |
482 | |
483 | std::cout << "Example " << (exit_code ? "failed" : "passed" ) << " on " |
484 | << engine_kind2str_upper(engine_kind) << "." << std::endl; |
485 | return exit_code; |
486 | } |
487 | // [Main] |
488 | |
489 | /// @page getting_started_cpp |
490 | /// |
491 | /// <b></b> |
492 | /// |
493 | /// Upon compiling and run the example the output should be just: |
494 | /// |
495 | /// ~~~ |
496 | /// Example passed. |
497 | /// ~~~ |
498 | /// |
499 | /// Users are encouraged to experiment with the code to familiarize themselves |
500 | /// with the concepts. In particular, one of the changes that might be of |
501 | /// interest is to spoil some of the library calls to check how error handling |
502 | /// happens. For instance, if we replace |
503 | /// |
504 | /// ~~~cpp |
505 | /// relu.execute(engine_stream, { |
506 | /// {DNNL_ARG_SRC, src_mem}, |
507 | /// {DNNL_ARG_DST, dst_mem}, |
508 | /// }); |
509 | /// ~~~ |
510 | /// |
511 | /// with |
512 | /// |
513 | /// ~~~cpp |
514 | /// relu.execute(engine_stream, { |
515 | /// {DNNL_ARG_SRC, src_mem}, |
516 | /// // {DNNL_ARG_DST, dst_mem}, // Oops, forgot about this one |
517 | /// }); |
518 | /// ~~~ |
519 | /// |
520 | /// we should get the following output: |
521 | /// |
522 | /// ~~~ |
523 | /// oneDNN error caught: |
524 | /// Status: invalid_arguments |
525 | /// Message: could not execute a primitive |
526 | /// Example failed. |
527 | /// ~~~ |
528 | |