getting_started.cpp source code [oneDNN/examples/getting_started.cpp]

1	/*******************************************************************************
2	* Copyright 2019-2022 Intel Corporation
3	*
4	* Licensed under the Apache License, Version 2.0 (the "License");
5	* you may not use this file except in compliance with the License.
6	* You may obtain a copy of the License at
7	*
8	* http://www.apache.org/licenses/LICENSE-2.0
9	*
10	* Unless required by applicable law or agreed to in writing, software
11	* distributed under the License is distributed on an "AS IS" BASIS,
12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13	* See the License for the specific language governing permissions and
14	* limitations under the License.
15	*******************************************************************************/
16
17	/// @example getting_started.cpp
18	/// @copybrief getting_started_cpp
19	/// > Annotated version: @ref getting_started_cpp
20
21	#include <cmath>
22	#include <numeric>
23	#include <stdexcept>
24	#include <vector>
25
26	#include "oneapi/dnnl/dnnl.hpp"
27	#include "oneapi/dnnl/dnnl_debug.h"
28
29	#include "example_utils.hpp"
30
31	using namespace dnnl;
32	// [Prologue]
33
34	/// @page getting_started_cpp Getting started
35	///
36	/// This C++ API example demonstrates the basics of the oneDNN programming model.
37	///
38	/// > Example code: @ref getting_started.cpp
39	///
40	/// This C++ API example demonstrates the basics of the oneDNN programming model:
41	/// - How to create oneDNN memory objects.
42	/// - How to get data from the user's buffer into a oneDNN memory object.
43	/// - How a tensor's logical dimensions and memory object formats relate.
44	/// - How to create oneDNN primitives.
45	/// - How to execute the primitives.
46	///
47	/// The example uses the ReLU operation and comprises the following steps:
48	/// 1. Creating @ref getting_started_cpp_sub1 to execute a primitive.
49	/// 2. Performing @ref getting_started_cpp_sub2.
50	/// 3. @ref getting_started_cpp_sub3 (using different flavors).
51	/// 4. @ref getting_started_cpp_sub4.
52	/// 5. @ref getting_started_cpp_sub5.
53	/// 6. @ref getting_started_cpp_sub6 (checking that the resulting image does
54	/// not contain negative values).
55	///
56	/// These steps are implemented in the @ref getting_started_cpp_tutorial, which
57	/// in turn is called from @ref getting_started_cpp_main (which is also
58	/// responsible for error handling).
59	///
60	/// @section getting_started_cpp_headers Public headers
61	///
62	/// To start using oneDNN we must first include the @ref dnnl.hpp
63	/// header file in the program. We also include @ref dnnl_debug.h in
64	/// example_utils.hpp, which contains some debugging facilities like returning
65	/// a string representation for common oneDNN C types.
66
67	// [Prologue]
68
69	/// @page getting_started_cpp
70	/// @section getting_started_cpp_tutorial getting_started_tutorial() function
71	///
72	void getting_started_tutorial(engine::kind engine_kind) {
73	/// @page getting_started_cpp
74	/// @subsection getting_started_cpp_sub1 Engine and stream
75	///
76	/// All oneDNN primitives and memory objects are attached to a
77	/// particular @ref dnnl::engine, which is an abstraction of a
78	/// computational device (see also @ref dev_guide_basic_concepts). The
79	/// primitives are created and optimized for the device they are attached
80	/// to and the memory objects refer to memory residing on the
81	/// corresponding device. In particular, that means neither memory objects
82	/// nor primitives that were created for one engine can be used on
83	/// another.
84	///
85	/// To create an engine, we should specify the @ref dnnl::engine::kind
86	/// and the index of the device of the given kind.
87	///
88	/// @snippet getting_started.cpp Initialize engine
89	// [Initialize engine]
90	engine eng(engine_kind, `0`);
91	// [Initialize engine]
92
93	/// In addition to an engine, all primitives require a @ref dnnl::stream
94	/// for the execution. The stream encapsulates an execution context and is
95	/// tied to a particular engine.
96	///
97	/// The creation is pretty straightforward:
98	/// @snippet getting_started.cpp Initialize stream
99	// [Initialize stream]
100	stream engine_stream(eng);
101	// [Initialize stream]
102
103	/// In the simple cases, when a program works with one device only (e.g.
104	/// only on CPU), an engine and a stream can be created once and used
105	/// throughout the program. Some frameworks create singleton objects that
106	/// hold oneDNN engine and stream and use them throughout the code.
107
108	/// @subsection getting_started_cpp_sub2 Data preparation (code outside of oneDNN)
109	///
110	/// Now that the preparation work is done, let's create some data to work
111	/// with. We will create a 4D tensor in NHWC format, which is quite
112	/// popular in many frameworks.
113	///
114	/// Note that even though we work with one image only, the image tensor
115	/// is still 4D. The extra dimension (here N) corresponds to the
116	/// batch, and, in case of a single image, is equal to 1. It is pretty
117	/// typical to have the batch dimension even when working with a single
118	/// image.
119	///
120	/// In oneDNN, all CNN primitives assume that tensors have the batch
121	/// dimension, which is always the first logical dimension (see also @ref
122	/// dev_guide_conventions).
123	///
124	/// @snippet getting_started.cpp Create user's data
125	// [Create user's data]
126	const int N = `1`, H = `13`, W = `13`, C = `3`;
127
128	// Compute physical strides for each dimension
129	const int stride_N = H * W * C;
130	const int stride_H = W * C;
131	const int stride_W = C;
132	const int stride_C = `1`;
133
134	// An auxiliary function that maps logical index to the physical offset
135	auto offset = [=](int n, int h, int w, int c) {
136	return n * stride_N + h * stride_H + w * stride_W + c * stride_C;
137	};
138
139	// The image size
140	const int image_size = N * H * W * C;
141
142	// Allocate a buffer for the image
143	std::vector<float> image(image_size);
144
145	// Initialize the image with some values
146	for (int n = `0`; n < N; ++n)
147	for (int h = `0`; h < H; ++h)
148	for (int w = `0`; w < W; ++w)
149	for (int c = `0`; c < C; ++c) {
150	int off = offset (
151	n, h, w, c); // Get the physical offset of a pixel
152	image[off] = -std::cos(off / `10.f`);
153	}
154	// [Create user's data]
155	/// @subsection getting_started_cpp_sub3 Wrapping data into a oneDNN memory object
156	///
157	/// Now, having the image ready, let's wrap it in a @ref dnnl::memory
158	/// object to be able to pass the data to oneDNN primitives.
159	///
160	/// Creating @ref dnnl::memory comprises two steps:
161	/// 1. Initializing the @ref dnnl::memory::desc struct (also referred to
162	/// as a memory descriptor), which only describes the tensor data and
163	/// doesn't contain the data itself. Memory descriptors are used to
164	/// create @ref dnnl::memory objects and to initialize primitive
165	/// descriptors (shown later in the example).
166	/// 2. Creating the @ref dnnl::memory object itself (also referred to as
167	/// a memory object), based on the memory descriptor initialized in
168	/// step 1, an engine, and, optionally, a handle to data. The
169	/// memory object is used when a primitive is executed.
170	///
171	/// Thanks to the
172	/// [list initialization](https://en.cppreference.com/w/cpp/language/list_initialization)
173	/// introduced in C++11, it is possible to combine these two steps whenever
174	/// a memory descriptor is not used anywhere else but in creating a @ref
175	/// dnnl::memory object.
176	///
177	/// However, for the sake of demonstration, we will show both steps
178	/// explicitly.
179
180	/// @subsubsection getting_started_cpp_sub31 Memory descriptor
181	///
182	/// To initialize the @ref dnnl::memory::desc, we need to pass:
183	/// 1. The tensor's dimensions, the semantic order* of which is*
184	/// defined by the primitive* that will use this memory*
185	/// (descriptor).
186	///
187	/// @warning
188	/// Memory descriptors and objects are not aware of any meaning of
189	/// the data they describe or contain.
190	/// 2. The data type for the tensor (@ref dnnl::memory::data_type).
191	/// 3. The memory format tag (@ref dnnl::memory::format_tag) that
192	/// describes how the data is going to be laid out in the device's
193	/// memory. The memory format is required for the primitive to
194	/// correctly handle the data.
195	///
196	/// The code:
197	/// @snippet getting_started.cpp Init src_md
198	// [Init src_md]
199	auto src_md = memory::desc(
200	{N, C, H, W}, // logical dims, the order is defined by a primitive
201	memory::data_type::f32, // tensor's data type
202	memory::format_tag::nhwc // memory format, NHWC in this case
203	);
204	// [Init src_md]
205
206	/// The first thing to notice here is that we pass dimensions as `{N, C,
207	/// H, W}` while it might seem more natural to pass `{N, H, W, C}`, which
208	/// better corresponds to the user's code. This is because oneDNN
209	/// CNN primitives like ReLU always expect tensors in the following form:
210	///
211	/// \| Spatial dim \| Tensor dimensions
212	/// \| :-- \| :--
213	/// \| 0D \| \f$N \times C\f$
214	/// \| 1D \| \f$N \times C \times W\f$
215	/// \| 2D \| \f$N \times C \times H \times W\f$
216	/// \| 3D \| \f$N \times C \times D \times H \times W\f$
217	///
218	/// where:
219	/// - \f$N\f$ is a batch dimension (discussed above),
220	/// - \f$C\f$ is channel (aka feature maps) dimension, and
221	/// - \f$D\f$, \f$H\f$, and \f$W\f$ are spatial dimensions.
222	///
223	/// Now that the logical order of dimension is defined, we need to specify
224	/// the memory format (the third parameter), which describes how logical
225	/// indices map to the offset in memory. This is the place where the user's
226	/// format NHWC comes into play. oneDNN has different @ref
227	/// dnnl::memory::format_tag values that cover the most popular memory
228	/// formats like NCHW, NHWC, CHWN, and some others.
229	///
230	/// The memory descriptor for the image is called `src_md`. The `src` part
231	/// comes from the fact that the image will be a source for the ReLU
232	/// primitive (that is, we formulate memory names from the primitive
233	/// perspective; hence we will use `dst` to name the output memory). The
234	/// `md` is an initialism for Memory Descriptor.
235
236	/// @paragraph getting_started_cpp_sub311 Alternative way to create a memory descriptor
237	///
238	/// Before we continue with memory creation, let us show the alternative
239	/// way to create the same memory descriptor: instead of using the
240	/// @ref dnnl::memory::format_tag, we can directly specify the strides
241	/// of each tensor dimension:
242	/// @snippet getting_started.cpp Init alt_src_md
243	// [Init alt_src_md]
244	auto alt_src_md = memory::desc(
245	{N, C, H, W}, // logical dims, the order is defined by a primitive
246	memory::data_type::f32, // tensor's data type
247	{stride_N, stride_C, stride_H, stride_W} // the strides
248	);
249
250	// Sanity check: the memory descriptors should be the same
251	if (src_md != alt_src_md)
252	throw std::logic_error("Memory descriptor initialization mismatch.");
253	// [Init alt_src_md]
254
255	/// Just as before, the tensor's dimensions come in the `N, C, H, W` order
256	/// as required by CNN primitives. To define the physical memory format,
257	/// the strides are passed as the third parameter. Note that the order of
258	/// the strides corresponds to the order of the tensor's dimensions.
259	///
260	/// @warning
261	/// Using the wrong order might lead to incorrect results or even a
262	/// crash.
263
264	/// @subsubsection getting_started_cpp_sub32 Creating a memory object
265	///
266	/// Having a memory descriptor and an engine prepared, let's create
267	/// input and output memory objects for a ReLU primitive.
268	/// @snippet getting_started.cpp Create memory objects
269	// [Create memory objects]
270	// src_mem contains a copy of image after write_to_dnnl_memory function
271	auto src_mem = memory(src_md, eng);
272	write_to_dnnl_memory(image.data(), src_mem);
273
274	// For dst_mem the library allocates buffer
275	auto dst_mem = memory(src_md, eng);
276	// [Create memory objects]
277
278	/// We already have a memory buffer for the source memory object. We pass
279	/// it to the
280	/// @ref dnnl::memory::memory(const dnnl::memory::desc &, const dnnl::engine &, void )*
281	/// constructor that takes a buffer pointer as its last argument.
282	///
283	/// Let's use a constructor that instructs the library to allocate a
284	/// memory buffer for the `dst_mem` for educational purposes.
285	///
286	/// The key difference between these two are:
287	/// 1. The library will own the memory for `dst_mem` and will deallocate
288	/// it when `dst_mem` is destroyed. That means the memory buffer can
289	/// be used only while `dst_mem` is alive.
290	/// 2. Library-allocated buffers have good alignment, which typically
291	/// results in better performance.
292	///
293	/// @note
294	/// Memory allocated outside of the library and passed to oneDNN
295	/// should have good alignment for better performance.
296	///
297	/// In the subsequent section we will show how to get the buffer (pointer)
298	/// from the `dst_mem` memory object.
299	/// @subsection getting_started_cpp_sub4 Creating a ReLU primitive
300	///
301	/// Let's now create a ReLU primitive.
302	///
303	/// The library implements ReLU primitive as a particular algorithm of a
304	/// more general @ref dev_guide_eltwise primitive, which applies a specified
305	/// function to each and every element of the source tensor.
306	///
307	/// Just as in the case of @ref dnnl::memory, a user should always go
308	/// through (at least) two creation steps (which however, can be sometimes
309	/// combined thanks to C++11):
310	/// 1. Create an operation primitive descriptor (here @ref
311	/// dnnl::eltwise_forward::primitive_desc) that defines operation
312	/// parameters and is a lightweight* descriptor of the actual*
313	/// algorithm that implements* the given operation.*
314	/// The user can query different characteristics of the chosen
315	/// implementation such as memory consumptions and some others that will
316	/// be covered in the next topic (@ref memory_format_propagation_cpp).
317	/// 2. Create a primitive (here @ref dnnl::eltwise_forward) that can be
318	/// executed on memory objects to compute the operation.
319	///
320	/// oneDNN separates steps 2 and 3 to enable the user to inspect details of a
321	/// primitive implementation prior to creating the primitive. This may be
322	/// expensive, because, for example, oneDNN generates the optimized
323	/// computational code on the fly.
324	///
325	///@note
326	/// Primitive creation might be a very expensive operation, so consider
327	/// creating primitive objects once and executing them multiple times.
328	///
329	/// The code:
330	/// @snippet getting_started.cpp Create a ReLU primitive
331	// [Create a ReLU primitive]
332	// ReLU primitive descriptor, which corresponds to a particular
333	// implementation in the library
334	auto relu_pd = eltwise_forward::primitive_desc(
335	eng, // an engine the primitive will be created for
336	prop_kind::forward_inference, algorithm::eltwise_relu,
337	src_md, // source memory descriptor for an operation to work on
338	src_md, // destination memory descriptor for an operation to work on
339	`0.f`, // alpha parameter means negative slope in case of ReLU
340	`0.f` // beta parameter is ignored in case of ReLU
341	);
342
343	// ReLU primitive
344	auto relu = eltwise_forward(relu_pd); // !!! this can take quite some time
345	// [Create a ReLU primitive]
346
347	/// A note about variable names. Similar to the `_md` suffix used for
348	/// memory descriptor, we use `_d` for the operation descriptor names,
349	/// `_pd` for the primitive descriptors, and no suffix for primitives
350	/// themselves.
351	///
352	/// It is worth mentioning that we specified the exact tensor and its
353	/// memory format when we were initializing the `relu_d`. That means
354	/// `relu` primitive would perform computations with memory objects that
355	/// correspond to this description. This is the one and only one way of
356	/// creating non-compute-intensive primitives like @ref dev_guide_eltwise,
357	/// @ref dev_guide_batch_normalization, and others.
358	///
359	/// Compute-intensive primitives (like @ref dev_guide_convolution) have an
360	/// ability to define the appropriate memory format on their own. This is
361	/// one of the key features of the library and will be discussed in detail
362	/// in the next topic: @ref memory_format_propagation_cpp.
363
364	/// @subsection getting_started_cpp_sub5 Executing the ReLU primitive
365	///
366	/// Finally, let's execute the primitive and wait for its completion.
367	///
368	/// The input and output memory objects are passed to the `execute()`
369	/// method using a <tag, memory> map. Each tag specifies what kind of
370	/// tensor each memory object represents. All @ref dev_guide_eltwise
371	/// primitives require the map to have two elements: a source memory
372	/// object (input) and a destination memory (output).
373	///
374	/// A primitive is executed in a stream (the first parameter of the
375	/// `execute()` method). Depending on a stream kind, an execution might be
376	/// blocking or non-blocking. This means that we need to call @ref
377	/// dnnl::stream::wait before accessing the results.
378	///
379	/// @snippet getting_started.cpp Execute ReLU primitive
380	// [Execute ReLU primitive]
381	// Execute ReLU (out-of-place)
382	relu.execute(engine_stream, // The execution stream
383	{
384	// A map with all inputs and outputs
385	{DNNL_ARG_SRC, src_mem}, // Source tag and memory obj
386	{DNNL_ARG_DST, dst_mem}, // Destination tag and memory obj
387	});
388
389	// Wait the stream to complete the execution
390	engine_stream.wait();
391	// [Execute ReLU primitive]
392
393	/// The @ref dev_guide_eltwise is one of the primitives that support
394	/// in-place operations, meaning that the source and destination memory can
395	/// be the same. To perform in-place transformation, the user must pass the
396	/// same memory object for both the `DNNL_ARG_SRC` and
397	/// `DNNL_ARG_DST` tags:
398	/// @snippet getting_started.cpp Execute ReLU primitive in-place
399	// [Execute ReLU primitive in-place]
400	// Execute ReLU (in-place)
401	// relu.execute(engine_stream, {
402	// {DNNL_ARG_SRC, src_mem},
403	// {DNNL_ARG_DST, src_mem},
404	// });
405	// [Execute ReLU primitive in-place]
406
407	/// @page getting_started_cpp
408	/// @subsection getting_started_cpp_sub6 Obtaining the result and validation
409	///
410	/// Now that we have the computed result, let's validate that it is
411	/// actually correct. The result is stored in the `dst_mem` memory object.
412	/// So we need to obtain the C++ pointer to a buffer with data via @ref
413	/// dnnl::memory::get_data_handle() and cast it to the proper data type
414	/// as shown below.
415	///
416	/// @warning
417	/// The @ref dnnl::memory::get_data_handle() returns a raw handle
418	/// to the buffer, the type of which is engine specific. For the CPU
419	/// engine the buffer is always a pointer to `void`, which can safely
420	/// be used. However, for engines other than CPU the handle might be
421	/// runtime-specific type, such as `cl_mem` in case of GPU/OpenCL.
422	///
423	/// @snippet getting_started.cpp Check the results
424	// [Check the results]
425	// Obtain a buffer for the `dst_mem` and cast it to `float `.*
426	// This is safe since we created `dst_mem` as f32 tensor with known
427	// memory format.
428	std::vector<float> relu_image(image_size);
429	read_from_dnnl_memory(relu_image.data(), dst_mem);
430	/*
431	// Check the results
432	for (int n = 0; n < N; ++n)
433	for (int h = 0; h < H; ++h)
434	for (int w = 0; w < W; ++w)
435	for (int c = 0; c < C; ++c) {
436	int off = offset(
437	n, h, w, c); // get the physical offset of a pixel
438	float expected = image[off] < 0
439	? 0.f
440	: image[off]; // expected value
441	if (relu_image[off] != expected) {
442	std::cout << "At index(" << n << ", " << c << ", " << h
443	<< ", " << w << ") expect " << expected
444	<< " but got " << relu_image[off]
445	<< std::endl;
446	throw std::logic_error("Accuracy check failed.");
447	}
448	}
449	// [Check the results]
450	*/
451	}
452
453	/// @page getting_started_cpp
454	///
455	/// @section getting_started_cpp_main main() function
456	///
457	/// We now just call everything we prepared earlier.
458	///
459	/// Because we are using the oneDNN C++ API, we use exceptions to handle errors
460	/// (see @ref dev_guide_c_and_cpp_apis).
461	/// The oneDNN C++ API throws exceptions of type @ref dnnl::error,
462	/// which contains the error status (of type @ref dnnl_status_t) and a
463	/// human-readable error message accessible through regular `what()` method.
464	/// @page getting_started_cpp
465	/// @snippet getting_started.cpp Main
466	// [Main]
467	int main(int argc, char **argv) {
468	int exit_code = `0`;
469
470	engine::kind engine_kind = parse_engine_kind(argc, argv);
471	try {
472	getting_started_tutorial(engine_kind);
473	} catch (dnnl::error &e) {
474	std::cout << "oneDNN error caught: " << std::endl
475	<< "\tStatus: " << dnnl_status2str(e.status) << std::endl
476	<< "\tMessage: " << e.what() << std::endl;
477	exit_code = `1`;
478	} catch (std::string &e) {
479	std::cout << "Error in the example: " << e << "." << std::endl;
480	exit_code = `2`;
481	}
482
483	std::cout << "Example " << (exit_code ? "failed" : "passed") << " on "
484	<< engine_kind2str_upper(engine_kind) << "." << std::endl;
485	return exit_code;
486	}
487	// [Main]
488
489	/// @page getting_started_cpp
490	///
491	/// <b></b>
492	///
493	/// Upon compiling and run the example the output should be just:
494	///
495	/// ~~~
496	/// Example passed.
497	/// ~~~
498	///
499	/// Users are encouraged to experiment with the code to familiarize themselves
500	/// with the concepts. In particular, one of the changes that might be of
501	/// interest is to spoil some of the library calls to check how error handling
502	/// happens. For instance, if we replace
503	///
504	/// ~~~cpp
505	/// relu.execute(engine_stream, {
506	/// {DNNL_ARG_SRC, src_mem},
507	/// {DNNL_ARG_DST, dst_mem},
508	/// });
509	/// ~~~
510	///
511	/// with
512	///
513	/// ~~~cpp
514	/// relu.execute(engine_stream, {
515	/// {DNNL_ARG_SRC, src_mem},
516	/// // {DNNL_ARG_DST, dst_mem}, // Oops, forgot about this one
517	/// });
518	/// ~~~
519	///
520	/// we should get the following output:
521	///
522	/// ~~~
523	/// oneDNN error caught:
524	/// Status: invalid_arguments
525	/// Message: could not execute a primitive
526	/// Example failed.
527	/// ~~~
528

Browse the source code of oneDNN/examples/getting_started.cpp