convolution.cpp source code [oneDNN/examples/primitives/convolution.cpp]

1	/*******************************************************************************
2	* Copyright 2020-2022 Intel Corporation
3	*
4	* Licensed under the Apache License, Version 2.0 (the "License");
5	* you may not use this file except in compliance with the License.
6	* You may obtain a copy of the License at
7	*
8	* http://www.apache.org/licenses/LICENSE-2.0
9	*
10	* Unless required by applicable law or agreed to in writing, software
11	* distributed under the License is distributed on an "AS IS" BASIS,
12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13	* See the License for the specific language governing permissions and
14	* limitations under the License.
15	*******************************************************************************/
16
17	/// @example convolution.cpp
18	/// > Annotated version: @ref convolution_example_cpp
19	///
20	/// @page convolution_example_cpp_short
21	///
22	/// This C++ API example demonstrates how to create and execute a
23	/// [Convolution](@ref dev_guide_convolution) primitive in forward propagation
24	/// mode in two configurations - with and without groups.
25	///
26	/// Key optimizations included in this example:
27	/// - Creation of optimized memory format from the primitive descriptor;
28	/// - Primitive attributes with fused post-ops.
29	///
30	/// @page convolution_example_cpp Convolution Primitive Example
31	/// @copydetails convolution_example_cpp_short
32	///
33	/// @include convolution.cpp
34
35	#include <algorithm>
36	#include <cmath>
37	#include <iostream>
38	#include <string>
39	#include <vector>
40
41	#include "example_utils.hpp"
42	#include "oneapi/dnnl/dnnl.hpp"
43
44	using namespace dnnl;
45
46	using tag = memory::format_tag;
47	using dt = memory::data_type;
48
49	void convolution_example(dnnl::engine::kind engine_kind) {
50
51	// Create execution dnnl::engine.
52	dnnl::engine engine(engine_kind, `0`);
53
54	// Create dnnl::stream.
55	dnnl::stream engine_stream(engine);
56
57	// Tensor dimensions.
58	const memory::dim N = `3`, // batch size
59	IC = `32`, // input channels
60	IH = `13`, // input height
61	IW = `13`, // input width
62	OC = `64`, // output channels
63	KH = `3`, // weights height
64	KW = `3`, // weights width
65	PH_L = `1`, // height padding: left
66	PH_R = `1`, // height padding: right
67	PW_L = `1`, // width padding: left
68	PW_R = `1`, // width padding: right
69	SH = `4`, // height-wise stride
70	SW = `4`, // width-wise stride
71	OH = (IH - KH + PH_L + PH_R) / SH + `1`, // output height
72	OW = (IW - KW + PW_L + PW_R) / SW + `1`; // output width
73
74	// Source (src), weights, bias, and destination (dst) tensors
75	// dimensions.
76	memory::dims src_dims = {N, IC, IH, IW};
77	memory::dims weights_dims = {OC, IC, KH, KW};
78	memory::dims bias_dims = {OC};
79	memory::dims dst_dims = {N, OC, OH, OW};
80
81	// Strides, padding dimensions.
82	memory::dims strides_dims = {SH, SW};
83	memory::dims padding_dims_l = {PH_L, PW_L};
84	memory::dims padding_dims_r = {PH_R, PW_R};
85
86	// Allocate buffers.
87	std::vector<float> src_data(product(src_dims));
88	std::vector<float> weights_data(product(weights_dims));
89	std::vector<float> bias_data(OC);
90	std::vector<float> dst_data(product(dst_dims));
91
92	// Initialize src, weights, and dst tensors.
93	std::generate(src_data.begin(), src_data.end(), []() {
94	static int i = `0`;
95	return std::cos(i++ / `10.f`);
96	});
97	std::generate(weights_data.begin(), weights_data.end(), []() {
98	static int i = `0`;
99	return std::sin(i++ * `2.f`);
100	});
101	std::generate(bias_data.begin(), bias_data.end(), []() {
102	static int i = `0`;
103	return std::tanh(float(i++));
104	});
105
106	// Create memory objects for tensor data (src, weights, dst). In this
107	// example, NCHW layout is assumed for src and dst, and OIHW for weights.
108	auto user_src_mem = memory({src_dims, dt::f32, tag::nchw}, engine);
109	auto user_weights_mem = memory({weights_dims, dt::f32, tag::oihw}, engine);
110	auto user_dst_mem = memory({dst_dims, dt::f32, tag::nchw}, engine);
111
112	// Create memory descriptors with format_tag::any for the primitive. This
113	// enables the convolution primitive to choose memory layouts for an
114	// optimized primitive implementation, and these layouts may differ from the
115	// ones provided by the user.
116	auto conv_src_md = memory::desc(src_dims, dt::f32, tag::any);
117	auto conv_weights_md = memory::desc(weights_dims, dt::f32, tag::any);
118	auto conv_dst_md = memory::desc(dst_dims, dt::f32, tag::any);
119
120	// Create memory descriptor and memory object for input bias.
121	auto user_bias_md = memory::desc(bias_dims, dt::f32, tag::a);
122	auto user_bias_mem = memory(user_bias_md, engine);
123
124	// Write data to memory object's handle.
125	write_to_dnnl_memory(src_data.data(), user_src_mem);
126	write_to_dnnl_memory(weights_data.data(), user_weights_mem);
127	write_to_dnnl_memory(bias_data.data(), user_bias_mem);
128
129	// Create primitive post-ops (ReLU).
130	const float alpha = `0.f`;
131	const float beta = `0.f`;
132	post_ops conv_ops;
133	conv_ops.append_eltwise(algorithm::eltwise_relu, alpha, beta);
134	primitive_attr conv_attr;
135	conv_attr.set_post_ops(conv_ops);
136
137	// Create primitive descriptor.
138	auto conv_pd = convolution_forward::primitive_desc(engine,
139	prop_kind::forward_training, algorithm::convolution_direct,
140	conv_src_md, conv_weights_md, user_bias_md, conv_dst_md,
141	strides_dims, padding_dims_l, padding_dims_r, conv_attr);
142
143	// For now, assume that the src, weights, and dst memory layouts generated
144	// by the primitive and the ones provided by the user are identical.
145	auto conv_src_mem = user_src_mem;
146	auto conv_weights_mem = user_weights_mem;
147	auto conv_dst_mem = user_dst_mem;
148
149	// Reorder the data in case the src and weights memory layouts generated by
150	// the primitive and the ones provided by the user are different. In this
151	// case, we create additional memory objects with internal buffers that will
152	// contain the reordered data. The data in dst will be reordered after the
153	// convolution computation has finalized.
154	if (conv_pd.src_desc() != user_src_mem.get_desc()) {
155	conv_src_mem = memory(conv_pd.src_desc(), engine);
156	reorder(user_src_mem, conv_src_mem)
157	.execute(engine_stream, user_src_mem, conv_src_mem);
158	}
159
160	if (conv_pd.weights_desc() != user_weights_mem.get_desc()) {
161	conv_weights_mem = memory(conv_pd.weights_desc(), engine);
162	reorder(user_weights_mem, conv_weights_mem)
163	.execute(engine_stream, user_weights_mem, conv_weights_mem);
164	}
165
166	if (conv_pd.dst_desc() != user_dst_mem.get_desc()) {
167	conv_dst_mem = memory(conv_pd.dst_desc(), engine);
168	}
169
170	// Create the primitive.
171	auto conv_prim = convolution_forward(conv_pd);
172
173	// Primitive arguments.
174	std::unordered_map<int, memory> conv_args;
175	conv_args.insert({DNNL_ARG_SRC, conv_src_mem});
176	conv_args.insert({DNNL_ARG_WEIGHTS, conv_weights_mem});
177	conv_args.insert({DNNL_ARG_BIAS, user_bias_mem});
178	conv_args.insert({DNNL_ARG_DST, conv_dst_mem});
179
180	// Primitive execution: convolution with ReLU.
181	conv_prim.execute(engine_stream, conv_args);
182
183	// Reorder the data in case the dst memory descriptor generated by the
184	// primitive and the one provided by the user are different.
185	if (conv_pd.dst_desc() != user_dst_mem.get_desc()) {
186	reorder(conv_dst_mem, user_dst_mem)
187	.execute(engine_stream, conv_dst_mem, user_dst_mem);
188	} else
189	user_dst_mem = conv_dst_mem;
190
191	// Wait for the computation to finalize.
192	engine_stream.wait();
193
194	// Read data from memory object's handle.
195	read_from_dnnl_memory(dst_data.data(), user_dst_mem);
196	}
197
198	void depthwise_convolution_example(dnnl::engine::kind engine_kind) {
199
200	// Create execution dnnl::engine.
201	dnnl::engine engine(engine_kind, `0`);
202
203	// Create dnnl::stream.
204	dnnl::stream engine_stream(engine);
205
206	// Tensor dimensions.
207	const memory::dim N = `3`, // batch size
208	G = `32`, // channel groups
209	IC = `32`, // input channels
210	IH = `13`, // input height
211	IW = `13`, // input width
212	OC = `32`, // output channels
213	KH = `3`, // weights height
214	KW = `3`, // weights width
215	PH_L = `1`, // height padding: left
216	PH_R = `1`, // height padding: right
217	PW_L = `1`, // width padding: left
218	PW_R = `1`, // width padding: right
219	SH = `4`, // height-wise stride
220	SW = `4`, // width-wise stride
221	OH = (IH - KH + PH_L + PH_R) / SH + `1`, // output height
222	OW = (IW - KW + PW_L + PW_R) / SW + `1`; // output width
223
224	// Source (src), weights, bias, and destination (dst) tensors dimensions.
225	memory::dims src_dims = {N, IC, IH, IW};
226	memory::dims weights_dims = {G, OC / G, IC / G, KH, KW};
227	memory::dims bias_dims = {OC};
228	memory::dims dst_dims = {N, OC, OH, OW};
229
230	// Strides, padding dimensions.
231	memory::dims strides_dims = {SH, SW};
232	memory::dims padding_dims_l = {PH_L, PW_L};
233	memory::dims padding_dims_r = {PH_R, PW_R};
234
235	// Allocate buffers.
236	std::vector<float> src_data(product(src_dims));
237	std::vector<float> weights_data(product(weights_dims));
238	std::vector<float> bias_data(OC);
239	std::vector<float> dst_data(product(dst_dims));
240
241	// Initialize src, weights, and dst tensors.
242	std::generate(src_data.begin(), src_data.end(), []() {
243	static int i = `0`;
244	return std::cos(i++ / `10.f`);
245	});
246	std::generate(weights_data.begin(), weights_data.end(), []() {
247	static int i = `0`;
248	return std::sin(i++ * `2.f`);
249	});
250	std::generate(bias_data.begin(), bias_data.end(), []() {
251	static int i = `0`;
252	return std::tanh(float(i++));
253	});
254
255	// Create memory objects for tensor data (src, weights, dst). In this
256	// example, NCHW layout is assumed for src and dst, and OIHW for weights.
257	auto user_src_mem = memory({src_dims, dt::f32, tag::nchw}, engine);
258	auto user_weights_mem = memory({weights_dims, dt::f32, tag::goihw}, engine);
259	auto user_dst_mem = memory({dst_dims, dt::f32, tag::nchw}, engine);
260
261	// Create memory descriptors with format_tag::any for the primitive. This
262	// enables the convolution primitive to choose memory layouts for an
263	// optimized primitive implementation, and these layouts may differ from the
264	// ones provided by the user.
265	auto conv_src_md = memory::desc(src_dims, dt::f32, tag::any);
266	auto conv_weights_md = memory::desc(weights_dims, dt::f32, tag::any);
267	auto conv_dst_md = memory::desc(dst_dims, dt::f32, tag::any);
268
269	// Create memory descriptor and memory object for input bias.
270	auto user_bias_md = memory::desc(bias_dims, dt::f32, tag::a);
271	auto user_bias_mem = memory(user_bias_md, engine);
272
273	// Write data to memory object's handle.
274	write_to_dnnl_memory(src_data.data(), user_src_mem);
275	write_to_dnnl_memory(weights_data.data(), user_weights_mem);
276	write_to_dnnl_memory(bias_data.data(), user_bias_mem);
277
278	// Create primitive post-ops (ReLU).
279	const float alpha = `0.f`;
280	const float beta = `0.f`;
281	post_ops conv_ops;
282	conv_ops.append_eltwise(algorithm::eltwise_relu, alpha, beta);
283	primitive_attr conv_attr;
284	conv_attr.set_post_ops(conv_ops);
285
286	// Create primitive descriptor.
287	auto conv_pd = convolution_forward::primitive_desc(engine,
288	prop_kind::forward_training, algorithm::convolution_direct,
289	conv_src_md, conv_weights_md, user_bias_md, conv_dst_md,
290	strides_dims, padding_dims_l, padding_dims_r, conv_attr);
291
292	// For now, assume that the src, weights, and dst memory layouts generated
293	// by the primitive and the ones provided by the user are identical.
294	auto conv_src_mem = user_src_mem;
295	auto conv_weights_mem = user_weights_mem;
296	auto conv_dst_mem = user_dst_mem;
297
298	// Reorder the data in case the src and weights memory layouts generated by
299	// the primitive and the ones provided by the user are different. In this
300	// case, we create additional memory objects with internal buffers that will
301	// contain the reordered data. The data in dst will be reordered after the
302	// convolution computation has finalized.
303	if (conv_pd.src_desc() != user_src_mem.get_desc()) {
304	conv_src_mem = memory(conv_pd.src_desc(), engine);
305	reorder(user_src_mem, conv_src_mem)
306	.execute(engine_stream, user_src_mem, conv_src_mem);
307	}
308
309	if (conv_pd.weights_desc() != user_weights_mem.get_desc()) {
310	conv_weights_mem = memory(conv_pd.weights_desc(), engine);
311	reorder(user_weights_mem, conv_weights_mem)
312	.execute(engine_stream, user_weights_mem, conv_weights_mem);
313	}
314
315	if (conv_pd.dst_desc() != user_dst_mem.get_desc()) {
316	conv_dst_mem = memory(conv_pd.dst_desc(), engine);
317	}
318
319	// Create the primitive.
320	auto conv_prim = convolution_forward(conv_pd);
321
322	// Primitive arguments.
323	std::unordered_map<int, memory> conv_args;
324	conv_args.insert({DNNL_ARG_SRC, conv_src_mem});
325	conv_args.insert({DNNL_ARG_WEIGHTS, conv_weights_mem});
326	conv_args.insert({DNNL_ARG_BIAS, user_bias_mem});
327	conv_args.insert({DNNL_ARG_DST, conv_dst_mem});
328
329	// Primitive execution: convolution with ReLU.
330	conv_prim.execute(engine_stream, conv_args);
331
332	// Reorder the data in case the dst memory descriptor generated by the
333	// primitive and the one provided by the user are different.
334	if (conv_pd.dst_desc() != user_dst_mem.get_desc()) {
335	reorder(conv_dst_mem, user_dst_mem)
336	.execute(engine_stream, conv_dst_mem, user_dst_mem);
337	} else
338	user_dst_mem = conv_dst_mem;
339
340	// Wait for the computation to finalize.
341	engine_stream.wait();
342
343	// Read data from memory object's handle.
344	read_from_dnnl_memory(dst_data.data(), user_dst_mem);
345	}
346
347	int main(int argc, char **argv) {
348	auto exit_code = handle_example_errors(
349	convolution_example, parse_engine_kind(argc, argv));
350	if (exit_code != `0`) return exit_code;
351
352	return handle_example_errors(
353	depthwise_convolution_example, parse_engine_kind(argc, argv));
354	}
355

Browse the source code of oneDNN/examples/primitives/convolution.cpp