cnn_inference_f32.cpp source code [oneDNN/examples/cnn_inference_f32.cpp]

1	/*******************************************************************************
2	* Copyright 2016-2022 Intel Corporation
3	*
4	* Licensed under the Apache License, Version 2.0 (the "License");
5	* you may not use this file except in compliance with the License.
6	* You may obtain a copy of the License at
7	*
8	* http://www.apache.org/licenses/LICENSE-2.0
9	*
10	* Unless required by applicable law or agreed to in writing, software
11	* distributed under the License is distributed on an "AS IS" BASIS,
12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13	* See the License for the specific language governing permissions and
14	* limitations under the License.
15	*******************************************************************************/
16
17	/// @example cnn_inference_f32.cpp
18	/// @copybrief cnn_inference_f32_cpp
19	/// > Annotated version: @ref cnn_inference_f32_cpp
20
21	/// @page cnn_inference_f32_cpp CNN f32 inference example
22	/// This C++ API example demonstrates how to build an AlexNet neural
23	/// network topology for forward-pass inference.
24	///
25	/// > Example code: @ref cnn_inference_f32.cpp
26	///
27	/// Some key take-aways include:
28	///
29	/// How tensors are implemented and submitted to primitives.*
30	/// How primitives are created.*
31	/// How primitives are sequentially submitted to the network, where the output*
32	/// from primitives is passed as input to the next primitive. The latter
33	/// specifies a dependency between the primitive input and output data.
34	/// Specific 'inference-only' configurations.*
35	/// Limiting the number of reorders performed that are detrimental*
36	/// to performance.
37	///
38	/// The example implements the AlexNet layers
39	/// as numbered primitives (for example, conv1, pool1, conv2).
40
41	#include <assert.h>
42
43	#include <chrono>
44	#include <vector>
45	#include <unordered_map>
46
47	#include "oneapi/dnnl/dnnl.hpp"
48
49	#include "example_utils.hpp"
50
51	using namespace dnnl;
52
53	void simple_net(engine::kind engine_kind, int times = `100`) {
54	using tag = memory::format_tag;
55	using dt = memory::data_type;
56
57	/// Initialize an engine and stream. The last parameter in the call represents
58	/// the index of the engine.
59	/// @snippet cnn_inference_f32.cpp Initialize engine and stream
60	//[Initialize engine and stream]
61	engine eng(engine_kind, `0`);
62	stream s(eng);
63	//[Initialize engine and stream]
64
65	/// Create a vector for the primitives and a vector to hold memory
66	/// that will be used as arguments.
67	/// @snippet cnn_inference_f32.cpp Create network
68	//[Create network]
69	std::vector<primitive> net;
70	std::vector<std::unordered_map<int, memory>> net_args;
71	//[Create network]
72
73	const memory::dim batch = `1`;
74
75	// AlexNet: conv1
76	// {batch, 3, 227, 227} (x) {96, 3, 11, 11} -> {batch, 96, 55, 55}
77	// strides: {4, 4}
78	memory::dims conv1_src_tz = {batch, `3`, `227`, `227`};
79	memory::dims conv1_weights_tz = {`96`, `3`, `11`, `11`};
80	memory::dims conv1_bias_tz = {`96`};
81	memory::dims conv1_dst_tz = {batch, `96`, `55`, `55`};
82	memory::dims conv1_strides = {`4`, `4`};
83	memory::dims conv1_padding = {`0`, `0`};
84
85	/// Allocate buffers for input and output data, weights, and bias.
86	/// @snippet cnn_inference_f32.cpp Allocate buffers
87	//[Allocate buffers]
88	std::vector<float> user_src(batch * `3` * `227` * `227`);
89	std::vector<float> user_dst(batch * `1000`);
90	std::vector<float> conv1_weights(product(conv1_weights_tz));
91	std::vector<float> conv1_bias(product(conv1_bias_tz));
92	//[Allocate buffers]
93
94	/// Create memory that describes data layout in the buffers. This example uses
95	/// tag::nchw (batch-channels-height-width) for input data and tag::oihw
96	/// for weights.
97	/// @snippet cnn_inference_f32.cpp Create user memory
98	//[Create user memory]
99	auto user_src_memory = memory({{conv1_src_tz}, dt::f32, tag::nchw}, eng);
100	write_to_dnnl_memory(user_src.data(), user_src_memory);
101	auto user_weights_memory
102	= memory({{conv1_weights_tz}, dt::f32, tag::oihw}, eng);
103	write_to_dnnl_memory(conv1_weights.data(), user_weights_memory);
104	auto conv1_user_bias_memory
105	= memory({{conv1_bias_tz}, dt::f32, tag::x}, eng);
106	write_to_dnnl_memory(conv1_bias.data(), conv1_user_bias_memory);
107	//[Create user memory]
108
109	/// Create memory descriptors with layout tag::any. The `any` format enables
110	/// the convolution primitive to choose the data format that will result in
111	/// best performance based on its input parameters (convolution kernel
112	/// sizes, strides, padding, and so on). If the resulting format is different
113	/// from `nchw`, the user data must be transformed to the format required for
114	/// the convolution (as explained below).
115	/// @snippet cnn_inference_f32.cpp Create convolution memory descriptors
116	//[Create convolution memory descriptors]
117	auto conv1_src_md = memory::desc({conv1_src_tz}, dt::f32, tag::any);
118	auto conv1_bias_md = memory::desc({conv1_bias_tz}, dt::f32, tag::any);
119	auto conv1_weights_md = memory::desc({conv1_weights_tz}, dt::f32, tag::any);
120	auto conv1_dst_md = memory::desc({conv1_dst_tz}, dt::f32, tag::any);
121	//[Create convolution memory descriptors]
122
123	/// Create a convolution primitive descriptor by specifying engine,
124	/// propagation kind, [convolution algorithm](@ref dev_guide_convolution),
125	/// shapes of input, weights, bias, output, convolution strides, padding,
126	/// and kind of padding.
127	/// Propagation kind is set to prop_kind::forward_inference to optimize for
128	/// inference execution and omit computations that are necessary only for
129	/// backward propagation.
130	/// Once created, it has specific formats instead of the `any` format.
131	/// @snippet cnn_inference_f32.cpp Create convolution primitive descriptor
132	//[Create convolution primitive descriptor]
133	auto conv1_prim_desc = convolution_forward::primitive_desc(eng,
134	prop_kind::forward_inference, algorithm::convolution_direct,
135	conv1_src_md, conv1_weights_md, conv1_bias_md, conv1_dst_md,
136	conv1_strides, conv1_padding, conv1_padding);
137	//[Create convolution primitive descriptor]
138
139	/// Check whether data and weights formats required by convolution is different
140	/// from the user format. In case it is different change the layout using
141	/// reorder primitive.
142	/// @snippet cnn_inference_f32.cpp Reorder data and weights
143	//[Reorder data and weights]
144	auto conv1_src_memory = user_src_memory;
145	if (conv1_prim_desc.src_desc() != user_src_memory.get_desc()) {
146	conv1_src_memory = memory(conv1_prim_desc.src_desc(), eng);
147	net.push_back(reorder(user_src_memory, conv1_src_memory));
148	net_args.push_back({{DNNL_ARG_FROM, user_src_memory},
149	{DNNL_ARG_TO, conv1_src_memory}});
150	}
151
152	auto conv1_weights_memory = user_weights_memory;
153	if (conv1_prim_desc.weights_desc() != user_weights_memory.get_desc()) {
154	conv1_weights_memory = memory(conv1_prim_desc.weights_desc(), eng);
155	reorder(user_weights_memory, conv1_weights_memory)
156	.execute(s, user_weights_memory, conv1_weights_memory);
157	}
158	//[Reorder data and weights]
159
160	/// Create a memory primitive for output.
161	/// @snippet cnn_inference_f32.cpp Create memory for output
162	//[Create memory for output]
163	auto conv1_dst_memory = memory(conv1_prim_desc.dst_desc(), eng);
164	//[Create memory for output]
165
166	/// Create a convolution primitive and add it to the net.
167	/// @snippet cnn_inference_f32.cpp Create memory for output
168	//[Create convolution primitive]
169	net.push_back(convolution_forward(conv1_prim_desc));
170	net_args.push_back({{DNNL_ARG_SRC, conv1_src_memory},
171	{DNNL_ARG_WEIGHTS, conv1_weights_memory},
172	{DNNL_ARG_BIAS, conv1_user_bias_memory},
173	{DNNL_ARG_DST, conv1_dst_memory}});
174	//[Create convolution primitive]
175
176	// AlexNet: relu1
177	// {batch, 96, 55, 55} -> {batch, 96, 55, 55}
178	const float negative1_slope = `0.0f`;
179
180	/// Create the relu primitive. For better performance, keep the input data
181	/// format for ReLU (as well as for other operation primitives until another
182	/// convolution or inner product is encountered) the same as the one chosen
183	/// for convolution. Also note that ReLU is done in-place by using conv1 memory.
184	/// @snippet cnn_inference_f32.cpp Create relu primitive
185	//[Create relu primitive]
186	auto relu1_prim_desc
187	= eltwise_forward::primitive_desc(eng, prop_kind::forward_inference,
188	algorithm::eltwise_relu, conv1_dst_memory.get_desc(),
189	conv1_dst_memory.get_desc(), negative1_slope);
190
191	net.push_back(eltwise_forward(relu1_prim_desc));
192	net_args.push_back({{DNNL_ARG_SRC, conv1_dst_memory},
193	{DNNL_ARG_DST, conv1_dst_memory}});
194	//[Create relu primitive]
195
196	// AlexNet: lrn1
197	// {batch, 96, 55, 55} -> {batch, 96, 55, 55}
198	// local size: 5
199	// alpha1: 0.0001
200	// beta1: 0.75
201	const memory::dim local1_size = `5`;
202	const float alpha1 = `0.0001f`;
203	const float beta1 = `0.75f`;
204	const float k1 = `1.0f`;
205
206	// create lrn primitive and add it to net
207	auto lrn1_prim_desc = lrn_forward::primitive_desc(eng,
208	prop_kind::forward_inference, algorithm::lrn_across_channels,
209	conv1_dst_memory.get_desc(), conv1_dst_memory.get_desc(),
210	local1_size, alpha1, beta1, k1);
211	auto lrn1_dst_memory = memory(lrn1_prim_desc.dst_desc(), eng);
212
213	net.push_back(lrn_forward(lrn1_prim_desc));
214	net_args.push_back({{DNNL_ARG_SRC, conv1_dst_memory},
215	{DNNL_ARG_DST, lrn1_dst_memory}});
216
217	// AlexNet: pool1
218	// {batch, 96, 55, 55} -> {batch, 96, 27, 27}
219	// kernel: {3, 3}
220	// strides: {2, 2}
221	memory::dims pool1_dst_tz = {batch, `96`, `27`, `27`};
222	memory::dims pool1_kernel = {`3`, `3`};
223	memory::dims pool1_strides = {`2`, `2`};
224	memory::dims pool_dilation = {`0`, `0`};
225	memory::dims pool_padding = {`0`, `0`};
226
227	auto pool1_dst_md = memory::desc({pool1_dst_tz}, dt::f32, tag::any);
228
229	/// For training execution, pooling requires a private workspace memory
230	/// to perform the backward pass. However, pooling should not use 'workspace'
231	/// for inference, because this is detrimental to performance.
232	/// @snippet cnn_inference_f32.cpp Create pooling primitive
233	///
234	/// The example continues to create more layers according
235	/// to the AlexNet topology.
236	//[Create pooling primitive]
237	auto pool1_pd = pooling_forward::primitive_desc(eng,
238	prop_kind::forward_inference, algorithm::pooling_max,
239	lrn1_dst_memory.get_desc(), pool1_dst_md, pool1_strides,
240	pool1_kernel, pool_dilation, pool_padding, pool_padding);
241	auto pool1_dst_memory = memory(pool1_pd.dst_desc(), eng);
242
243	net.push_back(pooling_forward(pool1_pd));
244	net_args.push_back({{DNNL_ARG_SRC, lrn1_dst_memory},
245	{DNNL_ARG_DST, pool1_dst_memory}});
246	//[Create pooling primitive]
247
248	// AlexNet: conv2
249	// {batch, 96, 27, 27} (x) {2, 128, 48, 5, 5} -> {batch, 256, 27, 27}
250	// strides: {1, 1}
251	memory::dims conv2_src_tz = {batch, `96`, `27`, `27`};
252	memory::dims conv2_weights_tz = {`2`, `128`, `48`, `5`, `5`};
253	memory::dims conv2_bias_tz = {`256`};
254	memory::dims conv2_dst_tz = {batch, `256`, `27`, `27`};
255	memory::dims conv2_strides = {`1`, `1`};
256	memory::dims conv2_padding = {`2`, `2`};
257
258	std::vector<float> conv2_weights(product(conv2_weights_tz));
259	std::vector<float> conv2_bias(product(conv2_bias_tz));
260
261	// create memory for user data
262	auto conv2_user_weights_memory
263	= memory({{conv2_weights_tz}, dt::f32, tag::goihw}, eng);
264	write_to_dnnl_memory(conv2_weights.data(), conv2_user_weights_memory);
265	auto conv2_user_bias_memory
266	= memory({{conv2_bias_tz}, dt::f32, tag::x}, eng);
267	write_to_dnnl_memory(conv2_bias.data(), conv2_user_bias_memory);
268
269	// create memory descriptors for convolution data w/ no specified format
270	auto conv2_src_md = memory::desc({conv2_src_tz}, dt::f32, tag::any);
271	auto conv2_bias_md = memory::desc({conv2_bias_tz}, dt::f32, tag::any);
272	auto conv2_weights_md = memory::desc({conv2_weights_tz}, dt::f32, tag::any);
273	auto conv2_dst_md = memory::desc({conv2_dst_tz}, dt::f32, tag::any);
274
275	// create a convolution
276	auto conv2_prim_desc = convolution_forward::primitive_desc(eng,
277	prop_kind::forward_inference, algorithm::convolution_direct,
278	conv2_src_md, conv2_weights_md, conv2_bias_md, conv2_dst_md,
279	conv2_strides, conv2_padding, conv2_padding);
280
281	auto conv2_src_memory = pool1_dst_memory;
282	if (conv2_prim_desc.src_desc() != conv2_src_memory.get_desc()) {
283	conv2_src_memory = memory(conv2_prim_desc.src_desc(), eng);
284	net.push_back(reorder(pool1_dst_memory, conv2_src_memory));
285	net_args.push_back({{DNNL_ARG_FROM, pool1_dst_memory},
286	{DNNL_ARG_TO, conv2_src_memory}});
287	}
288
289	auto conv2_weights_memory = conv2_user_weights_memory;
290	if (conv2_prim_desc.weights_desc()
291	!= conv2_user_weights_memory.get_desc()) {
292	conv2_weights_memory = memory(conv2_prim_desc.weights_desc(), eng);
293	reorder(conv2_user_weights_memory, conv2_weights_memory)
294	.execute(s, conv2_user_weights_memory, conv2_weights_memory);
295	}
296
297	auto conv2_dst_memory = memory(conv2_prim_desc.dst_desc(), eng);
298
299	// create convolution primitive and add it to net
300	net.push_back(convolution_forward(conv2_prim_desc));
301	net_args.push_back({{DNNL_ARG_SRC, conv2_src_memory},
302	{DNNL_ARG_WEIGHTS, conv2_weights_memory},
303	{DNNL_ARG_BIAS, conv2_user_bias_memory},
304	{DNNL_ARG_DST, conv2_dst_memory}});
305
306	// AlexNet: relu2
307	// {batch, 256, 27, 27} -> {batch, 256, 27, 27}
308	const float negative2_slope = `0.0f`;
309
310	// create relu primitive and add it to net
311	auto relu2_prim_desc
312	= eltwise_forward::primitive_desc(eng, prop_kind::forward_inference,
313	algorithm::eltwise_relu, conv2_dst_memory.get_desc(),
314	conv2_dst_memory.get_desc(), negative2_slope);
315
316	net.push_back(eltwise_forward(relu2_prim_desc));
317	net_args.push_back({{DNNL_ARG_SRC, conv2_dst_memory},
318	{DNNL_ARG_DST, conv2_dst_memory}});
319
320	// AlexNet: lrn2
321	// {batch, 256, 27, 27} -> {batch, 256, 27, 27}
322	// local size: 5
323	// alpha2: 0.0001
324	// beta2: 0.75
325	const memory::dim local2_size = `5`;
326	const float alpha2 = `0.0001f`;
327	const float beta2 = `0.75f`;
328	const float k2 = `1.0f`;
329
330	// create lrn primitive and add it to net
331	auto lrn2_prim_desc
332	= lrn_forward::primitive_desc(eng, prop_kind::forward_inference,
333	algorithm::lrn_across_channels, conv2_prim_desc.dst_desc(),
334	conv2_prim_desc.dst_desc(), local2_size, alpha2, beta2, k2);
335	auto lrn2_dst_memory = memory(lrn2_prim_desc.dst_desc(), eng);
336
337	net.push_back(lrn_forward(lrn2_prim_desc));
338	net_args.push_back({{DNNL_ARG_SRC, conv2_dst_memory},
339	{DNNL_ARG_DST, lrn2_dst_memory}});
340
341	// AlexNet: pool2
342	// {batch, 256, 27, 27} -> {batch, 256, 13, 13}
343	// kernel: {3, 3}
344	// strides: {2, 2}
345	memory::dims pool2_dst_tz = {batch, `256`, `13`, `13`};
346	memory::dims pool2_kernel = {`3`, `3`};
347	memory::dims pool2_strides = {`2`, `2`};
348	memory::dims pool2_dilation = {`0`, `0`};
349	memory::dims pool2_padding = {`0`, `0`};
350
351	auto pool2_dst_md = memory::desc({pool2_dst_tz}, dt::f32, tag::any);
352
353	// create a pooling
354	auto pool2_pd = pooling_forward::primitive_desc(eng,
355	prop_kind::forward_inference, algorithm::pooling_max,
356	lrn2_dst_memory.get_desc(), pool2_dst_md, pool2_strides,
357	pool2_kernel, pool2_dilation, pool2_padding, pool2_padding);
358	auto pool2_dst_memory = memory(pool2_pd.dst_desc(), eng);
359
360	// create pooling primitive an add it to net
361	net.push_back(pooling_forward(pool2_pd));
362	net_args.push_back({{DNNL_ARG_SRC, lrn2_dst_memory},
363	{DNNL_ARG_DST, pool2_dst_memory}});
364
365	// AlexNet: conv3
366	// {batch, 256, 13, 13} (x) {384, 256, 3, 3}; -> {batch, 384, 13, 13};
367	// strides: {1, 1}
368	memory::dims conv3_src_tz = {batch, `256`, `13`, `13`};
369	memory::dims conv3_weights_tz = {`384`, `256`, `3`, `3`};
370	memory::dims conv3_bias_tz = {`384`};
371	memory::dims conv3_dst_tz = {batch, `384`, `13`, `13`};
372	memory::dims conv3_strides = {`1`, `1`};
373	memory::dims conv3_padding = {`1`, `1`};
374
375	std::vector<float> conv3_weights(product(conv3_weights_tz));
376	std::vector<float> conv3_bias(product(conv3_bias_tz));
377
378	// create memory for user data
379	auto conv3_user_weights_memory
380	= memory({{conv3_weights_tz}, dt::f32, tag::oihw}, eng);
381	write_to_dnnl_memory(conv3_weights.data(), conv3_user_weights_memory);
382	auto conv3_user_bias_memory
383	= memory({{conv3_bias_tz}, dt::f32, tag::x}, eng);
384	write_to_dnnl_memory(conv3_bias.data(), conv3_user_bias_memory);
385
386	// create memory descriptors for convolution data w/ no specified format
387	auto conv3_src_md = memory::desc({conv3_src_tz}, dt::f32, tag::any);
388	auto conv3_bias_md = memory::desc({conv3_bias_tz}, dt::f32, tag::any);
389	auto conv3_weights_md = memory::desc({conv3_weights_tz}, dt::f32, tag::any);
390	auto conv3_dst_md = memory::desc({conv3_dst_tz}, dt::f32, tag::any);
391
392	// create a convolution
393	auto conv3_prim_desc = convolution_forward::primitive_desc(eng,
394	prop_kind::forward_inference, algorithm::convolution_direct,
395	conv3_src_md, conv3_weights_md, conv3_bias_md, conv3_dst_md,
396	conv3_strides, conv3_padding, conv3_padding);
397
398	auto conv3_src_memory = pool2_dst_memory;
399	if (conv3_prim_desc.src_desc() != conv3_src_memory.get_desc()) {
400	conv3_src_memory = memory(conv3_prim_desc.src_desc(), eng);
401	net.push_back(reorder(pool2_dst_memory, conv3_src_memory));
402	net_args.push_back({{DNNL_ARG_FROM, pool2_dst_memory},
403	{DNNL_ARG_TO, conv3_src_memory}});
404	}
405
406	auto conv3_weights_memory = conv3_user_weights_memory;
407	if (conv3_prim_desc.weights_desc()
408	!= conv3_user_weights_memory.get_desc()) {
409	conv3_weights_memory = memory(conv3_prim_desc.weights_desc(), eng);
410	reorder(conv3_user_weights_memory, conv3_weights_memory)
411	.execute(s, conv3_user_weights_memory, conv3_weights_memory);
412	}
413
414	auto conv3_dst_memory = memory(conv3_prim_desc.dst_desc(), eng);
415
416	// create convolution primitive and add it to net
417	net.push_back(convolution_forward(conv3_prim_desc));
418	net_args.push_back({{DNNL_ARG_SRC, conv3_src_memory},
419	{DNNL_ARG_WEIGHTS, conv3_weights_memory},
420	{DNNL_ARG_BIAS, conv3_user_bias_memory},
421	{DNNL_ARG_DST, conv3_dst_memory}});
422
423	// AlexNet: relu3
424	// {batch, 384, 13, 13} -> {batch, 384, 13, 13}
425	const float negative3_slope = `0.0f`;
426
427	// create relu primitive and add it to net
428	auto relu3_prim_desc
429	= eltwise_forward::primitive_desc(eng, prop_kind::forward_inference,
430	algorithm::eltwise_relu, conv3_dst_memory.get_desc(),
431	conv3_dst_memory.get_desc(), negative3_slope);
432
433	net.push_back(eltwise_forward(relu3_prim_desc));
434	net_args.push_back({{DNNL_ARG_SRC, conv3_dst_memory},
435	{DNNL_ARG_DST, conv3_dst_memory}});
436
437	// AlexNet: conv4
438	// {batch, 384, 13, 13} (x) {2, 192, 192, 3, 3}; ->
439	// {batch, 384, 13, 13};
440	// strides: {1, 1}
441	memory::dims conv4_src_tz = {batch, `384`, `13`, `13`};
442	memory::dims conv4_weights_tz = {`2`, `192`, `192`, `3`, `3`};
443	memory::dims conv4_bias_tz = {`384`};
444	memory::dims conv4_dst_tz = {batch, `384`, `13`, `13`};
445	memory::dims conv4_strides = {`1`, `1`};
446	memory::dims conv4_padding = {`1`, `1`};
447
448	std::vector<float> conv4_weights(product(conv4_weights_tz));
449	std::vector<float> conv4_bias(product(conv4_bias_tz));
450
451	// create memory for user data
452	auto conv4_user_weights_memory
453	= memory({{conv4_weights_tz}, dt::f32, tag::goihw}, eng);
454	write_to_dnnl_memory(conv4_weights.data(), conv4_user_weights_memory);
455	auto conv4_user_bias_memory
456	= memory({{conv4_bias_tz}, dt::f32, tag::x}, eng);
457	write_to_dnnl_memory(conv4_bias.data(), conv4_user_bias_memory);
458
459	// create memory descriptors for convolution data w/ no specified format
460	auto conv4_src_md = memory::desc({conv4_src_tz}, dt::f32, tag::any);
461	auto conv4_bias_md = memory::desc({conv4_bias_tz}, dt::f32, tag::any);
462	auto conv4_weights_md = memory::desc({conv4_weights_tz}, dt::f32, tag::any);
463	auto conv4_dst_md = memory::desc({conv4_dst_tz}, dt::f32, tag::any);
464
465	// create a convolution
466	auto conv4_prim_desc = convolution_forward::primitive_desc(eng,
467	prop_kind::forward_inference, algorithm::convolution_direct,
468	conv4_src_md, conv4_weights_md, conv4_bias_md, conv4_dst_md,
469	conv4_strides, conv4_padding, conv4_padding);
470
471	auto conv4_src_memory = conv3_dst_memory;
472	if (conv4_prim_desc.src_desc() != conv4_src_memory.get_desc()) {
473	conv4_src_memory = memory(conv4_prim_desc.src_desc(), eng);
474	net.push_back(reorder(conv3_dst_memory, conv4_src_memory));
475	net_args.push_back({{DNNL_ARG_FROM, conv3_dst_memory},
476	{DNNL_ARG_TO, conv4_src_memory}});
477	}
478
479	auto conv4_weights_memory = conv4_user_weights_memory;
480	if (conv4_prim_desc.weights_desc()
481	!= conv4_user_weights_memory.get_desc()) {
482	conv4_weights_memory = memory(conv4_prim_desc.weights_desc(), eng);
483	reorder(conv4_user_weights_memory, conv4_weights_memory)
484	.execute(s, conv4_user_weights_memory, conv4_weights_memory);
485	}
486
487	auto conv4_dst_memory = memory(conv4_prim_desc.dst_desc(), eng);
488
489	// create convolution primitive and add it to net
490	net.push_back(convolution_forward(conv4_prim_desc));
491	net_args.push_back({{DNNL_ARG_SRC, conv4_src_memory},
492	{DNNL_ARG_WEIGHTS, conv4_weights_memory},
493	{DNNL_ARG_BIAS, conv4_user_bias_memory},
494	{DNNL_ARG_DST, conv4_dst_memory}});
495
496	// AlexNet: relu4
497	// {batch, 384, 13, 13} -> {batch, 384, 13, 13}
498	const float negative4_slope = `0.0f`;
499
500	// create relu primitive and add it to net
501	auto relu4_prim_desc
502	= eltwise_forward::primitive_desc(eng, prop_kind::forward_inference,
503	algorithm::eltwise_relu, conv4_dst_memory.get_desc(),
504	conv4_dst_memory.get_desc(), negative4_slope);
505
506	net.push_back(eltwise_forward(relu4_prim_desc));
507	net_args.push_back({{DNNL_ARG_SRC, conv4_dst_memory},
508	{DNNL_ARG_DST, conv4_dst_memory}});
509
510	// AlexNet: conv5
511	// {batch, 384, 13, 13} (x) {2, 128, 192, 3, 3}; -> {batch, 256, 13, 13};
512	// strides: {1, 1}
513	memory::dims conv5_src_tz = {batch, `384`, `13`, `13`};
514	memory::dims conv5_weights_tz = {`2`, `128`, `192`, `3`, `3`};
515	memory::dims conv5_bias_tz = {`256`};
516	memory::dims conv5_dst_tz = {batch, `256`, `13`, `13`};
517	memory::dims conv5_strides = {`1`, `1`};
518	memory::dims conv5_padding = {`1`, `1`};
519
520	std::vector<float> conv5_weights(product(conv5_weights_tz));
521	std::vector<float> conv5_bias(product(conv5_bias_tz));
522
523	// create memory for user data
524	auto conv5_user_weights_memory
525	= memory({{conv5_weights_tz}, dt::f32, tag::goihw}, eng);
526	write_to_dnnl_memory(conv5_weights.data(), conv5_user_weights_memory);
527	auto conv5_user_bias_memory
528	= memory({{conv5_bias_tz}, dt::f32, tag::x}, eng);
529	write_to_dnnl_memory(conv5_bias.data(), conv5_user_bias_memory);
530
531	// create memory descriptors for convolution data w/ no specified format
532	auto conv5_src_md = memory::desc({conv5_src_tz}, dt::f32, tag::any);
533	auto conv5_weights_md = memory::desc({conv5_weights_tz}, dt::f32, tag::any);
534	auto conv5_bias_md = memory::desc({conv5_bias_tz}, dt::f32, tag::any);
535	auto conv5_dst_md = memory::desc({conv5_dst_tz}, dt::f32, tag::any);
536
537	// create a convolution
538	auto conv5_prim_desc = convolution_forward::primitive_desc(eng,
539	prop_kind::forward_inference, algorithm::convolution_direct,
540	conv5_src_md, conv5_weights_md, conv5_bias_md, conv5_dst_md,
541	conv5_strides, conv5_padding, conv5_padding);
542
543	auto conv5_src_memory = conv4_dst_memory;
544	if (conv5_prim_desc.src_desc() != conv5_src_memory.get_desc()) {
545	conv5_src_memory = memory(conv5_prim_desc.src_desc(), eng);
546	net.push_back(reorder(conv4_dst_memory, conv5_src_memory));
547	net_args.push_back({{DNNL_ARG_FROM, conv4_dst_memory},
548	{DNNL_ARG_TO, conv5_src_memory}});
549	}
550
551	auto conv5_weights_memory = conv5_user_weights_memory;
552	if (conv5_prim_desc.weights_desc()
553	!= conv5_user_weights_memory.get_desc()) {
554	conv5_weights_memory = memory(conv5_prim_desc.weights_desc(), eng);
555	reorder(conv5_user_weights_memory, conv5_weights_memory)
556	.execute(s, conv5_user_weights_memory, conv5_weights_memory);
557	}
558
559	auto conv5_dst_memory = memory(conv5_prim_desc.dst_desc(), eng);
560
561	// create convolution primitive and add it to net
562	net.push_back(convolution_forward(conv5_prim_desc));
563	net_args.push_back({{DNNL_ARG_SRC, conv5_src_memory},
564	{DNNL_ARG_WEIGHTS, conv5_weights_memory},
565	{DNNL_ARG_BIAS, conv5_user_bias_memory},
566	{DNNL_ARG_DST, conv5_dst_memory}});
567
568	// AlexNet: relu5
569	// {batch, 256, 13, 13} -> {batch, 256, 13, 13}
570	const float negative5_slope = `0.0f`;
571
572	// create relu primitive and add it to net
573	auto relu5_prim_desc
574	= eltwise_forward::primitive_desc(eng, prop_kind::forward_inference,
575	algorithm::eltwise_relu, conv5_dst_memory.get_desc(),
576	conv5_dst_memory.get_desc(), negative5_slope);
577
578	net.push_back(eltwise_forward(relu5_prim_desc));
579	net_args.push_back({{DNNL_ARG_SRC, conv5_dst_memory},
580	{DNNL_ARG_DST, conv5_dst_memory}});
581
582	// AlexNet: pool5
583	// {batch, 256, 13, 13} -> {batch, 256, 6, 6}
584	// kernel: {3, 3}
585	// strides: {2, 2}
586	memory::dims pool5_dst_tz = {batch, `256`, `6`, `6`};
587	memory::dims pool5_kernel = {`3`, `3`};
588	memory::dims pool5_strides = {`2`, `2`};
589	memory::dims pool5_dilation = {`0`, `0`};
590	memory::dims pool5_padding = {`0`, `0`};
591
592	std::vector<float> pool5_dst(product(pool5_dst_tz));
593
594	auto pool5_dst_md = memory::desc({pool5_dst_tz}, dt::f32, tag::any);
595
596	// create a pooling
597	auto pool5_pd = pooling_forward::primitive_desc(eng,
598	prop_kind::forward_inference, algorithm::pooling_max,
599	conv5_dst_memory.get_desc(), pool5_dst_md, pool5_strides,
600	pool5_kernel, pool5_dilation, pool5_padding, pool5_padding);
601
602	auto pool5_dst_memory = memory(pool5_pd.dst_desc(), eng);
603
604	// create pooling primitive an add it to net
605	net.push_back(pooling_forward(pool5_pd));
606	net_args.push_back({{DNNL_ARG_SRC, conv5_dst_memory},
607	{DNNL_ARG_DST, pool5_dst_memory}});
608
609	// fc6 inner product {batch, 256, 6, 6} (x) {4096, 256, 6, 6}-> {batch,
610	// 4096}
611	memory::dims fc6_src_tz = {batch, `256`, `6`, `6`};
612	memory::dims fc6_weights_tz = {`4096`, `256`, `6`, `6`};
613	memory::dims fc6_bias_tz = {`4096`};
614	memory::dims fc6_dst_tz = {batch, `4096`};
615
616	std::vector<float> fc6_weights(product(fc6_weights_tz));
617	std::vector<float> fc6_bias(product(fc6_bias_tz));
618
619	// create memory for user data
620	auto fc6_user_weights_memory
621	= memory({{fc6_weights_tz}, dt::f32, tag::oihw}, eng);
622	write_to_dnnl_memory(fc6_weights.data(), fc6_user_weights_memory);
623	auto fc6_user_bias_memory = memory({{fc6_bias_tz}, dt::f32, tag::x}, eng);
624	write_to_dnnl_memory(fc6_bias.data(), fc6_user_bias_memory);
625
626	// create memory descriptors for convolution data w/ no specified format
627	auto fc6_src_md = memory::desc({fc6_src_tz}, dt::f32, tag::any);
628	auto fc6_bias_md = memory::desc({fc6_bias_tz}, dt::f32, tag::any);
629	auto fc6_weights_md = memory::desc({fc6_weights_tz}, dt::f32, tag::any);
630	auto fc6_dst_md = memory::desc({fc6_dst_tz}, dt::f32, tag::any);
631
632	// create a inner_product
633	auto fc6_prim_desc = inner_product_forward::primitive_desc(eng,
634	prop_kind::forward_inference, fc6_src_md, fc6_weights_md,
635	fc6_bias_md, fc6_dst_md);
636
637	auto fc6_src_memory = pool5_dst_memory;
638	if (fc6_prim_desc.src_desc() != fc6_src_memory.get_desc()) {
639	fc6_src_memory = memory(fc6_prim_desc.src_desc(), eng);
640	net.push_back(reorder(pool5_dst_memory, fc6_src_memory));
641	net_args.push_back({{DNNL_ARG_FROM, pool5_dst_memory},
642	{DNNL_ARG_TO, fc6_src_memory}});
643	}
644
645	auto fc6_weights_memory = fc6_user_weights_memory;
646	if (fc6_prim_desc.weights_desc() != fc6_user_weights_memory.get_desc()) {
647	fc6_weights_memory = memory(fc6_prim_desc.weights_desc(), eng);
648	reorder(fc6_user_weights_memory, fc6_weights_memory)
649	.execute(s, fc6_user_weights_memory, fc6_weights_memory);
650	}
651
652	auto fc6_dst_memory = memory(fc6_prim_desc.dst_desc(), eng);
653
654	// create convolution primitive and add it to net
655	net.push_back(inner_product_forward(fc6_prim_desc));
656	net_args.push_back({{DNNL_ARG_SRC, fc6_src_memory},
657	{DNNL_ARG_WEIGHTS, fc6_weights_memory},
658	{DNNL_ARG_BIAS, fc6_user_bias_memory},
659	{DNNL_ARG_DST, fc6_dst_memory}});
660
661	// fc7 inner product {batch, 4096} (x) {4096, 4096}-> {batch, 4096}
662	memory::dims fc7_weights_tz = {`4096`, `4096`};
663	memory::dims fc7_bias_tz = {`4096`};
664	memory::dims fc7_dst_tz = {batch, `4096`};
665
666	std::vector<float> fc7_weights(product(fc7_weights_tz));
667	std::vector<float> fc7_bias(product(fc7_bias_tz));
668
669	// create memory for user data
670	auto fc7_user_weights_memory
671	= memory({{fc7_weights_tz}, dt::f32, tag::nc}, eng);
672	write_to_dnnl_memory(fc7_weights.data(), fc7_user_weights_memory);
673
674	auto fc7_user_bias_memory = memory({{fc7_bias_tz}, dt::f32, tag::x}, eng);
675	write_to_dnnl_memory(fc7_bias.data(), fc7_user_bias_memory);
676
677	// create memory descriptors for convolution data w/ no specified format
678	auto fc7_bias_md = memory::desc({fc7_bias_tz}, dt::f32, tag::any);
679	auto fc7_weights_md = memory::desc({fc7_weights_tz}, dt::f32, tag::any);
680	auto fc7_dst_md = memory::desc({fc7_dst_tz}, dt::f32, tag::any);
681
682	// create a inner_product
683	auto fc7_prim_desc = inner_product_forward::primitive_desc(eng,
684	prop_kind::forward_inference, fc6_dst_memory.get_desc(),
685	fc7_weights_md, fc7_bias_md, fc7_dst_md);
686
687	auto fc7_weights_memory = fc7_user_weights_memory;
688	if (fc7_prim_desc.weights_desc() != fc7_user_weights_memory.get_desc()) {
689	fc7_weights_memory = memory(fc7_prim_desc.weights_desc(), eng);
690	reorder(fc7_user_weights_memory, fc7_weights_memory)
691	.execute(s, fc7_user_weights_memory, fc7_weights_memory);
692	}
693
694	auto fc7_dst_memory = memory(fc7_prim_desc.dst_desc(), eng);
695
696	// create convolution primitive and add it to net
697	net.push_back(inner_product_forward(fc7_prim_desc));
698	net_args.push_back({{DNNL_ARG_SRC, fc6_dst_memory},
699	{DNNL_ARG_WEIGHTS, fc7_weights_memory},
700	{DNNL_ARG_BIAS, fc7_user_bias_memory},
701	{DNNL_ARG_DST, fc7_dst_memory}});
702
703	// fc8 inner product {batch, 4096} (x) {1000, 4096}-> {batch, 1000}
704	memory::dims fc8_weights_tz = {`1000`, `4096`};
705	memory::dims fc8_bias_tz = {`1000`};
706	memory::dims fc8_dst_tz = {batch, `1000`};
707
708	std::vector<float> fc8_weights(product(fc8_weights_tz));
709	std::vector<float> fc8_bias(product(fc8_bias_tz));
710
711	// create memory for user data
712	auto fc8_user_weights_memory
713	= memory({{fc8_weights_tz}, dt::f32, tag::nc}, eng);
714	write_to_dnnl_memory(fc8_weights.data(), fc8_user_weights_memory);
715	auto fc8_user_bias_memory = memory({{fc8_bias_tz}, dt::f32, tag::x}, eng);
716	write_to_dnnl_memory(fc8_bias.data(), fc8_user_bias_memory);
717	auto user_dst_memory = memory({{fc8_dst_tz}, dt::f32, tag::nc}, eng);
718	write_to_dnnl_memory(user_dst.data(), user_dst_memory);
719
720	// create memory descriptors for convolution data w/ no specified format
721	auto fc8_bias_md = memory::desc({fc8_bias_tz}, dt::f32, tag::any);
722	auto fc8_weights_md = memory::desc({fc8_weights_tz}, dt::f32, tag::any);
723	auto fc8_dst_md = memory::desc({fc8_dst_tz}, dt::f32, tag::any);
724
725	// create a inner_product
726	auto fc8_prim_desc = inner_product_forward::primitive_desc(eng,
727	prop_kind::forward_inference, fc7_dst_memory.get_desc(),
728	fc8_weights_md, fc8_bias_md, fc8_dst_md);
729
730	auto fc8_weights_memory = fc8_user_weights_memory;
731	if (fc8_prim_desc.weights_desc() != fc8_user_weights_memory.get_desc()) {
732	fc8_weights_memory = memory(fc8_prim_desc.weights_desc(), eng);
733	reorder(fc8_user_weights_memory, fc8_weights_memory)
734	.execute(s, fc8_user_weights_memory, fc8_weights_memory);
735	}
736
737	auto fc8_dst_memory = memory(fc8_prim_desc.dst_desc(), eng);
738
739	// create convolution primitive and add it to net
740	net.push_back(inner_product_forward(fc8_prim_desc));
741	net_args.push_back({{DNNL_ARG_SRC, fc7_dst_memory},
742	{DNNL_ARG_WEIGHTS, fc8_weights_memory},
743	{DNNL_ARG_BIAS, fc8_user_bias_memory},
744	{DNNL_ARG_DST, fc8_dst_memory}});
745
746	// create reorder between internal and user data if it is needed and
747	// add it to net after pooling
748	if (fc8_dst_memory != user_dst_memory) {
749	net.push_back(reorder(fc8_dst_memory, user_dst_memory));
750	net_args.push_back({{DNNL_ARG_FROM, fc8_dst_memory},
751	{DNNL_ARG_TO, user_dst_memory}});
752	}
753
754	/// @page cnn_inference_f32_cpp
755	/// Finally, execute the primitives. For this example, the net is executed
756	/// multiple times and each execution is timed individually.
757	/// @snippet cnn_inference_f32.cpp Execute model
758	//[Execute model]
759	for (int j = `0`; j < times; ++j) {
760	assert(net.size() == net_args.size() && "something is missing");
761	for (size_t i = `0`; i < net.size(); ++i)
762	net.at(i).execute(s, net_args.at(i));
763	}
764	//[Execute model]
765
766	s.wait();
767	}
768
769	void cnn_inference_f32(engine::kind engine_kind) {
770	auto begin = std::chrono::duration_cast<std::chrono::milliseconds>(
771	std::chrono::steady_clock::now().time_since_epoch())
772	.count();
773	int times = `100`;
774	simple_net(engine_kind, times);
775	auto end = std::chrono::duration_cast<std::chrono::milliseconds>(
776	std::chrono::steady_clock::now().time_since_epoch())
777	.count();
778	std::cout << "Use time: " << (end - begin) / (times + `0.0`)
779	<< " ms per iteration." << std::endl;
780	}
781
782	int main(int argc, char **argv) {
783	return handle_example_errors(
784	cnn_inference_f32, parse_engine_kind(argc, argv));
785	}
786

Browse the source code of oneDNN/examples/cnn_inference_f32.cpp