1 | /******************************************************************************* |
2 | * Copyright 2020-2022 Intel Corporation |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | *******************************************************************************/ |
16 | |
17 | /// @example convolution.cpp |
18 | /// > Annotated version: @ref convolution_example_cpp |
19 | /// |
20 | /// @page convolution_example_cpp_short |
21 | /// |
22 | /// This C++ API example demonstrates how to create and execute a |
23 | /// [Convolution](@ref dev_guide_convolution) primitive in forward propagation |
24 | /// mode in two configurations - with and without groups. |
25 | /// |
26 | /// Key optimizations included in this example: |
27 | /// - Creation of optimized memory format from the primitive descriptor; |
28 | /// - Primitive attributes with fused post-ops. |
29 | /// |
30 | /// @page convolution_example_cpp Convolution Primitive Example |
31 | /// @copydetails convolution_example_cpp_short |
32 | /// |
33 | /// @include convolution.cpp |
34 | |
35 | #include <algorithm> |
36 | #include <cmath> |
37 | #include <iostream> |
38 | #include <string> |
39 | #include <vector> |
40 | |
41 | #include "example_utils.hpp" |
42 | #include "oneapi/dnnl/dnnl.hpp" |
43 | |
44 | using namespace dnnl; |
45 | |
46 | using tag = memory::format_tag; |
47 | using dt = memory::data_type; |
48 | |
49 | void convolution_example(dnnl::engine::kind engine_kind) { |
50 | |
51 | // Create execution dnnl::engine. |
52 | dnnl::engine engine(engine_kind, 0); |
53 | |
54 | // Create dnnl::stream. |
55 | dnnl::stream engine_stream(engine); |
56 | |
57 | // Tensor dimensions. |
58 | const memory::dim N = 3, // batch size |
59 | IC = 32, // input channels |
60 | IH = 13, // input height |
61 | IW = 13, // input width |
62 | OC = 64, // output channels |
63 | KH = 3, // weights height |
64 | KW = 3, // weights width |
65 | PH_L = 1, // height padding: left |
66 | PH_R = 1, // height padding: right |
67 | PW_L = 1, // width padding: left |
68 | PW_R = 1, // width padding: right |
69 | SH = 4, // height-wise stride |
70 | SW = 4, // width-wise stride |
71 | OH = (IH - KH + PH_L + PH_R) / SH + 1, // output height |
72 | OW = (IW - KW + PW_L + PW_R) / SW + 1; // output width |
73 | |
74 | // Source (src), weights, bias, and destination (dst) tensors |
75 | // dimensions. |
76 | memory::dims src_dims = {N, IC, IH, IW}; |
77 | memory::dims weights_dims = {OC, IC, KH, KW}; |
78 | memory::dims bias_dims = {OC}; |
79 | memory::dims dst_dims = {N, OC, OH, OW}; |
80 | |
81 | // Strides, padding dimensions. |
82 | memory::dims strides_dims = {SH, SW}; |
83 | memory::dims padding_dims_l = {PH_L, PW_L}; |
84 | memory::dims padding_dims_r = {PH_R, PW_R}; |
85 | |
86 | // Allocate buffers. |
87 | std::vector<float> src_data(product(src_dims)); |
88 | std::vector<float> weights_data(product(weights_dims)); |
89 | std::vector<float> bias_data(OC); |
90 | std::vector<float> dst_data(product(dst_dims)); |
91 | |
92 | // Initialize src, weights, and dst tensors. |
93 | std::generate(src_data.begin(), src_data.end(), []() { |
94 | static int i = 0; |
95 | return std::cos(i++ / 10.f); |
96 | }); |
97 | std::generate(weights_data.begin(), weights_data.end(), []() { |
98 | static int i = 0; |
99 | return std::sin(i++ * 2.f); |
100 | }); |
101 | std::generate(bias_data.begin(), bias_data.end(), []() { |
102 | static int i = 0; |
103 | return std::tanh(float(i++)); |
104 | }); |
105 | |
106 | // Create memory objects for tensor data (src, weights, dst). In this |
107 | // example, NCHW layout is assumed for src and dst, and OIHW for weights. |
108 | auto user_src_mem = memory({src_dims, dt::f32, tag::nchw}, engine); |
109 | auto user_weights_mem = memory({weights_dims, dt::f32, tag::oihw}, engine); |
110 | auto user_dst_mem = memory({dst_dims, dt::f32, tag::nchw}, engine); |
111 | |
112 | // Create memory descriptors with format_tag::any for the primitive. This |
113 | // enables the convolution primitive to choose memory layouts for an |
114 | // optimized primitive implementation, and these layouts may differ from the |
115 | // ones provided by the user. |
116 | auto conv_src_md = memory::desc(src_dims, dt::f32, tag::any); |
117 | auto conv_weights_md = memory::desc(weights_dims, dt::f32, tag::any); |
118 | auto conv_dst_md = memory::desc(dst_dims, dt::f32, tag::any); |
119 | |
120 | // Create memory descriptor and memory object for input bias. |
121 | auto user_bias_md = memory::desc(bias_dims, dt::f32, tag::a); |
122 | auto user_bias_mem = memory(user_bias_md, engine); |
123 | |
124 | // Write data to memory object's handle. |
125 | write_to_dnnl_memory(src_data.data(), user_src_mem); |
126 | write_to_dnnl_memory(weights_data.data(), user_weights_mem); |
127 | write_to_dnnl_memory(bias_data.data(), user_bias_mem); |
128 | |
129 | // Create primitive post-ops (ReLU). |
130 | const float alpha = 0.f; |
131 | const float beta = 0.f; |
132 | post_ops conv_ops; |
133 | conv_ops.append_eltwise(algorithm::eltwise_relu, alpha, beta); |
134 | primitive_attr conv_attr; |
135 | conv_attr.set_post_ops(conv_ops); |
136 | |
137 | // Create primitive descriptor. |
138 | auto conv_pd = convolution_forward::primitive_desc(engine, |
139 | prop_kind::forward_training, algorithm::convolution_direct, |
140 | conv_src_md, conv_weights_md, user_bias_md, conv_dst_md, |
141 | strides_dims, padding_dims_l, padding_dims_r, conv_attr); |
142 | |
143 | // For now, assume that the src, weights, and dst memory layouts generated |
144 | // by the primitive and the ones provided by the user are identical. |
145 | auto conv_src_mem = user_src_mem; |
146 | auto conv_weights_mem = user_weights_mem; |
147 | auto conv_dst_mem = user_dst_mem; |
148 | |
149 | // Reorder the data in case the src and weights memory layouts generated by |
150 | // the primitive and the ones provided by the user are different. In this |
151 | // case, we create additional memory objects with internal buffers that will |
152 | // contain the reordered data. The data in dst will be reordered after the |
153 | // convolution computation has finalized. |
154 | if (conv_pd.src_desc() != user_src_mem.get_desc()) { |
155 | conv_src_mem = memory(conv_pd.src_desc(), engine); |
156 | reorder(user_src_mem, conv_src_mem) |
157 | .execute(engine_stream, user_src_mem, conv_src_mem); |
158 | } |
159 | |
160 | if (conv_pd.weights_desc() != user_weights_mem.get_desc()) { |
161 | conv_weights_mem = memory(conv_pd.weights_desc(), engine); |
162 | reorder(user_weights_mem, conv_weights_mem) |
163 | .execute(engine_stream, user_weights_mem, conv_weights_mem); |
164 | } |
165 | |
166 | if (conv_pd.dst_desc() != user_dst_mem.get_desc()) { |
167 | conv_dst_mem = memory(conv_pd.dst_desc(), engine); |
168 | } |
169 | |
170 | // Create the primitive. |
171 | auto conv_prim = convolution_forward(conv_pd); |
172 | |
173 | // Primitive arguments. |
174 | std::unordered_map<int, memory> conv_args; |
175 | conv_args.insert({DNNL_ARG_SRC, conv_src_mem}); |
176 | conv_args.insert({DNNL_ARG_WEIGHTS, conv_weights_mem}); |
177 | conv_args.insert({DNNL_ARG_BIAS, user_bias_mem}); |
178 | conv_args.insert({DNNL_ARG_DST, conv_dst_mem}); |
179 | |
180 | // Primitive execution: convolution with ReLU. |
181 | conv_prim.execute(engine_stream, conv_args); |
182 | |
183 | // Reorder the data in case the dst memory descriptor generated by the |
184 | // primitive and the one provided by the user are different. |
185 | if (conv_pd.dst_desc() != user_dst_mem.get_desc()) { |
186 | reorder(conv_dst_mem, user_dst_mem) |
187 | .execute(engine_stream, conv_dst_mem, user_dst_mem); |
188 | } else |
189 | user_dst_mem = conv_dst_mem; |
190 | |
191 | // Wait for the computation to finalize. |
192 | engine_stream.wait(); |
193 | |
194 | // Read data from memory object's handle. |
195 | read_from_dnnl_memory(dst_data.data(), user_dst_mem); |
196 | } |
197 | |
198 | void depthwise_convolution_example(dnnl::engine::kind engine_kind) { |
199 | |
200 | // Create execution dnnl::engine. |
201 | dnnl::engine engine(engine_kind, 0); |
202 | |
203 | // Create dnnl::stream. |
204 | dnnl::stream engine_stream(engine); |
205 | |
206 | // Tensor dimensions. |
207 | const memory::dim N = 3, // batch size |
208 | G = 32, // channel groups |
209 | IC = 32, // input channels |
210 | IH = 13, // input height |
211 | IW = 13, // input width |
212 | OC = 32, // output channels |
213 | KH = 3, // weights height |
214 | KW = 3, // weights width |
215 | PH_L = 1, // height padding: left |
216 | PH_R = 1, // height padding: right |
217 | PW_L = 1, // width padding: left |
218 | PW_R = 1, // width padding: right |
219 | SH = 4, // height-wise stride |
220 | SW = 4, // width-wise stride |
221 | OH = (IH - KH + PH_L + PH_R) / SH + 1, // output height |
222 | OW = (IW - KW + PW_L + PW_R) / SW + 1; // output width |
223 | |
224 | // Source (src), weights, bias, and destination (dst) tensors dimensions. |
225 | memory::dims src_dims = {N, IC, IH, IW}; |
226 | memory::dims weights_dims = {G, OC / G, IC / G, KH, KW}; |
227 | memory::dims bias_dims = {OC}; |
228 | memory::dims dst_dims = {N, OC, OH, OW}; |
229 | |
230 | // Strides, padding dimensions. |
231 | memory::dims strides_dims = {SH, SW}; |
232 | memory::dims padding_dims_l = {PH_L, PW_L}; |
233 | memory::dims padding_dims_r = {PH_R, PW_R}; |
234 | |
235 | // Allocate buffers. |
236 | std::vector<float> src_data(product(src_dims)); |
237 | std::vector<float> weights_data(product(weights_dims)); |
238 | std::vector<float> bias_data(OC); |
239 | std::vector<float> dst_data(product(dst_dims)); |
240 | |
241 | // Initialize src, weights, and dst tensors. |
242 | std::generate(src_data.begin(), src_data.end(), []() { |
243 | static int i = 0; |
244 | return std::cos(i++ / 10.f); |
245 | }); |
246 | std::generate(weights_data.begin(), weights_data.end(), []() { |
247 | static int i = 0; |
248 | return std::sin(i++ * 2.f); |
249 | }); |
250 | std::generate(bias_data.begin(), bias_data.end(), []() { |
251 | static int i = 0; |
252 | return std::tanh(float(i++)); |
253 | }); |
254 | |
255 | // Create memory objects for tensor data (src, weights, dst). In this |
256 | // example, NCHW layout is assumed for src and dst, and OIHW for weights. |
257 | auto user_src_mem = memory({src_dims, dt::f32, tag::nchw}, engine); |
258 | auto user_weights_mem = memory({weights_dims, dt::f32, tag::goihw}, engine); |
259 | auto user_dst_mem = memory({dst_dims, dt::f32, tag::nchw}, engine); |
260 | |
261 | // Create memory descriptors with format_tag::any for the primitive. This |
262 | // enables the convolution primitive to choose memory layouts for an |
263 | // optimized primitive implementation, and these layouts may differ from the |
264 | // ones provided by the user. |
265 | auto conv_src_md = memory::desc(src_dims, dt::f32, tag::any); |
266 | auto conv_weights_md = memory::desc(weights_dims, dt::f32, tag::any); |
267 | auto conv_dst_md = memory::desc(dst_dims, dt::f32, tag::any); |
268 | |
269 | // Create memory descriptor and memory object for input bias. |
270 | auto user_bias_md = memory::desc(bias_dims, dt::f32, tag::a); |
271 | auto user_bias_mem = memory(user_bias_md, engine); |
272 | |
273 | // Write data to memory object's handle. |
274 | write_to_dnnl_memory(src_data.data(), user_src_mem); |
275 | write_to_dnnl_memory(weights_data.data(), user_weights_mem); |
276 | write_to_dnnl_memory(bias_data.data(), user_bias_mem); |
277 | |
278 | // Create primitive post-ops (ReLU). |
279 | const float alpha = 0.f; |
280 | const float beta = 0.f; |
281 | post_ops conv_ops; |
282 | conv_ops.append_eltwise(algorithm::eltwise_relu, alpha, beta); |
283 | primitive_attr conv_attr; |
284 | conv_attr.set_post_ops(conv_ops); |
285 | |
286 | // Create primitive descriptor. |
287 | auto conv_pd = convolution_forward::primitive_desc(engine, |
288 | prop_kind::forward_training, algorithm::convolution_direct, |
289 | conv_src_md, conv_weights_md, user_bias_md, conv_dst_md, |
290 | strides_dims, padding_dims_l, padding_dims_r, conv_attr); |
291 | |
292 | // For now, assume that the src, weights, and dst memory layouts generated |
293 | // by the primitive and the ones provided by the user are identical. |
294 | auto conv_src_mem = user_src_mem; |
295 | auto conv_weights_mem = user_weights_mem; |
296 | auto conv_dst_mem = user_dst_mem; |
297 | |
298 | // Reorder the data in case the src and weights memory layouts generated by |
299 | // the primitive and the ones provided by the user are different. In this |
300 | // case, we create additional memory objects with internal buffers that will |
301 | // contain the reordered data. The data in dst will be reordered after the |
302 | // convolution computation has finalized. |
303 | if (conv_pd.src_desc() != user_src_mem.get_desc()) { |
304 | conv_src_mem = memory(conv_pd.src_desc(), engine); |
305 | reorder(user_src_mem, conv_src_mem) |
306 | .execute(engine_stream, user_src_mem, conv_src_mem); |
307 | } |
308 | |
309 | if (conv_pd.weights_desc() != user_weights_mem.get_desc()) { |
310 | conv_weights_mem = memory(conv_pd.weights_desc(), engine); |
311 | reorder(user_weights_mem, conv_weights_mem) |
312 | .execute(engine_stream, user_weights_mem, conv_weights_mem); |
313 | } |
314 | |
315 | if (conv_pd.dst_desc() != user_dst_mem.get_desc()) { |
316 | conv_dst_mem = memory(conv_pd.dst_desc(), engine); |
317 | } |
318 | |
319 | // Create the primitive. |
320 | auto conv_prim = convolution_forward(conv_pd); |
321 | |
322 | // Primitive arguments. |
323 | std::unordered_map<int, memory> conv_args; |
324 | conv_args.insert({DNNL_ARG_SRC, conv_src_mem}); |
325 | conv_args.insert({DNNL_ARG_WEIGHTS, conv_weights_mem}); |
326 | conv_args.insert({DNNL_ARG_BIAS, user_bias_mem}); |
327 | conv_args.insert({DNNL_ARG_DST, conv_dst_mem}); |
328 | |
329 | // Primitive execution: convolution with ReLU. |
330 | conv_prim.execute(engine_stream, conv_args); |
331 | |
332 | // Reorder the data in case the dst memory descriptor generated by the |
333 | // primitive and the one provided by the user are different. |
334 | if (conv_pd.dst_desc() != user_dst_mem.get_desc()) { |
335 | reorder(conv_dst_mem, user_dst_mem) |
336 | .execute(engine_stream, conv_dst_mem, user_dst_mem); |
337 | } else |
338 | user_dst_mem = conv_dst_mem; |
339 | |
340 | // Wait for the computation to finalize. |
341 | engine_stream.wait(); |
342 | |
343 | // Read data from memory object's handle. |
344 | read_from_dnnl_memory(dst_data.data(), user_dst_mem); |
345 | } |
346 | |
347 | int main(int argc, char **argv) { |
348 | auto exit_code = handle_example_errors( |
349 | convolution_example, parse_engine_kind(argc, argv)); |
350 | if (exit_code != 0) return exit_code; |
351 | |
352 | return handle_example_errors( |
353 | depthwise_convolution_example, parse_engine_kind(argc, argv)); |
354 | } |
355 | |