1/*******************************************************************************
2* Copyright 2019-2022 Intel Corporation
3*
4* Licensed under the Apache License, Version 2.0 (the "License");
5* you may not use this file except in compliance with the License.
6* You may obtain a copy of the License at
7*
8* http://www.apache.org/licenses/LICENSE-2.0
9*
10* Unless required by applicable law or agreed to in writing, software
11* distributed under the License is distributed on an "AS IS" BASIS,
12* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13* See the License for the specific language governing permissions and
14* limitations under the License.
15*******************************************************************************/
16
17/// @example cross_engine_reorder.c
18/// @copybrief cross_engine_reorder_c
19
20/// @page cross_engine_reorder_c Reorder between CPU and GPU engines
21/// This C API example demonstrates programming flow when reordering memory
22/// between CPU and GPU engines.
23///
24/// @include cross_engine_reorder.c
25
26#include <stdio.h>
27#include <stdlib.h>
28
29#include "oneapi/dnnl/dnnl.h"
30
31#include "example_utils.h"
32
33size_t product(int n_dims, const dnnl_dim_t dims[]) {
34 size_t n_elems = 1;
35 for (int d = 0; d < n_dims; ++d) {
36 n_elems *= (size_t)dims[d];
37 }
38 return n_elems;
39}
40
41void fill(dnnl_memory_t mem, int n_dims, const dnnl_dim_t dims[]) {
42 const size_t n_elems = product(n_dims, dims);
43 float *array = (float *)malloc(n_elems * sizeof(float));
44 if (!array) COMPLAIN_EXAMPLE_ERROR_AND_EXIT("%s", "malloc returned NULL");
45
46 for (size_t e = 0; e < n_elems; ++e) {
47 array[e] = e % 7 ? 1.0f : -1.0f;
48 }
49
50 write_to_dnnl_memory(array, mem);
51 free(array);
52}
53
54int find_negative(dnnl_memory_t mem, int n_dims, const dnnl_dim_t dims[]) {
55 const size_t n_elems = product(n_dims, dims);
56 float *array = (float *)malloc(n_elems * sizeof(float));
57 if (!array) COMPLAIN_EXAMPLE_ERROR_AND_EXIT("%s", "malloc returned NULL");
58 read_from_dnnl_memory(array, mem);
59
60 int negs = 0;
61 for (size_t e = 0; e < n_elems; ++e) {
62 negs += array[e] < 0.0f;
63 }
64
65 free(array);
66 return negs;
67}
68
69void cross_engine_reorder() {
70 dnnl_engine_t engine_cpu, engine_gpu;
71 CHECK(dnnl_engine_create(&engine_cpu, validate_engine_kind(dnnl_cpu), 0));
72 CHECK(dnnl_engine_create(&engine_gpu, validate_engine_kind(dnnl_gpu), 0));
73
74 const dnnl_dims_t tz = {2, 16, 1, 1};
75
76 dnnl_memory_desc_t m_cpu_md, m_gpu_md;
77 CHECK(dnnl_memory_desc_create_with_tag(
78 &m_cpu_md, 4, tz, dnnl_f32, dnnl_nchw));
79 CHECK(dnnl_memory_desc_create_with_tag(
80 &m_gpu_md, 4, tz, dnnl_f32, dnnl_nchw));
81
82 dnnl_memory_t m_cpu, m_gpu;
83 CHECK(dnnl_memory_create(
84 &m_cpu, m_cpu_md, engine_cpu, DNNL_MEMORY_ALLOCATE));
85 CHECK(dnnl_memory_create(
86 &m_gpu, m_gpu_md, engine_gpu, DNNL_MEMORY_ALLOCATE));
87
88 fill(m_cpu, 4, tz);
89 if (find_negative(m_cpu, 4, tz) == 0)
90 COMPLAIN_EXAMPLE_ERROR_AND_EXIT(
91 "%s", "incorrect data fill, no negative values found");
92
93 /* reorder cpu -> gpu */
94 dnnl_primitive_desc_t r1_pd;
95 CHECK(dnnl_reorder_primitive_desc_create(
96 &r1_pd, m_cpu_md, engine_cpu, m_gpu_md, engine_gpu, NULL));
97 dnnl_primitive_t r1;
98 CHECK(dnnl_primitive_create(&r1, r1_pd));
99
100 /* relu gpu */
101 dnnl_primitive_desc_t relu_pd;
102 CHECK(dnnl_eltwise_forward_primitive_desc_create(&relu_pd, engine_gpu,
103 dnnl_forward, dnnl_eltwise_relu, m_gpu_md, m_gpu_md, 0.0f, 0.0f,
104 NULL));
105
106 dnnl_primitive_t relu;
107 CHECK(dnnl_primitive_create(&relu, relu_pd));
108
109 /* reorder gpu -> cpu */
110 dnnl_primitive_desc_t r2_pd;
111 CHECK(dnnl_reorder_primitive_desc_create(
112 &r2_pd, m_gpu_md, engine_gpu, m_cpu_md, engine_cpu, NULL));
113 dnnl_primitive_t r2;
114 CHECK(dnnl_primitive_create(&r2, r2_pd));
115
116 dnnl_stream_t stream_gpu;
117 CHECK(dnnl_stream_create(
118 &stream_gpu, engine_gpu, dnnl_stream_default_flags));
119
120 dnnl_exec_arg_t r1_args[] = {{DNNL_ARG_FROM, m_cpu}, {DNNL_ARG_TO, m_gpu}};
121 CHECK(dnnl_primitive_execute(r1, stream_gpu, 2, r1_args));
122
123 dnnl_exec_arg_t relu_args[]
124 = {{DNNL_ARG_SRC, m_gpu}, {DNNL_ARG_DST, m_gpu}};
125 CHECK(dnnl_primitive_execute(relu, stream_gpu, 2, relu_args));
126
127 dnnl_exec_arg_t r2_args[] = {{DNNL_ARG_FROM, m_gpu}, {DNNL_ARG_TO, m_cpu}};
128 CHECK(dnnl_primitive_execute(r2, stream_gpu, 2, r2_args));
129
130 CHECK(dnnl_stream_wait(stream_gpu));
131
132 if (find_negative(m_cpu, 4, tz) != 0)
133 COMPLAIN_EXAMPLE_ERROR_AND_EXIT(
134 "%s", "found negative values after ReLU applied");
135
136 /* clean up */
137 dnnl_primitive_desc_destroy(relu_pd);
138 dnnl_primitive_desc_destroy(r1_pd);
139 dnnl_primitive_desc_destroy(r2_pd);
140
141 dnnl_primitive_destroy(relu);
142 dnnl_primitive_destroy(r1);
143 dnnl_primitive_destroy(r2);
144 dnnl_memory_destroy(m_cpu);
145 dnnl_memory_destroy(m_gpu);
146 dnnl_memory_desc_destroy(m_cpu_md);
147 dnnl_memory_desc_destroy(m_gpu_md);
148
149 dnnl_stream_destroy(stream_gpu);
150
151 dnnl_engine_destroy(engine_cpu);
152 dnnl_engine_destroy(engine_gpu);
153}
154
155int main() {
156 cross_engine_reorder();
157 printf("Example passed on CPU/GPU.\n");
158 return 0;
159}
160