cross_engine_reorder.c source code [oneDNN/examples/cross_engine_reorder.c]

1	/*******************************************************************************
2	* Copyright 2019-2022 Intel Corporation
3	*
4	* Licensed under the Apache License, Version 2.0 (the "License");
5	* you may not use this file except in compliance with the License.
6	* You may obtain a copy of the License at
7	*
8	* http://www.apache.org/licenses/LICENSE-2.0
9	*
10	* Unless required by applicable law or agreed to in writing, software
11	* distributed under the License is distributed on an "AS IS" BASIS,
12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13	* See the License for the specific language governing permissions and
14	* limitations under the License.
15	*******************************************************************************/
16
17	/// @example cross_engine_reorder.c
18	/// @copybrief cross_engine_reorder_c
19
20	/// @page cross_engine_reorder_c Reorder between CPU and GPU engines
21	/// This C API example demonstrates programming flow when reordering memory
22	/// between CPU and GPU engines.
23	///
24	/// @include cross_engine_reorder.c
25
26	#include <stdio.h>
27	#include <stdlib.h>
28
29	#include "oneapi/dnnl/dnnl.h"
30
31	#include "example_utils.h"
32
33	size_t product(int n_dims, const dnnl_dim_t dims[]) {
34	size_t n_elems = `1`;
35	for (int d = `0`; d < n_dims; ++d) {
36	n_elems *= (size_t)dims[d];
37	}
38	return n_elems;
39	}
40
41	void fill(dnnl_memory_t mem, int n_dims, const dnnl_dim_t dims[]) {
42	const size_t n_elems = product(n_dims, dims);
43	float array = (float* )malloc(n_elems sizeof(float));
44	if (!array) COMPLAIN_EXAMPLE_ERROR_AND_EXIT("%s", "malloc returned NULL");
45
46	for (size_t e = `0`; e < n_elems; ++e) {
47	array[e] = e % `7` ? `1.0f` : -`1.0f`;
48	}
49
50	write_to_dnnl_memory(array, mem);
51	free(array);
52	}
53
54	int find_negative(dnnl_memory_t mem, int n_dims, const dnnl_dim_t dims[]) {
55	const size_t n_elems = product(n_dims, dims);
56	float array = (float* )malloc(n_elems sizeof(float));
57	if (!array) COMPLAIN_EXAMPLE_ERROR_AND_EXIT("%s", "malloc returned NULL");
58	read_from_dnnl_memory(array, mem);
59
60	int negs = `0`;
61	for (size_t e = `0`; e < n_elems; ++e) {
62	negs += array[e] < `0.0f`;
63	}
64
65	free(array);
66	return negs;
67	}
68
69	void cross_engine_reorder() {
70	dnnl_engine_t engine_cpu, engine_gpu;
71	CHECK(dnnl_engine_create(&engine_cpu, validate_engine_kind(dnnl_cpu), `0`));
72	CHECK(dnnl_engine_create(&engine_gpu, validate_engine_kind(dnnl_gpu), `0`));
73
74	const dnnl_dims_t tz = {`2`, `16`, `1`, `1`};
75
76	dnnl_memory_desc_t m_cpu_md, m_gpu_md;
77	CHECK(dnnl_memory_desc_create_with_tag(
78	&m_cpu_md, `4`, tz, dnnl_f32, dnnl_nchw));
79	CHECK(dnnl_memory_desc_create_with_tag(
80	&m_gpu_md, `4`, tz, dnnl_f32, dnnl_nchw));
81
82	dnnl_memory_t m_cpu, m_gpu;
83	CHECK(dnnl_memory_create(
84	&m_cpu, m_cpu_md, engine_cpu, DNNL_MEMORY_ALLOCATE));
85	CHECK(dnnl_memory_create(
86	&m_gpu, m_gpu_md, engine_gpu, DNNL_MEMORY_ALLOCATE));
87
88	fill(m_cpu, `4`, tz);
89	if (find_negative(m_cpu, `4`, tz) == `0`)
90	COMPLAIN_EXAMPLE_ERROR_AND_EXIT(
91	"%s", "incorrect data fill, no negative values found");
92
93	/ reorder cpu -> gpu /
94	dnnl_primitive_desc_t r1_pd;
95	CHECK(dnnl_reorder_primitive_desc_create(
96	&r1_pd, m_cpu_md, engine_cpu, m_gpu_md, engine_gpu, NULL));
97	dnnl_primitive_t r1;
98	CHECK(dnnl_primitive_create(&r1, r1_pd));
99
100	/ relu gpu /
101	dnnl_primitive_desc_t relu_pd;
102	CHECK(dnnl_eltwise_forward_primitive_desc_create(&relu_pd, engine_gpu,
103	dnnl_forward, dnnl_eltwise_relu, m_gpu_md, m_gpu_md, `0.0f`, `0.0f`,
104	NULL));
105
106	dnnl_primitive_t relu;
107	CHECK(dnnl_primitive_create(&relu, relu_pd));
108
109	/ reorder gpu -> cpu /
110	dnnl_primitive_desc_t r2_pd;
111	CHECK(dnnl_reorder_primitive_desc_create(
112	&r2_pd, m_gpu_md, engine_gpu, m_cpu_md, engine_cpu, NULL));
113	dnnl_primitive_t r2;
114	CHECK(dnnl_primitive_create(&r2, r2_pd));
115
116	dnnl_stream_t stream_gpu;
117	CHECK(dnnl_stream_create(
118	&stream_gpu, engine_gpu, dnnl_stream_default_flags));
119
120	dnnl_exec_arg_t r1_args[] = {{DNNL_ARG_FROM, m_cpu}, {DNNL_ARG_TO, m_gpu}};
121	CHECK(dnnl_primitive_execute(r1, stream_gpu, `2`, r1_args));
122
123	dnnl_exec_arg_t relu_args[]
124	= {{DNNL_ARG_SRC, m_gpu}, {DNNL_ARG_DST, m_gpu}};
125	CHECK(dnnl_primitive_execute(relu, stream_gpu, `2`, relu_args));
126
127	dnnl_exec_arg_t r2_args[] = {{DNNL_ARG_FROM, m_gpu}, {DNNL_ARG_TO, m_cpu}};
128	CHECK(dnnl_primitive_execute(r2, stream_gpu, `2`, r2_args));
129
130	CHECK(dnnl_stream_wait(stream_gpu));
131
132	if (find_negative(m_cpu, `4`, tz) != `0`)
133	COMPLAIN_EXAMPLE_ERROR_AND_EXIT(
134	"%s", "found negative values after ReLU applied");
135
136	/ clean up /
137	dnnl_primitive_desc_destroy(relu_pd);
138	dnnl_primitive_desc_destroy(r1_pd);
139	dnnl_primitive_desc_destroy(r2_pd);
140
141	dnnl_primitive_destroy(relu);
142	dnnl_primitive_destroy(r1);
143	dnnl_primitive_destroy(r2);
144	dnnl_memory_destroy(m_cpu);
145	dnnl_memory_destroy(m_gpu);
146	dnnl_memory_desc_destroy(m_cpu_md);
147	dnnl_memory_desc_destroy(m_gpu_md);
148
149	dnnl_stream_destroy(stream_gpu);
150
151	dnnl_engine_destroy(engine_cpu);
152	dnnl_engine_destroy(engine_gpu);
153	}
154
155	int main() {
156	cross_engine_reorder();
157	printf("Example passed on CPU/GPU.\n");
158	return `0`;
159	}
160

Browse the source code of oneDNN/examples/cross_engine_reorder.c