1/*******************************************************************************
2* Copyright 2020-2022 Intel Corporation
3*
4* Licensed under the Apache License, Version 2.0 (the "License");
5* you may not use this file except in compliance with the License.
6* You may obtain a copy of the License at
7*
8* http://www.apache.org/licenses/LICENSE-2.0
9*
10* Unless required by applicable law or agreed to in writing, software
11* distributed under the License is distributed on an "AS IS" BASIS,
12* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13* See the License for the specific language governing permissions and
14* limitations under the License.
15*******************************************************************************/
16
17#include <atomic>
18
19#include "oneapi/dnnl/dnnl.h"
20
21#include "dnnl_common.hpp"
22#include "dnnl_memory.hpp"
23
24#include "utils/parallel.hpp"
25
26#include "zeropad/zeropad.hpp"
27
28extern "C" {
29dnnl_status_t dnnl_impl_zero_pad(
30 const dnnl_memory *memory, dnnl_stream *stream);
31}
32
33namespace zeropad {
34
35static int compare(const dnn_mem_t &test_mem, res_t *res) {
36 if (test_mem.ndims() == 0) return OK;
37 if (test_mem.format_kind() != dnnl_blocked) return OK;
38
39 std::atomic<int> ok(true);
40
41 const uint8_t *mem = (const uint8_t *)test_mem;
42 size_t type_size = test_mem.sizeof_dt();
43
44 const auto increment
45 = [&](dnnl_dims_t &pos, dnnl_dim_t &idx, bool &done, int stop_dim) {
46 for (int i = test_mem.ndims() - 1; i >= stop_dim; i--) {
47 pos[i]++;
48 if (pos[i] < test_mem.dims()[i]) {
49 break;
50 } else {
51 pos[i] = 0;
52 if (i == stop_dim) done = true;
53 }
54 }
55 idx = md_off_v(test_mem, pos);
56 };
57
58 benchdnn_parallel_nd(test_mem.dims()[0], [&](dnnl_dim_t dim0) {
59 dnnl_dims_t pos = {0};
60 pos[0] = dim0;
61 dnnl_dim_t idx = md_off_v(test_mem, pos);
62 bool done = false;
63
64 while (!done && ok) {
65 for (size_t i = 0; i < type_size; i++) {
66 uint8_t mem_value = mem[type_size * idx + i];
67 if (mem_value != dnnl_mem_default_value) ok = false;
68 }
69 increment(pos, idx, done, 1);
70 }
71 });
72
73 // Serially check for errors for data dumping purposes
74 if (!ok) {
75 int errors = 0;
76 dnnl_dims_t pos = {0};
77 dnnl_dim_t idx = md_off_v(test_mem, pos);
78 bool done = false;
79 while (!done) {
80 for (size_t i = 0; i < type_size; i++) {
81 uint8_t mem_value = mem[type_size * idx + i];
82 bool idx_ok = (mem_value == dnnl_mem_default_value);
83 if (!idx_ok) errors++;
84 const bool dump = (!idx_ok && (errors < 10 || verbose >= 10))
85 || (verbose >= 99);
86 if (dump) {
87 BENCHDNN_PRINT(0,
88 "[%4ld][arg:%d]"
89 "[" IFMT "," IFMT "," IFMT "," IFMT "," IFMT
90 "," IFMT "] dt:% 9.6g \n",
91 (long)idx, test_mem.dt(), pos[0], pos[1], pos[2],
92 pos[3], pos[4], pos[5], test_mem.get_elem(idx));
93 break;
94 }
95 }
96 increment(pos, idx, done, 0);
97 }
98
99 BENCHDNN_PRINT(0, "@@@ [arg:%d] check_non_zeroed_elements failed\n",
100 test_mem.dt());
101 res->errors += errors;
102 }
103
104 int errors = 0;
105 auto status = check_zero_padding(test_mem, test_mem.dt(), res, &errors);
106 res->errors += errors;
107
108 bool passed = ok && (status == OK);
109 if (passed) res->state = PASSED;
110 return passed ? OK : FAIL;
111}
112
113static dnnl_status_t perf_func(
114 const dnnl_stream_t &stream, const std::vector<dnnl_exec_arg_t> &args) {
115 return dnnl_impl_zero_pad(args[0].memory, stream);
116}
117
118void skip_unimplemented_prb(const prb_t *prb, res_t *res) {
119 skip_unimplemented_data_type({prb->dt}, FWD_D, res);
120
121 if (is_nvidia_gpu() || is_amd_gpu()) {
122 res->state = SKIPPED;
123 res->reason = CASE_NOT_SUPPORTED;
124 }
125}
126
127int doit(const prb_t *prb, res_t *res) {
128 if (bench_mode == LIST) return res->state = LISTED, OK;
129
130 skip_unimplemented_prb(prb, res);
131 if (res->state == SKIPPED) return OK;
132
133 auto data_md = dnn_mem_t::init_md(
134 prb->ndims, prb->dims.data(), prb->dt, prb->tag);
135 if (res->state == SKIPPED || res->state == UNIMPLEMENTED) return OK;
136
137 SAFE(check_mem_size(data_md, res), WARN);
138 if (res->state == SKIPPED) return OK;
139
140 const auto &test_engine = get_test_engine();
141
142 dnn_mem_t test_mem(data_md, test_engine);
143
144 args_t args;
145 args.set(0, test_mem);
146 perf_function_t perf_func_ = &perf_func;
147
148 execute_and_wait(perf_func_, test_engine, args, res);
149
150 if (is_bench_mode(CORR)) { SAFE(compare(test_mem, res), WARN); }
151 if (is_bench_mode(PERF)) {
152 // Get plain memory desc size to have a proper padded area size.
153 auto plain_data_md = dnn_mem_t::init_md(
154 prb->ndims, prb->dims.data(), prb->dt, tag::abx);
155 // Fill output bytes for perf_report.
156 res->ibytes = 0; // Since we don't read any data from padding.
157 res->obytes = dnnl_memory_desc_get_size(data_md)
158 - dnnl_memory_desc_get_size(plain_data_md);
159 }
160
161 measure_perf(default_thr_ctx, res, perf_func_, args);
162
163 return OK;
164}
165
166} // namespace zeropad
167