1 | /******************************************************************************* |
2 | * Copyright 2020-2022 Intel Corporation |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | *******************************************************************************/ |
16 | |
17 | #include <atomic> |
18 | |
19 | #include "oneapi/dnnl/dnnl.h" |
20 | |
21 | #include "dnnl_common.hpp" |
22 | #include "dnnl_memory.hpp" |
23 | |
24 | #include "utils/parallel.hpp" |
25 | |
26 | #include "zeropad/zeropad.hpp" |
27 | |
28 | extern "C" { |
29 | dnnl_status_t dnnl_impl_zero_pad( |
30 | const dnnl_memory *memory, dnnl_stream *stream); |
31 | } |
32 | |
33 | namespace zeropad { |
34 | |
35 | static int compare(const dnn_mem_t &test_mem, res_t *res) { |
36 | if (test_mem.ndims() == 0) return OK; |
37 | if (test_mem.format_kind() != dnnl_blocked) return OK; |
38 | |
39 | std::atomic<int> ok(true); |
40 | |
41 | const uint8_t *mem = (const uint8_t *)test_mem; |
42 | size_t type_size = test_mem.sizeof_dt(); |
43 | |
44 | const auto increment |
45 | = [&](dnnl_dims_t &pos, dnnl_dim_t &idx, bool &done, int stop_dim) { |
46 | for (int i = test_mem.ndims() - 1; i >= stop_dim; i--) { |
47 | pos[i]++; |
48 | if (pos[i] < test_mem.dims()[i]) { |
49 | break; |
50 | } else { |
51 | pos[i] = 0; |
52 | if (i == stop_dim) done = true; |
53 | } |
54 | } |
55 | idx = md_off_v(test_mem, pos); |
56 | }; |
57 | |
58 | benchdnn_parallel_nd(test_mem.dims()[0], [&](dnnl_dim_t dim0) { |
59 | dnnl_dims_t pos = {0}; |
60 | pos[0] = dim0; |
61 | dnnl_dim_t idx = md_off_v(test_mem, pos); |
62 | bool done = false; |
63 | |
64 | while (!done && ok) { |
65 | for (size_t i = 0; i < type_size; i++) { |
66 | uint8_t mem_value = mem[type_size * idx + i]; |
67 | if (mem_value != dnnl_mem_default_value) ok = false; |
68 | } |
69 | increment(pos, idx, done, 1); |
70 | } |
71 | }); |
72 | |
73 | // Serially check for errors for data dumping purposes |
74 | if (!ok) { |
75 | int errors = 0; |
76 | dnnl_dims_t pos = {0}; |
77 | dnnl_dim_t idx = md_off_v(test_mem, pos); |
78 | bool done = false; |
79 | while (!done) { |
80 | for (size_t i = 0; i < type_size; i++) { |
81 | uint8_t mem_value = mem[type_size * idx + i]; |
82 | bool idx_ok = (mem_value == dnnl_mem_default_value); |
83 | if (!idx_ok) errors++; |
84 | const bool dump = (!idx_ok && (errors < 10 || verbose >= 10)) |
85 | || (verbose >= 99); |
86 | if (dump) { |
87 | BENCHDNN_PRINT(0, |
88 | "[%4ld][arg:%d]" |
89 | "[" IFMT "," IFMT "," IFMT "," IFMT "," IFMT |
90 | "," IFMT "] dt:% 9.6g \n" , |
91 | (long)idx, test_mem.dt(), pos[0], pos[1], pos[2], |
92 | pos[3], pos[4], pos[5], test_mem.get_elem(idx)); |
93 | break; |
94 | } |
95 | } |
96 | increment(pos, idx, done, 0); |
97 | } |
98 | |
99 | BENCHDNN_PRINT(0, "@@@ [arg:%d] check_non_zeroed_elements failed\n" , |
100 | test_mem.dt()); |
101 | res->errors += errors; |
102 | } |
103 | |
104 | int errors = 0; |
105 | auto status = check_zero_padding(test_mem, test_mem.dt(), res, &errors); |
106 | res->errors += errors; |
107 | |
108 | bool passed = ok && (status == OK); |
109 | if (passed) res->state = PASSED; |
110 | return passed ? OK : FAIL; |
111 | } |
112 | |
113 | static dnnl_status_t perf_func( |
114 | const dnnl_stream_t &stream, const std::vector<dnnl_exec_arg_t> &args) { |
115 | return dnnl_impl_zero_pad(args[0].memory, stream); |
116 | } |
117 | |
118 | void skip_unimplemented_prb(const prb_t *prb, res_t *res) { |
119 | skip_unimplemented_data_type({prb->dt}, FWD_D, res); |
120 | |
121 | if (is_nvidia_gpu() || is_amd_gpu()) { |
122 | res->state = SKIPPED; |
123 | res->reason = CASE_NOT_SUPPORTED; |
124 | } |
125 | } |
126 | |
127 | int doit(const prb_t *prb, res_t *res) { |
128 | if (bench_mode == LIST) return res->state = LISTED, OK; |
129 | |
130 | skip_unimplemented_prb(prb, res); |
131 | if (res->state == SKIPPED) return OK; |
132 | |
133 | auto data_md = dnn_mem_t::init_md( |
134 | prb->ndims, prb->dims.data(), prb->dt, prb->tag); |
135 | if (res->state == SKIPPED || res->state == UNIMPLEMENTED) return OK; |
136 | |
137 | SAFE(check_mem_size(data_md, res), WARN); |
138 | if (res->state == SKIPPED) return OK; |
139 | |
140 | const auto &test_engine = get_test_engine(); |
141 | |
142 | dnn_mem_t test_mem(data_md, test_engine); |
143 | |
144 | args_t args; |
145 | args.set(0, test_mem); |
146 | perf_function_t perf_func_ = &perf_func; |
147 | |
148 | execute_and_wait(perf_func_, test_engine, args, res); |
149 | |
150 | if (is_bench_mode(CORR)) { SAFE(compare(test_mem, res), WARN); } |
151 | if (is_bench_mode(PERF)) { |
152 | // Get plain memory desc size to have a proper padded area size. |
153 | auto plain_data_md = dnn_mem_t::init_md( |
154 | prb->ndims, prb->dims.data(), prb->dt, tag::abx); |
155 | // Fill output bytes for perf_report. |
156 | res->ibytes = 0; // Since we don't read any data from padding. |
157 | res->obytes = dnnl_memory_desc_get_size(data_md) |
158 | - dnnl_memory_desc_get_size(plain_data_md); |
159 | } |
160 | |
161 | measure_perf(default_thr_ctx, res, perf_func_, args); |
162 | |
163 | return OK; |
164 | } |
165 | |
166 | } // namespace zeropad |
167 | |