1 | /******************************************************************************* |
2 | * Copyright 2019-2022 Intel Corporation |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | *******************************************************************************/ |
16 | |
17 | #include "utils/parallel.hpp" |
18 | |
19 | #include "sum/sum.hpp" |
20 | |
21 | namespace sum { |
22 | |
23 | void compute_ref( |
24 | const prb_t *prb, const args_t &args, dnnl_primitive_t prim_ref) { |
25 | const dnn_mem_t &dst = args.find(DNNL_ARG_DST); |
26 | |
27 | float *dst_ptr = (float *)dst; |
28 | |
29 | const auto nelems = dst.nelems(); |
30 | |
31 | benchdnn_parallel_nd(nelems, [&](int64_t k) { |
32 | dst_ptr[k] = 0; |
33 | for (int i_input = 0; i_input < prb->n_inputs(); ++i_input) { |
34 | const dnn_mem_t &src_i = args.find(DNNL_ARG_MULTIPLE_SRC + i_input); |
35 | dst_ptr[k] += (src_i.get_elem(k) * prb->input_scales[i_input]); |
36 | } |
37 | }); |
38 | } |
39 | |
40 | } // namespace sum |
41 | |