1 | /******************************************************************************* |
2 | * Copyright 2019-2022 Intel Corporation |
3 | * Copyright 2021 FUJITSU LIMITED |
4 | * Copyright 2021-2022 Arm Ltd. and affiliates |
5 | * |
6 | * Licensed under the Apache License, Version 2.0 (the "License"); |
7 | * you may not use this file except in compliance with the License. |
8 | * You may obtain a copy of the License at |
9 | * |
10 | * http://www.apache.org/licenses/LICENSE-2.0 |
11 | * |
12 | * Unless required by applicable law or agreed to in writing, software |
13 | * distributed under the License is distributed on an "AS IS" BASIS, |
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
15 | * See the License for the specific language governing permissions and |
16 | * limitations under the License. |
17 | *******************************************************************************/ |
18 | |
19 | #include "cpu/cpu_engine.hpp" |
20 | |
21 | #include "cpu/ref_eltwise.hpp" |
22 | |
23 | #if DNNL_X64 |
24 | #include "cpu/x64/jit_uni_eltwise.hpp" |
25 | #include "cpu/x64/jit_uni_eltwise_int.hpp" |
26 | using namespace dnnl::impl::cpu::x64; |
27 | #elif DNNL_AARCH64 |
28 | #include "cpu/aarch64/jit_uni_eltwise.hpp" |
29 | #include "cpu/aarch64/jit_uni_eltwise_int.hpp" |
30 | #if DNNL_AARCH64_USE_ACL |
31 | #include "cpu/aarch64/acl_eltwise.hpp" |
32 | #endif // DNNL_AARCH64_USE_ACL |
33 | using namespace dnnl::impl::cpu::aarch64; |
34 | #endif |
35 | |
36 | namespace dnnl { |
37 | namespace impl { |
38 | namespace cpu { |
39 | |
40 | namespace { |
41 | using namespace dnnl::impl::data_type; |
42 | using namespace dnnl::impl::prop_kind; |
43 | |
44 | // clang-format off |
45 | const std::map<pk_impl_key_t, std::vector<impl_list_item_t>> &impl_list_map() { |
46 | static const std::map<pk_impl_key_t, std::vector<impl_list_item_t>> the_map = REG_ELTWISE_P({ |
47 | {{forward}, { |
48 | CPU_INSTANCE_X64(jit_uni_eltwise_fwd_t<avx512_core_fp16, f16>) |
49 | CPU_INSTANCE_X64(jit_uni_eltwise_fwd_t<avx512_core, f32>) |
50 | CPU_INSTANCE_X64(jit_uni_eltwise_fwd_t<avx512_core, bf16>) |
51 | CPU_INSTANCE_X64(jit_uni_eltwise_fwd_t<avx2, f32>) |
52 | CPU_INSTANCE_X64(jit_uni_eltwise_fwd_t<avx, f32>) |
53 | CPU_INSTANCE_X64(jit_uni_eltwise_fwd_t<sse41, f32>) |
54 | CPU_INSTANCE_X64(jit_uni_eltwise_int_fwd_t<avx512_core, s32>) |
55 | CPU_INSTANCE_X64(jit_uni_eltwise_int_fwd_t<avx512_core, s8>) |
56 | CPU_INSTANCE_X64(jit_uni_eltwise_int_fwd_t<avx512_core, u8>) |
57 | CPU_INSTANCE_X64(jit_uni_eltwise_int_fwd_t<avx2, s32>) |
58 | CPU_INSTANCE_X64(jit_uni_eltwise_int_fwd_t<avx2, s8>) |
59 | CPU_INSTANCE_X64(jit_uni_eltwise_int_fwd_t<avx2, u8>) |
60 | CPU_INSTANCE_X64(jit_uni_eltwise_int_fwd_t<sse41, s32>) |
61 | CPU_INSTANCE_X64(jit_uni_eltwise_int_fwd_t<sse41, s8>) |
62 | CPU_INSTANCE_X64(jit_uni_eltwise_int_fwd_t<sse41, u8>) |
63 | CPU_INSTANCE_AARCH64(jit_uni_eltwise_fwd_t<sve_512, f32>) |
64 | CPU_INSTANCE_AARCH64(jit_uni_eltwise_fwd_t<sve_256, f32>) |
65 | CPU_INSTANCE_AARCH64(jit_uni_eltwise_fwd_t<sve_128, f32>) |
66 | CPU_INSTANCE_AARCH64(jit_uni_eltwise_int_fwd_t<sve_512, s32>) |
67 | CPU_INSTANCE_AARCH64(jit_uni_eltwise_int_fwd_t<sve_512, s8>) |
68 | CPU_INSTANCE_AARCH64(jit_uni_eltwise_int_fwd_t<sve_512, u8>) |
69 | CPU_INSTANCE_AARCH64_ACL(acl_eltwise_fwd_t) |
70 | CPU_INSTANCE(ref_eltwise_fwd_t<f32>) |
71 | CPU_INSTANCE(ref_eltwise_fwd_t<bf16>) |
72 | CPU_INSTANCE(ref_eltwise_fwd_t<f16>) |
73 | CPU_INSTANCE(ref_eltwise_fwd_t<s32>) |
74 | CPU_INSTANCE(ref_eltwise_fwd_t<s8>) |
75 | CPU_INSTANCE(ref_eltwise_fwd_t<u8>) |
76 | nullptr, |
77 | }}, |
78 | {{backward}, REG_BWD_PK({ |
79 | CPU_INSTANCE_X64(jit_uni_eltwise_bwd_t<avx512_core_fp16, f16>) |
80 | CPU_INSTANCE_X64(jit_uni_eltwise_bwd_t<avx512_core, f32>) |
81 | CPU_INSTANCE_X64(jit_uni_eltwise_bwd_t<avx512_core, bf16>) |
82 | CPU_INSTANCE_X64(jit_uni_eltwise_bwd_t<avx2, f32>) |
83 | CPU_INSTANCE_X64(jit_uni_eltwise_bwd_t<avx, f32>) |
84 | CPU_INSTANCE_X64(jit_uni_eltwise_bwd_t<sse41, f32>) |
85 | CPU_INSTANCE_AARCH64(jit_uni_eltwise_bwd_t<sve_512, f32>) |
86 | CPU_INSTANCE_AARCH64(jit_uni_eltwise_bwd_t<sve_256, f32>) |
87 | CPU_INSTANCE_AARCH64(jit_uni_eltwise_bwd_t<sve_128, f32>) |
88 | CPU_INSTANCE(ref_eltwise_bwd_t<f32>) |
89 | CPU_INSTANCE(ref_eltwise_bwd_t<bf16>) |
90 | CPU_INSTANCE(ref_eltwise_bwd_t<f16>) |
91 | nullptr, |
92 | })}, |
93 | }); |
94 | return the_map; |
95 | } |
96 | // clang-format on |
97 | } // namespace |
98 | |
99 | const impl_list_item_t *get_eltwise_impl_list(const eltwise_desc_t *desc) { |
100 | static const impl_list_item_t empty_list[] = {nullptr}; |
101 | |
102 | const bool is_fwd = utils::one_of( |
103 | desc->prop_kind, forward_training, forward_inference); |
104 | prop_kind_t prop_kind = is_fwd ? forward : backward; |
105 | |
106 | pk_impl_key_t key {prop_kind}; |
107 | |
108 | const auto impl_list_it = impl_list_map().find(key); |
109 | return impl_list_it != impl_list_map().cend() ? impl_list_it->second.data() |
110 | : empty_list; |
111 | } |
112 | |
113 | } // namespace cpu |
114 | } // namespace impl |
115 | } // namespace dnnl |
116 | |