1 | /******************************************************************************* |
2 | * Copyright 2020-2022 Intel Corporation |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | *******************************************************************************/ |
16 | |
17 | #ifndef CPU_REORDER_CPU_REORDER_HPP |
18 | #define CPU_REORDER_CPU_REORDER_HPP |
19 | |
20 | #include <map> |
21 | #include <vector> |
22 | |
23 | #include "cpu/reorder/simple_reorder.hpp" |
24 | |
25 | #include "common/impl_list_item.hpp" |
26 | #include "common/memory.hpp" |
27 | #include "common/type_helpers.hpp" |
28 | |
29 | #include "cpu/cpu_engine.hpp" |
30 | #include "cpu/reorder/cpu_reorder_pd.hpp" |
31 | |
32 | #if DNNL_X64 |
33 | #include "cpu/x64/jit_uni_reorder.hpp" |
34 | #include "cpu/x64/matmul/brgemm_matmul_reorders.hpp" |
35 | #include "cpu/x64/wino_reorder.hpp" |
36 | #elif DNNL_AARCH64 |
37 | #include "cpu/aarch64/jit_uni_reorder.hpp" |
38 | #endif |
39 | |
40 | #include "cpu/rnn/rnn_reorders.hpp" |
41 | |
42 | namespace dnnl { |
43 | namespace impl { |
44 | namespace cpu { |
45 | |
46 | using namespace dnnl::impl::data_type; |
47 | using namespace dnnl::impl::format_tag; |
48 | |
49 | struct reorder_impl_key_t { |
50 | data_type_t src_dt; |
51 | data_type_t dst_dt; // data_type::undef if arbitrary |
52 | int ndims; // 0 if arbitrary |
53 | |
54 | bool operator<(const reorder_impl_key_t &rhs) const { |
55 | return value() < rhs.value(); |
56 | } |
57 | |
58 | private: |
59 | enum { MAX_DT_NUM = 10 }; |
60 | size_t value() const { |
61 | return ((size_t)ndims * MAX_DT_NUM + (size_t)src_dt) * MAX_DT_NUM |
62 | + (size_t)dst_dt; |
63 | } |
64 | }; |
65 | |
66 | using impl_list_map_t |
67 | = std::map<reorder_impl_key_t, std::vector<impl_list_item_t>>; |
68 | |
69 | /* regular reorders */ |
70 | extern const impl_list_map_t ®ular_f32_bf16_impl_list_map(); |
71 | extern const impl_list_map_t ®ular_f32_f16_impl_list_map(); |
72 | extern const impl_list_map_t ®ular_f32_f32_impl_list_map(); |
73 | extern const impl_list_map_t ®ular_f32_s32_impl_list_map(); |
74 | extern const impl_list_map_t ®ular_f32_s8_impl_list_map(); |
75 | extern const impl_list_map_t ®ular_f32_u8_impl_list_map(); |
76 | extern const impl_list_map_t ®ular_bf16_impl_list_map(); |
77 | extern const impl_list_map_t ®ular_f16_impl_list_map(); |
78 | extern const impl_list_map_t ®ular_s32_impl_list_map(); |
79 | extern const impl_list_map_t ®ular_s8_impl_list_map(); |
80 | extern const impl_list_map_t ®ular_u8_impl_list_map(); |
81 | |
82 | /* conv reorders w/ compensation */ |
83 | extern const impl_list_map_t &comp_f32_s8_impl_list_map(); |
84 | extern const impl_list_map_t &comp_bf16_s8_impl_list_map(); |
85 | extern const impl_list_map_t &comp_s8_s8_impl_list_map(); |
86 | |
87 | // clang-format off |
88 | |
89 | #define REG_SR(idt, ifmt, odt, ofmt, ...) \ |
90 | impl_list_item_t(impl_list_item_t::reorder_type_deduction_helper_t< \ |
91 | simple_reorder_t<idt, ifmt, odt, ofmt, __VA_ARGS__>::pd_t>()), |
92 | |
93 | #define REG_SR_BIDIR(idt, ifmt, odt, ofmt) \ |
94 | REG_SR(idt, ifmt, odt, ofmt, fmt_order::keep) \ |
95 | REG_SR(idt, ifmt, odt, ofmt, fmt_order::reverse) |
96 | |
97 | #define REG_SR_DIRECT_COPY(idt, odt) \ |
98 | REG_SR(idt, any, odt, any, fmt_order::any, spec::direct_copy) \ |
99 | REG_SR(idt, any, odt, any, fmt_order::any, spec::direct_copy_except_dim_0) |
100 | |
101 | // clang-format on |
102 | |
103 | #if defined(__INTEL_COMPILER) || (defined(__GNUC__) && !defined(__clang__)) |
104 | /* Direct copy for icc which is faster than jitted code; |
105 | * Direct copy for gcc which might or might not be faster than jitted |
106 | * code, but still worth it because doesn't require jitting, i.e. much |
107 | * faster creation time. This is tentative solution and should be |
108 | * removed later (when we will cache jitted code?...). */ |
109 | #define REG_FAST_DIRECT_COPY_F32_F32 REG_SR_DIRECT_COPY(f32, f32) |
110 | #else |
111 | #define REG_FAST_DIRECT_COPY_F32_F32 |
112 | #endif |
113 | |
114 | #ifdef __INTEL_COMPILER |
115 | /* direct copy for icc, which is faster than jitted code */ |
116 | #define REG_FAST_DIRECT_COPY(sdt, ddt) REG_SR_DIRECT_COPY(sdt, ddt) |
117 | #else |
118 | #define REG_FAST_DIRECT_COPY(sdt, ddt) |
119 | #endif |
120 | |
121 | #define CPU_REORDER_INSTANCE(...) \ |
122 | impl_list_item_t(impl_list_item_t::reorder_type_deduction_helper_t< \ |
123 | __VA_ARGS__::pd_t>()), |
124 | |
125 | } // namespace cpu |
126 | } // namespace impl |
127 | } // namespace dnnl |
128 | |
129 | #endif |
130 | |