1/*******************************************************************************
2* Copyright 2020-2022 Intel Corporation
3*
4* Licensed under the Apache License, Version 2.0 (the "License");
5* you may not use this file except in compliance with the License.
6* You may obtain a copy of the License at
7*
8* http://www.apache.org/licenses/LICENSE-2.0
9*
10* Unless required by applicable law or agreed to in writing, software
11* distributed under the License is distributed on an "AS IS" BASIS,
12* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13* See the License for the specific language governing permissions and
14* limitations under the License.
15*******************************************************************************/
16
17#ifndef CPU_REORDER_CPU_REORDER_HPP
18#define CPU_REORDER_CPU_REORDER_HPP
19
20#include <map>
21#include <vector>
22
23#include "cpu/reorder/simple_reorder.hpp"
24
25#include "common/impl_list_item.hpp"
26#include "common/memory.hpp"
27#include "common/type_helpers.hpp"
28
29#include "cpu/cpu_engine.hpp"
30#include "cpu/reorder/cpu_reorder_pd.hpp"
31
32#if DNNL_X64
33#include "cpu/x64/jit_uni_reorder.hpp"
34#include "cpu/x64/matmul/brgemm_matmul_reorders.hpp"
35#include "cpu/x64/wino_reorder.hpp"
36#elif DNNL_AARCH64
37#include "cpu/aarch64/jit_uni_reorder.hpp"
38#endif
39
40#include "cpu/rnn/rnn_reorders.hpp"
41
42namespace dnnl {
43namespace impl {
44namespace cpu {
45
46using namespace dnnl::impl::data_type;
47using namespace dnnl::impl::format_tag;
48
49struct reorder_impl_key_t {
50 data_type_t src_dt;
51 data_type_t dst_dt; // data_type::undef if arbitrary
52 int ndims; // 0 if arbitrary
53
54 bool operator<(const reorder_impl_key_t &rhs) const {
55 return value() < rhs.value();
56 }
57
58private:
59 enum { MAX_DT_NUM = 10 };
60 size_t value() const {
61 return ((size_t)ndims * MAX_DT_NUM + (size_t)src_dt) * MAX_DT_NUM
62 + (size_t)dst_dt;
63 }
64};
65
66using impl_list_map_t
67 = std::map<reorder_impl_key_t, std::vector<impl_list_item_t>>;
68
69/* regular reorders */
70extern const impl_list_map_t &regular_f32_bf16_impl_list_map();
71extern const impl_list_map_t &regular_f32_f16_impl_list_map();
72extern const impl_list_map_t &regular_f32_f32_impl_list_map();
73extern const impl_list_map_t &regular_f32_s32_impl_list_map();
74extern const impl_list_map_t &regular_f32_s8_impl_list_map();
75extern const impl_list_map_t &regular_f32_u8_impl_list_map();
76extern const impl_list_map_t &regular_bf16_impl_list_map();
77extern const impl_list_map_t &regular_f16_impl_list_map();
78extern const impl_list_map_t &regular_s32_impl_list_map();
79extern const impl_list_map_t &regular_s8_impl_list_map();
80extern const impl_list_map_t &regular_u8_impl_list_map();
81
82/* conv reorders w/ compensation */
83extern const impl_list_map_t &comp_f32_s8_impl_list_map();
84extern const impl_list_map_t &comp_bf16_s8_impl_list_map();
85extern const impl_list_map_t &comp_s8_s8_impl_list_map();
86
87// clang-format off
88
89#define REG_SR(idt, ifmt, odt, ofmt, ...) \
90 impl_list_item_t(impl_list_item_t::reorder_type_deduction_helper_t< \
91 simple_reorder_t<idt, ifmt, odt, ofmt, __VA_ARGS__>::pd_t>()),
92
93#define REG_SR_BIDIR(idt, ifmt, odt, ofmt) \
94 REG_SR(idt, ifmt, odt, ofmt, fmt_order::keep) \
95 REG_SR(idt, ifmt, odt, ofmt, fmt_order::reverse)
96
97#define REG_SR_DIRECT_COPY(idt, odt) \
98 REG_SR(idt, any, odt, any, fmt_order::any, spec::direct_copy) \
99 REG_SR(idt, any, odt, any, fmt_order::any, spec::direct_copy_except_dim_0)
100
101// clang-format on
102
103#if defined(__INTEL_COMPILER) || (defined(__GNUC__) && !defined(__clang__))
104/* Direct copy for icc which is faster than jitted code;
105 * Direct copy for gcc which might or might not be faster than jitted
106 * code, but still worth it because doesn't require jitting, i.e. much
107 * faster creation time. This is tentative solution and should be
108 * removed later (when we will cache jitted code?...). */
109#define REG_FAST_DIRECT_COPY_F32_F32 REG_SR_DIRECT_COPY(f32, f32)
110#else
111#define REG_FAST_DIRECT_COPY_F32_F32
112#endif
113
114#ifdef __INTEL_COMPILER
115/* direct copy for icc, which is faster than jitted code */
116#define REG_FAST_DIRECT_COPY(sdt, ddt) REG_SR_DIRECT_COPY(sdt, ddt)
117#else
118#define REG_FAST_DIRECT_COPY(sdt, ddt)
119#endif
120
121#define CPU_REORDER_INSTANCE(...) \
122 impl_list_item_t(impl_list_item_t::reorder_type_deduction_helper_t< \
123 __VA_ARGS__::pd_t>()),
124
125} // namespace cpu
126} // namespace impl
127} // namespace dnnl
128
129#endif
130