cpu_reorder.hpp source code [oneDNN/src/cpu/reorder/cpu_reorder.hpp]

1	/*******************************************************************************
2	* Copyright 2020-2022 Intel Corporation
3	*
4	* Licensed under the Apache License, Version 2.0 (the "License");
5	* you may not use this file except in compliance with the License.
6	* You may obtain a copy of the License at
7	*
8	* http://www.apache.org/licenses/LICENSE-2.0
9	*
10	* Unless required by applicable law or agreed to in writing, software
11	* distributed under the License is distributed on an "AS IS" BASIS,
12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13	* See the License for the specific language governing permissions and
14	* limitations under the License.
15	*******************************************************************************/
16
17	#ifndef CPU_REORDER_CPU_REORDER_HPP
18	#define CPU_REORDER_CPU_REORDER_HPP
19
20	#include <map>
21	#include <vector>
22
23	#include "cpu/reorder/simple_reorder.hpp"
24
25	#include "common/impl_list_item.hpp"
26	#include "common/memory.hpp"
27	#include "common/type_helpers.hpp"
28
29	#include "cpu/cpu_engine.hpp"
30	#include "cpu/reorder/cpu_reorder_pd.hpp"
31
32	#if DNNL_X64
33	#include "cpu/x64/jit_uni_reorder.hpp"
34	#include "cpu/x64/matmul/brgemm_matmul_reorders.hpp"
35	#include "cpu/x64/wino_reorder.hpp"
36	#elif DNNL_AARCH64
37	#include "cpu/aarch64/jit_uni_reorder.hpp"
38	#endif
39
40	#include "cpu/rnn/rnn_reorders.hpp"
41
42	namespace dnnl {
43	namespace impl {
44	namespace cpu {
45
46	using namespace dnnl::impl::data_type;
47	using namespace dnnl::impl::format_tag;
48
49	struct reorder_impl_key_t {
50	data_type_t src_dt;
51	data_type_t dst_dt; // data_type::undef if arbitrary
52	int ndims; // 0 if arbitrary
53
54	bool operator<(const reorder_impl_key_t &rhs) const {
55	return value() < rhs.value();
56	}
57
58	private:
59	enum { MAX_DT_NUM = `10` };
60	size_t value() const {
61	return ((size_t)ndims * MAX_DT_NUM + (size_t)src_dt) * MAX_DT_NUM
62	+ (size_t)dst_dt;
63	}
64	};
65
66	using impl_list_map_t
67	= std::map<reorder_impl_key_t, std::vector<impl_list_item_t>>;
68
69	/ regular reorders /
70	extern const impl_list_map_t &regular_f32_bf16_impl_list_map();
71	extern const impl_list_map_t &regular_f32_f16_impl_list_map();
72	extern const impl_list_map_t &regular_f32_f32_impl_list_map();
73	extern const impl_list_map_t &regular_f32_s32_impl_list_map();
74	extern const impl_list_map_t &regular_f32_s8_impl_list_map();
75	extern const impl_list_map_t &regular_f32_u8_impl_list_map();
76	extern const impl_list_map_t &regular_bf16_impl_list_map();
77	extern const impl_list_map_t &regular_f16_impl_list_map();
78	extern const impl_list_map_t &regular_s32_impl_list_map();
79	extern const impl_list_map_t &regular_s8_impl_list_map();
80	extern const impl_list_map_t &regular_u8_impl_list_map();
81
82	/ conv reorders w/ compensation /
83	extern const impl_list_map_t &comp_f32_s8_impl_list_map();
84	extern const impl_list_map_t &comp_bf16_s8_impl_list_map();
85	extern const impl_list_map_t &comp_s8_s8_impl_list_map();
86
87	// clang-format off
88
89	#define REG_SR(idt, ifmt, odt, ofmt, ...) \
90	impl_list_item_t(impl_list_item_t::reorder_type_deduction_helper_t< \
91	simple_reorder_t<idt, ifmt, odt, ofmt, __VA_ARGS__>::pd_t>()),
92
93	#define REG_SR_BIDIR(idt, ifmt, odt, ofmt) \
94	REG_SR(idt, ifmt, odt, ofmt, fmt_order::keep) \
95	REG_SR(idt, ifmt, odt, ofmt, fmt_order::reverse)
96
97	#define REG_SR_DIRECT_COPY(idt, odt) \
98	REG_SR(idt, any, odt, any, fmt_order::any, spec::direct_copy) \
99	REG_SR(idt, any, odt, any, fmt_order::any, spec::direct_copy_except_dim_0)
100
101	// clang-format on
102
103	#if defined(__INTEL_COMPILER) \|\| (defined(__GNUC__) && !defined(__clang__))
104	/ Direct copy for icc which is faster than jitted code;*
105	* Direct copy for gcc which might or might not be faster than jitted
106	* code, but still worth it because doesn't require jitting, i.e. much
107	* faster creation time. This is tentative solution and should be
108	* removed later (when we will cache jitted code?...). */
109	#define REG_FAST_DIRECT_COPY_F32_F32 REG_SR_DIRECT_COPY(f32, f32)
110	#else
111	#define REG_FAST_DIRECT_COPY_F32_F32
112	#endif
113
114	#ifdef __INTEL_COMPILER
115	/ direct copy for icc, which is faster than jitted code /
116	#define REG_FAST_DIRECT_COPY(sdt, ddt) REG_SR_DIRECT_COPY(sdt, ddt)
117	#else
118	#define REG_FAST_DIRECT_COPY(sdt, ddt)
119	#endif
120
121	#define CPU_REORDER_INSTANCE(...) \
122	impl_list_item_t(impl_list_item_t::reorder_type_deduction_helper_t< \
123	__VA_ARGS__::pd_t>()),
124
125	} // namespace cpu
126	} // namespace impl
127	} // namespace dnnl
128
129	#endif
130

Browse the source code of oneDNN/src/cpu/reorder/cpu_reorder.hpp