1 | /******************************************************************************* |
2 | * Copyright 2020-2022 Intel Corporation |
3 | * Copyright 2020 Arm Ltd. and affiliates |
4 | * |
5 | * Licensed under the Apache License, Version 2.0 (the "License"); |
6 | * you may not use this file except in compliance with the License. |
7 | * You may obtain a copy of the License at |
8 | * |
9 | * http://www.apache.org/licenses/LICENSE-2.0 |
10 | * |
11 | * Unless required by applicable law or agreed to in writing, software |
12 | * distributed under the License is distributed on an "AS IS" BASIS, |
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | * See the License for the specific language governing permissions and |
15 | * limitations under the License. |
16 | *******************************************************************************/ |
17 | |
18 | #ifndef CPU_PLATFORM_HPP |
19 | #define CPU_PLATFORM_HPP |
20 | |
21 | #include "oneapi/dnnl/dnnl_config.h" |
22 | |
23 | #include "common/c_types_map.hpp" |
24 | #include "common/impl_registration.hpp" |
25 | #include "common/z_magic.hpp" |
26 | |
27 | // Possible architectures: |
28 | // - DNNL_X64 |
29 | // - DNNL_AARCH64 |
30 | // - DNNL_PPC64 |
31 | // - DNNL_S390X |
32 | // - DNNL_RV64 |
33 | // - DNNL_ARCH_GENERIC |
34 | // Target architecture macro is set to 1, others to 0. All macros are defined. |
35 | |
36 | #if defined(DNNL_X64) + defined(DNNL_AARCH64) + defined(DNNL_PPC64) \ |
37 | + defined(DNNL_S390X) + defined(DNNL_RV64) \ |
38 | + defined(DNNL_ARCH_GENERIC) \ |
39 | == 0 |
40 | #if defined(__x86_64__) || defined(_M_X64) |
41 | #define DNNL_X64 1 |
42 | #elif defined(__aarch64__) |
43 | #define DNNL_AARCH64 1 |
44 | #elif defined(__powerpc64__) || defined(__PPC64__) || defined(_ARCH_PPC64) |
45 | #define DNNL_PPC64 1 |
46 | #elif defined(__s390x__) |
47 | #define DNNL_S390X 1 |
48 | #elif defined(__riscv) |
49 | #define DNNL_RV64 1 |
50 | #else |
51 | #define DNNL_ARCH_GENERIC 1 |
52 | #endif |
53 | #endif // defined(DNNL_X64) + ... == 0 |
54 | |
55 | #if defined(DNNL_X64) + defined(DNNL_AARCH64) + defined(DNNL_PPC64) \ |
56 | + defined(DNNL_S390X) + defined(DNNL_RV64) \ |
57 | + defined(DNNL_ARCH_GENERIC) \ |
58 | != 1 |
59 | #error One and only one architecture should be defined at a time |
60 | #endif |
61 | |
62 | #if !defined(DNNL_X64) |
63 | #define DNNL_X64 0 |
64 | #endif |
65 | #if !defined(DNNL_AARCH64) |
66 | #define DNNL_AARCH64 0 |
67 | #endif |
68 | #if !defined(DNNL_PPC64) |
69 | #define DNNL_PPC64 0 |
70 | #endif |
71 | #if !defined(DNNL_S390X) |
72 | #define DNNL_S390X 0 |
73 | #endif |
74 | #if !defined(DNNL_RV64) |
75 | #define DNNL_RV64 0 |
76 | #endif |
77 | #if !defined(DNNL_ARCH_GENERIC) |
78 | #define DNNL_ARCH_GENERIC 0 |
79 | #endif |
80 | |
81 | // Helper macros: expand the parameters only on the corresponding architecture. |
82 | // Equivalent to: #if DNNL_$ARCH ... #endif |
83 | #define DNNL_X64_ONLY(...) Z_CONDITIONAL_DO(DNNL_X64, __VA_ARGS__) |
84 | #define DNNL_PPC64_ONLY(...) Z_CONDITIONAL_DO(DNNL_PPC64_ONLY, __VA_ARGS__) |
85 | #define DNNL_S390X_ONLY(...) Z_CONDITIONAL_DO(DNNL_S390X_ONLY, __VA_ARGS__) |
86 | #define DNNL_AARCH64_ONLY(...) Z_CONDITIONAL_DO(DNNL_AARCH64, __VA_ARGS__) |
87 | |
88 | // Negation of the helper macros above |
89 | #define DNNL_NON_X64_ONLY(...) Z_CONDITIONAL_DO(Z_NOT(DNNL_X64), __VA_ARGS__) |
90 | |
91 | // Using Arm Compute Library kernels is optional for AArch64 builds |
92 | // and can be enabled with the DNNL_AARCH64_USE_ACL CMake option |
93 | #if defined(DNNL_AARCH64) && defined(DNNL_AARCH64_USE_ACL) |
94 | #define DNNL_AARCH64_ACL_ONLY(...) __VA_ARGS__ |
95 | #else |
96 | #define DNNL_AARCH64_ACL_ONLY(...) |
97 | #endif |
98 | |
99 | // Primitive ISA section for configuring knobs. |
100 | // Note: MSVC preprocessor by some reason "eats" symbols it's not supposed to |
101 | // if __VA_ARGS__ is passed as empty. Then things happen like this for non-x64: |
102 | // impl0, AMX(X64_impl1), impl2, ... -> impl0 impl2, ... |
103 | // resulting in compilation error. Such problem happens for lists interleaving |
104 | // X64 impls and non-X64 for non-X64 build. |
105 | #if DNNL_X64 |
106 | // Note: unlike workload or primitive set, these macros will work with impl |
107 | // items directly, thus, just make an item disappear, no empty lists. |
108 | #define __BUILD_AMX BUILD_PRIMITIVE_CPU_ISA_ALL || BUILD_AMX |
109 | #define __BUILD_AVX512 __BUILD_AMX || BUILD_AVX512 |
110 | #define __BUILD_AVX2 __BUILD_AVX512 || BUILD_AVX2 |
111 | #define __BUILD_SSE41 __BUILD_AVX2 || BUILD_SSE41 |
112 | #else |
113 | #define __BUILD_AMX 0 |
114 | #define __BUILD_AVX512 0 |
115 | #define __BUILD_AVX2 0 |
116 | #define __BUILD_SSE41 0 |
117 | #endif |
118 | |
119 | #if __BUILD_AMX |
120 | #define REG_AMX_ISA(...) __VA_ARGS__ |
121 | #else |
122 | #define REG_AMX_ISA(...) |
123 | #endif |
124 | |
125 | #if __BUILD_AVX512 |
126 | #define REG_AVX512_ISA(...) __VA_ARGS__ |
127 | #else |
128 | #define REG_AVX512_ISA(...) |
129 | #endif |
130 | |
131 | #if __BUILD_AVX2 |
132 | #define REG_AVX2_ISA(...) __VA_ARGS__ |
133 | #else |
134 | #define REG_AVX2_ISA(...) |
135 | #endif |
136 | |
137 | #if __BUILD_SSE41 |
138 | #define REG_SSE41_ISA(...) __VA_ARGS__ |
139 | #else |
140 | #define REG_SSE41_ISA(...) |
141 | #endif |
142 | |
143 | namespace dnnl { |
144 | namespace impl { |
145 | namespace cpu { |
146 | namespace platform { |
147 | |
148 | const char *get_isa_info(); |
149 | dnnl_cpu_isa_t get_effective_cpu_isa(); |
150 | status_t set_max_cpu_isa(dnnl_cpu_isa_t isa); |
151 | status_t set_cpu_isa_hints(dnnl_cpu_isa_hints_t isa_hints); |
152 | dnnl_cpu_isa_hints_t get_cpu_isa_hints(); |
153 | |
154 | bool DNNL_API prefer_ymm_requested(); |
155 | // This call is limited to performing checks on plain C-code implementations |
156 | // (e.g. 'ref' and 'simple_primitive') and should avoid any x64 JIT |
157 | // implementations since these require specific code-path updates. |
158 | bool DNNL_API has_data_type_support(data_type_t data_type); |
159 | bool DNNL_API has_training_support(data_type_t data_type); |
160 | float DNNL_API s8s8_weights_scale_factor(); |
161 | |
162 | unsigned get_per_core_cache_size(int level); |
163 | unsigned get_num_cores(); |
164 | #if DNNL_CPU_THREADING_RUNTIME == DNNL_RUNTIME_THREADPOOL |
165 | unsigned DNNL_API get_max_threads_to_use(); |
166 | #endif |
167 | |
168 | constexpr int get_cache_line_size() { |
169 | return 64; |
170 | } |
171 | |
172 | int get_vector_register_size(); |
173 | |
174 | size_t get_timestamp(); |
175 | |
176 | } // namespace platform |
177 | |
178 | // XXX: find a better place for these values? |
179 | enum { |
180 | PAGE_4K = 4096, |
181 | PAGE_2M = 2097152, |
182 | }; |
183 | |
184 | } // namespace cpu |
185 | } // namespace impl |
186 | } // namespace dnnl |
187 | |
188 | #endif |
189 | |