1 | // Copyright 2018 The Gemmlowp Authors. All Rights Reserved. |
2 | // |
3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
4 | // you may not use this file except in compliance with the License. |
5 | // You may obtain a copy of the License at |
6 | // |
7 | // http://www.apache.org/licenses/LICENSE-2.0 |
8 | // |
9 | // Unless required by applicable law or agreed to in writing, software |
10 | // distributed under the License is distributed on an "AS IS" BASIS, |
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | // See the License for the specific language governing permissions and |
13 | // limitations under the License. |
14 | |
15 | // detect_platform.h: Sets up macros that control architecture-specific |
16 | // features of gemmlowp's implementation. |
17 | |
18 | #ifndef GEMMLOWP_INTERNAL_DETECT_PLATFORM_H_ |
19 | #define GEMMLOWP_INTERNAL_DETECT_PLATFORM_H_ |
20 | |
21 | // Our inline assembly path assume GCC/Clang syntax. |
22 | // Native Client doesn't seem to support inline assembly(?). |
23 | #if (defined(__GNUC__) || defined(__clang__)) && !defined(__native_client__) |
24 | #define GEMMLOWP_ALLOW_INLINE_ASM |
25 | #endif |
26 | |
27 | // Define macro statement that avoids inlining for GCC. |
28 | // For non-GCC, define as empty macro. |
29 | #if defined(__GNUC__) |
30 | #define GEMMLOWP_NOINLINE __attribute__((noinline)) |
31 | #else |
32 | #define GEMMLOWP_NOINLINE |
33 | #endif |
34 | |
35 | // Detect ARM, 32-bit or 64-bit |
36 | #ifdef __arm__ |
37 | #define GEMMLOWP_ARM_32 |
38 | #endif |
39 | |
40 | #ifdef __aarch64__ |
41 | #define GEMMLOWP_ARM_64 |
42 | #endif |
43 | |
44 | #if defined(GEMMLOWP_ARM_32) || defined(GEMMLOWP_ARM_64) |
45 | #define GEMMLOWP_ARM |
46 | #endif |
47 | |
48 | // Detect MIPS, 32-bit or 64-bit |
49 | #if defined(__mips) && !defined(__LP64__) |
50 | #define GEMMLOWP_MIPS_32 |
51 | #endif |
52 | |
53 | #if defined(__mips) && defined(__LP64__) |
54 | #define GEMMLOWP_MIPS_64 |
55 | #endif |
56 | |
57 | #if defined(GEMMLOWP_MIPS_32) || defined(GEMMLOWP_MIPS_64) |
58 | #define GEMMLOWP_MIPS |
59 | #endif |
60 | |
61 | // Detect x86, 32-bit or 64-bit |
62 | #if defined(__i386__) || defined(_M_IX86) || defined(_X86_) || defined(__i386) |
63 | #define GEMMLOWP_X86_32 |
64 | #endif |
65 | |
66 | #if defined(__x86_64__) || defined(_M_X64) || defined(__amd64) |
67 | #define GEMMLOWP_X86_64 |
68 | #endif |
69 | |
70 | #if defined(GEMMLOWP_X86_32) || defined(GEMMLOWP_X86_64) |
71 | #define GEMMLOWP_X86 |
72 | #endif |
73 | |
74 | // Detect WebAssembly SIMD. |
75 | #if defined(__wasm_simd128__) |
76 | #define GEMMLOWP_WASMSIMD |
77 | #endif |
78 | |
79 | // Some of our optimized paths use inline assembly and for |
80 | // now we don't bother enabling some other optimized paths using intrinddics |
81 | // where we can't use inline assembly paths. |
82 | #ifdef GEMMLOWP_ALLOW_INLINE_ASM |
83 | |
84 | // Detect NEON. It's important to check for both tokens. |
85 | #if (defined __ARM_NEON) || (defined __ARM_NEON__) |
86 | #define GEMMLOWP_NEON |
87 | #endif |
88 | |
89 | // Convenience NEON tokens for 32-bit or 64-bit |
90 | #if defined(GEMMLOWP_NEON) && defined(GEMMLOWP_ARM_32) |
91 | #define GEMMLOWP_NEON_32 |
92 | #endif |
93 | |
94 | #if defined(GEMMLOWP_NEON) && defined(GEMMLOWP_ARM_64) |
95 | #define GEMMLOWP_NEON_64 |
96 | #endif |
97 | |
98 | // Detect MIPS MSA. |
99 | // Limit MSA optimizations to little-endian CPUs for now. |
100 | // TODO: Perhaps, eventually support MSA optimizations on big-endian CPUs? |
101 | #if defined(GEMMLOWP_MIPS) && (__mips_isa_rev >= 5) && defined(__mips_msa) && \ |
102 | defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) |
103 | #define GEMMLOWP_MSA |
104 | #endif |
105 | |
106 | // Convenience MIPS MSA tokens for 32-bit or 64-bit. |
107 | #if defined(GEMMLOWP_MSA) && defined(GEMMLOWP_MIPS_32) |
108 | #define GEMMLOWP_MSA_32 |
109 | #endif |
110 | |
111 | #if defined(GEMMLOWP_MSA) && defined(GEMMLOWP_MIPS_64) |
112 | #define GEMMLOWP_MSA_64 |
113 | #endif |
114 | |
115 | // compiler define for AVX2 -D GEMMLOWP_ENABLE_AVX2 |
116 | // Detect AVX2 |
117 | #if defined(__AVX2__) && defined(GEMMLOWP_ENABLE_AVX2) |
118 | #define GEMMLOWP_AVX2 |
119 | // Detect SSE4. |
120 | // MSVC does not have __SSE4_1__ macro, but will enable SSE4 |
121 | // when AVX is turned on. |
122 | #elif defined(__SSE4_1__) || (defined(_MSC_VER) && defined(__AVX__)) |
123 | #define GEMMLOWP_SSE4 |
124 | // Detect SSE3. |
125 | #elif defined(__SSE3__) |
126 | #define GEMMLOWP_SSE3 |
127 | #endif |
128 | |
129 | // Convenience SSE4 tokens for 32-bit or 64-bit |
130 | #if defined(GEMMLOWP_SSE4) && defined(GEMMLOWP_X86_32) && \ |
131 | !defined(GEMMLOWP_DISABLE_SSE4) |
132 | #define GEMMLOWP_SSE4_32 |
133 | #endif |
134 | |
135 | #if defined(GEMMLOWP_SSE3) && defined(GEMMLOWP_X86_32) |
136 | #define GEMMLOWP_SSE3_32 |
137 | #endif |
138 | |
139 | #if defined(GEMMLOWP_SSE4) && defined(GEMMLOWP_X86_64) && \ |
140 | !defined(GEMMLOWP_DISABLE_SSE4) |
141 | #define GEMMLOWP_SSE4_64 |
142 | #endif |
143 | |
144 | #if defined(GEMMLOWP_SSE3) && defined(GEMMLOWP_X86_64) |
145 | #define GEMMLOWP_SSE3_64 |
146 | #endif |
147 | |
148 | #if defined(GEMMLOWP_AVX2) && defined(GEMMLOWP_X86_64) |
149 | #define GEMMLOWP_AVX2_64 |
150 | #endif |
151 | |
152 | #if defined(__has_feature) |
153 | #if __has_feature(memory_sanitizer) |
154 | #include <sanitizer/msan_interface.h> |
155 | #define GEMMLOWP_MARK_MEMORY_AS_INITIALIZED __msan_unpoison |
156 | #elif __has_feature(address_sanitizer) |
157 | #include <sanitizer/asan_interface.h> |
158 | #define GEMMLOWP_MARK_MEMORY_AS_INITIALIZED __asan_unpoison_memory_region |
159 | #endif |
160 | #endif |
161 | |
162 | #endif // GEMMLOWP_ALLOW_INLINE_ASM |
163 | |
164 | // Detect Android. Don't conflate with ARM - we care about tuning |
165 | // for non-ARM Android devices too. This can be used in conjunction |
166 | // with x86 to tune differently for mobile x86 CPUs (Atom) vs. desktop x86 CPUs. |
167 | #if defined(__ANDROID__) || defined(ANDROID) |
168 | #define GEMMLOWP_ANDROID |
169 | #endif |
170 | |
171 | #endif // GEMMLOWP_INTERNAL_DETECT_PLATFORM_H_ |
172 | |