1// Copyright 2018 The Gemmlowp Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15// detect_platform.h: Sets up macros that control architecture-specific
16// features of gemmlowp's implementation.
17
18#ifndef GEMMLOWP_INTERNAL_DETECT_PLATFORM_H_
19#define GEMMLOWP_INTERNAL_DETECT_PLATFORM_H_
20
21// Our inline assembly path assume GCC/Clang syntax.
22// Native Client doesn't seem to support inline assembly(?).
23#if (defined(__GNUC__) || defined(__clang__)) && !defined(__native_client__)
24#define GEMMLOWP_ALLOW_INLINE_ASM
25#endif
26
27// Define macro statement that avoids inlining for GCC.
28// For non-GCC, define as empty macro.
29#if defined(__GNUC__)
30#define GEMMLOWP_NOINLINE __attribute__((noinline))
31#else
32#define GEMMLOWP_NOINLINE
33#endif
34
35// Detect ARM, 32-bit or 64-bit
36#ifdef __arm__
37#define GEMMLOWP_ARM_32
38#endif
39
40#ifdef __aarch64__
41#define GEMMLOWP_ARM_64
42#endif
43
44#if defined(GEMMLOWP_ARM_32) || defined(GEMMLOWP_ARM_64)
45#define GEMMLOWP_ARM
46#endif
47
48// Detect MIPS, 32-bit or 64-bit
49#if defined(__mips) && !defined(__LP64__)
50#define GEMMLOWP_MIPS_32
51#endif
52
53#if defined(__mips) && defined(__LP64__)
54#define GEMMLOWP_MIPS_64
55#endif
56
57#if defined(GEMMLOWP_MIPS_32) || defined(GEMMLOWP_MIPS_64)
58#define GEMMLOWP_MIPS
59#endif
60
61// Detect x86, 32-bit or 64-bit
62#if defined(__i386__) || defined(_M_IX86) || defined(_X86_) || defined(__i386)
63#define GEMMLOWP_X86_32
64#endif
65
66#if defined(__x86_64__) || defined(_M_X64) || defined(__amd64)
67#define GEMMLOWP_X86_64
68#endif
69
70#if defined(GEMMLOWP_X86_32) || defined(GEMMLOWP_X86_64)
71#define GEMMLOWP_X86
72#endif
73
74// Detect WebAssembly SIMD.
75#if defined(__wasm_simd128__)
76#define GEMMLOWP_WASMSIMD
77#endif
78
79// Some of our optimized paths use inline assembly and for
80// now we don't bother enabling some other optimized paths using intrinddics
81// where we can't use inline assembly paths.
82#ifdef GEMMLOWP_ALLOW_INLINE_ASM
83
84// Detect NEON. It's important to check for both tokens.
85#if (defined __ARM_NEON) || (defined __ARM_NEON__)
86#define GEMMLOWP_NEON
87#endif
88
89// Convenience NEON tokens for 32-bit or 64-bit
90#if defined(GEMMLOWP_NEON) && defined(GEMMLOWP_ARM_32)
91#define GEMMLOWP_NEON_32
92#endif
93
94#if defined(GEMMLOWP_NEON) && defined(GEMMLOWP_ARM_64)
95#define GEMMLOWP_NEON_64
96#endif
97
98// Detect MIPS MSA.
99// Limit MSA optimizations to little-endian CPUs for now.
100// TODO: Perhaps, eventually support MSA optimizations on big-endian CPUs?
101#if defined(GEMMLOWP_MIPS) && (__mips_isa_rev >= 5) && defined(__mips_msa) && \
102 defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
103#define GEMMLOWP_MSA
104#endif
105
106// Convenience MIPS MSA tokens for 32-bit or 64-bit.
107#if defined(GEMMLOWP_MSA) && defined(GEMMLOWP_MIPS_32)
108#define GEMMLOWP_MSA_32
109#endif
110
111#if defined(GEMMLOWP_MSA) && defined(GEMMLOWP_MIPS_64)
112#define GEMMLOWP_MSA_64
113#endif
114
115// compiler define for AVX2 -D GEMMLOWP_ENABLE_AVX2
116// Detect AVX2
117#if defined(__AVX2__) && defined(GEMMLOWP_ENABLE_AVX2)
118#define GEMMLOWP_AVX2
119// Detect SSE4.
120// MSVC does not have __SSE4_1__ macro, but will enable SSE4
121// when AVX is turned on.
122#elif defined(__SSE4_1__) || (defined(_MSC_VER) && defined(__AVX__))
123#define GEMMLOWP_SSE4
124// Detect SSE3.
125#elif defined(__SSE3__)
126#define GEMMLOWP_SSE3
127#endif
128
129// Convenience SSE4 tokens for 32-bit or 64-bit
130#if defined(GEMMLOWP_SSE4) && defined(GEMMLOWP_X86_32) && \
131 !defined(GEMMLOWP_DISABLE_SSE4)
132#define GEMMLOWP_SSE4_32
133#endif
134
135#if defined(GEMMLOWP_SSE3) && defined(GEMMLOWP_X86_32)
136#define GEMMLOWP_SSE3_32
137#endif
138
139#if defined(GEMMLOWP_SSE4) && defined(GEMMLOWP_X86_64) && \
140 !defined(GEMMLOWP_DISABLE_SSE4)
141#define GEMMLOWP_SSE4_64
142#endif
143
144#if defined(GEMMLOWP_SSE3) && defined(GEMMLOWP_X86_64)
145#define GEMMLOWP_SSE3_64
146#endif
147
148#if defined(GEMMLOWP_AVX2) && defined(GEMMLOWP_X86_64)
149#define GEMMLOWP_AVX2_64
150#endif
151
152#if defined(__has_feature)
153#if __has_feature(memory_sanitizer)
154#include <sanitizer/msan_interface.h>
155#define GEMMLOWP_MARK_MEMORY_AS_INITIALIZED __msan_unpoison
156#elif __has_feature(address_sanitizer)
157#include <sanitizer/asan_interface.h>
158#define GEMMLOWP_MARK_MEMORY_AS_INITIALIZED __asan_unpoison_memory_region
159#endif
160#endif
161
162#endif // GEMMLOWP_ALLOW_INLINE_ASM
163
164// Detect Android. Don't conflate with ARM - we care about tuning
165// for non-ARM Android devices too. This can be used in conjunction
166// with x86 to tune differently for mobile x86 CPUs (Atom) vs. desktop x86 CPUs.
167#if defined(__ANDROID__) || defined(ANDROID)
168#define GEMMLOWP_ANDROID
169#endif
170
171#endif // GEMMLOWP_INTERNAL_DETECT_PLATFORM_H_
172