1 | // Copyright 2017 Google Inc. All Rights Reserved. |
2 | // |
3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
4 | // you may not use this file except in compliance with the License. |
5 | // You may obtain a copy of the License at |
6 | // |
7 | // http://www.apache.org/licenses/LICENSE-2.0 |
8 | // |
9 | // Unless required by applicable law or agreed to in writing, software |
10 | // distributed under the License is distributed on an "AS IS" BASIS, |
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | // See the License for the specific language governing permissions and |
13 | // limitations under the License. |
14 | |
15 | #ifndef HIGHWAYHASH_ARCH_SPECIFIC_H_ |
16 | #define HIGHWAYHASH_ARCH_SPECIFIC_H_ |
17 | |
18 | // WARNING: this is a "restricted" header because it is included from |
19 | // translation units compiled with different flags. This header and its |
20 | // dependencies must not define any function unless it is static inline and/or |
21 | // within namespace HH_TARGET_NAME. |
22 | // |
23 | // Background: older GCC/Clang require flags such as -mavx2 before AVX2 SIMD |
24 | // intrinsics can be used. These intrinsics are only used within blocks that |
25 | // first verify CPU capabilities. However, the flag also allows the compiler to |
26 | // generate AVX2 code in other places. This can violate the One Definition Rule, |
27 | // which requires multiple instances of a function with external linkage |
28 | // (e.g. extern inline in a header) to be "equivalent". To prevent the resulting |
29 | // crashes on non-AVX2 CPUs, any header (transitively) included from a |
30 | // translation unit compiled with different flags is "restricted". This means |
31 | // all function definitions must have internal linkage (e.g. static inline), or |
32 | // reside in namespace HH_TARGET_NAME, which expands to a name unique to the |
33 | // current compiler flags. |
34 | // |
35 | // Most C system headers are safe to include, but C++ headers should generally |
36 | // be avoided because they often do not specify static linkage and cannot |
37 | // reliably be wrapped in a namespace. |
38 | |
39 | #include "highwayhash/compiler_specific.h" |
40 | |
41 | #include <stdint.h> |
42 | |
43 | #if HH_MSC_VERSION |
44 | #include <intrin.h> // _byteswap_* |
45 | #endif |
46 | |
47 | namespace highwayhash { |
48 | |
49 | #if defined(__x86_64__) || defined(_M_X64) |
50 | #define HH_ARCH_X64 1 |
51 | #else |
52 | #define HH_ARCH_X64 0 |
53 | #endif |
54 | |
55 | #if defined(__aarch64__) || defined(__arm64__) |
56 | #define HH_ARCH_AARCH64 1 |
57 | #else |
58 | #define HH_ARCH_AARCH64 0 |
59 | #endif |
60 | |
61 | #ifdef __arm__ |
62 | #define HH_ARCH_ARM 1 |
63 | #else |
64 | #define HH_ARCH_ARM 0 |
65 | #endif |
66 | |
67 | #if defined(__ARM_NEON__) || defined(__ARM_NEON) |
68 | #define HH_ARCH_NEON 1 |
69 | #else |
70 | #define HH_ARCH_NEON 0 |
71 | #endif |
72 | |
73 | #if defined(__powerpc64__) || defined(_M_PPC) |
74 | #define HH_ARCH_PPC 1 |
75 | #else |
76 | #define HH_ARCH_PPC 0 |
77 | #endif |
78 | |
79 | // Target := instruction set extension(s) such as SSE41. A translation unit can |
80 | // only provide a single target-specific implementation because they require |
81 | // different compiler flags. |
82 | |
83 | // Either the build system specifies the target by defining HH_TARGET_NAME |
84 | // (which is necessary for Portable on X64, and SSE41 on MSVC), or we'll choose |
85 | // the most efficient one that can be compiled given the current flags: |
86 | #ifndef HH_TARGET_NAME |
87 | |
88 | // To avoid excessive code size and dispatch overhead, we only support a few |
89 | // groups of extensions, e.g. FMA+BMI2+AVX+AVX2 =: "AVX2". These names must |
90 | // match the HH_TARGET_* suffixes below. |
91 | #ifdef __AVX2__ |
92 | #define HH_TARGET_NAME AVX2 |
93 | // MSVC does not set SSE4_1, but it does set AVX; checking for the latter means |
94 | // we at least get SSE4 on machines supporting AVX but not AVX2. |
95 | // https://stackoverflow.com/questions/18563978/detect-the-availability-of-sse-sse2-instruction-set-in-visual-studio |
96 | #elif defined(__SSE4_1__) || (HH_MSC_VERSION != 0 && defined(__AVX__)) |
97 | #define HH_TARGET_NAME SSE41 |
98 | #elif defined(__VSX__) |
99 | #define HH_TARGET_NAME VSX |
100 | #elif HH_ARCH_NEON |
101 | #define HH_TARGET_NAME NEON |
102 | #else |
103 | #define HH_TARGET_NAME Portable |
104 | #endif |
105 | |
106 | #endif // HH_TARGET_NAME |
107 | |
108 | #define HH_CONCAT(first, second) first##second |
109 | // Required due to macro expansion rules. |
110 | #define HH_EXPAND_CONCAT(first, second) HH_CONCAT(first, second) |
111 | // Appends HH_TARGET_NAME to "identifier_prefix". |
112 | #define HH_ADD_TARGET_SUFFIX(identifier_prefix) \ |
113 | HH_EXPAND_CONCAT(identifier_prefix, HH_TARGET_NAME) |
114 | |
115 | // HH_TARGET expands to an integer constant. Typical usage: HHStateT<HH_TARGET>. |
116 | // This ensures your code will work correctly when compiler flags are changed, |
117 | // and benefit from subsequently added targets/specializations. |
118 | #define HH_TARGET HH_ADD_TARGET_SUFFIX(HH_TARGET_) |
119 | |
120 | // Deprecated former name of HH_TARGET; please use HH_TARGET instead. |
121 | #define HH_TARGET_PREFERRED HH_TARGET |
122 | |
123 | // Associate targets with integer literals so the preprocessor can compare them |
124 | // with HH_TARGET. Do not instantiate templates with these values - use |
125 | // HH_TARGET instead. Must be unique powers of two, see TargetBits. Always |
126 | // defined even if unavailable on this HH_ARCH to allow calling TargetName. |
127 | // The suffixes must match the HH_TARGET_NAME identifiers. |
128 | #define HH_TARGET_Portable 1 |
129 | #define HH_TARGET_SSE41 2 |
130 | #define HH_TARGET_AVX2 4 |
131 | #define HH_TARGET_VSX 8 |
132 | #define HH_TARGET_NEON 16 |
133 | |
134 | // Bit array for one or more HH_TARGET_*. Used to indicate which target(s) are |
135 | // supported or were called by InstructionSets::RunAll. |
136 | using TargetBits = unsigned; |
137 | |
138 | namespace HH_TARGET_NAME { |
139 | |
140 | // Calls func(bit_value) for every nonzero bit in "bits". |
141 | template <class Func> |
142 | void ForeachTarget(TargetBits bits, const Func& func) { |
143 | while (bits != 0) { |
144 | const TargetBits lowest = bits & (~bits + 1); |
145 | func(lowest); |
146 | bits &= ~lowest; |
147 | } |
148 | } |
149 | |
150 | } // namespace HH_TARGET_NAME |
151 | |
152 | // Returns a brief human-readable string literal identifying one of the above |
153 | // bits, or nullptr if zero, multiple, or unknown bits are set. |
154 | const char* TargetName(const TargetBits target_bit); |
155 | |
156 | // Returns the nominal (without Turbo Boost) CPU clock rate [Hertz]. Useful for |
157 | // (roughly) characterizing the CPU speed. |
158 | double NominalClockRate(); |
159 | |
160 | // Returns tsc_timer frequency, useful for converting ticks to seconds. This is |
161 | // unaffected by CPU throttling ("invariant"). Thread-safe. Returns timebase |
162 | // frequency on PPC and NominalClockRate on all other platforms. |
163 | double InvariantTicksPerSecond(); |
164 | |
165 | #if HH_ARCH_X64 |
166 | |
167 | // Calls CPUID instruction with eax=level and ecx=count and returns the result |
168 | // in abcd array where abcd = {eax, ebx, ecx, edx} (hence the name abcd). |
169 | void Cpuid(const uint32_t level, const uint32_t count, |
170 | uint32_t* HH_RESTRICT abcd); |
171 | |
172 | // Returns the APIC ID of the CPU on which we're currently running. |
173 | uint32_t ApicId(); |
174 | |
175 | #endif // HH_ARCH_X64 |
176 | |
177 | } // namespace highwayhash |
178 | |
179 | #endif // HIGHWAYHASH_ARCH_SPECIFIC_H_ |
180 | |