1// Copyright 2017 Google Inc. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#ifndef HIGHWAYHASH_ARCH_SPECIFIC_H_
16#define HIGHWAYHASH_ARCH_SPECIFIC_H_
17
18// WARNING: this is a "restricted" header because it is included from
19// translation units compiled with different flags. This header and its
20// dependencies must not define any function unless it is static inline and/or
21// within namespace HH_TARGET_NAME.
22//
23// Background: older GCC/Clang require flags such as -mavx2 before AVX2 SIMD
24// intrinsics can be used. These intrinsics are only used within blocks that
25// first verify CPU capabilities. However, the flag also allows the compiler to
26// generate AVX2 code in other places. This can violate the One Definition Rule,
27// which requires multiple instances of a function with external linkage
28// (e.g. extern inline in a header) to be "equivalent". To prevent the resulting
29// crashes on non-AVX2 CPUs, any header (transitively) included from a
30// translation unit compiled with different flags is "restricted". This means
31// all function definitions must have internal linkage (e.g. static inline), or
32// reside in namespace HH_TARGET_NAME, which expands to a name unique to the
33// current compiler flags.
34//
35// Most C system headers are safe to include, but C++ headers should generally
36// be avoided because they often do not specify static linkage and cannot
37// reliably be wrapped in a namespace.
38
39#include "highwayhash/compiler_specific.h"
40
41#include <stdint.h>
42
43#if HH_MSC_VERSION
44#include <intrin.h> // _byteswap_*
45#endif
46
47namespace highwayhash {
48
49#if defined(__x86_64__) || defined(_M_X64)
50#define HH_ARCH_X64 1
51#else
52#define HH_ARCH_X64 0
53#endif
54
55#if defined(__aarch64__) || defined(__arm64__)
56#define HH_ARCH_AARCH64 1
57#else
58#define HH_ARCH_AARCH64 0
59#endif
60
61#ifdef __arm__
62#define HH_ARCH_ARM 1
63#else
64#define HH_ARCH_ARM 0
65#endif
66
67#if defined(__ARM_NEON__) || defined(__ARM_NEON)
68#define HH_ARCH_NEON 1
69#else
70#define HH_ARCH_NEON 0
71#endif
72
73#if defined(__powerpc64__) || defined(_M_PPC)
74#define HH_ARCH_PPC 1
75#else
76#define HH_ARCH_PPC 0
77#endif
78
79// Target := instruction set extension(s) such as SSE41. A translation unit can
80// only provide a single target-specific implementation because they require
81// different compiler flags.
82
83// Either the build system specifies the target by defining HH_TARGET_NAME
84// (which is necessary for Portable on X64, and SSE41 on MSVC), or we'll choose
85// the most efficient one that can be compiled given the current flags:
86#ifndef HH_TARGET_NAME
87
88// To avoid excessive code size and dispatch overhead, we only support a few
89// groups of extensions, e.g. FMA+BMI2+AVX+AVX2 =: "AVX2". These names must
90// match the HH_TARGET_* suffixes below.
91#ifdef __AVX2__
92#define HH_TARGET_NAME AVX2
93// MSVC does not set SSE4_1, but it does set AVX; checking for the latter means
94// we at least get SSE4 on machines supporting AVX but not AVX2.
95// https://stackoverflow.com/questions/18563978/detect-the-availability-of-sse-sse2-instruction-set-in-visual-studio
96#elif defined(__SSE4_1__) || (HH_MSC_VERSION != 0 && defined(__AVX__))
97#define HH_TARGET_NAME SSE41
98#elif defined(__VSX__)
99#define HH_TARGET_NAME VSX
100#elif HH_ARCH_NEON
101#define HH_TARGET_NAME NEON
102#else
103#define HH_TARGET_NAME Portable
104#endif
105
106#endif // HH_TARGET_NAME
107
108#define HH_CONCAT(first, second) first##second
109// Required due to macro expansion rules.
110#define HH_EXPAND_CONCAT(first, second) HH_CONCAT(first, second)
111// Appends HH_TARGET_NAME to "identifier_prefix".
112#define HH_ADD_TARGET_SUFFIX(identifier_prefix) \
113 HH_EXPAND_CONCAT(identifier_prefix, HH_TARGET_NAME)
114
115// HH_TARGET expands to an integer constant. Typical usage: HHStateT<HH_TARGET>.
116// This ensures your code will work correctly when compiler flags are changed,
117// and benefit from subsequently added targets/specializations.
118#define HH_TARGET HH_ADD_TARGET_SUFFIX(HH_TARGET_)
119
120// Deprecated former name of HH_TARGET; please use HH_TARGET instead.
121#define HH_TARGET_PREFERRED HH_TARGET
122
123// Associate targets with integer literals so the preprocessor can compare them
124// with HH_TARGET. Do not instantiate templates with these values - use
125// HH_TARGET instead. Must be unique powers of two, see TargetBits. Always
126// defined even if unavailable on this HH_ARCH to allow calling TargetName.
127// The suffixes must match the HH_TARGET_NAME identifiers.
128#define HH_TARGET_Portable 1
129#define HH_TARGET_SSE41 2
130#define HH_TARGET_AVX2 4
131#define HH_TARGET_VSX 8
132#define HH_TARGET_NEON 16
133
134// Bit array for one or more HH_TARGET_*. Used to indicate which target(s) are
135// supported or were called by InstructionSets::RunAll.
136using TargetBits = unsigned;
137
138namespace HH_TARGET_NAME {
139
140// Calls func(bit_value) for every nonzero bit in "bits".
141template <class Func>
142void ForeachTarget(TargetBits bits, const Func& func) {
143 while (bits != 0) {
144 const TargetBits lowest = bits & (~bits + 1);
145 func(lowest);
146 bits &= ~lowest;
147 }
148}
149
150} // namespace HH_TARGET_NAME
151
152// Returns a brief human-readable string literal identifying one of the above
153// bits, or nullptr if zero, multiple, or unknown bits are set.
154const char* TargetName(const TargetBits target_bit);
155
156// Returns the nominal (without Turbo Boost) CPU clock rate [Hertz]. Useful for
157// (roughly) characterizing the CPU speed.
158double NominalClockRate();
159
160// Returns tsc_timer frequency, useful for converting ticks to seconds. This is
161// unaffected by CPU throttling ("invariant"). Thread-safe. Returns timebase
162// frequency on PPC and NominalClockRate on all other platforms.
163double InvariantTicksPerSecond();
164
165#if HH_ARCH_X64
166
167// Calls CPUID instruction with eax=level and ecx=count and returns the result
168// in abcd array where abcd = {eax, ebx, ecx, edx} (hence the name abcd).
169void Cpuid(const uint32_t level, const uint32_t count,
170 uint32_t* HH_RESTRICT abcd);
171
172// Returns the APIC ID of the CPU on which we're currently running.
173uint32_t ApicId();
174
175#endif // HH_ARCH_X64
176
177} // namespace highwayhash
178
179#endif // HIGHWAYHASH_ARCH_SPECIFIC_H_
180