1// Copyright (c) 2014 Google, Inc.
2//
3// Permission is hereby granted, free of charge, to any person obtaining a copy
4// of this software and associated documentation files (the "Software"), to deal
5// in the Software without restriction, including without limitation the rights
6// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7// copies of the Software, and to permit persons to whom the Software is
8// furnished to do so, subject to the following conditions:
9//
10// The above copyright notice and this permission notice shall be included in
11// all copies or substantial portions of the Software.
12//
13// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19// THE SOFTWARE.
20//
21// FarmHash, by Geoff Pike
22
23//
24// http://code.google.com/p/farmhash/
25//
26// This file provides a few functions for hashing strings and other
27// data. All of them are high-quality functions in the sense that
28// they do well on standard tests such as Austin Appleby's SMHasher.
29// They're also fast. FarmHash is the successor to CityHash.
30//
31// Functions in the FarmHash family are not suitable for cryptography.
32//
33// WARNING: This code has been only lightly tested on big-endian platforms!
34// It is known to work well on little-endian platforms that have a small penalty
35// for unaligned reads, such as current Intel and AMD moderate-to-high-end CPUs.
36// It should work on all 32-bit and 64-bit platforms that allow unaligned reads;
37// bug reports are welcome.
38//
39// By the way, for some hash functions, given strings a and b, the hash
40// of a+b is easily derived from the hashes of a and b. This property
41// doesn't hold for any hash functions in this file.
42
43#ifndef FARM_HASH_H_
44#define FARM_HASH_H_
45
46#include <assert.h>
47#include <stdint.h>
48#include <stdlib.h>
49#include <string.h> // for memcpy and memset
50#include <utility>
51
52#ifndef NAMESPACE_FOR_HASH_FUNCTIONS
53#define NAMESPACE_FOR_HASH_FUNCTIONS util
54#endif
55
56namespace NAMESPACE_FOR_HASH_FUNCTIONS {
57
58#if defined(FARMHASH_UINT128_T_DEFINED)
59#if defined(__clang__)
60#if !defined(uint128_t)
61#define uint128_t __uint128_t
62#endif
63#endif
64inline uint64_t Uint128Low64(const uint128_t x) {
65 return static_cast<uint64_t>(x);
66}
67inline uint64_t Uint128High64(const uint128_t x) {
68 return static_cast<uint64_t>(x >> 64);
69}
70inline uint128_t Uint128(uint64_t lo, uint64_t hi) {
71 return lo + (((uint128_t)hi) << 64);
72}
73#else
74typedef std::pair<uint64_t, uint64_t> uint128_t;
75inline uint64_t Uint128Low64(const uint128_t x) { return x.first; }
76inline uint64_t Uint128High64(const uint128_t x) { return x.second; }
77inline uint128_t Uint128(uint64_t lo, uint64_t hi) { return uint128_t(lo, hi); }
78#endif
79
80
81// BASIC STRING HASHING
82
83// Hash function for a byte array.
84// May change from time to time, may differ on different platforms, may differ
85// depending on NDEBUG.
86size_t Hash(const char* s, size_t len);
87
88// Hash function for a byte array. Most useful in 32-bit binaries.
89// May change from time to time, may differ on different platforms, may differ
90// depending on NDEBUG.
91uint32_t Hash32(const char* s, size_t len);
92
93// Hash function for a byte array. For convenience, a 32-bit seed is also
94// hashed into the result.
95// May change from time to time, may differ on different platforms, may differ
96// depending on NDEBUG.
97uint32_t Hash32WithSeed(const char* s, size_t len, uint32_t seed);
98
99// Hash function for a byte array.
100// May change from time to time, may differ on different platforms, may differ
101// depending on NDEBUG.
102uint64_t Hash64(const char* s, size_t len);
103
104// Hash function for a byte array. For convenience, a 64-bit seed is also
105// hashed into the result.
106// May change from time to time, may differ on different platforms, may differ
107// depending on NDEBUG.
108uint64_t Hash64WithSeed(const char* s, size_t len, uint64_t seed);
109
110// Hash function for a byte array. For convenience, two seeds are also
111// hashed into the result.
112// May change from time to time, may differ on different platforms, may differ
113// depending on NDEBUG.
114uint64_t Hash64WithSeeds(const char* s, size_t len,
115 uint64_t seed0, uint64_t seed1);
116
117// Hash function for a byte array.
118// May change from time to time, may differ on different platforms, may differ
119// depending on NDEBUG.
120uint128_t Hash128(const char* s, size_t len);
121
122// Hash function for a byte array. For convenience, a 128-bit seed is also
123// hashed into the result.
124// May change from time to time, may differ on different platforms, may differ
125// depending on NDEBUG.
126uint128_t Hash128WithSeed(const char* s, size_t len, uint128_t seed);
127
128// BASIC NON-STRING HASHING
129
130// Hash 128 input bits down to 64 bits of output.
131// This is intended to be a reasonably good hash function.
132// May change from time to time, may differ on different platforms, may differ
133// depending on NDEBUG.
134inline uint64_t Hash128to64(uint128_t x) {
135 // Murmur-inspired hashing.
136 const uint64_t kMul = 0x9ddfea08eb382d69ULL;
137 uint64_t a = (Uint128Low64(x) ^ Uint128High64(x)) * kMul;
138 a ^= (a >> 47);
139 uint64_t b = (Uint128High64(x) ^ a) * kMul;
140 b ^= (b >> 47);
141 b *= kMul;
142 return b;
143}
144
145// FINGERPRINTING (i.e., good, portable, forever-fixed hash functions)
146
147// Fingerprint function for a byte array. Most useful in 32-bit binaries.
148uint32_t Fingerprint32(const char* s, size_t len);
149
150// Fingerprint function for a byte array.
151uint64_t Fingerprint64(const char* s, size_t len);
152
153// Fingerprint function for a byte array.
154uint128_t Fingerprint128(const char* s, size_t len);
155
156// This is intended to be a good fingerprinting primitive.
157// See below for more overloads.
158inline uint64_t Fingerprint(uint128_t x) {
159 // Murmur-inspired hashing.
160 const uint64_t kMul = 0x9ddfea08eb382d69ULL;
161 uint64_t a = (Uint128Low64(x) ^ Uint128High64(x)) * kMul;
162 a ^= (a >> 47);
163 uint64_t b = (Uint128High64(x) ^ a) * kMul;
164 b ^= (b >> 44);
165 b *= kMul;
166 b ^= (b >> 41);
167 b *= kMul;
168 return b;
169}
170
171// This is intended to be a good fingerprinting primitive.
172inline uint64_t Fingerprint(uint64_t x) {
173 // Murmur-inspired hashing.
174 const uint64_t kMul = 0x9ddfea08eb382d69ULL;
175 uint64_t b = x * kMul;
176 b ^= (b >> 44);
177 b *= kMul;
178 b ^= (b >> 41);
179 b *= kMul;
180 return b;
181}
182
183#ifndef FARMHASH_NO_CXX_STRING
184
185// Convenience functions to hash or fingerprint C++ strings.
186// These require that Str::data() return a pointer to the first char
187// (as a const char*) and that Str::length() return the string's length;
188// they work with std::string, for example.
189
190// Hash function for a byte array.
191// May change from time to time, may differ on different platforms, may differ
192// depending on NDEBUG.
193template <typename Str>
194inline size_t Hash(const Str& s) {
195 assert(sizeof(s[0]) == 1);
196 return Hash(s.data(), s.length());
197}
198
199// Hash function for a byte array. Most useful in 32-bit binaries.
200// May change from time to time, may differ on different platforms, may differ
201// depending on NDEBUG.
202template <typename Str>
203inline uint32_t Hash32(const Str& s) {
204 assert(sizeof(s[0]) == 1);
205 return Hash32(s.data(), s.length());
206}
207
208// Hash function for a byte array. For convenience, a 32-bit seed is also
209// hashed into the result.
210// May change from time to time, may differ on different platforms, may differ
211// depending on NDEBUG.
212template <typename Str>
213inline uint32_t Hash32WithSeed(const Str& s, uint32_t seed) {
214 assert(sizeof(s[0]) == 1);
215 return Hash32WithSeed(s.data(), s.length(), seed);
216}
217
218// Hash 128 input bits down to 64 bits of output.
219// Hash function for a byte array.
220// May change from time to time, may differ on different platforms, may differ
221// depending on NDEBUG.
222template <typename Str>
223inline uint64_t Hash64(const Str& s) {
224 assert(sizeof(s[0]) == 1);
225 return Hash64(s.data(), s.length());
226}
227
228// Hash function for a byte array. For convenience, a 64-bit seed is also
229// hashed into the result.
230// May change from time to time, may differ on different platforms, may differ
231// depending on NDEBUG.
232template <typename Str>
233inline uint64_t Hash64WithSeed(const Str& s, uint64_t seed) {
234 assert(sizeof(s[0]) == 1);
235 return Hash64WithSeed(s.data(), s.length(), seed);
236}
237
238// Hash function for a byte array. For convenience, two seeds are also
239// hashed into the result.
240// May change from time to time, may differ on different platforms, may differ
241// depending on NDEBUG.
242template <typename Str>
243inline uint64_t Hash64WithSeeds(const Str& s, uint64_t seed0, uint64_t seed1) {
244 assert(sizeof(s[0]) == 1);
245 return Hash64WithSeeds(s.data(), s.length(), seed0, seed1);
246}
247
248// Hash function for a byte array.
249// May change from time to time, may differ on different platforms, may differ
250// depending on NDEBUG.
251template <typename Str>
252inline uint128_t Hash128(const Str& s) {
253 assert(sizeof(s[0]) == 1);
254 return Hash128(s.data(), s.length());
255}
256
257// Hash function for a byte array. For convenience, a 128-bit seed is also
258// hashed into the result.
259// May change from time to time, may differ on different platforms, may differ
260// depending on NDEBUG.
261template <typename Str>
262inline uint128_t Hash128WithSeed(const Str& s, uint128_t seed) {
263 assert(sizeof(s[0]) == 1);
264 return Hash128(s.data(), s.length(), seed);
265}
266
267// FINGERPRINTING (i.e., good, portable, forever-fixed hash functions)
268
269// Fingerprint function for a byte array. Most useful in 32-bit binaries.
270template <typename Str>
271inline uint32_t Fingerprint32(const Str& s) {
272 assert(sizeof(s[0]) == 1);
273 return Fingerprint32(s.data(), s.length());
274}
275
276// Fingerprint 128 input bits down to 64 bits of output.
277// Fingerprint function for a byte array.
278template <typename Str>
279inline uint64_t Fingerprint64(const Str& s) {
280 assert(sizeof(s[0]) == 1);
281 return Fingerprint64(s.data(), s.length());
282}
283
284// Fingerprint function for a byte array.
285template <typename Str>
286inline uint128_t Fingerprint128(const Str& s) {
287 assert(sizeof(s[0]) == 1);
288 return Fingerprint128(s.data(), s.length());
289}
290
291#endif
292
293} // namespace NAMESPACE_FOR_HASH_FUNCTIONS
294
295/* gently define FARMHASH_BIG_ENDIAN when detected big-endian machine */
296#if defined(__BIG_ENDIAN__)
297 #if !defined(FARMHASH_BIG_ENDIAN)
298 #define FARMHASH_BIG_ENDIAN
299 #endif
300#elif defined(__LITTLE_ENDIAN__)
301 // nothing for little-endian
302#elif defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && (__BYTE_ORDER == __ORDER_LITTLE_ENDIAN__)
303 // nothing for little-endian
304#elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && (__BYTE_ORDER == __ORDER_BIG_ENDIAN__)
305 #if !defined(FARMHASH_BIG_ENDIAN)
306 #define FARMHASH_BIG_ENDIAN
307 #endif
308#elif defined(__linux__) || defined(__CYGWIN__) || defined( __GNUC__ ) && !defined(_WIN32) || defined( __GNU_LIBRARY__ )
309 #include <endian.h> // libc6-dev, GLIBC
310 #if BYTE_ORDER == BIG_ENDIAN
311 #if !defined(FARMHASH_BIG_ENDIAN)
312 #define FARMHASH_BIG_ENDIAN
313 #endif
314 #endif
315#elif defined(__OpenBSD__) || defined(__NetBSD__) || defined(__FreeBSD__) || defined(__DragonFly__) || defined(__s390x__)
316 #include <sys/endian.h>
317 #if BYTE_ORDER == BIG_ENDIAN
318 #if !defined(FARMHASH_BIG_ENDIAN)
319 #define FARMHASH_BIG_ENDIAN
320 #endif
321 #endif
322#elif defined(_WIN32)
323 // Windows is (currently) little-endian
324#else
325 #error "Unable to determine endianness!"
326#endif /* __BIG_ENDIAN__ */
327
328#endif // FARM_HASH_H_
329