1 | // Copyright 2017 Google Inc. All Rights Reserved. |
2 | // |
3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
4 | // you may not use this file except in compliance with the License. |
5 | // You may obtain a copy of the License at |
6 | // |
7 | // http://www.apache.org/licenses/LICENSE-2.0 |
8 | // |
9 | // Unless required by applicable law or agreed to in writing, software |
10 | // distributed under the License is distributed on an "AS IS" BASIS, |
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | // See the License for the specific language governing permissions and |
13 | // limitations under the License. |
14 | |
15 | #ifndef HIGHWAYHASH_STATE_H_ |
16 | #define HIGHWAYHASH_STATE_H_ |
17 | |
18 | // Helper functions to split inputs into packets and call State::Update on each. |
19 | |
20 | #include <stdint.h> |
21 | #include <cstddef> |
22 | #include <cstring> |
23 | #include <memory> |
24 | |
25 | #include "highwayhash/compiler_specific.h" |
26 | |
27 | namespace highwayhash { |
28 | |
29 | // uint64_t is unsigned long on Linux; we need 'unsigned long long' |
30 | // for interoperability with TensorFlow. |
31 | typedef unsigned long long HH_U64; // NOLINT |
32 | |
33 | // Copies the remaining bytes to a zero-padded buffer, sets the upper byte to |
34 | // size % 256 (always possible because this should only be called if the |
35 | // total size is not a multiple of the packet size) and updates hash state. |
36 | // |
37 | // The padding scheme is essentially from SipHash, but permuted for the |
38 | // convenience of AVX-2 masked loads. This function must use the same layout so |
39 | // that the vector and scalar HighwayTreeHash have the same result. |
40 | // |
41 | // "remaining_size" is the number of accessible/remaining bytes |
42 | // (size % kPacketSize). |
43 | // |
44 | // Primary template; the specialization for AVX-2 is faster. Intended as an |
45 | // implementation detail, do not call directly. |
46 | template <class State> |
47 | HH_INLINE void PaddedUpdate(const HH_U64 size, const char* remaining_bytes, |
48 | const HH_U64 remaining_size, State* state) { |
49 | char final_packet[State::kPacketSize] HH_ALIGNAS(32) = {0}; |
50 | |
51 | // This layout matches the AVX-2 specialization in highway_tree_hash.h. |
52 | uint32_t packet4 = static_cast<uint32_t>(size) << 24; |
53 | |
54 | const size_t remainder_mod4 = remaining_size & 3; |
55 | if (remainder_mod4 != 0) { |
56 | const char* final_bytes = remaining_bytes + remaining_size - remainder_mod4; |
57 | packet4 += static_cast<uint32_t>(final_bytes[0]); |
58 | const int idx1 = remainder_mod4 >> 1; |
59 | const int idx2 = remainder_mod4 - 1; |
60 | packet4 += static_cast<uint32_t>(final_bytes[idx1]) << 8; |
61 | packet4 += static_cast<uint32_t>(final_bytes[idx2]) << 16; |
62 | } |
63 | |
64 | memcpy(final_packet, remaining_bytes, remaining_size - remainder_mod4); |
65 | memcpy(final_packet + State::kPacketSize - 4, &packet4, sizeof(packet4)); |
66 | |
67 | state->Update(final_packet); |
68 | } |
69 | |
70 | // Updates hash state for every whole packet, and once more for the final |
71 | // padded packet. |
72 | template <class State> |
73 | HH_INLINE void UpdateState(const char* bytes, const HH_U64 size, State* state) { |
74 | // Feed entire packets. |
75 | const int kPacketSize = State::kPacketSize; |
76 | static_assert((kPacketSize & (kPacketSize - 1)) == 0, "Size must be 2^i." ); |
77 | const size_t remainder = size & (kPacketSize - 1); |
78 | const size_t truncated_size = size - remainder; |
79 | for (size_t i = 0; i < truncated_size; i += kPacketSize) { |
80 | state->Update(bytes + i); |
81 | } |
82 | |
83 | PaddedUpdate(size, bytes + truncated_size, remainder, state); |
84 | } |
85 | |
86 | // Convenience function for updating with the bytes of a string. |
87 | template <class String, class State> |
88 | HH_INLINE void UpdateState(const String& s, State* state) { |
89 | const char* bytes = reinterpret_cast<const char*>(s.data()); |
90 | const size_t size = s.length() * sizeof(typename String::value_type); |
91 | UpdateState(bytes, size, state); |
92 | } |
93 | |
94 | // Computes a hash of a byte array using the given hash State class. |
95 | // |
96 | // Example: const SipHashState::Key key = { 1, 2 }; char data[4]; |
97 | // ComputeHash<SipHashState>(key, data, sizeof(data)); |
98 | // |
99 | // This function avoids duplicating Update/Finalize in every call site. |
100 | // Callers wanting to combine multiple hashes should repeatedly UpdateState() |
101 | // and only call State::Finalize once. |
102 | template <class State> |
103 | HH_U64 ComputeHash(const typename State::Key& key, const char* bytes, |
104 | const HH_U64 size) { |
105 | State state(key); |
106 | UpdateState(bytes, size, &state); |
107 | return state.Finalize(); |
108 | } |
109 | |
110 | // Computes a hash of a string's bytes using the given hash State class. |
111 | // |
112 | // Example: const SipHashState::Key key = { 1, 2 }; |
113 | // StringHasher<SipHashState>()(key, std::u16string(u"abc")); |
114 | // |
115 | // A struct with nested function template enables deduction of the String type. |
116 | template <class State> |
117 | struct StringHasher { |
118 | template <class String> |
119 | HH_U64 operator()(const typename State::Key& key, const String& s) { |
120 | State state(key); |
121 | UpdateState(s, &state); |
122 | return state.Finalize(); |
123 | } |
124 | }; |
125 | |
126 | } // namespace highwayhash |
127 | |
128 | #endif // HIGHWAYHASH_STATE_H_ |
129 | |