1/*
2 * Copyright (c) Facebook, Inc. and its affiliates.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#pragma once
18
19#include <cassert>
20#include <climits>
21#include <cstdint>
22
23#include <folly/Portability.h>
24#include <folly/detail/Futex.h>
25
26namespace folly {
27
28/**
29 * Tiny exclusive lock that packs four lock slots into a single
30 * byte. Each slot is an independent real, sleeping lock. The default
31 * lock and unlock functions operate on slot zero, which modifies only
32 * the low two bits of the host byte.
33 *
34 * You should zero-initialize the bits of a MicroLock that you intend
35 * to use.
36 *
37 * If you're not space-constrained, prefer std::mutex, which will
38 * likely be faster, since it has more than two bits of information to
39 * work with.
40 *
41 * You are free to put a MicroLock in a union with some other object.
42 * If, for example, you want to use the bottom two bits of a pointer
43 * as a lock, you can put a MicroLock in a union with the pointer and
44 * limit yourself to MicroLock slot zero, which will use the two
45 * least-significant bits in the bottom byte.
46 *
47 * (Note that such a union is safe only because MicroLock is based on
48 * a character type, and even under a strict interpretation of C++'s
49 * aliasing rules, character types may alias anything.)
50 *
51 * MicroLock uses a dirty trick: it actually operates on the full
52 * 32-bit, four-byte-aligned bit of memory into which it is embedded.
53 * It never modifies bits outside the ones it's defined to modify, but
54 * it _accesses_ all the bits in the 32-bit memory location for
55 * purposes of futex management.
56 *
57 * The MaxSpins template parameter controls the number of times we
58 * spin trying to acquire the lock. MaxYields controls the number of
59 * times we call sched_yield; once we've tried to acquire the lock
60 * MaxSpins + MaxYields times, we sleep on the lock futex.
61 * By adjusting these parameters, you can make MicroLock behave as
62 * much or as little like a conventional spinlock as you'd like.
63 *
64 * Performance
65 * -----------
66 *
67 * With the default template options, the timings for uncontended
68 * acquire-then-release come out as follows on Intel(R) Xeon(R) CPU
69 * E5-2660 0 @ 2.20GHz, in @mode/opt, as of the master tree at Tue, 01
70 * Mar 2016 19:48:15.
71 *
72 * ========================================================================
73 * folly/test/SmallLocksBenchmark.cpp relative time/iter iters/s
74 * ========================================================================
75 * MicroSpinLockUncontendedBenchmark 13.46ns 74.28M
76 * PicoSpinLockUncontendedBenchmark 14.99ns 66.71M
77 * MicroLockUncontendedBenchmark 27.06ns 36.96M
78 * StdMutexUncontendedBenchmark 25.18ns 39.72M
79 * VirtualFunctionCall 1.72ns 579.78M
80 * ========================================================================
81 *
82 * (The virtual dispatch benchmark is provided for scale.)
83 *
84 * While the uncontended case for MicroLock is competitive with the
85 * glibc 2.2.0 implementation of std::mutex, std::mutex is likely to be
86 * faster in the contended case, because we need to wake up all waiters
87 * when we release.
88 *
89 * Make sure to benchmark your particular workload.
90 *
91 */
92
93class MicroLockCore {
94 protected:
95 uint8_t lock_;
96 inline detail::Futex<>* word() const; // Well, halfword on 64-bit systems
97 inline uint32_t baseShift(unsigned slot) const;
98 inline uint32_t heldBit(unsigned slot) const;
99 inline uint32_t waitBit(unsigned slot) const;
100 static void lockSlowPath(
101 uint32_t oldWord,
102 detail::Futex<>* wordPtr,
103 uint32_t slotHeldBit,
104 unsigned maxSpins,
105 unsigned maxYields);
106
107 public:
108 FOLLY_DISABLE_ADDRESS_SANITIZER inline void unlock(unsigned slot);
109 inline void unlock() {
110 unlock(0);
111 }
112 // Initializes all the slots.
113 inline void init() {
114 lock_ = 0;
115 }
116};
117
118inline detail::Futex<>* MicroLockCore::word() const {
119 uintptr_t lockptr = (uintptr_t)&lock_;
120 lockptr &= ~(sizeof(uint32_t) - 1);
121 return (detail::Futex<>*)lockptr;
122}
123
124inline unsigned MicroLockCore::baseShift(unsigned slot) const {
125 assert(slot < CHAR_BIT / 2);
126
127 unsigned offset_bytes = (unsigned)((uintptr_t)&lock_ - (uintptr_t)word());
128
129 return (
130 unsigned)(kIsLittleEndian ? offset_bytes * CHAR_BIT + slot * 2 : CHAR_BIT * (sizeof(uint32_t) - offset_bytes - 1) + slot * 2);
131}
132
133inline uint32_t MicroLockCore::heldBit(unsigned slot) const {
134 return 1U << (baseShift(slot) + 0);
135}
136
137inline uint32_t MicroLockCore::waitBit(unsigned slot) const {
138 return 1U << (baseShift(slot) + 1);
139}
140
141void MicroLockCore::unlock(unsigned slot) {
142 detail::Futex<>* wordPtr = word();
143 uint32_t oldWord;
144 uint32_t newWord;
145
146 oldWord = wordPtr->load(std::memory_order_relaxed);
147 do {
148 assert(oldWord & heldBit(slot));
149 newWord = oldWord & ~(heldBit(slot) | waitBit(slot));
150 } while (!wordPtr->compare_exchange_weak(
151 oldWord, newWord, std::memory_order_release, std::memory_order_relaxed));
152
153 if (oldWord & waitBit(slot)) {
154 detail::futexWake(wordPtr, 1, heldBit(slot));
155 }
156}
157
158template <unsigned MaxSpins = 1000, unsigned MaxYields = 0>
159class MicroLockBase : public MicroLockCore {
160 public:
161 FOLLY_DISABLE_ADDRESS_SANITIZER inline void lock(unsigned slot);
162 inline void lock() {
163 lock(0);
164 }
165 FOLLY_DISABLE_ADDRESS_SANITIZER inline bool try_lock(unsigned slot);
166 inline bool try_lock() {
167 return try_lock(0);
168 }
169};
170
171template <unsigned MaxSpins, unsigned MaxYields>
172bool MicroLockBase<MaxSpins, MaxYields>::try_lock(unsigned slot) {
173 // N.B. You might think that try_lock is just the fast path of lock,
174 // but you'd be wrong. Keep in mind that other parts of our host
175 // word might be changing while we take the lock! We're not allowed
176 // to fail spuriously if the lock is in fact not held, even if other
177 // people are concurrently modifying other parts of the word.
178 //
179 // We need to loop until we either see firm evidence that somebody
180 // else has the lock (by looking at heldBit) or see our CAS succeed.
181 // A failed CAS by itself does not indicate lock-acquire failure.
182
183 detail::Futex<>* wordPtr = word();
184 uint32_t oldWord = wordPtr->load(std::memory_order_relaxed);
185 do {
186 if (oldWord & heldBit(slot)) {
187 return false;
188 }
189 } while (!wordPtr->compare_exchange_weak(
190 oldWord,
191 oldWord | heldBit(slot),
192 std::memory_order_acquire,
193 std::memory_order_relaxed));
194
195 return true;
196}
197
198template <unsigned MaxSpins, unsigned MaxYields>
199void MicroLockBase<MaxSpins, MaxYields>::lock(unsigned slot) {
200 static_assert(MaxSpins + MaxYields < (unsigned)-1, "overflow");
201
202 detail::Futex<>* wordPtr = word();
203 uint32_t oldWord;
204 oldWord = wordPtr->load(std::memory_order_relaxed);
205 if ((oldWord & heldBit(slot)) == 0 &&
206 wordPtr->compare_exchange_weak(
207 oldWord,
208 oldWord | heldBit(slot),
209 std::memory_order_acquire,
210 std::memory_order_relaxed)) {
211 // Fast uncontended case: memory_order_acquire above is our barrier
212 } else {
213 // lockSlowPath doesn't have any slot-dependent computation; it
214 // just shifts the input bit. Make sure its shifting produces the
215 // same result a call to waitBit for our slot would.
216 assert(heldBit(slot) << 1 == waitBit(slot));
217 // lockSlowPath emits its own memory barrier
218 lockSlowPath(oldWord, wordPtr, heldBit(slot), MaxSpins, MaxYields);
219 }
220}
221
222typedef MicroLockBase<> MicroLock;
223} // namespace folly
224