1/*
2 * Copyright (c) Facebook, Inc. and its affiliates.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/**
18 * Improved thread local storage for non-trivial types (similar speed as
19 * pthread_getspecific but only consumes a single pthread_key_t, and 4x faster
20 * than boost::thread_specific_ptr).
21 *
22 * Also includes an accessor interface to walk all the thread local child
23 * objects of a parent. accessAllThreads() initializes an accessor which holds
24 * a global lock *that blocks all creation and destruction of ThreadLocal
25 * objects with the same Tag* and can be used as an iterable container.
26 * accessAllThreads() can race with destruction of thread-local elements. We
27 * provide a strict mode which is dangerous because it requires the access lock
28 * to be held while destroying thread-local elements which could cause
29 * deadlocks. We gate this mode behind the AccessModeStrict template parameter.
30 *
31 * Intended use is for frequent write, infrequent read data access patterns such
32 * as counters.
33 *
34 * There are two classes here - ThreadLocal and ThreadLocalPtr. ThreadLocalPtr
35 * has semantics similar to boost::thread_specific_ptr. ThreadLocal is a thin
36 * wrapper around ThreadLocalPtr that manages allocation automatically.
37 *
38 * @author Spencer Ahrens (sahrens)
39 */
40
41#pragma once
42
43#include <iterator>
44#include <thread>
45#include <type_traits>
46#include <utility>
47
48#include <folly/Likely.h>
49#include <folly/Portability.h>
50#include <folly/ScopeGuard.h>
51#include <folly/SharedMutex.h>
52#include <folly/detail/ThreadLocalDetail.h>
53
54namespace folly {
55
56template <class T, class Tag, class AccessMode>
57class ThreadLocalPtr;
58
59template <class T, class Tag = void, class AccessMode = void>
60class ThreadLocal {
61 public:
62 constexpr ThreadLocal() : constructor_([]() { return new T(); }) {}
63
64 template <typename F, std::enable_if_t<is_invocable_r<T*, F>::value, int> = 0>
65 explicit ThreadLocal(F&& constructor)
66 : constructor_(std::forward<F>(constructor)) {}
67
68 FOLLY_ERASE T* get() const {
69 auto const ptr = tlp_.get();
70 return FOLLY_LIKELY(!!ptr) ? ptr : makeTlp();
71 }
72
73 T* operator->() const {
74 return get();
75 }
76
77 T& operator*() const {
78 return *get();
79 }
80
81 void reset(T* newPtr = nullptr) {
82 tlp_.reset(newPtr);
83 }
84
85 typedef typename ThreadLocalPtr<T, Tag, AccessMode>::Accessor Accessor;
86 Accessor accessAllThreads() const {
87 return tlp_.accessAllThreads();
88 }
89
90 // movable
91 ThreadLocal(ThreadLocal&&) = default;
92 ThreadLocal& operator=(ThreadLocal&&) = default;
93
94 private:
95 // non-copyable
96 ThreadLocal(const ThreadLocal&) = delete;
97 ThreadLocal& operator=(const ThreadLocal&) = delete;
98
99 FOLLY_NOINLINE T* makeTlp() const {
100 auto const ptr = constructor_();
101 tlp_.reset(ptr);
102 return ptr;
103 }
104
105 mutable ThreadLocalPtr<T, Tag, AccessMode> tlp_;
106 std::function<T*()> constructor_;
107};
108
109/*
110 * The idea here is that __thread is faster than pthread_getspecific, so we
111 * keep a __thread array of pointers to objects (ThreadEntry::elements) where
112 * each array has an index for each unique instance of the ThreadLocalPtr
113 * object. Each ThreadLocalPtr object has a unique id that is an index into
114 * these arrays so we can fetch the correct object from thread local storage
115 * very efficiently.
116 *
117 * In order to prevent unbounded growth of the id space and thus huge
118 * ThreadEntry::elements, arrays, for example due to continuous creation and
119 * destruction of ThreadLocalPtr objects, we keep a set of all active
120 * instances. When an instance is destroyed we remove it from the active
121 * set and insert the id into freeIds_ for reuse. These operations require a
122 * global mutex, but only happen at construction and destruction time.
123 *
124 * We use a single global pthread_key_t per Tag to manage object destruction and
125 * memory cleanup upon thread exit because there is a finite number of
126 * pthread_key_t's available per machine.
127 *
128 * NOTE: Apple platforms don't support the same semantics for __thread that
129 * Linux does (and it's only supported at all on i386). For these, use
130 * pthread_setspecific()/pthread_getspecific() for the per-thread
131 * storage. Windows (MSVC and GCC) does support the same semantics
132 * with __declspec(thread)
133 */
134
135template <class T, class Tag = void, class AccessMode = void>
136class ThreadLocalPtr {
137 private:
138 typedef threadlocal_detail::StaticMeta<Tag, AccessMode> StaticMeta;
139
140 using AccessAllThreadsEnabled = Negation<std::is_same<Tag, void>>;
141
142 public:
143 constexpr ThreadLocalPtr() : id_() {}
144
145 ThreadLocalPtr(ThreadLocalPtr&& other) noexcept : id_(std::move(other.id_)) {}
146
147 ThreadLocalPtr& operator=(ThreadLocalPtr&& other) {
148 assert(this != &other);
149 destroy();
150 id_ = std::move(other.id_);
151 return *this;
152 }
153
154 ~ThreadLocalPtr() {
155 destroy();
156 }
157
158 T* get() const {
159 threadlocal_detail::ElementWrapper& w = StaticMeta::get(&id_);
160 return static_cast<T*>(w.ptr);
161 }
162
163 T* operator->() const {
164 return get();
165 }
166
167 T& operator*() const {
168 return *get();
169 }
170
171 T* release() {
172 auto rlock = getAccessAllThreadsLockReadHolderIfEnabled();
173
174 threadlocal_detail::ElementWrapper& w = StaticMeta::get(&id_);
175
176 return static_cast<T*>(w.release());
177 }
178
179 void reset(T* newPtr = nullptr) {
180 auto rlock = getAccessAllThreadsLockReadHolderIfEnabled();
181
182 auto guard = makeGuard([&] { delete newPtr; });
183 threadlocal_detail::ElementWrapper* w = &StaticMeta::get(&id_);
184
185 w->dispose(TLPDestructionMode::THIS_THREAD);
186 // need to get a new ptr since the
187 // ThreadEntry::elements array can be reallocated
188 w = &StaticMeta::get(&id_);
189 w->cleanup();
190 guard.dismiss();
191 w->set(newPtr);
192 }
193
194 explicit operator bool() const {
195 return get() != nullptr;
196 }
197
198 /**
199 * reset() that transfers ownership from a smart pointer
200 */
201 template <
202 typename SourceT,
203 typename Deleter,
204 typename = typename std::enable_if<
205 std::is_convertible<SourceT*, T*>::value>::type>
206 void reset(std::unique_ptr<SourceT, Deleter> source) {
207 auto deleter = [delegate = source.get_deleter()](
208 T* ptr, TLPDestructionMode) { delegate(ptr); };
209 reset(source.release(), deleter);
210 }
211
212 /**
213 * reset() that transfers ownership from a smart pointer with the default
214 * deleter
215 */
216 template <
217 typename SourceT,
218 typename = typename std::enable_if<
219 std::is_convertible<SourceT*, T*>::value>::type>
220 void reset(std::unique_ptr<SourceT> source) {
221 reset(source.release());
222 }
223
224 /**
225 * reset() with a custom deleter:
226 * deleter(T* ptr, TLPDestructionMode mode)
227 * "mode" is ALL_THREADS if we're destructing this ThreadLocalPtr (and thus
228 * deleting pointers for all threads), and THIS_THREAD if we're only deleting
229 * the member for one thread (because of thread exit or reset()).
230 * Invoking the deleter must not throw.
231 */
232 template <class Deleter>
233 void reset(T* newPtr, const Deleter& deleter) {
234 auto rlock = getAccessAllThreadsLockReadHolderIfEnabled();
235
236 auto guard = makeGuard([&] {
237 if (newPtr) {
238 deleter(newPtr, TLPDestructionMode::THIS_THREAD);
239 }
240 });
241 threadlocal_detail::ElementWrapper* w = &StaticMeta::get(&id_);
242 w->dispose(TLPDestructionMode::THIS_THREAD);
243 // need to get a new ptr since the
244 // ThreadEntry::elements array can be reallocated
245 w = &StaticMeta::get(&id_);
246 w->cleanup();
247 guard.dismiss();
248 w->set(newPtr, deleter);
249 }
250
251 // Holds a global lock for iteration through all thread local child objects.
252 // Can be used as an iterable container.
253 // Use accessAllThreads() to obtain one.
254 class Accessor {
255 friend class ThreadLocalPtr<T, Tag, AccessMode>;
256
257 threadlocal_detail::StaticMetaBase& meta_;
258 SharedMutex* accessAllThreadsLock_;
259 std::mutex* lock_;
260 uint32_t id_;
261
262 public:
263 class Iterator;
264 friend class Iterator;
265
266 // The iterators obtained from Accessor are bidirectional iterators.
267 class Iterator {
268 friend class Accessor;
269 const Accessor* accessor_;
270 threadlocal_detail::ThreadEntryNode* e_;
271
272 void increment() {
273 e_ = e_->getNext();
274 incrementToValid();
275 }
276
277 void decrement() {
278 e_ = e_->getPrev();
279 decrementToValid();
280 }
281
282 const T& dereference() const {
283 return *static_cast<T*>(
284 e_->getThreadEntry()->elements[accessor_->id_].ptr);
285 }
286
287 T& dereference() {
288 return *static_cast<T*>(
289 e_->getThreadEntry()->elements[accessor_->id_].ptr);
290 }
291
292 bool equal(const Iterator& other) const {
293 return (accessor_->id_ == other.accessor_->id_ && e_ == other.e_);
294 }
295
296 explicit Iterator(const Accessor* accessor)
297 : accessor_(accessor),
298 e_(&accessor_->meta_.head_.elements[accessor_->id_].node) {}
299
300 // we just need to check the ptr since it can be set to nullptr
301 // even if the entry is part of the list
302 bool valid() const {
303 return (e_->getThreadEntry()->elements[accessor_->id_].ptr);
304 }
305
306 void incrementToValid() {
307 for (; e_ != &accessor_->meta_.head_.elements[accessor_->id_].node &&
308 !valid();
309 e_ = e_->getNext()) {
310 }
311 }
312
313 void decrementToValid() {
314 for (; e_ != &accessor_->meta_.head_.elements[accessor_->id_].node &&
315 !valid();
316 e_ = e_->getPrev()) {
317 }
318 }
319
320 public:
321 using difference_type = ssize_t;
322 using value_type = T;
323 using reference = T const&;
324 using pointer = T const*;
325 using iterator_category = std::bidirectional_iterator_tag;
326
327 Iterator& operator++() {
328 increment();
329 return *this;
330 }
331
332 Iterator& operator++(int) {
333 Iterator copy(*this);
334 increment();
335 return copy;
336 }
337
338 Iterator& operator--() {
339 decrement();
340 return *this;
341 }
342
343 Iterator& operator--(int) {
344 Iterator copy(*this);
345 decrement();
346 return copy;
347 }
348
349 T& operator*() {
350 return dereference();
351 }
352
353 T const& operator*() const {
354 return dereference();
355 }
356
357 T* operator->() {
358 return &dereference();
359 }
360
361 T const* operator->() const {
362 return &dereference();
363 }
364
365 bool operator==(Iterator const& rhs) const {
366 return equal(rhs);
367 }
368
369 bool operator!=(Iterator const& rhs) const {
370 return !equal(rhs);
371 }
372
373 std::thread::id getThreadId() const {
374 return e_->getThreadEntry()->tid();
375 }
376
377 uint64_t getOSThreadId() const {
378 return e_->getThreadEntry()->tid_os;
379 }
380 };
381
382 ~Accessor() {
383 release();
384 }
385
386 Iterator begin() const {
387 return ++Iterator(this);
388 }
389
390 Iterator end() const {
391 return Iterator(this);
392 }
393
394 Accessor(const Accessor&) = delete;
395 Accessor& operator=(const Accessor&) = delete;
396
397 Accessor(Accessor&& other) noexcept
398 : meta_(other.meta_),
399 accessAllThreadsLock_(other.accessAllThreadsLock_),
400 lock_(other.lock_),
401 id_(other.id_) {
402 other.id_ = 0;
403 other.accessAllThreadsLock_ = nullptr;
404 other.lock_ = nullptr;
405 }
406
407 Accessor& operator=(Accessor&& other) noexcept {
408 // Each Tag has its own unique meta, and accessors with different Tags
409 // have different types. So either *this is empty, or this and other
410 // have the same tag. But if they have the same tag, they have the same
411 // meta (and lock), so they'd both hold the lock at the same time,
412 // which is impossible, which leaves only one possible scenario --
413 // *this is empty. Assert it.
414 assert(&meta_ == &other.meta_);
415 assert(lock_ == nullptr);
416 using std::swap;
417 swap(accessAllThreadsLock_, other.accessAllThreadsLock_);
418 swap(lock_, other.lock_);
419 swap(id_, other.id_);
420 }
421
422 Accessor()
423 : meta_(threadlocal_detail::StaticMeta<Tag, AccessMode>::instance()),
424 accessAllThreadsLock_(nullptr),
425 lock_(nullptr),
426 id_(0) {}
427
428 private:
429 explicit Accessor(uint32_t id)
430 : meta_(threadlocal_detail::StaticMeta<Tag, AccessMode>::instance()),
431 accessAllThreadsLock_(&meta_.accessAllThreadsLock_),
432 lock_(&meta_.lock_) {
433 accessAllThreadsLock_->lock();
434 lock_->lock();
435 id_ = id;
436 }
437
438 void release() {
439 if (lock_) {
440 lock_->unlock();
441 DCHECK(accessAllThreadsLock_ != nullptr);
442 accessAllThreadsLock_->unlock();
443 id_ = 0;
444 lock_ = nullptr;
445 accessAllThreadsLock_ = nullptr;
446 }
447 }
448 };
449
450 // accessor allows a client to iterate through all thread local child
451 // elements of this ThreadLocal instance. Holds a global lock for each <Tag>
452 Accessor accessAllThreads() const {
453 static_assert(
454 AccessAllThreadsEnabled::value,
455 "Must use a unique Tag to use the accessAllThreads feature");
456 return Accessor(id_.getOrAllocate(StaticMeta::instance()));
457 }
458
459 private:
460 void destroy() {
461 StaticMeta::instance().destroy(&id_);
462 }
463
464 // non-copyable
465 ThreadLocalPtr(const ThreadLocalPtr&) = delete;
466 ThreadLocalPtr& operator=(const ThreadLocalPtr&) = delete;
467
468 static auto getAccessAllThreadsLockReadHolderIfEnabled() {
469 return SharedMutex::ReadHolder(
470 AccessAllThreadsEnabled::value
471 ? &StaticMeta::instance().accessAllThreadsLock_
472 : nullptr);
473 }
474
475 mutable typename StaticMeta::EntryID id_;
476};
477
478namespace threadlocal_detail {
479template <typename>
480struct static_meta_of;
481
482template <typename T, typename Tag, typename AccessMode>
483struct static_meta_of<ThreadLocalPtr<T, Tag, AccessMode>> {
484 using type = StaticMeta<Tag, AccessMode>;
485};
486
487template <typename T, typename Tag, typename AccessMode>
488struct static_meta_of<ThreadLocal<T, Tag, AccessMode>> {
489 using type = StaticMeta<Tag, AccessMode>;
490};
491
492} // namespace threadlocal_detail
493} // namespace folly
494