1 | /* |
2 | * Copyright (c) Facebook, Inc. and its affiliates. |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | */ |
16 | |
17 | /** |
18 | * Improved thread local storage for non-trivial types (similar speed as |
19 | * pthread_getspecific but only consumes a single pthread_key_t, and 4x faster |
20 | * than boost::thread_specific_ptr). |
21 | * |
22 | * Also includes an accessor interface to walk all the thread local child |
23 | * objects of a parent. accessAllThreads() initializes an accessor which holds |
24 | * a global lock *that blocks all creation and destruction of ThreadLocal |
25 | * objects with the same Tag* and can be used as an iterable container. |
26 | * accessAllThreads() can race with destruction of thread-local elements. We |
27 | * provide a strict mode which is dangerous because it requires the access lock |
28 | * to be held while destroying thread-local elements which could cause |
29 | * deadlocks. We gate this mode behind the AccessModeStrict template parameter. |
30 | * |
31 | * Intended use is for frequent write, infrequent read data access patterns such |
32 | * as counters. |
33 | * |
34 | * There are two classes here - ThreadLocal and ThreadLocalPtr. ThreadLocalPtr |
35 | * has semantics similar to boost::thread_specific_ptr. ThreadLocal is a thin |
36 | * wrapper around ThreadLocalPtr that manages allocation automatically. |
37 | * |
38 | * @author Spencer Ahrens (sahrens) |
39 | */ |
40 | |
41 | #pragma once |
42 | |
43 | #include <iterator> |
44 | #include <thread> |
45 | #include <type_traits> |
46 | #include <utility> |
47 | |
48 | #include <folly/Likely.h> |
49 | #include <folly/Portability.h> |
50 | #include <folly/ScopeGuard.h> |
51 | #include <folly/SharedMutex.h> |
52 | #include <folly/detail/ThreadLocalDetail.h> |
53 | |
54 | namespace folly { |
55 | |
56 | template <class T, class Tag, class AccessMode> |
57 | class ThreadLocalPtr; |
58 | |
59 | template <class T, class Tag = void, class AccessMode = void> |
60 | class ThreadLocal { |
61 | public: |
62 | constexpr ThreadLocal() : constructor_([]() { return new T(); }) {} |
63 | |
64 | template <typename F, std::enable_if_t<is_invocable_r<T*, F>::value, int> = 0> |
65 | explicit ThreadLocal(F&& constructor) |
66 | : constructor_(std::forward<F>(constructor)) {} |
67 | |
68 | FOLLY_ERASE T* get() const { |
69 | auto const ptr = tlp_.get(); |
70 | return FOLLY_LIKELY(!!ptr) ? ptr : makeTlp(); |
71 | } |
72 | |
73 | T* operator->() const { |
74 | return get(); |
75 | } |
76 | |
77 | T& operator*() const { |
78 | return *get(); |
79 | } |
80 | |
81 | void reset(T* newPtr = nullptr) { |
82 | tlp_.reset(newPtr); |
83 | } |
84 | |
85 | typedef typename ThreadLocalPtr<T, Tag, AccessMode>::Accessor Accessor; |
86 | Accessor accessAllThreads() const { |
87 | return tlp_.accessAllThreads(); |
88 | } |
89 | |
90 | // movable |
91 | ThreadLocal(ThreadLocal&&) = default; |
92 | ThreadLocal& operator=(ThreadLocal&&) = default; |
93 | |
94 | private: |
95 | // non-copyable |
96 | ThreadLocal(const ThreadLocal&) = delete; |
97 | ThreadLocal& operator=(const ThreadLocal&) = delete; |
98 | |
99 | FOLLY_NOINLINE T* makeTlp() const { |
100 | auto const ptr = constructor_(); |
101 | tlp_.reset(ptr); |
102 | return ptr; |
103 | } |
104 | |
105 | mutable ThreadLocalPtr<T, Tag, AccessMode> tlp_; |
106 | std::function<T*()> constructor_; |
107 | }; |
108 | |
109 | /* |
110 | * The idea here is that __thread is faster than pthread_getspecific, so we |
111 | * keep a __thread array of pointers to objects (ThreadEntry::elements) where |
112 | * each array has an index for each unique instance of the ThreadLocalPtr |
113 | * object. Each ThreadLocalPtr object has a unique id that is an index into |
114 | * these arrays so we can fetch the correct object from thread local storage |
115 | * very efficiently. |
116 | * |
117 | * In order to prevent unbounded growth of the id space and thus huge |
118 | * ThreadEntry::elements, arrays, for example due to continuous creation and |
119 | * destruction of ThreadLocalPtr objects, we keep a set of all active |
120 | * instances. When an instance is destroyed we remove it from the active |
121 | * set and insert the id into freeIds_ for reuse. These operations require a |
122 | * global mutex, but only happen at construction and destruction time. |
123 | * |
124 | * We use a single global pthread_key_t per Tag to manage object destruction and |
125 | * memory cleanup upon thread exit because there is a finite number of |
126 | * pthread_key_t's available per machine. |
127 | * |
128 | * NOTE: Apple platforms don't support the same semantics for __thread that |
129 | * Linux does (and it's only supported at all on i386). For these, use |
130 | * pthread_setspecific()/pthread_getspecific() for the per-thread |
131 | * storage. Windows (MSVC and GCC) does support the same semantics |
132 | * with __declspec(thread) |
133 | */ |
134 | |
135 | template <class T, class Tag = void, class AccessMode = void> |
136 | class ThreadLocalPtr { |
137 | private: |
138 | typedef threadlocal_detail::StaticMeta<Tag, AccessMode> StaticMeta; |
139 | |
140 | using AccessAllThreadsEnabled = Negation<std::is_same<Tag, void>>; |
141 | |
142 | public: |
143 | constexpr ThreadLocalPtr() : id_() {} |
144 | |
145 | ThreadLocalPtr(ThreadLocalPtr&& other) noexcept : id_(std::move(other.id_)) {} |
146 | |
147 | ThreadLocalPtr& operator=(ThreadLocalPtr&& other) { |
148 | assert(this != &other); |
149 | destroy(); |
150 | id_ = std::move(other.id_); |
151 | return *this; |
152 | } |
153 | |
154 | ~ThreadLocalPtr() { |
155 | destroy(); |
156 | } |
157 | |
158 | T* get() const { |
159 | threadlocal_detail::ElementWrapper& w = StaticMeta::get(&id_); |
160 | return static_cast<T*>(w.ptr); |
161 | } |
162 | |
163 | T* operator->() const { |
164 | return get(); |
165 | } |
166 | |
167 | T& operator*() const { |
168 | return *get(); |
169 | } |
170 | |
171 | T* release() { |
172 | auto rlock = getAccessAllThreadsLockReadHolderIfEnabled(); |
173 | |
174 | threadlocal_detail::ElementWrapper& w = StaticMeta::get(&id_); |
175 | |
176 | return static_cast<T*>(w.release()); |
177 | } |
178 | |
179 | void reset(T* newPtr = nullptr) { |
180 | auto rlock = getAccessAllThreadsLockReadHolderIfEnabled(); |
181 | |
182 | auto guard = makeGuard([&] { delete newPtr; }); |
183 | threadlocal_detail::ElementWrapper* w = &StaticMeta::get(&id_); |
184 | |
185 | w->dispose(TLPDestructionMode::THIS_THREAD); |
186 | // need to get a new ptr since the |
187 | // ThreadEntry::elements array can be reallocated |
188 | w = &StaticMeta::get(&id_); |
189 | w->cleanup(); |
190 | guard.dismiss(); |
191 | w->set(newPtr); |
192 | } |
193 | |
194 | explicit operator bool() const { |
195 | return get() != nullptr; |
196 | } |
197 | |
198 | /** |
199 | * reset() that transfers ownership from a smart pointer |
200 | */ |
201 | template < |
202 | typename SourceT, |
203 | typename Deleter, |
204 | typename = typename std::enable_if< |
205 | std::is_convertible<SourceT*, T*>::value>::type> |
206 | void reset(std::unique_ptr<SourceT, Deleter> source) { |
207 | auto deleter = [delegate = source.get_deleter()]( |
208 | T* ptr, TLPDestructionMode) { delegate(ptr); }; |
209 | reset(source.release(), deleter); |
210 | } |
211 | |
212 | /** |
213 | * reset() that transfers ownership from a smart pointer with the default |
214 | * deleter |
215 | */ |
216 | template < |
217 | typename SourceT, |
218 | typename = typename std::enable_if< |
219 | std::is_convertible<SourceT*, T*>::value>::type> |
220 | void reset(std::unique_ptr<SourceT> source) { |
221 | reset(source.release()); |
222 | } |
223 | |
224 | /** |
225 | * reset() with a custom deleter: |
226 | * deleter(T* ptr, TLPDestructionMode mode) |
227 | * "mode" is ALL_THREADS if we're destructing this ThreadLocalPtr (and thus |
228 | * deleting pointers for all threads), and THIS_THREAD if we're only deleting |
229 | * the member for one thread (because of thread exit or reset()). |
230 | * Invoking the deleter must not throw. |
231 | */ |
232 | template <class Deleter> |
233 | void reset(T* newPtr, const Deleter& deleter) { |
234 | auto rlock = getAccessAllThreadsLockReadHolderIfEnabled(); |
235 | |
236 | auto guard = makeGuard([&] { |
237 | if (newPtr) { |
238 | deleter(newPtr, TLPDestructionMode::THIS_THREAD); |
239 | } |
240 | }); |
241 | threadlocal_detail::ElementWrapper* w = &StaticMeta::get(&id_); |
242 | w->dispose(TLPDestructionMode::THIS_THREAD); |
243 | // need to get a new ptr since the |
244 | // ThreadEntry::elements array can be reallocated |
245 | w = &StaticMeta::get(&id_); |
246 | w->cleanup(); |
247 | guard.dismiss(); |
248 | w->set(newPtr, deleter); |
249 | } |
250 | |
251 | // Holds a global lock for iteration through all thread local child objects. |
252 | // Can be used as an iterable container. |
253 | // Use accessAllThreads() to obtain one. |
254 | class Accessor { |
255 | friend class ThreadLocalPtr<T, Tag, AccessMode>; |
256 | |
257 | threadlocal_detail::StaticMetaBase& meta_; |
258 | SharedMutex* accessAllThreadsLock_; |
259 | std::mutex* lock_; |
260 | uint32_t id_; |
261 | |
262 | public: |
263 | class Iterator; |
264 | friend class Iterator; |
265 | |
266 | // The iterators obtained from Accessor are bidirectional iterators. |
267 | class Iterator { |
268 | friend class Accessor; |
269 | const Accessor* accessor_; |
270 | threadlocal_detail::ThreadEntryNode* e_; |
271 | |
272 | void increment() { |
273 | e_ = e_->getNext(); |
274 | incrementToValid(); |
275 | } |
276 | |
277 | void decrement() { |
278 | e_ = e_->getPrev(); |
279 | decrementToValid(); |
280 | } |
281 | |
282 | const T& dereference() const { |
283 | return *static_cast<T*>( |
284 | e_->getThreadEntry()->elements[accessor_->id_].ptr); |
285 | } |
286 | |
287 | T& dereference() { |
288 | return *static_cast<T*>( |
289 | e_->getThreadEntry()->elements[accessor_->id_].ptr); |
290 | } |
291 | |
292 | bool equal(const Iterator& other) const { |
293 | return (accessor_->id_ == other.accessor_->id_ && e_ == other.e_); |
294 | } |
295 | |
296 | explicit Iterator(const Accessor* accessor) |
297 | : accessor_(accessor), |
298 | e_(&accessor_->meta_.head_.elements[accessor_->id_].node) {} |
299 | |
300 | // we just need to check the ptr since it can be set to nullptr |
301 | // even if the entry is part of the list |
302 | bool valid() const { |
303 | return (e_->getThreadEntry()->elements[accessor_->id_].ptr); |
304 | } |
305 | |
306 | void incrementToValid() { |
307 | for (; e_ != &accessor_->meta_.head_.elements[accessor_->id_].node && |
308 | !valid(); |
309 | e_ = e_->getNext()) { |
310 | } |
311 | } |
312 | |
313 | void decrementToValid() { |
314 | for (; e_ != &accessor_->meta_.head_.elements[accessor_->id_].node && |
315 | !valid(); |
316 | e_ = e_->getPrev()) { |
317 | } |
318 | } |
319 | |
320 | public: |
321 | using difference_type = ssize_t; |
322 | using value_type = T; |
323 | using reference = T const&; |
324 | using pointer = T const*; |
325 | using iterator_category = std::bidirectional_iterator_tag; |
326 | |
327 | Iterator& operator++() { |
328 | increment(); |
329 | return *this; |
330 | } |
331 | |
332 | Iterator& operator++(int) { |
333 | Iterator copy(*this); |
334 | increment(); |
335 | return copy; |
336 | } |
337 | |
338 | Iterator& operator--() { |
339 | decrement(); |
340 | return *this; |
341 | } |
342 | |
343 | Iterator& operator--(int) { |
344 | Iterator copy(*this); |
345 | decrement(); |
346 | return copy; |
347 | } |
348 | |
349 | T& operator*() { |
350 | return dereference(); |
351 | } |
352 | |
353 | T const& operator*() const { |
354 | return dereference(); |
355 | } |
356 | |
357 | T* operator->() { |
358 | return &dereference(); |
359 | } |
360 | |
361 | T const* operator->() const { |
362 | return &dereference(); |
363 | } |
364 | |
365 | bool operator==(Iterator const& rhs) const { |
366 | return equal(rhs); |
367 | } |
368 | |
369 | bool operator!=(Iterator const& rhs) const { |
370 | return !equal(rhs); |
371 | } |
372 | |
373 | std::thread::id getThreadId() const { |
374 | return e_->getThreadEntry()->tid(); |
375 | } |
376 | |
377 | uint64_t getOSThreadId() const { |
378 | return e_->getThreadEntry()->tid_os; |
379 | } |
380 | }; |
381 | |
382 | ~Accessor() { |
383 | release(); |
384 | } |
385 | |
386 | Iterator begin() const { |
387 | return ++Iterator(this); |
388 | } |
389 | |
390 | Iterator end() const { |
391 | return Iterator(this); |
392 | } |
393 | |
394 | Accessor(const Accessor&) = delete; |
395 | Accessor& operator=(const Accessor&) = delete; |
396 | |
397 | Accessor(Accessor&& other) noexcept |
398 | : meta_(other.meta_), |
399 | accessAllThreadsLock_(other.accessAllThreadsLock_), |
400 | lock_(other.lock_), |
401 | id_(other.id_) { |
402 | other.id_ = 0; |
403 | other.accessAllThreadsLock_ = nullptr; |
404 | other.lock_ = nullptr; |
405 | } |
406 | |
407 | Accessor& operator=(Accessor&& other) noexcept { |
408 | // Each Tag has its own unique meta, and accessors with different Tags |
409 | // have different types. So either *this is empty, or this and other |
410 | // have the same tag. But if they have the same tag, they have the same |
411 | // meta (and lock), so they'd both hold the lock at the same time, |
412 | // which is impossible, which leaves only one possible scenario -- |
413 | // *this is empty. Assert it. |
414 | assert(&meta_ == &other.meta_); |
415 | assert(lock_ == nullptr); |
416 | using std::swap; |
417 | swap(accessAllThreadsLock_, other.accessAllThreadsLock_); |
418 | swap(lock_, other.lock_); |
419 | swap(id_, other.id_); |
420 | } |
421 | |
422 | Accessor() |
423 | : meta_(threadlocal_detail::StaticMeta<Tag, AccessMode>::instance()), |
424 | accessAllThreadsLock_(nullptr), |
425 | lock_(nullptr), |
426 | id_(0) {} |
427 | |
428 | private: |
429 | explicit Accessor(uint32_t id) |
430 | : meta_(threadlocal_detail::StaticMeta<Tag, AccessMode>::instance()), |
431 | accessAllThreadsLock_(&meta_.accessAllThreadsLock_), |
432 | lock_(&meta_.lock_) { |
433 | accessAllThreadsLock_->lock(); |
434 | lock_->lock(); |
435 | id_ = id; |
436 | } |
437 | |
438 | void release() { |
439 | if (lock_) { |
440 | lock_->unlock(); |
441 | DCHECK(accessAllThreadsLock_ != nullptr); |
442 | accessAllThreadsLock_->unlock(); |
443 | id_ = 0; |
444 | lock_ = nullptr; |
445 | accessAllThreadsLock_ = nullptr; |
446 | } |
447 | } |
448 | }; |
449 | |
450 | // accessor allows a client to iterate through all thread local child |
451 | // elements of this ThreadLocal instance. Holds a global lock for each <Tag> |
452 | Accessor accessAllThreads() const { |
453 | static_assert( |
454 | AccessAllThreadsEnabled::value, |
455 | "Must use a unique Tag to use the accessAllThreads feature" ); |
456 | return Accessor(id_.getOrAllocate(StaticMeta::instance())); |
457 | } |
458 | |
459 | private: |
460 | void destroy() { |
461 | StaticMeta::instance().destroy(&id_); |
462 | } |
463 | |
464 | // non-copyable |
465 | ThreadLocalPtr(const ThreadLocalPtr&) = delete; |
466 | ThreadLocalPtr& operator=(const ThreadLocalPtr&) = delete; |
467 | |
468 | static auto getAccessAllThreadsLockReadHolderIfEnabled() { |
469 | return SharedMutex::ReadHolder( |
470 | AccessAllThreadsEnabled::value |
471 | ? &StaticMeta::instance().accessAllThreadsLock_ |
472 | : nullptr); |
473 | } |
474 | |
475 | mutable typename StaticMeta::EntryID id_; |
476 | }; |
477 | |
478 | namespace threadlocal_detail { |
479 | template <typename> |
480 | struct static_meta_of; |
481 | |
482 | template <typename T, typename Tag, typename AccessMode> |
483 | struct static_meta_of<ThreadLocalPtr<T, Tag, AccessMode>> { |
484 | using type = StaticMeta<Tag, AccessMode>; |
485 | }; |
486 | |
487 | template <typename T, typename Tag, typename AccessMode> |
488 | struct static_meta_of<ThreadLocal<T, Tag, AccessMode>> { |
489 | using type = StaticMeta<Tag, AccessMode>; |
490 | }; |
491 | |
492 | } // namespace threadlocal_detail |
493 | } // namespace folly |
494 | |