1 | /* Copyright 2016 Google Inc. |
2 | |
3 | Licensed under the Apache License, Version 2.0 (the "License"); |
4 | you may not use this file except in compliance with the License. |
5 | You may obtain a copy of the License at |
6 | |
7 | http://www.apache.org/licenses/LICENSE-2.0 |
8 | |
9 | Unless required by applicable law or agreed to in writing, software |
10 | distributed under the License is distributed on an "AS IS" BASIS, |
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | See the License for the specific language governing permissions and |
13 | limitations under the License. */ |
14 | |
15 | /* This package provides a mutex nsync_mu and a Mesa-style condition variable nsync_cv. */ |
16 | |
17 | #include "nsync_cpp.h" |
18 | #include "platform.h" |
19 | #include "compiler.h" |
20 | #include "cputype.h" |
21 | #include "nsync.h" |
22 | #include "atomic.h" |
23 | #include "sem.h" |
24 | #include "dll.h" |
25 | #include "wait_internal.h" |
26 | #include "common.h" |
27 | |
28 | NSYNC_CPP_START_ |
29 | |
30 | /* Implementation notes |
31 | |
32 | The implementations of nsync_mu and nsync_cv both use spinlocks to protect |
33 | their waiter queues. The spinlocks are implemented with atomic operations |
34 | and a delay loop found below. They could use pthread_mutex_t, but I wished |
35 | to have an implementation independent of pthread mutexes and condition |
36 | variables. |
37 | |
38 | nsync_mu and nsync_cv use the same type of doubly-linked list of waiters |
39 | (see waiter.c). This allows waiters to be transferred from the cv queue to |
40 | the mu queue when a thread is logically woken from the cv but would |
41 | immediately go to sleep on the mu. See the wake_waiters() call. |
42 | |
43 | In mu, the "designated waker" is a thread that was waiting on mu, has been |
44 | woken up, but as yet has neither acquired nor gone back to waiting. The |
45 | presence of such a thread is indicated by the MU_DESIG_WAKER bit in the mu |
46 | word. This bit allows the nsync_mu_unlock() code to avoid waking a second |
47 | waiter when there's already one that will wake the next thread when the time |
48 | comes. This speeds things up when the lock is heavily contended, and the |
49 | critical sections are small. |
50 | |
51 | The weasel words "with high probability" in the specification of |
52 | nsync_mu_trylock() and nsync_mu_rtrylock() prevent clients from believing |
53 | that they can determine with certainty whether another thread has given up a |
54 | lock yet. This, together with the requirement that a thread that acquired a |
55 | mutex must release it (rather than it being released by another thread), |
56 | prohibits clients from using mu as a sort of semaphore. The intent is that |
57 | it be used only for traditional mutual exclusion, and that clients that need |
58 | a semaphore should use one. This leaves room for certain future |
59 | optimizations, and make it easier to apply detection of potential races via |
60 | candidate lock-set algorithms, should that ever be desired. |
61 | |
62 | The nsync_mu_wait_with_deadline() and nsync_mu_wait_with_deadline() calls use an |
63 | absolute rather than a relative timeout. This is less error prone, as |
64 | described in the comment on nsync_cv_wait_with_deadline(). Alas, relative |
65 | timeouts are seductive in trivial examples (such as tests). These are the |
66 | first things that people try, so they are likely to be requested. If enough |
67 | people complain we could give them that particular piece of rope. |
68 | |
69 | Excessive evaluations of the same wait condition are avoided by maintaining |
70 | waiter.same_condition as a doubly-linked list of waiters with the same |
71 | non-NULL wait condition that are also adjacent in the waiter list. This does |
72 | well even with large numbers of threads if there is at most one |
73 | wait condition that can be false at any given time (such as in a |
74 | producer/consumer queue, which cannot be both empty and full |
75 | simultaneously). One could imagine a queueing mechanism that would |
76 | guarantee to evaluate each condition at most once per wakeup, but that would |
77 | be substantially more complex, and would still degrade if the number of |
78 | distinct wakeup conditions were high. So clients are advised to resort to |
79 | condition variables if they have many distinct wakeup conditions. */ |
80 | |
81 | /* Used in spinloops to delay resumption of the loop. |
82 | Usage: |
83 | unsigned attempts = 0; |
84 | while (try_something) { |
85 | attempts = nsync_spin_delay_ (attempts); |
86 | } */ |
87 | unsigned nsync_spin_delay_ (unsigned attempts) { |
88 | if (attempts < 7) { |
89 | volatile int i; |
90 | for (i = 0; i != 1 << attempts; i++) { |
91 | } |
92 | attempts++; |
93 | } else { |
94 | nsync_yield_ (); |
95 | } |
96 | return (attempts); |
97 | } |
98 | |
99 | /* Spin until (*w & test) == 0, then atomically perform *w = ((*w | set) & |
100 | ~clear), perform an acquire barrier, and return the previous value of *w. |
101 | */ |
102 | uint32_t nsync_spin_test_and_set_ (nsync_atomic_uint32_ *w, uint32_t test, |
103 | uint32_t set, uint32_t clear) { |
104 | unsigned attempts = 0; /* CV_SPINLOCK retry count */ |
105 | uint32_t old = ATM_LOAD (w); |
106 | while ((old & test) != 0 || !ATM_CAS_ACQ (w, old, (old | set) & ~clear)) { |
107 | attempts = nsync_spin_delay_ (attempts); |
108 | old = ATM_LOAD (w); |
109 | } |
110 | return (old); |
111 | } |
112 | |
113 | /* ====================================================================================== */ |
114 | |
115 | struct nsync_waiter_s *nsync_dll_nsync_waiter_ (nsync_dll_element_ *e) { |
116 | struct nsync_waiter_s *nw = (struct nsync_waiter_s *) e->container; |
117 | ASSERT (nw->tag == NSYNC_WAITER_TAG); |
118 | ASSERT (e == &nw->q); |
119 | return (nw); |
120 | } |
121 | waiter *nsync_dll_waiter_ (nsync_dll_element_ *e) { |
122 | struct nsync_waiter_s *nw = DLL_NSYNC_WAITER (e); |
123 | waiter *w = CONTAINER (waiter, nw, nw); |
124 | ASSERT ((nw->flags & NSYNC_WAITER_FLAG_MUCV) != 0); |
125 | ASSERT (w->tag == WAITER_TAG); |
126 | ASSERT (e == &w->nw.q); |
127 | return (w); |
128 | } |
129 | |
130 | waiter *nsync_dll_waiter_samecond_ (nsync_dll_element_ *e) { |
131 | waiter *w = (waiter *) e->container; |
132 | ASSERT (w->tag == WAITER_TAG); |
133 | ASSERT (e == &w->same_condition); |
134 | return (w); |
135 | } |
136 | |
137 | /* -------------------------------- */ |
138 | |
139 | static nsync_dll_list_ free_waiters = NULL; |
140 | |
141 | /* free_waiters points to a doubly-linked list of free waiter structs. */ |
142 | static nsync_atomic_uint32_ free_waiters_mu; /* spinlock; protects free_waiters */ |
143 | |
144 | static THREAD_LOCAL waiter *waiter_for_thread; |
145 | static void waiter_destroy (void *v) { |
146 | waiter *w = (waiter *) v; |
147 | /* Reset waiter_for_thread in case another thread-local variable reuses |
148 | the waiter in its destructor while the waiter is taken by the other |
149 | thread from free_waiters. This can happen as the destruction order |
150 | of thread-local variables can be arbitrary in some platform e.g. |
151 | POSIX. */ |
152 | waiter_for_thread = NULL; |
153 | IGNORE_RACES_START (); |
154 | ASSERT ((w->flags & (WAITER_RESERVED|WAITER_IN_USE)) == WAITER_RESERVED); |
155 | w->flags &= ~WAITER_RESERVED; |
156 | nsync_spin_test_and_set_ (&free_waiters_mu, 1, 1, 0); |
157 | free_waiters = nsync_dll_make_first_in_list_ (free_waiters, &w->nw.q); |
158 | ATM_STORE_REL (&free_waiters_mu, 0); /* release store */ |
159 | IGNORE_RACES_END (); |
160 | } |
161 | |
162 | /* If non-nil, nsync_malloc_ptr_ points to a malloc-like routine that allocated |
163 | memory, used by mutex and condition variable code to allocate waiter |
164 | structs. This would allow nsync's mutexes to be used inside an |
165 | implementation of malloc(), by providing another, simpler allocator here. |
166 | The intent is that the implicit NULL value here can be overridden by a |
167 | client declaration that uses an initializer. */ |
168 | void *(*nsync_malloc_ptr_) (size_t size); |
169 | |
170 | /* Return a pointer to an unused waiter struct. |
171 | Ensures that the enclosed timer is stopped and its channel drained. */ |
172 | waiter *nsync_waiter_new_ (void) { |
173 | nsync_dll_element_ *q; |
174 | waiter *tw; |
175 | waiter *w; |
176 | if (HAVE_THREAD_LOCAL) { |
177 | tw = waiter_for_thread; |
178 | } else { |
179 | tw = (waiter *) nsync_per_thread_waiter_ (&waiter_destroy); |
180 | } |
181 | w = tw; |
182 | if (w == NULL || (w->flags & (WAITER_RESERVED|WAITER_IN_USE)) != WAITER_RESERVED) { |
183 | w = NULL; |
184 | nsync_spin_test_and_set_ (&free_waiters_mu, 1, 1, 0); |
185 | q = nsync_dll_first_ (free_waiters); |
186 | if (q != NULL) { /* If free list is non-empty, dequeue an item. */ |
187 | free_waiters = nsync_dll_remove_ (free_waiters, q); |
188 | w = DLL_WAITER (q); |
189 | } |
190 | ATM_STORE_REL (&free_waiters_mu, 0); /* release store */ |
191 | if (w == NULL) { /* If free list was empty, allocate an item. */ |
192 | if (nsync_malloc_ptr_ != NULL) { /* Use client's malloc() */ |
193 | w = (waiter *) (*nsync_malloc_ptr_) (sizeof (*w)); |
194 | } else { /* standard malloc () */ |
195 | w = (waiter *) malloc (sizeof (*w)); |
196 | } |
197 | w->tag = WAITER_TAG; |
198 | w->nw.tag = NSYNC_WAITER_TAG; |
199 | nsync_mu_semaphore_init (&w->sem); |
200 | w->nw.sem = &w->sem; |
201 | nsync_dll_init_ (&w->nw.q, &w->nw); |
202 | NSYNC_ATOMIC_UINT32_STORE_ (&w->nw.waiting, 0); |
203 | w->nw.flags = NSYNC_WAITER_FLAG_MUCV; |
204 | ATM_STORE (&w->remove_count, 0); |
205 | nsync_dll_init_ (&w->same_condition, w); |
206 | w->flags = 0; |
207 | } |
208 | if (tw == NULL) { |
209 | w->flags |= WAITER_RESERVED; |
210 | nsync_set_per_thread_waiter_ (w, &waiter_destroy); |
211 | if (HAVE_THREAD_LOCAL) { |
212 | waiter_for_thread = w; |
213 | } |
214 | } |
215 | } |
216 | w->flags |= WAITER_IN_USE; |
217 | return (w); |
218 | } |
219 | |
220 | /* Return an unused waiter struct *w to the free pool. */ |
221 | void nsync_waiter_free_ (waiter *w) { |
222 | ASSERT ((w->flags & WAITER_IN_USE) != 0); |
223 | w->flags &= ~WAITER_IN_USE; |
224 | if ((w->flags & WAITER_RESERVED) == 0) { |
225 | nsync_spin_test_and_set_ (&free_waiters_mu, 1, 1, 0); |
226 | free_waiters = nsync_dll_make_first_in_list_ (free_waiters, &w->nw.q); |
227 | ATM_STORE_REL (&free_waiters_mu, 0); /* release store */ |
228 | } |
229 | } |
230 | |
231 | /* ====================================================================================== */ |
232 | |
233 | /* writer_type points to a lock_type that describes how to manipulate a mu for a writer. */ |
234 | static lock_type Xwriter_type = { |
235 | MU_WZERO_TO_ACQUIRE, |
236 | MU_WADD_TO_ACQUIRE, |
237 | MU_WHELD_IF_NON_ZERO, |
238 | MU_WSET_WHEN_WAITING, |
239 | MU_WCLEAR_ON_ACQUIRE, |
240 | MU_WCLEAR_ON_UNCONTENDED_RELEASE |
241 | }; |
242 | lock_type *nsync_writer_type_ = &Xwriter_type; |
243 | |
244 | |
245 | /* reader_type points to a lock_type that describes how to manipulate a mu for a reader. */ |
246 | static lock_type Xreader_type = { |
247 | MU_RZERO_TO_ACQUIRE, |
248 | MU_RADD_TO_ACQUIRE, |
249 | MU_RHELD_IF_NON_ZERO, |
250 | MU_RSET_WHEN_WAITING, |
251 | MU_RCLEAR_ON_ACQUIRE, |
252 | MU_RCLEAR_ON_UNCONTENDED_RELEASE |
253 | }; |
254 | lock_type *nsync_reader_type_ = &Xreader_type; |
255 | |
256 | NSYNC_CPP_END_ |
257 | |