common.c source code [tensorflow/external/nsync/internal/common.c]

1	/ Copyright 2016 Google Inc.*
2
3	Licensed under the Apache License, Version 2.0 (the "License");
4	you may not use this file except in compliance with the License.
5	You may obtain a copy of the License at
6
7	http://www.apache.org/licenses/LICENSE-2.0
8
9	Unless required by applicable law or agreed to in writing, software
10	distributed under the License is distributed on an "AS IS" BASIS,
11	WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12	See the License for the specific language governing permissions and
13	limitations under the License. /*
14
15	/ This package provides a mutex nsync_mu and a Mesa-style condition variable nsync_cv. /
16
17	#include "nsync_cpp.h"
18	#include "platform.h"
19	#include "compiler.h"
20	#include "cputype.h"
21	#include "nsync.h"
22	#include "atomic.h"
23	#include "sem.h"
24	#include "dll.h"
25	#include "wait_internal.h"
26	#include "common.h"
27
28	NSYNC_CPP_START_
29
30	/ Implementation notes*
31
32	The implementations of nsync_mu and nsync_cv both use spinlocks to protect
33	their waiter queues. The spinlocks are implemented with atomic operations
34	and a delay loop found below. They could use pthread_mutex_t, but I wished
35	to have an implementation independent of pthread mutexes and condition
36	variables.
37
38	nsync_mu and nsync_cv use the same type of doubly-linked list of waiters
39	(see waiter.c). This allows waiters to be transferred from the cv queue to
40	the mu queue when a thread is logically woken from the cv but would
41	immediately go to sleep on the mu. See the wake_waiters() call.
42
43	In mu, the "designated waker" is a thread that was waiting on mu, has been
44	woken up, but as yet has neither acquired nor gone back to waiting. The
45	presence of such a thread is indicated by the MU_DESIG_WAKER bit in the mu
46	word. This bit allows the nsync_mu_unlock() code to avoid waking a second
47	waiter when there's already one that will wake the next thread when the time
48	comes. This speeds things up when the lock is heavily contended, and the
49	critical sections are small.
50
51	The weasel words "with high probability" in the specification of
52	nsync_mu_trylock() and nsync_mu_rtrylock() prevent clients from believing
53	that they can determine with certainty whether another thread has given up a
54	lock yet. This, together with the requirement that a thread that acquired a
55	mutex must release it (rather than it being released by another thread),
56	prohibits clients from using mu as a sort of semaphore. The intent is that
57	it be used only for traditional mutual exclusion, and that clients that need
58	a semaphore should use one. This leaves room for certain future
59	optimizations, and make it easier to apply detection of potential races via
60	candidate lock-set algorithms, should that ever be desired.
61
62	The nsync_mu_wait_with_deadline() and nsync_mu_wait_with_deadline() calls use an
63	absolute rather than a relative timeout. This is less error prone, as
64	described in the comment on nsync_cv_wait_with_deadline(). Alas, relative
65	timeouts are seductive in trivial examples (such as tests). These are the
66	first things that people try, so they are likely to be requested. If enough
67	people complain we could give them that particular piece of rope.
68
69	Excessive evaluations of the same wait condition are avoided by maintaining
70	waiter.same_condition as a doubly-linked list of waiters with the same
71	non-NULL wait condition that are also adjacent in the waiter list. This does
72	well even with large numbers of threads if there is at most one
73	wait condition that can be false at any given time (such as in a
74	producer/consumer queue, which cannot be both empty and full
75	simultaneously). One could imagine a queueing mechanism that would
76	guarantee to evaluate each condition at most once per wakeup, but that would
77	be substantially more complex, and would still degrade if the number of
78	distinct wakeup conditions were high. So clients are advised to resort to
79	condition variables if they have many distinct wakeup conditions. /*
80
81	/ Used in spinloops to delay resumption of the loop.*
82	Usage:
83	unsigned attempts = 0;
84	while (try_something) {
85	attempts = nsync_spin_delay_ (attempts);
86	} /*
87	unsigned nsync_spin_delay_ (unsigned attempts) {
88	if (attempts < `7`) {
89	volatile int i;
90	for (i = `0`; i != `1` << attempts; i++) {
91	}
92	attempts++;
93	} else {
94	nsync_yield_ ();
95	}
96	return (attempts);
97	}
98
99	/ Spin until (w & test) == 0, then atomically perform w = ((w \| set) &
100	~clear), perform an acquire barrier, and return the previous value of w.*
101	*/
102	uint32_t nsync_spin_test_and_set_ (nsync_atomic_uint32_ *w, uint32_t test,
103	uint32_t set, uint32_t clear) {
104	unsigned attempts = `0`; / CV_SPINLOCK retry count /
105	uint32_t old = ATM_LOAD (w);
106	while ((old & test) != `0` \|\| !ATM_CAS_ACQ (w, old, (old \| set) & ~clear)) {
107	attempts = nsync_spin_delay_ (attempts);
108	old = ATM_LOAD (w);
109	}
110	return (old);
111	}
112
113	/ ====================================================================================== /
114
115	struct nsync_waiter_s nsync_dll_nsync_waiter_ (nsync_dll_element_ e) {
116	struct nsync_waiter_s nw = (struct* nsync_waiter_s *) e->container;
117	ASSERT (nw->tag == NSYNC_WAITER_TAG);
118	ASSERT (e == &nw->q);
119	return (nw);
120	}
121	waiter nsync_dll_waiter_ (nsync_dll_element_ e) {
122	struct nsync_waiter_s *nw = DLL_NSYNC_WAITER (e);
123	waiter *w = CONTAINER (waiter, nw, nw);
124	ASSERT ((nw->flags & NSYNC_WAITER_FLAG_MUCV) != `0`);
125	ASSERT (w->tag == WAITER_TAG);
126	ASSERT (e == &w->nw.q);
127	return (w);
128	}
129
130	waiter nsync_dll_waiter_samecond_ (nsync_dll_element_ e) {
131	waiter w = (waiter ) e->container;
132	ASSERT (w->tag == WAITER_TAG);
133	ASSERT (e == &w->same_condition);
134	return (w);
135	}
136
137	/ -------------------------------- /
138
139	static nsync_dll_list_ free_waiters = NULL;
140
141	/ free_waiters points to a doubly-linked list of free waiter structs. /
142	static nsync_atomic_uint32_ free_waiters_mu; / spinlock; protects free_waiters /
143
144	static THREAD_LOCAL waiter *waiter_for_thread;
145	static void waiter_destroy (void *v) {
146	waiter w = (waiter ) v;
147	/ Reset waiter_for_thread in case another thread-local variable reuses*
148	the waiter in its destructor while the waiter is taken by the other
149	thread from free_waiters. This can happen as the destruction order
150	of thread-local variables can be arbitrary in some platform e.g.
151	POSIX. /*
152	waiter_for_thread = NULL;
153	IGNORE_RACES_START ();
154	ASSERT ((w->flags & (WAITER_RESERVED\|WAITER_IN_USE)) == WAITER_RESERVED);
155	w->flags &= ~WAITER_RESERVED;
156	nsync_spin_test_and_set_ (&free_waiters_mu, `1`, `1`, `0`);
157	free_waiters = nsync_dll_make_first_in_list_ (free_waiters, &w->nw.q);
158	ATM_STORE_REL (&free_waiters_mu, `0`); / release store /
159	IGNORE_RACES_END ();
160	}
161
162	/ If non-nil, nsync_malloc_ptr_ points to a malloc-like routine that allocated*
163	memory, used by mutex and condition variable code to allocate waiter
164	structs. This would allow nsync's mutexes to be used inside an
165	implementation of malloc(), by providing another, simpler allocator here.
166	The intent is that the implicit NULL value here can be overridden by a
167	client declaration that uses an initializer. /*
168	void (nsync_malloc_ptr_) (size_t size);
169
170	/ Return a pointer to an unused waiter struct.*
171	Ensures that the enclosed timer is stopped and its channel drained. /*
172	waiter nsync_waiter_new_ (void*) {
173	nsync_dll_element_ *q;
174	waiter *tw;
175	waiter *w;
176	if (HAVE_THREAD_LOCAL) {
177	tw = waiter_for_thread;
178	} else {
179	tw = (waiter *) nsync_per_thread_waiter_ (&waiter_destroy);
180	}
181	w = tw;
182	if (w == NULL \|\| (w->flags & (WAITER_RESERVED\|WAITER_IN_USE)) != WAITER_RESERVED) {
183	w = NULL;
184	nsync_spin_test_and_set_ (&free_waiters_mu, `1`, `1`, `0`);
185	q = nsync_dll_first_ (free_waiters);
186	if (q != NULL) { / If free list is non-empty, dequeue an item. /
187	free_waiters = nsync_dll_remove_ (free_waiters, q);
188	w = DLL_WAITER (q);
189	}
190	ATM_STORE_REL (&free_waiters_mu, `0`); / release store /
191	if (w == NULL) { / If free list was empty, allocate an item. /
192	if (nsync_malloc_ptr_ != NULL) { / Use client's malloc() /
193	w = (waiter ) (nsync_malloc_ptr_) (sizeof (*w));
194	} else { / standard malloc () /
195	w = (waiter ) malloc (sizeof* (*w));
196	}
197	w->tag = WAITER_TAG;
198	w->nw.tag = NSYNC_WAITER_TAG;
199	nsync_mu_semaphore_init (&w->sem);
200	w->nw.sem = &w->sem;
201	nsync_dll_init_ (&w->nw.q, &w->nw);
202	NSYNC_ATOMIC_UINT32_STORE_ (&w->nw.waiting, `0`);
203	w->nw.flags = NSYNC_WAITER_FLAG_MUCV;
204	ATM_STORE (&w->remove_count, `0`);
205	nsync_dll_init_ (&w->same_condition, w);
206	w->flags = `0`;
207	}
208	if (tw == NULL) {
209	w->flags \|= WAITER_RESERVED;
210	nsync_set_per_thread_waiter_ (w, &waiter_destroy);
211	if (HAVE_THREAD_LOCAL) {
212	waiter_for_thread = w;
213	}
214	}
215	}
216	w->flags \|= WAITER_IN_USE;
217	return (w);
218	}
219
220	/ Return an unused waiter struct w to the free pool. /*
221	void nsync_waiter_free_ (waiter *w) {
222	ASSERT ((w->flags & WAITER_IN_USE) != `0`);
223	w->flags &= ~WAITER_IN_USE;
224	if ((w->flags & WAITER_RESERVED) == `0`) {
225	nsync_spin_test_and_set_ (&free_waiters_mu, `1`, `1`, `0`);
226	free_waiters = nsync_dll_make_first_in_list_ (free_waiters, &w->nw.q);
227	ATM_STORE_REL (&free_waiters_mu, `0`); / release store /
228	}
229	}
230
231	/ ====================================================================================== /
232
233	/ writer_type points to a lock_type that describes how to manipulate a mu for a writer. /
234	static lock_type Xwriter_type = {
235	MU_WZERO_TO_ACQUIRE,
236	MU_WADD_TO_ACQUIRE,
237	MU_WHELD_IF_NON_ZERO,
238	MU_WSET_WHEN_WAITING,
239	MU_WCLEAR_ON_ACQUIRE,
240	MU_WCLEAR_ON_UNCONTENDED_RELEASE
241	};
242	lock_type *nsync_writer_type_ = &Xwriter_type;
243
244
245	/ reader_type points to a lock_type that describes how to manipulate a mu for a reader. /
246	static lock_type Xreader_type = {
247	MU_RZERO_TO_ACQUIRE,
248	MU_RADD_TO_ACQUIRE,
249	MU_RHELD_IF_NON_ZERO,
250	MU_RSET_WHEN_WAITING,
251	MU_RCLEAR_ON_ACQUIRE,
252	MU_RCLEAR_ON_UNCONTENDED_RELEASE
253	};
254	lock_type *nsync_reader_type_ = &Xreader_type;
255
256	NSYNC_CPP_END_
257

Browse the source code of tensorflow/external/nsync/internal/common.c