xref: /illumos-gate/usr/src/uts/common/os/mutex.c (revision 7c478bd95313f5f23a4c958a745db2134aa0324)
1*7c478bd9Sstevel@tonic-gate /*
2*7c478bd9Sstevel@tonic-gate  * CDDL HEADER START
3*7c478bd9Sstevel@tonic-gate  *
4*7c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*7c478bd9Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
6*7c478bd9Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
7*7c478bd9Sstevel@tonic-gate  * with the License.
8*7c478bd9Sstevel@tonic-gate  *
9*7c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10*7c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
11*7c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
12*7c478bd9Sstevel@tonic-gate  * and limitations under the License.
13*7c478bd9Sstevel@tonic-gate  *
14*7c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
15*7c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16*7c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
17*7c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
18*7c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
19*7c478bd9Sstevel@tonic-gate  *
20*7c478bd9Sstevel@tonic-gate  * CDDL HEADER END
21*7c478bd9Sstevel@tonic-gate  */
22*7c478bd9Sstevel@tonic-gate /*
23*7c478bd9Sstevel@tonic-gate  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
24*7c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
25*7c478bd9Sstevel@tonic-gate  */
26*7c478bd9Sstevel@tonic-gate 
27*7c478bd9Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
28*7c478bd9Sstevel@tonic-gate 
29*7c478bd9Sstevel@tonic-gate /*
30*7c478bd9Sstevel@tonic-gate  * Big Theory Statement for mutual exclusion locking primitives.
31*7c478bd9Sstevel@tonic-gate  *
32*7c478bd9Sstevel@tonic-gate  * A mutex serializes multiple threads so that only one thread
33*7c478bd9Sstevel@tonic-gate  * (the "owner" of the mutex) is active at a time.  See mutex(9F)
34*7c478bd9Sstevel@tonic-gate  * for a full description of the interfaces and programming model.
35*7c478bd9Sstevel@tonic-gate  * The rest of this comment describes the implementation.
36*7c478bd9Sstevel@tonic-gate  *
37*7c478bd9Sstevel@tonic-gate  * Mutexes come in two flavors: adaptive and spin.  mutex_init(9F)
38*7c478bd9Sstevel@tonic-gate  * determines the type based solely on the iblock cookie (PIL) argument.
39*7c478bd9Sstevel@tonic-gate  * PIL > LOCK_LEVEL implies a spin lock; everything else is adaptive.
40*7c478bd9Sstevel@tonic-gate  *
41*7c478bd9Sstevel@tonic-gate  * Spin mutexes block interrupts and spin until the lock becomes available.
42*7c478bd9Sstevel@tonic-gate  * A thread may not sleep, or call any function that might sleep, while
43*7c478bd9Sstevel@tonic-gate  * holding a spin mutex.  With few exceptions, spin mutexes should only
44*7c478bd9Sstevel@tonic-gate  * be used to synchronize with interrupt handlers.
45*7c478bd9Sstevel@tonic-gate  *
46*7c478bd9Sstevel@tonic-gate  * Adaptive mutexes (the default type) spin if the owner is running on
47*7c478bd9Sstevel@tonic-gate  * another CPU and block otherwise.  This policy is based on the assumption
48*7c478bd9Sstevel@tonic-gate  * that mutex hold times are typically short enough that the time spent
49*7c478bd9Sstevel@tonic-gate  * spinning is less than the time it takes to block.  If you need mutual
50*7c478bd9Sstevel@tonic-gate  * exclusion semantics with long hold times, consider an rwlock(9F) as
51*7c478bd9Sstevel@tonic-gate  * RW_WRITER.  Better still, reconsider the algorithm: if it requires
52*7c478bd9Sstevel@tonic-gate  * mutual exclusion for long periods of time, it's probably not scalable.
53*7c478bd9Sstevel@tonic-gate  *
54*7c478bd9Sstevel@tonic-gate  * Adaptive mutexes are overwhelmingly more common than spin mutexes,
55*7c478bd9Sstevel@tonic-gate  * so mutex_enter() assumes that the lock is adaptive.  We get away
56*7c478bd9Sstevel@tonic-gate  * with this by structuring mutexes so that an attempt to acquire a
57*7c478bd9Sstevel@tonic-gate  * spin mutex as adaptive always fails.  When mutex_enter() fails
58*7c478bd9Sstevel@tonic-gate  * it punts to mutex_vector_enter(), which does all the hard stuff.
59*7c478bd9Sstevel@tonic-gate  *
60*7c478bd9Sstevel@tonic-gate  * mutex_vector_enter() first checks the type.  If it's spin mutex,
61*7c478bd9Sstevel@tonic-gate  * we just call lock_set_spl() and return.  If it's an adaptive mutex,
62*7c478bd9Sstevel@tonic-gate  * we check to see what the owner is doing.  If the owner is running,
63*7c478bd9Sstevel@tonic-gate  * we spin until the lock becomes available; if not, we mark the lock
64*7c478bd9Sstevel@tonic-gate  * as having waiters and block.
65*7c478bd9Sstevel@tonic-gate  *
66*7c478bd9Sstevel@tonic-gate  * Blocking on a mutex is surprisingly delicate dance because, for speed,
67*7c478bd9Sstevel@tonic-gate  * mutex_exit() doesn't use an atomic instruction.  Thus we have to work
68*7c478bd9Sstevel@tonic-gate  * a little harder in the (rarely-executed) blocking path to make sure
69*7c478bd9Sstevel@tonic-gate  * we don't block on a mutex that's just been released -- otherwise we
70*7c478bd9Sstevel@tonic-gate  * might never be woken up.
71*7c478bd9Sstevel@tonic-gate  *
72*7c478bd9Sstevel@tonic-gate  * The logic for synchronizing mutex_vector_enter() with mutex_exit()
73*7c478bd9Sstevel@tonic-gate  * in the face of preemption and relaxed memory ordering is as follows:
74*7c478bd9Sstevel@tonic-gate  *
75*7c478bd9Sstevel@tonic-gate  * (1) Preemption in the middle of mutex_exit() must cause mutex_exit()
76*7c478bd9Sstevel@tonic-gate  *     to restart.  Each platform must enforce this by checking the
77*7c478bd9Sstevel@tonic-gate  *     interrupted PC in the interrupt handler (or on return from trap --
78*7c478bd9Sstevel@tonic-gate  *     whichever is more convenient for the platform).  If the PC
79*7c478bd9Sstevel@tonic-gate  *     lies within the critical region of mutex_exit(), the interrupt
80*7c478bd9Sstevel@tonic-gate  *     handler must reset the PC back to the beginning of mutex_exit().
81*7c478bd9Sstevel@tonic-gate  *     The critical region consists of all instructions up to, but not
82*7c478bd9Sstevel@tonic-gate  *     including, the store that clears the lock (which, of course,
83*7c478bd9Sstevel@tonic-gate  *     must never be executed twice.)
84*7c478bd9Sstevel@tonic-gate  *
85*7c478bd9Sstevel@tonic-gate  *     This ensures that the owner will always check for waiters after
86*7c478bd9Sstevel@tonic-gate  *     resuming from a previous preemption.
87*7c478bd9Sstevel@tonic-gate  *
88*7c478bd9Sstevel@tonic-gate  * (2) A thread resuming in mutex_exit() does (at least) the following:
89*7c478bd9Sstevel@tonic-gate  *
90*7c478bd9Sstevel@tonic-gate  *	when resuming:	set CPU_THREAD = owner
91*7c478bd9Sstevel@tonic-gate  *			membar #StoreLoad
92*7c478bd9Sstevel@tonic-gate  *
93*7c478bd9Sstevel@tonic-gate  *	in mutex_exit:	check waiters bit; do wakeup if set
94*7c478bd9Sstevel@tonic-gate  *			membar #LoadStore|#StoreStore
95*7c478bd9Sstevel@tonic-gate  *			clear owner
96*7c478bd9Sstevel@tonic-gate  *			(at this point, other threads may or may not grab
97*7c478bd9Sstevel@tonic-gate  *			the lock, and we may or may not reacquire it)
98*7c478bd9Sstevel@tonic-gate  *
99*7c478bd9Sstevel@tonic-gate  *	when blocking:	membar #StoreStore (due to disp_lock_enter())
100*7c478bd9Sstevel@tonic-gate  *			set CPU_THREAD = (possibly) someone else
101*7c478bd9Sstevel@tonic-gate  *
102*7c478bd9Sstevel@tonic-gate  * (3) A thread blocking in mutex_vector_enter() does the following:
103*7c478bd9Sstevel@tonic-gate  *
104*7c478bd9Sstevel@tonic-gate  *			set waiters bit
105*7c478bd9Sstevel@tonic-gate  *			membar #StoreLoad (via membar_enter())
106*7c478bd9Sstevel@tonic-gate  *			check CPU_THREAD for each CPU; abort if owner running
107*7c478bd9Sstevel@tonic-gate  *			membar #LoadLoad (via membar_consumer())
108*7c478bd9Sstevel@tonic-gate  *			check owner and waiters bit; abort if either changed
109*7c478bd9Sstevel@tonic-gate  *			block
110*7c478bd9Sstevel@tonic-gate  *
111*7c478bd9Sstevel@tonic-gate  * Thus the global memory orderings for (2) and (3) are as follows:
112*7c478bd9Sstevel@tonic-gate  *
113*7c478bd9Sstevel@tonic-gate  * (2M) mutex_exit() memory order:
114*7c478bd9Sstevel@tonic-gate  *
115*7c478bd9Sstevel@tonic-gate  *			STORE	CPU_THREAD = owner
116*7c478bd9Sstevel@tonic-gate  *			LOAD	waiters bit
117*7c478bd9Sstevel@tonic-gate  *			STORE	owner = NULL
118*7c478bd9Sstevel@tonic-gate  *			STORE	CPU_THREAD = (possibly) someone else
119*7c478bd9Sstevel@tonic-gate  *
120*7c478bd9Sstevel@tonic-gate  * (3M) mutex_vector_enter() memory order:
121*7c478bd9Sstevel@tonic-gate  *
122*7c478bd9Sstevel@tonic-gate  *			STORE	waiters bit = 1
123*7c478bd9Sstevel@tonic-gate  *			LOAD	CPU_THREAD for each CPU
124*7c478bd9Sstevel@tonic-gate  *			LOAD	owner and waiters bit
125*7c478bd9Sstevel@tonic-gate  *
126*7c478bd9Sstevel@tonic-gate  * It has been verified by exhaustive simulation that all possible global
127*7c478bd9Sstevel@tonic-gate  * memory orderings of (2M) interleaved with (3M) result in correct
128*7c478bd9Sstevel@tonic-gate  * behavior.  Moreover, these ordering constraints are minimal: changing
129*7c478bd9Sstevel@tonic-gate  * the ordering of anything in (2M) or (3M) breaks the algorithm, creating
130*7c478bd9Sstevel@tonic-gate  * windows for missed wakeups.  Note: the possibility that other threads
131*7c478bd9Sstevel@tonic-gate  * may grab the lock after the owner drops it can be factored out of the
132*7c478bd9Sstevel@tonic-gate  * memory ordering analysis because mutex_vector_enter() won't block
133*7c478bd9Sstevel@tonic-gate  * if the lock isn't still owned by the same thread.
134*7c478bd9Sstevel@tonic-gate  *
135*7c478bd9Sstevel@tonic-gate  * The only requirements of code outside the mutex implementation are
136*7c478bd9Sstevel@tonic-gate  * (1) mutex_exit() preemption fixup in interrupt handlers or trap return,
137*7c478bd9Sstevel@tonic-gate  * and (2) a membar #StoreLoad after setting CPU_THREAD in resume().
138*7c478bd9Sstevel@tonic-gate  * Note: idle threads cannot grab adaptive locks (since they cannot block),
139*7c478bd9Sstevel@tonic-gate  * so the membar may be safely omitted when resuming an idle thread.
140*7c478bd9Sstevel@tonic-gate  *
141*7c478bd9Sstevel@tonic-gate  * When a mutex has waiters, mutex_vector_exit() has several options:
142*7c478bd9Sstevel@tonic-gate  *
143*7c478bd9Sstevel@tonic-gate  * (1) Choose a waiter and make that thread the owner before waking it;
144*7c478bd9Sstevel@tonic-gate  *     this is known as "direct handoff" of ownership.
145*7c478bd9Sstevel@tonic-gate  *
146*7c478bd9Sstevel@tonic-gate  * (2) Drop the lock and wake one waiter.
147*7c478bd9Sstevel@tonic-gate  *
148*7c478bd9Sstevel@tonic-gate  * (3) Drop the lock, clear the waiters bit, and wake all waiters.
149*7c478bd9Sstevel@tonic-gate  *
150*7c478bd9Sstevel@tonic-gate  * In many ways (1) is the cleanest solution, but if a lock is moderately
151*7c478bd9Sstevel@tonic-gate  * contended it defeats the adaptive spin logic.  If we make some other
152*7c478bd9Sstevel@tonic-gate  * thread the owner, but he's not ONPROC yet, then all other threads on
153*7c478bd9Sstevel@tonic-gate  * other cpus that try to get the lock will conclude that the owner is
154*7c478bd9Sstevel@tonic-gate  * blocked, so they'll block too.  And so on -- it escalates quickly,
155*7c478bd9Sstevel@tonic-gate  * with every thread taking the blocking path rather than the spin path.
156*7c478bd9Sstevel@tonic-gate  * Thus, direct handoff is *not* a good idea for adaptive mutexes.
157*7c478bd9Sstevel@tonic-gate  *
158*7c478bd9Sstevel@tonic-gate  * Option (2) is the next most natural-seeming option, but it has several
159*7c478bd9Sstevel@tonic-gate  * annoying properties.  If there's more than one waiter, we must preserve
160*7c478bd9Sstevel@tonic-gate  * the waiters bit on an unheld lock.  On cas-capable platforms, where
161*7c478bd9Sstevel@tonic-gate  * the waiters bit is part of the lock word, this means that both 0x0
162*7c478bd9Sstevel@tonic-gate  * and 0x1 represent unheld locks, so we have to cas against *both*.
163*7c478bd9Sstevel@tonic-gate  * Priority inheritance also gets more complicated, because a lock can
164*7c478bd9Sstevel@tonic-gate  * have waiters but no owner to whom priority can be willed.  So while
165*7c478bd9Sstevel@tonic-gate  * it is possible to make option (2) work, it's surprisingly vile.
166*7c478bd9Sstevel@tonic-gate  *
167*7c478bd9Sstevel@tonic-gate  * Option (3), the least-intuitive at first glance, is what we actually do.
168*7c478bd9Sstevel@tonic-gate  * It has the advantage that because you always wake all waiters, you
169*7c478bd9Sstevel@tonic-gate  * never have to preserve the waiters bit.  Waking all waiters seems like
170*7c478bd9Sstevel@tonic-gate  * begging for a thundering herd problem, but consider: under option (2),
171*7c478bd9Sstevel@tonic-gate  * every thread that grabs and drops the lock will wake one waiter -- so
172*7c478bd9Sstevel@tonic-gate  * if the lock is fairly active, all waiters will be awakened very quickly
173*7c478bd9Sstevel@tonic-gate  * anyway.  Moreover, this is how adaptive locks are *supposed* to work.
174*7c478bd9Sstevel@tonic-gate  * The blocking case is rare; the more common case (by 3-4 orders of
175*7c478bd9Sstevel@tonic-gate  * magnitude) is that one or more threads spin waiting to get the lock.
176*7c478bd9Sstevel@tonic-gate  * Only direct handoff can prevent the thundering herd problem, but as
177*7c478bd9Sstevel@tonic-gate  * mentioned earlier, that would tend to defeat the adaptive spin logic.
178*7c478bd9Sstevel@tonic-gate  * In practice, option (3) works well because the blocking case is rare.
179*7c478bd9Sstevel@tonic-gate  */
180*7c478bd9Sstevel@tonic-gate 
181*7c478bd9Sstevel@tonic-gate /*
182*7c478bd9Sstevel@tonic-gate  * delayed lock retry with exponential delay for spin locks
183*7c478bd9Sstevel@tonic-gate  *
184*7c478bd9Sstevel@tonic-gate  * It is noted above that for both the spin locks and the adaptive locks,
185*7c478bd9Sstevel@tonic-gate  * spinning is the dominate mode of operation.  So long as there is only
186*7c478bd9Sstevel@tonic-gate  * one thread waiting on a lock, the naive spin loop works very well in
187*7c478bd9Sstevel@tonic-gate  * cache based architectures.  The lock data structure is pulled into the
188*7c478bd9Sstevel@tonic-gate  * cache of the processor with the waiting/spinning thread and no further
189*7c478bd9Sstevel@tonic-gate  * memory traffic is generated until the lock is released.  Unfortunately,
190*7c478bd9Sstevel@tonic-gate  * once two or more threads are waiting on a lock, the naive spin has
191*7c478bd9Sstevel@tonic-gate  * the property of generating maximum memory traffic from each spinning
192*7c478bd9Sstevel@tonic-gate  * thread as the spinning threads contend for the lock data structure.
193*7c478bd9Sstevel@tonic-gate  *
194*7c478bd9Sstevel@tonic-gate  * By executing a delay loop before retrying a lock, a waiting thread
195*7c478bd9Sstevel@tonic-gate  * can reduce its memory traffic by a large factor, depending on the
196*7c478bd9Sstevel@tonic-gate  * size of the delay loop.  A large delay loop greatly reduced the memory
197*7c478bd9Sstevel@tonic-gate  * traffic, but has the drawback of having a period of time when
198*7c478bd9Sstevel@tonic-gate  * no thread is attempting to gain the lock even though several threads
199*7c478bd9Sstevel@tonic-gate  * might be waiting.  A small delay loop has the drawback of not
200*7c478bd9Sstevel@tonic-gate  * much reduction in memory traffic, but reduces the potential idle time.
201*7c478bd9Sstevel@tonic-gate  * The theory of the exponential delay code is to start with a short
202*7c478bd9Sstevel@tonic-gate  * delay loop and double the waiting time on each iteration, up to
203*7c478bd9Sstevel@tonic-gate  * a preselected maximum.  The BACKOFF_BASE provides the equivalent
204*7c478bd9Sstevel@tonic-gate  * of 2 to 3 memory references delay for US-III+ and US-IV architectures.
205*7c478bd9Sstevel@tonic-gate  * The BACKOFF_CAP is the equivalent of 50 to 100 memory references of
206*7c478bd9Sstevel@tonic-gate  * time (less than 12 microseconds for a 1000 MHz system).
207*7c478bd9Sstevel@tonic-gate  *
208*7c478bd9Sstevel@tonic-gate  * To determine appropriate BACKOFF_BASE and BACKOFF_CAP values,
209*7c478bd9Sstevel@tonic-gate  * studies on US-III+ and US-IV systems using 1 to 66 threads were
210*7c478bd9Sstevel@tonic-gate  * done.  A range of possible values were studied.
211*7c478bd9Sstevel@tonic-gate  * Performance differences below 10 threads were not large.  For
212*7c478bd9Sstevel@tonic-gate  * systems with more threads, substantial increases in total lock
213*7c478bd9Sstevel@tonic-gate  * throughput was observed with the given values.  For cases where
214*7c478bd9Sstevel@tonic-gate  * more than 20 threads were waiting on the same lock, lock throughput
215*7c478bd9Sstevel@tonic-gate  * increased by a factor of 5 or more using the backoff algorithm.
216*7c478bd9Sstevel@tonic-gate  */
217*7c478bd9Sstevel@tonic-gate 
218*7c478bd9Sstevel@tonic-gate #include <sys/param.h>
219*7c478bd9Sstevel@tonic-gate #include <sys/time.h>
220*7c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h>
221*7c478bd9Sstevel@tonic-gate #include <sys/thread.h>
222*7c478bd9Sstevel@tonic-gate #include <sys/debug.h>
223*7c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h>
224*7c478bd9Sstevel@tonic-gate #include <sys/sobject.h>
225*7c478bd9Sstevel@tonic-gate #include <sys/turnstile.h>
226*7c478bd9Sstevel@tonic-gate #include <sys/systm.h>
227*7c478bd9Sstevel@tonic-gate #include <sys/mutex_impl.h>
228*7c478bd9Sstevel@tonic-gate #include <sys/spl.h>
229*7c478bd9Sstevel@tonic-gate #include <sys/lockstat.h>
230*7c478bd9Sstevel@tonic-gate #include <sys/atomic.h>
231*7c478bd9Sstevel@tonic-gate #include <sys/cpu.h>
232*7c478bd9Sstevel@tonic-gate #include <sys/stack.h>
233*7c478bd9Sstevel@tonic-gate 
234*7c478bd9Sstevel@tonic-gate #define	BACKOFF_BASE	50
235*7c478bd9Sstevel@tonic-gate #define	BACKOFF_CAP 	1600
236*7c478bd9Sstevel@tonic-gate 
237*7c478bd9Sstevel@tonic-gate /*
238*7c478bd9Sstevel@tonic-gate  * The sobj_ops vector exports a set of functions needed when a thread
239*7c478bd9Sstevel@tonic-gate  * is asleep on a synchronization object of this type.
240*7c478bd9Sstevel@tonic-gate  */
241*7c478bd9Sstevel@tonic-gate static sobj_ops_t mutex_sobj_ops = {
242*7c478bd9Sstevel@tonic-gate 	SOBJ_MUTEX, mutex_owner, turnstile_stay_asleep, turnstile_change_pri
243*7c478bd9Sstevel@tonic-gate };
244*7c478bd9Sstevel@tonic-gate 
245*7c478bd9Sstevel@tonic-gate /*
246*7c478bd9Sstevel@tonic-gate  * If the system panics on a mutex, save the address of the offending
247*7c478bd9Sstevel@tonic-gate  * mutex in panic_mutex_addr, and save the contents in panic_mutex.
248*7c478bd9Sstevel@tonic-gate  */
249*7c478bd9Sstevel@tonic-gate static mutex_impl_t panic_mutex;
250*7c478bd9Sstevel@tonic-gate static mutex_impl_t *panic_mutex_addr;
251*7c478bd9Sstevel@tonic-gate 
252*7c478bd9Sstevel@tonic-gate static void
253*7c478bd9Sstevel@tonic-gate mutex_panic(char *msg, mutex_impl_t *lp)
254*7c478bd9Sstevel@tonic-gate {
255*7c478bd9Sstevel@tonic-gate 	if (panicstr)
256*7c478bd9Sstevel@tonic-gate 		return;
257*7c478bd9Sstevel@tonic-gate 
258*7c478bd9Sstevel@tonic-gate 	if (casptr(&panic_mutex_addr, NULL, lp) == NULL)
259*7c478bd9Sstevel@tonic-gate 		panic_mutex = *lp;
260*7c478bd9Sstevel@tonic-gate 
261*7c478bd9Sstevel@tonic-gate 	panic("%s, lp=%p owner=%p thread=%p",
262*7c478bd9Sstevel@tonic-gate 	    msg, lp, MUTEX_OWNER(&panic_mutex), curthread);
263*7c478bd9Sstevel@tonic-gate }
264*7c478bd9Sstevel@tonic-gate 
265*7c478bd9Sstevel@tonic-gate /*
266*7c478bd9Sstevel@tonic-gate  * mutex_vector_enter() is called from the assembly mutex_enter() routine
267*7c478bd9Sstevel@tonic-gate  * if the lock is held or is not of type MUTEX_ADAPTIVE.
268*7c478bd9Sstevel@tonic-gate  */
269*7c478bd9Sstevel@tonic-gate void
270*7c478bd9Sstevel@tonic-gate mutex_vector_enter(mutex_impl_t *lp)
271*7c478bd9Sstevel@tonic-gate {
272*7c478bd9Sstevel@tonic-gate 	kthread_id_t	owner;
273*7c478bd9Sstevel@tonic-gate 	hrtime_t	sleep_time = 0;	/* how long we slept */
274*7c478bd9Sstevel@tonic-gate 	uint_t		spin_count = 0;	/* how many times we spun */
275*7c478bd9Sstevel@tonic-gate 	cpu_t 		*cpup, *last_cpu;
276*7c478bd9Sstevel@tonic-gate 	extern cpu_t	*cpu_list;
277*7c478bd9Sstevel@tonic-gate 	turnstile_t	*ts;
278*7c478bd9Sstevel@tonic-gate 	volatile mutex_impl_t *vlp = (volatile mutex_impl_t *)lp;
279*7c478bd9Sstevel@tonic-gate 	int		backoff;	/* current backoff */
280*7c478bd9Sstevel@tonic-gate 	int		backctr;	/* ctr for backoff */
281*7c478bd9Sstevel@tonic-gate 
282*7c478bd9Sstevel@tonic-gate 	ASSERT_STACK_ALIGNED();
283*7c478bd9Sstevel@tonic-gate 
284*7c478bd9Sstevel@tonic-gate 	if (MUTEX_TYPE_SPIN(lp)) {
285*7c478bd9Sstevel@tonic-gate 		lock_set_spl(&lp->m_spin.m_spinlock, lp->m_spin.m_minspl,
286*7c478bd9Sstevel@tonic-gate 		    &lp->m_spin.m_oldspl);
287*7c478bd9Sstevel@tonic-gate 		return;
288*7c478bd9Sstevel@tonic-gate 	}
289*7c478bd9Sstevel@tonic-gate 
290*7c478bd9Sstevel@tonic-gate 	if (!MUTEX_TYPE_ADAPTIVE(lp)) {
291*7c478bd9Sstevel@tonic-gate 		mutex_panic("mutex_enter: bad mutex", lp);
292*7c478bd9Sstevel@tonic-gate 		return;
293*7c478bd9Sstevel@tonic-gate 	}
294*7c478bd9Sstevel@tonic-gate 
295*7c478bd9Sstevel@tonic-gate 	/*
296*7c478bd9Sstevel@tonic-gate 	 * Adaptive mutexes must not be acquired from above LOCK_LEVEL.
297*7c478bd9Sstevel@tonic-gate 	 * We can migrate after loading CPU but before checking CPU_ON_INTR,
298*7c478bd9Sstevel@tonic-gate 	 * so we must verify by disabling preemption and loading CPU again.
299*7c478bd9Sstevel@tonic-gate 	 */
300*7c478bd9Sstevel@tonic-gate 	cpup = CPU;
301*7c478bd9Sstevel@tonic-gate 	if (CPU_ON_INTR(cpup) && !panicstr) {
302*7c478bd9Sstevel@tonic-gate 		kpreempt_disable();
303*7c478bd9Sstevel@tonic-gate 		if (CPU_ON_INTR(CPU))
304*7c478bd9Sstevel@tonic-gate 			mutex_panic("mutex_enter: adaptive at high PIL", lp);
305*7c478bd9Sstevel@tonic-gate 		kpreempt_enable();
306*7c478bd9Sstevel@tonic-gate 	}
307*7c478bd9Sstevel@tonic-gate 
308*7c478bd9Sstevel@tonic-gate 	CPU_STATS_ADDQ(cpup, sys, mutex_adenters, 1);
309*7c478bd9Sstevel@tonic-gate 
310*7c478bd9Sstevel@tonic-gate 	backoff = BACKOFF_BASE;
311*7c478bd9Sstevel@tonic-gate 
312*7c478bd9Sstevel@tonic-gate 	for (;;) {
313*7c478bd9Sstevel@tonic-gate spin:
314*7c478bd9Sstevel@tonic-gate 		spin_count++;
315*7c478bd9Sstevel@tonic-gate 		/*
316*7c478bd9Sstevel@tonic-gate 		 * Add an exponential backoff delay before trying again
317*7c478bd9Sstevel@tonic-gate 		 * to touch the mutex data structure.
318*7c478bd9Sstevel@tonic-gate 		 * the spin_count test and call to nulldev are to prevent
319*7c478bd9Sstevel@tonic-gate 		 * the compiler optimizer from eliminating the delay loop.
320*7c478bd9Sstevel@tonic-gate 		 */
321*7c478bd9Sstevel@tonic-gate 		for (backctr = backoff; backctr; backctr--) {
322*7c478bd9Sstevel@tonic-gate 			if (!spin_count) (void) nulldev();
323*7c478bd9Sstevel@tonic-gate 		};    /* delay */
324*7c478bd9Sstevel@tonic-gate 		backoff = backoff << 1;			/* double it */
325*7c478bd9Sstevel@tonic-gate 		if (backoff > BACKOFF_CAP) {
326*7c478bd9Sstevel@tonic-gate 			backoff = BACKOFF_CAP;
327*7c478bd9Sstevel@tonic-gate 		}
328*7c478bd9Sstevel@tonic-gate 
329*7c478bd9Sstevel@tonic-gate 		SMT_PAUSE();
330*7c478bd9Sstevel@tonic-gate 
331*7c478bd9Sstevel@tonic-gate 		if (panicstr)
332*7c478bd9Sstevel@tonic-gate 			return;
333*7c478bd9Sstevel@tonic-gate 
334*7c478bd9Sstevel@tonic-gate 		if ((owner = MUTEX_OWNER(vlp)) == NULL) {
335*7c478bd9Sstevel@tonic-gate 			if (mutex_adaptive_tryenter(lp))
336*7c478bd9Sstevel@tonic-gate 				break;
337*7c478bd9Sstevel@tonic-gate 			continue;
338*7c478bd9Sstevel@tonic-gate 		}
339*7c478bd9Sstevel@tonic-gate 
340*7c478bd9Sstevel@tonic-gate 		if (owner == curthread)
341*7c478bd9Sstevel@tonic-gate 			mutex_panic("recursive mutex_enter", lp);
342*7c478bd9Sstevel@tonic-gate 
343*7c478bd9Sstevel@tonic-gate 		/*
344*7c478bd9Sstevel@tonic-gate 		 * If lock is held but owner is not yet set, spin.
345*7c478bd9Sstevel@tonic-gate 		 * (Only relevant for platforms that don't have cas.)
346*7c478bd9Sstevel@tonic-gate 		 */
347*7c478bd9Sstevel@tonic-gate 		if (owner == MUTEX_NO_OWNER)
348*7c478bd9Sstevel@tonic-gate 			continue;
349*7c478bd9Sstevel@tonic-gate 
350*7c478bd9Sstevel@tonic-gate 		/*
351*7c478bd9Sstevel@tonic-gate 		 * When searching the other CPUs, start with the one where
352*7c478bd9Sstevel@tonic-gate 		 * we last saw the owner thread.  If owner is running, spin.
353*7c478bd9Sstevel@tonic-gate 		 *
354*7c478bd9Sstevel@tonic-gate 		 * We must disable preemption at this point to guarantee
355*7c478bd9Sstevel@tonic-gate 		 * that the list doesn't change while we traverse it
356*7c478bd9Sstevel@tonic-gate 		 * without the cpu_lock mutex.  While preemption is
357*7c478bd9Sstevel@tonic-gate 		 * disabled, we must revalidate our cached cpu pointer.
358*7c478bd9Sstevel@tonic-gate 		 */
359*7c478bd9Sstevel@tonic-gate 		kpreempt_disable();
360*7c478bd9Sstevel@tonic-gate 		if (cpup->cpu_next == NULL)
361*7c478bd9Sstevel@tonic-gate 			cpup = cpu_list;
362*7c478bd9Sstevel@tonic-gate 		last_cpu = cpup;	/* mark end of search */
363*7c478bd9Sstevel@tonic-gate 		do {
364*7c478bd9Sstevel@tonic-gate 			if (cpup->cpu_thread == owner) {
365*7c478bd9Sstevel@tonic-gate 				kpreempt_enable();
366*7c478bd9Sstevel@tonic-gate 				goto spin;
367*7c478bd9Sstevel@tonic-gate 			}
368*7c478bd9Sstevel@tonic-gate 		} while ((cpup = cpup->cpu_next) != last_cpu);
369*7c478bd9Sstevel@tonic-gate 		kpreempt_enable();
370*7c478bd9Sstevel@tonic-gate 
371*7c478bd9Sstevel@tonic-gate 		/*
372*7c478bd9Sstevel@tonic-gate 		 * The owner appears not to be running, so block.
373*7c478bd9Sstevel@tonic-gate 		 * See the Big Theory Statement for memory ordering issues.
374*7c478bd9Sstevel@tonic-gate 		 */
375*7c478bd9Sstevel@tonic-gate 		ts = turnstile_lookup(lp);
376*7c478bd9Sstevel@tonic-gate 		MUTEX_SET_WAITERS(lp);
377*7c478bd9Sstevel@tonic-gate 		membar_enter();
378*7c478bd9Sstevel@tonic-gate 
379*7c478bd9Sstevel@tonic-gate 		/*
380*7c478bd9Sstevel@tonic-gate 		 * Recheck whether owner is running after waiters bit hits
381*7c478bd9Sstevel@tonic-gate 		 * global visibility (above).  If owner is running, spin.
382*7c478bd9Sstevel@tonic-gate 		 *
383*7c478bd9Sstevel@tonic-gate 		 * Since we are at ipl DISP_LEVEL, kernel preemption is
384*7c478bd9Sstevel@tonic-gate 		 * disabled, however we still need to revalidate our cached
385*7c478bd9Sstevel@tonic-gate 		 * cpu pointer to make sure the cpu hasn't been deleted.
386*7c478bd9Sstevel@tonic-gate 		 */
387*7c478bd9Sstevel@tonic-gate 		if (cpup->cpu_next == NULL)
388*7c478bd9Sstevel@tonic-gate 			last_cpu = cpup = cpu_list;
389*7c478bd9Sstevel@tonic-gate 		do {
390*7c478bd9Sstevel@tonic-gate 			if (cpup->cpu_thread == owner) {
391*7c478bd9Sstevel@tonic-gate 				turnstile_exit(lp);
392*7c478bd9Sstevel@tonic-gate 				goto spin;
393*7c478bd9Sstevel@tonic-gate 			}
394*7c478bd9Sstevel@tonic-gate 		} while ((cpup = cpup->cpu_next) != last_cpu);
395*7c478bd9Sstevel@tonic-gate 		membar_consumer();
396*7c478bd9Sstevel@tonic-gate 
397*7c478bd9Sstevel@tonic-gate 		/*
398*7c478bd9Sstevel@tonic-gate 		 * If owner and waiters bit are unchanged, block.
399*7c478bd9Sstevel@tonic-gate 		 */
400*7c478bd9Sstevel@tonic-gate 		if (MUTEX_OWNER(vlp) == owner && MUTEX_HAS_WAITERS(vlp)) {
401*7c478bd9Sstevel@tonic-gate 			sleep_time -= gethrtime();
402*7c478bd9Sstevel@tonic-gate 			(void) turnstile_block(ts, TS_WRITER_Q, lp,
403*7c478bd9Sstevel@tonic-gate 			    &mutex_sobj_ops, NULL, NULL);
404*7c478bd9Sstevel@tonic-gate 			sleep_time += gethrtime();
405*7c478bd9Sstevel@tonic-gate 		} else {
406*7c478bd9Sstevel@tonic-gate 			turnstile_exit(lp);
407*7c478bd9Sstevel@tonic-gate 		}
408*7c478bd9Sstevel@tonic-gate 	}
409*7c478bd9Sstevel@tonic-gate 
410*7c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_OWNER(lp) == curthread);
411*7c478bd9Sstevel@tonic-gate 
412*7c478bd9Sstevel@tonic-gate 	if (sleep_time == 0) {
413*7c478bd9Sstevel@tonic-gate 		LOCKSTAT_RECORD(LS_MUTEX_ENTER_SPIN, lp, spin_count);
414*7c478bd9Sstevel@tonic-gate 	} else {
415*7c478bd9Sstevel@tonic-gate 		LOCKSTAT_RECORD(LS_MUTEX_ENTER_BLOCK, lp, sleep_time);
416*7c478bd9Sstevel@tonic-gate 	}
417*7c478bd9Sstevel@tonic-gate 
418*7c478bd9Sstevel@tonic-gate 	LOCKSTAT_RECORD0(LS_MUTEX_ENTER_ACQUIRE, lp);
419*7c478bd9Sstevel@tonic-gate }
420*7c478bd9Sstevel@tonic-gate 
421*7c478bd9Sstevel@tonic-gate /*
422*7c478bd9Sstevel@tonic-gate  * mutex_vector_tryenter() is called from the assembly mutex_tryenter()
423*7c478bd9Sstevel@tonic-gate  * routine if the lock is held or is not of type MUTEX_ADAPTIVE.
424*7c478bd9Sstevel@tonic-gate  */
425*7c478bd9Sstevel@tonic-gate int
426*7c478bd9Sstevel@tonic-gate mutex_vector_tryenter(mutex_impl_t *lp)
427*7c478bd9Sstevel@tonic-gate {
428*7c478bd9Sstevel@tonic-gate 	int s;
429*7c478bd9Sstevel@tonic-gate 
430*7c478bd9Sstevel@tonic-gate 	if (MUTEX_TYPE_ADAPTIVE(lp))
431*7c478bd9Sstevel@tonic-gate 		return (0);		/* we already tried in assembly */
432*7c478bd9Sstevel@tonic-gate 
433*7c478bd9Sstevel@tonic-gate 	if (!MUTEX_TYPE_SPIN(lp)) {
434*7c478bd9Sstevel@tonic-gate 		mutex_panic("mutex_tryenter: bad mutex", lp);
435*7c478bd9Sstevel@tonic-gate 		return (0);
436*7c478bd9Sstevel@tonic-gate 	}
437*7c478bd9Sstevel@tonic-gate 
438*7c478bd9Sstevel@tonic-gate 	s = splr(lp->m_spin.m_minspl);
439*7c478bd9Sstevel@tonic-gate 	if (lock_try(&lp->m_spin.m_spinlock)) {
440*7c478bd9Sstevel@tonic-gate 		lp->m_spin.m_oldspl = (ushort_t)s;
441*7c478bd9Sstevel@tonic-gate 		return (1);
442*7c478bd9Sstevel@tonic-gate 	}
443*7c478bd9Sstevel@tonic-gate 	splx(s);
444*7c478bd9Sstevel@tonic-gate 	return (0);
445*7c478bd9Sstevel@tonic-gate }
446*7c478bd9Sstevel@tonic-gate 
447*7c478bd9Sstevel@tonic-gate /*
448*7c478bd9Sstevel@tonic-gate  * mutex_vector_exit() is called from mutex_exit() if the lock is not
449*7c478bd9Sstevel@tonic-gate  * adaptive, has waiters, or is not owned by the current thread (panic).
450*7c478bd9Sstevel@tonic-gate  */
451*7c478bd9Sstevel@tonic-gate void
452*7c478bd9Sstevel@tonic-gate mutex_vector_exit(mutex_impl_t *lp)
453*7c478bd9Sstevel@tonic-gate {
454*7c478bd9Sstevel@tonic-gate 	turnstile_t *ts;
455*7c478bd9Sstevel@tonic-gate 
456*7c478bd9Sstevel@tonic-gate 	if (MUTEX_TYPE_SPIN(lp)) {
457*7c478bd9Sstevel@tonic-gate 		lock_clear_splx(&lp->m_spin.m_spinlock, lp->m_spin.m_oldspl);
458*7c478bd9Sstevel@tonic-gate 		return;
459*7c478bd9Sstevel@tonic-gate 	}
460*7c478bd9Sstevel@tonic-gate 
461*7c478bd9Sstevel@tonic-gate 	if (MUTEX_OWNER(lp) != curthread) {
462*7c478bd9Sstevel@tonic-gate 		mutex_panic("mutex_exit: not owner", lp);
463*7c478bd9Sstevel@tonic-gate 		return;
464*7c478bd9Sstevel@tonic-gate 	}
465*7c478bd9Sstevel@tonic-gate 
466*7c478bd9Sstevel@tonic-gate 	ts = turnstile_lookup(lp);
467*7c478bd9Sstevel@tonic-gate 	MUTEX_CLEAR_LOCK_AND_WAITERS(lp);
468*7c478bd9Sstevel@tonic-gate 	if (ts == NULL)
469*7c478bd9Sstevel@tonic-gate 		turnstile_exit(lp);
470*7c478bd9Sstevel@tonic-gate 	else
471*7c478bd9Sstevel@tonic-gate 		turnstile_wakeup(ts, TS_WRITER_Q, ts->ts_waiters, NULL);
472*7c478bd9Sstevel@tonic-gate 	LOCKSTAT_RECORD0(LS_MUTEX_EXIT_RELEASE, lp);
473*7c478bd9Sstevel@tonic-gate }
474*7c478bd9Sstevel@tonic-gate 
475*7c478bd9Sstevel@tonic-gate int
476*7c478bd9Sstevel@tonic-gate mutex_owned(kmutex_t *mp)
477*7c478bd9Sstevel@tonic-gate {
478*7c478bd9Sstevel@tonic-gate 	mutex_impl_t *lp = (mutex_impl_t *)mp;
479*7c478bd9Sstevel@tonic-gate 
480*7c478bd9Sstevel@tonic-gate 	if (panicstr)
481*7c478bd9Sstevel@tonic-gate 		return (1);
482*7c478bd9Sstevel@tonic-gate 
483*7c478bd9Sstevel@tonic-gate 	if (MUTEX_TYPE_ADAPTIVE(lp))
484*7c478bd9Sstevel@tonic-gate 		return (MUTEX_OWNER(lp) == curthread);
485*7c478bd9Sstevel@tonic-gate 	return (LOCK_HELD(&lp->m_spin.m_spinlock));
486*7c478bd9Sstevel@tonic-gate }
487*7c478bd9Sstevel@tonic-gate 
488*7c478bd9Sstevel@tonic-gate kthread_t *
489*7c478bd9Sstevel@tonic-gate mutex_owner(kmutex_t *mp)
490*7c478bd9Sstevel@tonic-gate {
491*7c478bd9Sstevel@tonic-gate 	mutex_impl_t *lp = (mutex_impl_t *)mp;
492*7c478bd9Sstevel@tonic-gate 	kthread_id_t t;
493*7c478bd9Sstevel@tonic-gate 
494*7c478bd9Sstevel@tonic-gate 	if (MUTEX_TYPE_ADAPTIVE(lp) && (t = MUTEX_OWNER(lp)) != MUTEX_NO_OWNER)
495*7c478bd9Sstevel@tonic-gate 		return (t);
496*7c478bd9Sstevel@tonic-gate 	return (NULL);
497*7c478bd9Sstevel@tonic-gate }
498*7c478bd9Sstevel@tonic-gate 
499*7c478bd9Sstevel@tonic-gate /*
500*7c478bd9Sstevel@tonic-gate  * The iblock cookie 'ibc' is the spl level associated with the lock;
501*7c478bd9Sstevel@tonic-gate  * this alone determines whether the lock will be ADAPTIVE or SPIN.
502*7c478bd9Sstevel@tonic-gate  *
503*7c478bd9Sstevel@tonic-gate  * Adaptive mutexes created in zeroed memory do not need to call
504*7c478bd9Sstevel@tonic-gate  * mutex_init() as their allocation in this fashion guarantees
505*7c478bd9Sstevel@tonic-gate  * their initialization.
506*7c478bd9Sstevel@tonic-gate  *   eg adaptive mutexes created as static within the BSS or allocated
507*7c478bd9Sstevel@tonic-gate  *      by kmem_zalloc().
508*7c478bd9Sstevel@tonic-gate  */
509*7c478bd9Sstevel@tonic-gate /* ARGSUSED */
510*7c478bd9Sstevel@tonic-gate void
511*7c478bd9Sstevel@tonic-gate mutex_init(kmutex_t *mp, char *name, kmutex_type_t type, void *ibc)
512*7c478bd9Sstevel@tonic-gate {
513*7c478bd9Sstevel@tonic-gate 	mutex_impl_t *lp = (mutex_impl_t *)mp;
514*7c478bd9Sstevel@tonic-gate 
515*7c478bd9Sstevel@tonic-gate 	ASSERT(ibc < (void *)KERNELBASE);	/* see 1215173 */
516*7c478bd9Sstevel@tonic-gate 
517*7c478bd9Sstevel@tonic-gate 	if ((intptr_t)ibc > ipltospl(LOCK_LEVEL) && ibc < (void *)KERNELBASE) {
518*7c478bd9Sstevel@tonic-gate 		ASSERT(type != MUTEX_ADAPTIVE && type != MUTEX_DEFAULT);
519*7c478bd9Sstevel@tonic-gate 		MUTEX_SET_TYPE(lp, MUTEX_SPIN);
520*7c478bd9Sstevel@tonic-gate 		LOCK_INIT_CLEAR(&lp->m_spin.m_spinlock);
521*7c478bd9Sstevel@tonic-gate 		LOCK_INIT_HELD(&lp->m_spin.m_dummylock);
522*7c478bd9Sstevel@tonic-gate 		lp->m_spin.m_minspl = (int)(intptr_t)ibc;
523*7c478bd9Sstevel@tonic-gate 	} else {
524*7c478bd9Sstevel@tonic-gate 		ASSERT(type != MUTEX_SPIN);
525*7c478bd9Sstevel@tonic-gate 		MUTEX_SET_TYPE(lp, MUTEX_ADAPTIVE);
526*7c478bd9Sstevel@tonic-gate 		MUTEX_CLEAR_LOCK_AND_WAITERS(lp);
527*7c478bd9Sstevel@tonic-gate 	}
528*7c478bd9Sstevel@tonic-gate }
529*7c478bd9Sstevel@tonic-gate 
530*7c478bd9Sstevel@tonic-gate void
531*7c478bd9Sstevel@tonic-gate mutex_destroy(kmutex_t *mp)
532*7c478bd9Sstevel@tonic-gate {
533*7c478bd9Sstevel@tonic-gate 	mutex_impl_t *lp = (mutex_impl_t *)mp;
534*7c478bd9Sstevel@tonic-gate 
535*7c478bd9Sstevel@tonic-gate 	if (lp->m_owner == 0 && !MUTEX_HAS_WAITERS(lp)) {
536*7c478bd9Sstevel@tonic-gate 		MUTEX_DESTROY(lp);
537*7c478bd9Sstevel@tonic-gate 	} else if (MUTEX_TYPE_SPIN(lp)) {
538*7c478bd9Sstevel@tonic-gate 		LOCKSTAT_RECORD0(LS_MUTEX_DESTROY_RELEASE, lp);
539*7c478bd9Sstevel@tonic-gate 		MUTEX_DESTROY(lp);
540*7c478bd9Sstevel@tonic-gate 	} else if (MUTEX_TYPE_ADAPTIVE(lp)) {
541*7c478bd9Sstevel@tonic-gate 		LOCKSTAT_RECORD0(LS_MUTEX_DESTROY_RELEASE, lp);
542*7c478bd9Sstevel@tonic-gate 		if (MUTEX_OWNER(lp) != curthread)
543*7c478bd9Sstevel@tonic-gate 			mutex_panic("mutex_destroy: not owner", lp);
544*7c478bd9Sstevel@tonic-gate 		if (MUTEX_HAS_WAITERS(lp)) {
545*7c478bd9Sstevel@tonic-gate 			turnstile_t *ts = turnstile_lookup(lp);
546*7c478bd9Sstevel@tonic-gate 			turnstile_exit(lp);
547*7c478bd9Sstevel@tonic-gate 			if (ts != NULL)
548*7c478bd9Sstevel@tonic-gate 				mutex_panic("mutex_destroy: has waiters", lp);
549*7c478bd9Sstevel@tonic-gate 		}
550*7c478bd9Sstevel@tonic-gate 		MUTEX_DESTROY(lp);
551*7c478bd9Sstevel@tonic-gate 	} else {
552*7c478bd9Sstevel@tonic-gate 		mutex_panic("mutex_destroy: bad mutex", lp);
553*7c478bd9Sstevel@tonic-gate 	}
554*7c478bd9Sstevel@tonic-gate }
555*7c478bd9Sstevel@tonic-gate 
556*7c478bd9Sstevel@tonic-gate /*
557*7c478bd9Sstevel@tonic-gate  * Simple C support for the cases where spin locks miss on the first try.
558*7c478bd9Sstevel@tonic-gate  */
559*7c478bd9Sstevel@tonic-gate void
560*7c478bd9Sstevel@tonic-gate lock_set_spin(lock_t *lp)
561*7c478bd9Sstevel@tonic-gate {
562*7c478bd9Sstevel@tonic-gate 	int spin_count = 1;
563*7c478bd9Sstevel@tonic-gate 	int backoff;	/* current backoff */
564*7c478bd9Sstevel@tonic-gate 	int backctr;	/* ctr for backoff */
565*7c478bd9Sstevel@tonic-gate 
566*7c478bd9Sstevel@tonic-gate 	if (panicstr)
567*7c478bd9Sstevel@tonic-gate 		return;
568*7c478bd9Sstevel@tonic-gate 
569*7c478bd9Sstevel@tonic-gate 	if (ncpus == 1)
570*7c478bd9Sstevel@tonic-gate 		panic("lock_set: %p lock held and only one CPU", lp);
571*7c478bd9Sstevel@tonic-gate 
572*7c478bd9Sstevel@tonic-gate 	backoff = BACKOFF_BASE;
573*7c478bd9Sstevel@tonic-gate 	while (LOCK_HELD(lp) || !lock_spin_try(lp)) {
574*7c478bd9Sstevel@tonic-gate 		if (panicstr)
575*7c478bd9Sstevel@tonic-gate 			return;
576*7c478bd9Sstevel@tonic-gate 		spin_count++;
577*7c478bd9Sstevel@tonic-gate 		/*
578*7c478bd9Sstevel@tonic-gate 		 * Add an exponential backoff delay before trying again
579*7c478bd9Sstevel@tonic-gate 		 * to touch the mutex data structure.
580*7c478bd9Sstevel@tonic-gate 		 * the spin_count test and call to nulldev are to prevent
581*7c478bd9Sstevel@tonic-gate 		 * the compiler optimizer from eliminating the delay loop.
582*7c478bd9Sstevel@tonic-gate 		 */
583*7c478bd9Sstevel@tonic-gate 		for (backctr = backoff; backctr; backctr--) {	/* delay */
584*7c478bd9Sstevel@tonic-gate 			if (!spin_count) (void) nulldev();
585*7c478bd9Sstevel@tonic-gate 		}
586*7c478bd9Sstevel@tonic-gate 
587*7c478bd9Sstevel@tonic-gate 		backoff = backoff << 1;		/* double it */
588*7c478bd9Sstevel@tonic-gate 		if (backoff > BACKOFF_CAP) {
589*7c478bd9Sstevel@tonic-gate 			backoff = BACKOFF_CAP;
590*7c478bd9Sstevel@tonic-gate 		}
591*7c478bd9Sstevel@tonic-gate 		SMT_PAUSE();
592*7c478bd9Sstevel@tonic-gate 	}
593*7c478bd9Sstevel@tonic-gate 
594*7c478bd9Sstevel@tonic-gate 	if (spin_count) {
595*7c478bd9Sstevel@tonic-gate 		LOCKSTAT_RECORD(LS_LOCK_SET_SPIN, lp, spin_count);
596*7c478bd9Sstevel@tonic-gate 	}
597*7c478bd9Sstevel@tonic-gate 
598*7c478bd9Sstevel@tonic-gate 	LOCKSTAT_RECORD0(LS_LOCK_SET_ACQUIRE, lp);
599*7c478bd9Sstevel@tonic-gate }
600*7c478bd9Sstevel@tonic-gate 
601*7c478bd9Sstevel@tonic-gate void
602*7c478bd9Sstevel@tonic-gate lock_set_spl_spin(lock_t *lp, int new_pil, ushort_t *old_pil_addr, int old_pil)
603*7c478bd9Sstevel@tonic-gate {
604*7c478bd9Sstevel@tonic-gate 	int spin_count = 1;
605*7c478bd9Sstevel@tonic-gate 	int backoff;	/* current backoff */
606*7c478bd9Sstevel@tonic-gate 	int backctr;	/* ctr for backoff */
607*7c478bd9Sstevel@tonic-gate 
608*7c478bd9Sstevel@tonic-gate 	if (panicstr)
609*7c478bd9Sstevel@tonic-gate 		return;
610*7c478bd9Sstevel@tonic-gate 
611*7c478bd9Sstevel@tonic-gate 	if (ncpus == 1)
612*7c478bd9Sstevel@tonic-gate 		panic("lock_set_spl: %p lock held and only one CPU", lp);
613*7c478bd9Sstevel@tonic-gate 
614*7c478bd9Sstevel@tonic-gate 	ASSERT(new_pil > LOCK_LEVEL);
615*7c478bd9Sstevel@tonic-gate 
616*7c478bd9Sstevel@tonic-gate 	backoff = BACKOFF_BASE;
617*7c478bd9Sstevel@tonic-gate 	do {
618*7c478bd9Sstevel@tonic-gate 		splx(old_pil);
619*7c478bd9Sstevel@tonic-gate 		while (LOCK_HELD(lp)) {
620*7c478bd9Sstevel@tonic-gate 			if (panicstr) {
621*7c478bd9Sstevel@tonic-gate 				*old_pil_addr = (ushort_t)splr(new_pil);
622*7c478bd9Sstevel@tonic-gate 				return;
623*7c478bd9Sstevel@tonic-gate 			}
624*7c478bd9Sstevel@tonic-gate 			spin_count++;
625*7c478bd9Sstevel@tonic-gate 			/*
626*7c478bd9Sstevel@tonic-gate 			 * Add an exponential backoff delay before trying again
627*7c478bd9Sstevel@tonic-gate 			 * to touch the mutex data structure.
628*7c478bd9Sstevel@tonic-gate 			 * spin_count test and call to nulldev are to prevent
629*7c478bd9Sstevel@tonic-gate 			 * compiler optimizer from eliminating the delay loop.
630*7c478bd9Sstevel@tonic-gate 			 */
631*7c478bd9Sstevel@tonic-gate 			for (backctr = backoff; backctr; backctr--) {
632*7c478bd9Sstevel@tonic-gate 				if (!spin_count) (void) nulldev();
633*7c478bd9Sstevel@tonic-gate 			}
634*7c478bd9Sstevel@tonic-gate 			backoff = backoff << 1;		/* double it */
635*7c478bd9Sstevel@tonic-gate 			if (backoff > BACKOFF_CAP) {
636*7c478bd9Sstevel@tonic-gate 				backoff = BACKOFF_CAP;
637*7c478bd9Sstevel@tonic-gate 			}
638*7c478bd9Sstevel@tonic-gate 
639*7c478bd9Sstevel@tonic-gate 			SMT_PAUSE();
640*7c478bd9Sstevel@tonic-gate 		}
641*7c478bd9Sstevel@tonic-gate 		old_pil = splr(new_pil);
642*7c478bd9Sstevel@tonic-gate 	} while (!lock_spin_try(lp));
643*7c478bd9Sstevel@tonic-gate 
644*7c478bd9Sstevel@tonic-gate 	*old_pil_addr = (ushort_t)old_pil;
645*7c478bd9Sstevel@tonic-gate 
646*7c478bd9Sstevel@tonic-gate 	if (spin_count) {
647*7c478bd9Sstevel@tonic-gate 		LOCKSTAT_RECORD(LS_LOCK_SET_SPL_SPIN, lp, spin_count);
648*7c478bd9Sstevel@tonic-gate 	}
649*7c478bd9Sstevel@tonic-gate 
650*7c478bd9Sstevel@tonic-gate 	LOCKSTAT_RECORD(LS_LOCK_SET_SPL_ACQUIRE, lp, spin_count);
651*7c478bd9Sstevel@tonic-gate }
652