xref: /illumos-gate/usr/src/uts/common/os/mutex.c (revision 575a7426)
17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
50efe5e54Sdv  * Common Development and Distribution License (the "License").
60efe5e54Sdv  * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate  *
87c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate  * and limitations under the License.
127c478bd9Sstevel@tonic-gate  *
137c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate  *
197c478bd9Sstevel@tonic-gate  * CDDL HEADER END
207c478bd9Sstevel@tonic-gate  */
217c478bd9Sstevel@tonic-gate /*
22*575a7426Spt  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
237c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
247c478bd9Sstevel@tonic-gate  */
257c478bd9Sstevel@tonic-gate 
267c478bd9Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
277c478bd9Sstevel@tonic-gate 
287c478bd9Sstevel@tonic-gate /*
297c478bd9Sstevel@tonic-gate  * Big Theory Statement for mutual exclusion locking primitives.
307c478bd9Sstevel@tonic-gate  *
317c478bd9Sstevel@tonic-gate  * A mutex serializes multiple threads so that only one thread
327c478bd9Sstevel@tonic-gate  * (the "owner" of the mutex) is active at a time.  See mutex(9F)
337c478bd9Sstevel@tonic-gate  * for a full description of the interfaces and programming model.
347c478bd9Sstevel@tonic-gate  * The rest of this comment describes the implementation.
357c478bd9Sstevel@tonic-gate  *
367c478bd9Sstevel@tonic-gate  * Mutexes come in two flavors: adaptive and spin.  mutex_init(9F)
377c478bd9Sstevel@tonic-gate  * determines the type based solely on the iblock cookie (PIL) argument.
387c478bd9Sstevel@tonic-gate  * PIL > LOCK_LEVEL implies a spin lock; everything else is adaptive.
397c478bd9Sstevel@tonic-gate  *
407c478bd9Sstevel@tonic-gate  * Spin mutexes block interrupts and spin until the lock becomes available.
417c478bd9Sstevel@tonic-gate  * A thread may not sleep, or call any function that might sleep, while
427c478bd9Sstevel@tonic-gate  * holding a spin mutex.  With few exceptions, spin mutexes should only
437c478bd9Sstevel@tonic-gate  * be used to synchronize with interrupt handlers.
447c478bd9Sstevel@tonic-gate  *
457c478bd9Sstevel@tonic-gate  * Adaptive mutexes (the default type) spin if the owner is running on
467c478bd9Sstevel@tonic-gate  * another CPU and block otherwise.  This policy is based on the assumption
477c478bd9Sstevel@tonic-gate  * that mutex hold times are typically short enough that the time spent
487c478bd9Sstevel@tonic-gate  * spinning is less than the time it takes to block.  If you need mutual
497c478bd9Sstevel@tonic-gate  * exclusion semantics with long hold times, consider an rwlock(9F) as
507c478bd9Sstevel@tonic-gate  * RW_WRITER.  Better still, reconsider the algorithm: if it requires
517c478bd9Sstevel@tonic-gate  * mutual exclusion for long periods of time, it's probably not scalable.
527c478bd9Sstevel@tonic-gate  *
537c478bd9Sstevel@tonic-gate  * Adaptive mutexes are overwhelmingly more common than spin mutexes,
547c478bd9Sstevel@tonic-gate  * so mutex_enter() assumes that the lock is adaptive.  We get away
557c478bd9Sstevel@tonic-gate  * with this by structuring mutexes so that an attempt to acquire a
567c478bd9Sstevel@tonic-gate  * spin mutex as adaptive always fails.  When mutex_enter() fails
577c478bd9Sstevel@tonic-gate  * it punts to mutex_vector_enter(), which does all the hard stuff.
587c478bd9Sstevel@tonic-gate  *
597c478bd9Sstevel@tonic-gate  * mutex_vector_enter() first checks the type.  If it's spin mutex,
607c478bd9Sstevel@tonic-gate  * we just call lock_set_spl() and return.  If it's an adaptive mutex,
617c478bd9Sstevel@tonic-gate  * we check to see what the owner is doing.  If the owner is running,
627c478bd9Sstevel@tonic-gate  * we spin until the lock becomes available; if not, we mark the lock
637c478bd9Sstevel@tonic-gate  * as having waiters and block.
647c478bd9Sstevel@tonic-gate  *
657c478bd9Sstevel@tonic-gate  * Blocking on a mutex is surprisingly delicate dance because, for speed,
667c478bd9Sstevel@tonic-gate  * mutex_exit() doesn't use an atomic instruction.  Thus we have to work
677c478bd9Sstevel@tonic-gate  * a little harder in the (rarely-executed) blocking path to make sure
687c478bd9Sstevel@tonic-gate  * we don't block on a mutex that's just been released -- otherwise we
697c478bd9Sstevel@tonic-gate  * might never be woken up.
707c478bd9Sstevel@tonic-gate  *
717c478bd9Sstevel@tonic-gate  * The logic for synchronizing mutex_vector_enter() with mutex_exit()
727c478bd9Sstevel@tonic-gate  * in the face of preemption and relaxed memory ordering is as follows:
737c478bd9Sstevel@tonic-gate  *
747c478bd9Sstevel@tonic-gate  * (1) Preemption in the middle of mutex_exit() must cause mutex_exit()
757c478bd9Sstevel@tonic-gate  *     to restart.  Each platform must enforce this by checking the
767c478bd9Sstevel@tonic-gate  *     interrupted PC in the interrupt handler (or on return from trap --
777c478bd9Sstevel@tonic-gate  *     whichever is more convenient for the platform).  If the PC
787c478bd9Sstevel@tonic-gate  *     lies within the critical region of mutex_exit(), the interrupt
797c478bd9Sstevel@tonic-gate  *     handler must reset the PC back to the beginning of mutex_exit().
807c478bd9Sstevel@tonic-gate  *     The critical region consists of all instructions up to, but not
817c478bd9Sstevel@tonic-gate  *     including, the store that clears the lock (which, of course,
827c478bd9Sstevel@tonic-gate  *     must never be executed twice.)
837c478bd9Sstevel@tonic-gate  *
847c478bd9Sstevel@tonic-gate  *     This ensures that the owner will always check for waiters after
857c478bd9Sstevel@tonic-gate  *     resuming from a previous preemption.
867c478bd9Sstevel@tonic-gate  *
877c478bd9Sstevel@tonic-gate  * (2) A thread resuming in mutex_exit() does (at least) the following:
887c478bd9Sstevel@tonic-gate  *
897c478bd9Sstevel@tonic-gate  *	when resuming:	set CPU_THREAD = owner
907c478bd9Sstevel@tonic-gate  *			membar #StoreLoad
917c478bd9Sstevel@tonic-gate  *
927c478bd9Sstevel@tonic-gate  *	in mutex_exit:	check waiters bit; do wakeup if set
937c478bd9Sstevel@tonic-gate  *			membar #LoadStore|#StoreStore
947c478bd9Sstevel@tonic-gate  *			clear owner
957c478bd9Sstevel@tonic-gate  *			(at this point, other threads may or may not grab
967c478bd9Sstevel@tonic-gate  *			the lock, and we may or may not reacquire it)
977c478bd9Sstevel@tonic-gate  *
987c478bd9Sstevel@tonic-gate  *	when blocking:	membar #StoreStore (due to disp_lock_enter())
997c478bd9Sstevel@tonic-gate  *			set CPU_THREAD = (possibly) someone else
1007c478bd9Sstevel@tonic-gate  *
1017c478bd9Sstevel@tonic-gate  * (3) A thread blocking in mutex_vector_enter() does the following:
1027c478bd9Sstevel@tonic-gate  *
1037c478bd9Sstevel@tonic-gate  *			set waiters bit
1047c478bd9Sstevel@tonic-gate  *			membar #StoreLoad (via membar_enter())
105*575a7426Spt  *			check CPU_THREAD for owner's t_cpu
106*575a7426Spt  *				continue if owner running
1077c478bd9Sstevel@tonic-gate  *			membar #LoadLoad (via membar_consumer())
1087c478bd9Sstevel@tonic-gate  *			check owner and waiters bit; abort if either changed
1097c478bd9Sstevel@tonic-gate  *			block
1107c478bd9Sstevel@tonic-gate  *
1117c478bd9Sstevel@tonic-gate  * Thus the global memory orderings for (2) and (3) are as follows:
1127c478bd9Sstevel@tonic-gate  *
1137c478bd9Sstevel@tonic-gate  * (2M) mutex_exit() memory order:
1147c478bd9Sstevel@tonic-gate  *
1157c478bd9Sstevel@tonic-gate  *			STORE	CPU_THREAD = owner
1167c478bd9Sstevel@tonic-gate  *			LOAD	waiters bit
1177c478bd9Sstevel@tonic-gate  *			STORE	owner = NULL
1187c478bd9Sstevel@tonic-gate  *			STORE	CPU_THREAD = (possibly) someone else
1197c478bd9Sstevel@tonic-gate  *
1207c478bd9Sstevel@tonic-gate  * (3M) mutex_vector_enter() memory order:
1217c478bd9Sstevel@tonic-gate  *
1227c478bd9Sstevel@tonic-gate  *			STORE	waiters bit = 1
1237c478bd9Sstevel@tonic-gate  *			LOAD	CPU_THREAD for each CPU
1247c478bd9Sstevel@tonic-gate  *			LOAD	owner and waiters bit
1257c478bd9Sstevel@tonic-gate  *
1267c478bd9Sstevel@tonic-gate  * It has been verified by exhaustive simulation that all possible global
1277c478bd9Sstevel@tonic-gate  * memory orderings of (2M) interleaved with (3M) result in correct
1287c478bd9Sstevel@tonic-gate  * behavior.  Moreover, these ordering constraints are minimal: changing
1297c478bd9Sstevel@tonic-gate  * the ordering of anything in (2M) or (3M) breaks the algorithm, creating
1307c478bd9Sstevel@tonic-gate  * windows for missed wakeups.  Note: the possibility that other threads
1317c478bd9Sstevel@tonic-gate  * may grab the lock after the owner drops it can be factored out of the
1327c478bd9Sstevel@tonic-gate  * memory ordering analysis because mutex_vector_enter() won't block
1337c478bd9Sstevel@tonic-gate  * if the lock isn't still owned by the same thread.
1347c478bd9Sstevel@tonic-gate  *
1357c478bd9Sstevel@tonic-gate  * The only requirements of code outside the mutex implementation are
1367c478bd9Sstevel@tonic-gate  * (1) mutex_exit() preemption fixup in interrupt handlers or trap return,
137*575a7426Spt  * (2) a membar #StoreLoad after setting CPU_THREAD in resume(),
138*575a7426Spt  * (3) mutex_owner_running() preemption fixup in interrupt handlers
139*575a7426Spt  * or trap returns.
1407c478bd9Sstevel@tonic-gate  * Note: idle threads cannot grab adaptive locks (since they cannot block),
1417c478bd9Sstevel@tonic-gate  * so the membar may be safely omitted when resuming an idle thread.
1427c478bd9Sstevel@tonic-gate  *
1437c478bd9Sstevel@tonic-gate  * When a mutex has waiters, mutex_vector_exit() has several options:
1447c478bd9Sstevel@tonic-gate  *
1457c478bd9Sstevel@tonic-gate  * (1) Choose a waiter and make that thread the owner before waking it;
1467c478bd9Sstevel@tonic-gate  *     this is known as "direct handoff" of ownership.
1477c478bd9Sstevel@tonic-gate  *
1487c478bd9Sstevel@tonic-gate  * (2) Drop the lock and wake one waiter.
1497c478bd9Sstevel@tonic-gate  *
1507c478bd9Sstevel@tonic-gate  * (3) Drop the lock, clear the waiters bit, and wake all waiters.
1517c478bd9Sstevel@tonic-gate  *
1527c478bd9Sstevel@tonic-gate  * In many ways (1) is the cleanest solution, but if a lock is moderately
1537c478bd9Sstevel@tonic-gate  * contended it defeats the adaptive spin logic.  If we make some other
1547c478bd9Sstevel@tonic-gate  * thread the owner, but he's not ONPROC yet, then all other threads on
1557c478bd9Sstevel@tonic-gate  * other cpus that try to get the lock will conclude that the owner is
1567c478bd9Sstevel@tonic-gate  * blocked, so they'll block too.  And so on -- it escalates quickly,
1577c478bd9Sstevel@tonic-gate  * with every thread taking the blocking path rather than the spin path.
1587c478bd9Sstevel@tonic-gate  * Thus, direct handoff is *not* a good idea for adaptive mutexes.
1597c478bd9Sstevel@tonic-gate  *
1607c478bd9Sstevel@tonic-gate  * Option (2) is the next most natural-seeming option, but it has several
1617c478bd9Sstevel@tonic-gate  * annoying properties.  If there's more than one waiter, we must preserve
1627c478bd9Sstevel@tonic-gate  * the waiters bit on an unheld lock.  On cas-capable platforms, where
1637c478bd9Sstevel@tonic-gate  * the waiters bit is part of the lock word, this means that both 0x0
1647c478bd9Sstevel@tonic-gate  * and 0x1 represent unheld locks, so we have to cas against *both*.
1657c478bd9Sstevel@tonic-gate  * Priority inheritance also gets more complicated, because a lock can
1667c478bd9Sstevel@tonic-gate  * have waiters but no owner to whom priority can be willed.  So while
1677c478bd9Sstevel@tonic-gate  * it is possible to make option (2) work, it's surprisingly vile.
1687c478bd9Sstevel@tonic-gate  *
1697c478bd9Sstevel@tonic-gate  * Option (3), the least-intuitive at first glance, is what we actually do.
1707c478bd9Sstevel@tonic-gate  * It has the advantage that because you always wake all waiters, you
1717c478bd9Sstevel@tonic-gate  * never have to preserve the waiters bit.  Waking all waiters seems like
1727c478bd9Sstevel@tonic-gate  * begging for a thundering herd problem, but consider: under option (2),
1737c478bd9Sstevel@tonic-gate  * every thread that grabs and drops the lock will wake one waiter -- so
1747c478bd9Sstevel@tonic-gate  * if the lock is fairly active, all waiters will be awakened very quickly
1757c478bd9Sstevel@tonic-gate  * anyway.  Moreover, this is how adaptive locks are *supposed* to work.
1767c478bd9Sstevel@tonic-gate  * The blocking case is rare; the more common case (by 3-4 orders of
1777c478bd9Sstevel@tonic-gate  * magnitude) is that one or more threads spin waiting to get the lock.
1787c478bd9Sstevel@tonic-gate  * Only direct handoff can prevent the thundering herd problem, but as
1797c478bd9Sstevel@tonic-gate  * mentioned earlier, that would tend to defeat the adaptive spin logic.
1807c478bd9Sstevel@tonic-gate  * In practice, option (3) works well because the blocking case is rare.
1817c478bd9Sstevel@tonic-gate  */
1827c478bd9Sstevel@tonic-gate 
1837c478bd9Sstevel@tonic-gate /*
1847c478bd9Sstevel@tonic-gate  * delayed lock retry with exponential delay for spin locks
1857c478bd9Sstevel@tonic-gate  *
1867c478bd9Sstevel@tonic-gate  * It is noted above that for both the spin locks and the adaptive locks,
1877c478bd9Sstevel@tonic-gate  * spinning is the dominate mode of operation.  So long as there is only
1887c478bd9Sstevel@tonic-gate  * one thread waiting on a lock, the naive spin loop works very well in
1897c478bd9Sstevel@tonic-gate  * cache based architectures.  The lock data structure is pulled into the
1907c478bd9Sstevel@tonic-gate  * cache of the processor with the waiting/spinning thread and no further
1917c478bd9Sstevel@tonic-gate  * memory traffic is generated until the lock is released.  Unfortunately,
1927c478bd9Sstevel@tonic-gate  * once two or more threads are waiting on a lock, the naive spin has
1937c478bd9Sstevel@tonic-gate  * the property of generating maximum memory traffic from each spinning
1947c478bd9Sstevel@tonic-gate  * thread as the spinning threads contend for the lock data structure.
1957c478bd9Sstevel@tonic-gate  *
1967c478bd9Sstevel@tonic-gate  * By executing a delay loop before retrying a lock, a waiting thread
1977c478bd9Sstevel@tonic-gate  * can reduce its memory traffic by a large factor, depending on the
1987c478bd9Sstevel@tonic-gate  * size of the delay loop.  A large delay loop greatly reduced the memory
1997c478bd9Sstevel@tonic-gate  * traffic, but has the drawback of having a period of time when
2007c478bd9Sstevel@tonic-gate  * no thread is attempting to gain the lock even though several threads
2017c478bd9Sstevel@tonic-gate  * might be waiting.  A small delay loop has the drawback of not
2027c478bd9Sstevel@tonic-gate  * much reduction in memory traffic, but reduces the potential idle time.
2037c478bd9Sstevel@tonic-gate  * The theory of the exponential delay code is to start with a short
2047c478bd9Sstevel@tonic-gate  * delay loop and double the waiting time on each iteration, up to
205*575a7426Spt  * a preselected maximum.
2067c478bd9Sstevel@tonic-gate  */
2077c478bd9Sstevel@tonic-gate 
2087c478bd9Sstevel@tonic-gate #include <sys/param.h>
2097c478bd9Sstevel@tonic-gate #include <sys/time.h>
2107c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h>
2117c478bd9Sstevel@tonic-gate #include <sys/thread.h>
2127c478bd9Sstevel@tonic-gate #include <sys/debug.h>
2137c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h>
2147c478bd9Sstevel@tonic-gate #include <sys/sobject.h>
2157c478bd9Sstevel@tonic-gate #include <sys/turnstile.h>
2167c478bd9Sstevel@tonic-gate #include <sys/systm.h>
2177c478bd9Sstevel@tonic-gate #include <sys/mutex_impl.h>
2187c478bd9Sstevel@tonic-gate #include <sys/spl.h>
2197c478bd9Sstevel@tonic-gate #include <sys/lockstat.h>
2207c478bd9Sstevel@tonic-gate #include <sys/atomic.h>
2217c478bd9Sstevel@tonic-gate #include <sys/cpu.h>
2227c478bd9Sstevel@tonic-gate #include <sys/stack.h>
223843e1988Sjohnlev #include <sys/archsystm.h>
224*575a7426Spt #include <sys/machsystm.h>
225*575a7426Spt #include <sys/x_call.h>
2267c478bd9Sstevel@tonic-gate 
2277c478bd9Sstevel@tonic-gate /*
2287c478bd9Sstevel@tonic-gate  * The sobj_ops vector exports a set of functions needed when a thread
2297c478bd9Sstevel@tonic-gate  * is asleep on a synchronization object of this type.
2307c478bd9Sstevel@tonic-gate  */
2317c478bd9Sstevel@tonic-gate static sobj_ops_t mutex_sobj_ops = {
2327c478bd9Sstevel@tonic-gate 	SOBJ_MUTEX, mutex_owner, turnstile_stay_asleep, turnstile_change_pri
2337c478bd9Sstevel@tonic-gate };
2347c478bd9Sstevel@tonic-gate 
2357c478bd9Sstevel@tonic-gate /*
2367c478bd9Sstevel@tonic-gate  * If the system panics on a mutex, save the address of the offending
2377c478bd9Sstevel@tonic-gate  * mutex in panic_mutex_addr, and save the contents in panic_mutex.
2387c478bd9Sstevel@tonic-gate  */
2397c478bd9Sstevel@tonic-gate static mutex_impl_t panic_mutex;
2407c478bd9Sstevel@tonic-gate static mutex_impl_t *panic_mutex_addr;
2417c478bd9Sstevel@tonic-gate 
2427c478bd9Sstevel@tonic-gate static void
2437c478bd9Sstevel@tonic-gate mutex_panic(char *msg, mutex_impl_t *lp)
2447c478bd9Sstevel@tonic-gate {
2457c478bd9Sstevel@tonic-gate 	if (panicstr)
2467c478bd9Sstevel@tonic-gate 		return;
2477c478bd9Sstevel@tonic-gate 
2487c478bd9Sstevel@tonic-gate 	if (casptr(&panic_mutex_addr, NULL, lp) == NULL)
2497c478bd9Sstevel@tonic-gate 		panic_mutex = *lp;
2507c478bd9Sstevel@tonic-gate 
2517c478bd9Sstevel@tonic-gate 	panic("%s, lp=%p owner=%p thread=%p",
2527c478bd9Sstevel@tonic-gate 	    msg, lp, MUTEX_OWNER(&panic_mutex), curthread);
2537c478bd9Sstevel@tonic-gate }
2547c478bd9Sstevel@tonic-gate 
255*575a7426Spt /* "tunables" for per-platform backoff constants. */
256*575a7426Spt uint_t mutex_backoff_cap = 0;
257*575a7426Spt ushort_t mutex_backoff_base = MUTEX_BACKOFF_BASE;
258*575a7426Spt ushort_t mutex_cap_factor = MUTEX_CAP_FACTOR;
259*575a7426Spt uchar_t mutex_backoff_shift = MUTEX_BACKOFF_SHIFT;
260*575a7426Spt 
261*575a7426Spt void
262*575a7426Spt mutex_sync(void)
263*575a7426Spt {
264*575a7426Spt 	MUTEX_SYNC();
265*575a7426Spt }
266*575a7426Spt 
267*575a7426Spt /* calculate the backoff interval */
268*575a7426Spt static uint_t
269*575a7426Spt default_lock_backoff(uint_t backoff)
270*575a7426Spt {
271*575a7426Spt 	uint_t cap;		/* backoff cap calculated */
272*575a7426Spt 
273*575a7426Spt 	if (backoff == 0) {
274*575a7426Spt 		backoff = mutex_backoff_base;
275*575a7426Spt 		/* first call just sets the base */
276*575a7426Spt 		return (backoff);
277*575a7426Spt 	}
278*575a7426Spt 
279*575a7426Spt 	/* set cap */
280*575a7426Spt 	if (mutex_backoff_cap == 0) {
281*575a7426Spt 		/*
282*575a7426Spt 		 * For a contended lock, in the worst case a load + cas may
283*575a7426Spt 		 * be queued  at the controller for each contending CPU.
284*575a7426Spt 		 * Therefore, to avoid queueing, the accesses for all CPUS must
285*575a7426Spt 		 * be spread out in time over an interval of (ncpu *
286*575a7426Spt 		 * cap-factor).  Maximum backoff is set to this value, and
287*575a7426Spt 		 * actual backoff is a random number from 0 to the current max.
288*575a7426Spt 		 */
289*575a7426Spt 		cap = ncpus_online * mutex_cap_factor;
290*575a7426Spt 	} else {
291*575a7426Spt 		cap = mutex_backoff_cap;
292*575a7426Spt 	}
293*575a7426Spt 
294*575a7426Spt 	/* calculate new backoff value */
295*575a7426Spt 	backoff <<= mutex_backoff_shift;	/* increase backoff */
296*575a7426Spt 	if (backoff > cap) {
297*575a7426Spt 		if (cap < mutex_backoff_base)
298*575a7426Spt 			backoff = mutex_backoff_base;
299*575a7426Spt 		else
300*575a7426Spt 			backoff = cap;
301*575a7426Spt 	}
302*575a7426Spt 
303*575a7426Spt 	return (backoff);
304*575a7426Spt }
305*575a7426Spt 
306*575a7426Spt /*
307*575a7426Spt  * default delay function for mutexes.
308*575a7426Spt  */
309*575a7426Spt static void
310*575a7426Spt default_lock_delay(uint_t backoff)
311*575a7426Spt {
312*575a7426Spt 	ulong_t rnd;		/* random factor */
313*575a7426Spt 	uint_t cur_backoff;	/* calculated backoff */
314*575a7426Spt 	uint_t backctr;
315*575a7426Spt 
316*575a7426Spt 	/*
317*575a7426Spt 	 * Modify backoff by a random amount to avoid lockstep, and to
318*575a7426Spt 	 * make it probable that some thread gets a small backoff, and
319*575a7426Spt 	 * re-checks quickly
320*575a7426Spt 	 */
321*575a7426Spt 	rnd = (((long)curthread >> PTR24_LSB) ^ (long)MUTEX_GETTICK());
322*575a7426Spt 	cur_backoff = (uint_t)(rnd % (backoff - mutex_backoff_base + 1)) +
323*575a7426Spt 	    mutex_backoff_base;
324*575a7426Spt 
325*575a7426Spt 	/*
326*575a7426Spt 	 * Delay before trying
327*575a7426Spt 	 * to touch the mutex data structure.
328*575a7426Spt 	 */
329*575a7426Spt 	for (backctr = cur_backoff; backctr; backctr--) {
330*575a7426Spt 		MUTEX_DELAY();
331*575a7426Spt 	};
332*575a7426Spt }
333*575a7426Spt 
334*575a7426Spt uint_t (*mutex_lock_backoff)(uint_t) = default_lock_backoff;
335*575a7426Spt void (*mutex_lock_delay)(uint_t) = default_lock_delay;
336*575a7426Spt void (*mutex_delay)(void) = mutex_delay_default;
337*575a7426Spt 
3387c478bd9Sstevel@tonic-gate /*
3397c478bd9Sstevel@tonic-gate  * mutex_vector_enter() is called from the assembly mutex_enter() routine
3407c478bd9Sstevel@tonic-gate  * if the lock is held or is not of type MUTEX_ADAPTIVE.
3417c478bd9Sstevel@tonic-gate  */
3427c478bd9Sstevel@tonic-gate void
3437c478bd9Sstevel@tonic-gate mutex_vector_enter(mutex_impl_t *lp)
3447c478bd9Sstevel@tonic-gate {
3457c478bd9Sstevel@tonic-gate 	kthread_id_t	owner;
346*575a7426Spt 	kthread_id_t	lastowner = MUTEX_NO_OWNER; /* track owner changes */
3477c478bd9Sstevel@tonic-gate 	hrtime_t	sleep_time = 0;	/* how long we slept */
3487c478bd9Sstevel@tonic-gate 	uint_t		spin_count = 0;	/* how many times we spun */
349*575a7426Spt 	cpu_t 		*cpup;
3507c478bd9Sstevel@tonic-gate 	turnstile_t	*ts;
3517c478bd9Sstevel@tonic-gate 	volatile mutex_impl_t *vlp = (volatile mutex_impl_t *)lp;
352*575a7426Spt 	uint_t		backoff = 0;	/* current backoff */
3530efe5e54Sdv 	int		sleep_count = 0;
354*575a7426Spt 	int		changecnt = 0;	/* count of owner changes */
3557c478bd9Sstevel@tonic-gate 
3567c478bd9Sstevel@tonic-gate 	ASSERT_STACK_ALIGNED();
3577c478bd9Sstevel@tonic-gate 
3587c478bd9Sstevel@tonic-gate 	if (MUTEX_TYPE_SPIN(lp)) {
3597c478bd9Sstevel@tonic-gate 		lock_set_spl(&lp->m_spin.m_spinlock, lp->m_spin.m_minspl,
3607c478bd9Sstevel@tonic-gate 		    &lp->m_spin.m_oldspl);
3617c478bd9Sstevel@tonic-gate 		return;
3627c478bd9Sstevel@tonic-gate 	}
3637c478bd9Sstevel@tonic-gate 
3647c478bd9Sstevel@tonic-gate 	if (!MUTEX_TYPE_ADAPTIVE(lp)) {
3657c478bd9Sstevel@tonic-gate 		mutex_panic("mutex_enter: bad mutex", lp);
3667c478bd9Sstevel@tonic-gate 		return;
3677c478bd9Sstevel@tonic-gate 	}
3687c478bd9Sstevel@tonic-gate 
3697c478bd9Sstevel@tonic-gate 	/*
3707c478bd9Sstevel@tonic-gate 	 * Adaptive mutexes must not be acquired from above LOCK_LEVEL.
3717c478bd9Sstevel@tonic-gate 	 * We can migrate after loading CPU but before checking CPU_ON_INTR,
3727c478bd9Sstevel@tonic-gate 	 * so we must verify by disabling preemption and loading CPU again.
3737c478bd9Sstevel@tonic-gate 	 */
3747c478bd9Sstevel@tonic-gate 	cpup = CPU;
3757c478bd9Sstevel@tonic-gate 	if (CPU_ON_INTR(cpup) && !panicstr) {
3767c478bd9Sstevel@tonic-gate 		kpreempt_disable();
3777c478bd9Sstevel@tonic-gate 		if (CPU_ON_INTR(CPU))
3787c478bd9Sstevel@tonic-gate 			mutex_panic("mutex_enter: adaptive at high PIL", lp);
3797c478bd9Sstevel@tonic-gate 		kpreempt_enable();
3807c478bd9Sstevel@tonic-gate 	}
3817c478bd9Sstevel@tonic-gate 
3827c478bd9Sstevel@tonic-gate 	CPU_STATS_ADDQ(cpup, sys, mutex_adenters, 1);
3837c478bd9Sstevel@tonic-gate 
384*575a7426Spt 	backoff = mutex_lock_backoff(0);	/* set base backoff */
3857c478bd9Sstevel@tonic-gate 	for (;;) {
3867c478bd9Sstevel@tonic-gate 		spin_count++;
387*575a7426Spt 		mutex_lock_delay(backoff); /* backoff delay */
3887c478bd9Sstevel@tonic-gate 
3897c478bd9Sstevel@tonic-gate 		if (panicstr)
3907c478bd9Sstevel@tonic-gate 			return;
3917c478bd9Sstevel@tonic-gate 
3927c478bd9Sstevel@tonic-gate 		if ((owner = MUTEX_OWNER(vlp)) == NULL) {
393*575a7426Spt 			if (mutex_adaptive_tryenter(lp)) {
3947c478bd9Sstevel@tonic-gate 				break;
395*575a7426Spt 			}
396*575a7426Spt 			/* increase backoff only on failed attempt. */
397*575a7426Spt 			backoff = mutex_lock_backoff(backoff);
398*575a7426Spt 			changecnt++;
3997c478bd9Sstevel@tonic-gate 			continue;
400*575a7426Spt 		} else if (lastowner != owner) {
401*575a7426Spt 			lastowner = owner;
402*575a7426Spt 			backoff = mutex_lock_backoff(backoff);
403*575a7426Spt 			changecnt++;
404*575a7426Spt 		}
405*575a7426Spt 
406*575a7426Spt 		if (changecnt >= ncpus_online) {
407*575a7426Spt 			backoff = mutex_lock_backoff(0);
408*575a7426Spt 			changecnt = 0;
4097c478bd9Sstevel@tonic-gate 		}
4107c478bd9Sstevel@tonic-gate 
4117c478bd9Sstevel@tonic-gate 		if (owner == curthread)
4127c478bd9Sstevel@tonic-gate 			mutex_panic("recursive mutex_enter", lp);
4137c478bd9Sstevel@tonic-gate 
4147c478bd9Sstevel@tonic-gate 		/*
4157c478bd9Sstevel@tonic-gate 		 * If lock is held but owner is not yet set, spin.
4167c478bd9Sstevel@tonic-gate 		 * (Only relevant for platforms that don't have cas.)
4177c478bd9Sstevel@tonic-gate 		 */
4187c478bd9Sstevel@tonic-gate 		if (owner == MUTEX_NO_OWNER)
4197c478bd9Sstevel@tonic-gate 			continue;
4207c478bd9Sstevel@tonic-gate 
421*575a7426Spt 		if (mutex_owner_running(lp) != NULL)  {
422*575a7426Spt 			continue;
423*575a7426Spt 		}
4247c478bd9Sstevel@tonic-gate 
4257c478bd9Sstevel@tonic-gate 		/*
4267c478bd9Sstevel@tonic-gate 		 * The owner appears not to be running, so block.
4277c478bd9Sstevel@tonic-gate 		 * See the Big Theory Statement for memory ordering issues.
4287c478bd9Sstevel@tonic-gate 		 */
4297c478bd9Sstevel@tonic-gate 		ts = turnstile_lookup(lp);
4307c478bd9Sstevel@tonic-gate 		MUTEX_SET_WAITERS(lp);
4317c478bd9Sstevel@tonic-gate 		membar_enter();
4327c478bd9Sstevel@tonic-gate 
4337c478bd9Sstevel@tonic-gate 		/*
4347c478bd9Sstevel@tonic-gate 		 * Recheck whether owner is running after waiters bit hits
4357c478bd9Sstevel@tonic-gate 		 * global visibility (above).  If owner is running, spin.
4367c478bd9Sstevel@tonic-gate 		 */
437*575a7426Spt 		if (mutex_owner_running(lp) != NULL) {
438*575a7426Spt 			turnstile_exit(lp);
439*575a7426Spt 			continue;
440*575a7426Spt 		}
4417c478bd9Sstevel@tonic-gate 		membar_consumer();
4427c478bd9Sstevel@tonic-gate 
4437c478bd9Sstevel@tonic-gate 		/*
4447c478bd9Sstevel@tonic-gate 		 * If owner and waiters bit are unchanged, block.
4457c478bd9Sstevel@tonic-gate 		 */
4467c478bd9Sstevel@tonic-gate 		if (MUTEX_OWNER(vlp) == owner && MUTEX_HAS_WAITERS(vlp)) {
4477c478bd9Sstevel@tonic-gate 			sleep_time -= gethrtime();
4487c478bd9Sstevel@tonic-gate 			(void) turnstile_block(ts, TS_WRITER_Q, lp,
4497c478bd9Sstevel@tonic-gate 			    &mutex_sobj_ops, NULL, NULL);
4507c478bd9Sstevel@tonic-gate 			sleep_time += gethrtime();
4510efe5e54Sdv 			sleep_count++;
452*575a7426Spt 			/* reset backoff after turnstile */
453*575a7426Spt 			backoff = mutex_lock_backoff(0);
4547c478bd9Sstevel@tonic-gate 		} else {
4557c478bd9Sstevel@tonic-gate 			turnstile_exit(lp);
4567c478bd9Sstevel@tonic-gate 		}
4577c478bd9Sstevel@tonic-gate 	}
4587c478bd9Sstevel@tonic-gate 
4597c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_OWNER(lp) == curthread);
4607c478bd9Sstevel@tonic-gate 
4610efe5e54Sdv 	if (sleep_time != 0) {
4620efe5e54Sdv 		/*
4630efe5e54Sdv 		 * Note, sleep time is the sum of all the sleeping we
4640efe5e54Sdv 		 * did.
4650efe5e54Sdv 		 */
4667c478bd9Sstevel@tonic-gate 		LOCKSTAT_RECORD(LS_MUTEX_ENTER_BLOCK, lp, sleep_time);
4677c478bd9Sstevel@tonic-gate 	}
4687c478bd9Sstevel@tonic-gate 
4690efe5e54Sdv 	/*
4700efe5e54Sdv 	 * We do not count a sleep as a spin.
4710efe5e54Sdv 	 */
472*575a7426Spt 	if (spin_count > sleep_count) {
4730efe5e54Sdv 		LOCKSTAT_RECORD(LS_MUTEX_ENTER_SPIN, lp,
4740efe5e54Sdv 		    spin_count - sleep_count);
475*575a7426Spt 	}
4760efe5e54Sdv 
4777c478bd9Sstevel@tonic-gate 	LOCKSTAT_RECORD0(LS_MUTEX_ENTER_ACQUIRE, lp);
4787c478bd9Sstevel@tonic-gate }
4797c478bd9Sstevel@tonic-gate 
4807c478bd9Sstevel@tonic-gate /*
4817c478bd9Sstevel@tonic-gate  * mutex_vector_tryenter() is called from the assembly mutex_tryenter()
4827c478bd9Sstevel@tonic-gate  * routine if the lock is held or is not of type MUTEX_ADAPTIVE.
4837c478bd9Sstevel@tonic-gate  */
4847c478bd9Sstevel@tonic-gate int
4857c478bd9Sstevel@tonic-gate mutex_vector_tryenter(mutex_impl_t *lp)
4867c478bd9Sstevel@tonic-gate {
4877c478bd9Sstevel@tonic-gate 	int s;
4887c478bd9Sstevel@tonic-gate 
4897c478bd9Sstevel@tonic-gate 	if (MUTEX_TYPE_ADAPTIVE(lp))
4907c478bd9Sstevel@tonic-gate 		return (0);		/* we already tried in assembly */
4917c478bd9Sstevel@tonic-gate 
4927c478bd9Sstevel@tonic-gate 	if (!MUTEX_TYPE_SPIN(lp)) {
4937c478bd9Sstevel@tonic-gate 		mutex_panic("mutex_tryenter: bad mutex", lp);
4947c478bd9Sstevel@tonic-gate 		return (0);
4957c478bd9Sstevel@tonic-gate 	}
4967c478bd9Sstevel@tonic-gate 
4977c478bd9Sstevel@tonic-gate 	s = splr(lp->m_spin.m_minspl);
4987c478bd9Sstevel@tonic-gate 	if (lock_try(&lp->m_spin.m_spinlock)) {
4997c478bd9Sstevel@tonic-gate 		lp->m_spin.m_oldspl = (ushort_t)s;
5007c478bd9Sstevel@tonic-gate 		return (1);
5017c478bd9Sstevel@tonic-gate 	}
5027c478bd9Sstevel@tonic-gate 	splx(s);
5037c478bd9Sstevel@tonic-gate 	return (0);
5047c478bd9Sstevel@tonic-gate }
5057c478bd9Sstevel@tonic-gate 
5067c478bd9Sstevel@tonic-gate /*
5077c478bd9Sstevel@tonic-gate  * mutex_vector_exit() is called from mutex_exit() if the lock is not
5087c478bd9Sstevel@tonic-gate  * adaptive, has waiters, or is not owned by the current thread (panic).
5097c478bd9Sstevel@tonic-gate  */
5107c478bd9Sstevel@tonic-gate void
5117c478bd9Sstevel@tonic-gate mutex_vector_exit(mutex_impl_t *lp)
5127c478bd9Sstevel@tonic-gate {
5137c478bd9Sstevel@tonic-gate 	turnstile_t *ts;
5147c478bd9Sstevel@tonic-gate 
5157c478bd9Sstevel@tonic-gate 	if (MUTEX_TYPE_SPIN(lp)) {
5167c478bd9Sstevel@tonic-gate 		lock_clear_splx(&lp->m_spin.m_spinlock, lp->m_spin.m_oldspl);
5177c478bd9Sstevel@tonic-gate 		return;
5187c478bd9Sstevel@tonic-gate 	}
5197c478bd9Sstevel@tonic-gate 
5207c478bd9Sstevel@tonic-gate 	if (MUTEX_OWNER(lp) != curthread) {
5217c478bd9Sstevel@tonic-gate 		mutex_panic("mutex_exit: not owner", lp);
5227c478bd9Sstevel@tonic-gate 		return;
5237c478bd9Sstevel@tonic-gate 	}
5247c478bd9Sstevel@tonic-gate 
5257c478bd9Sstevel@tonic-gate 	ts = turnstile_lookup(lp);
5267c478bd9Sstevel@tonic-gate 	MUTEX_CLEAR_LOCK_AND_WAITERS(lp);
5277c478bd9Sstevel@tonic-gate 	if (ts == NULL)
5287c478bd9Sstevel@tonic-gate 		turnstile_exit(lp);
5297c478bd9Sstevel@tonic-gate 	else
5307c478bd9Sstevel@tonic-gate 		turnstile_wakeup(ts, TS_WRITER_Q, ts->ts_waiters, NULL);
5317c478bd9Sstevel@tonic-gate 	LOCKSTAT_RECORD0(LS_MUTEX_EXIT_RELEASE, lp);
5327c478bd9Sstevel@tonic-gate }
5337c478bd9Sstevel@tonic-gate 
5347c478bd9Sstevel@tonic-gate int
5357c478bd9Sstevel@tonic-gate mutex_owned(kmutex_t *mp)
5367c478bd9Sstevel@tonic-gate {
5377c478bd9Sstevel@tonic-gate 	mutex_impl_t *lp = (mutex_impl_t *)mp;
5387c478bd9Sstevel@tonic-gate 
5397c478bd9Sstevel@tonic-gate 	if (panicstr)
5407c478bd9Sstevel@tonic-gate 		return (1);
5417c478bd9Sstevel@tonic-gate 
5427c478bd9Sstevel@tonic-gate 	if (MUTEX_TYPE_ADAPTIVE(lp))
5437c478bd9Sstevel@tonic-gate 		return (MUTEX_OWNER(lp) == curthread);
5447c478bd9Sstevel@tonic-gate 	return (LOCK_HELD(&lp->m_spin.m_spinlock));
5457c478bd9Sstevel@tonic-gate }
5467c478bd9Sstevel@tonic-gate 
5477c478bd9Sstevel@tonic-gate kthread_t *
5487c478bd9Sstevel@tonic-gate mutex_owner(kmutex_t *mp)
5497c478bd9Sstevel@tonic-gate {
5507c478bd9Sstevel@tonic-gate 	mutex_impl_t *lp = (mutex_impl_t *)mp;
5517c478bd9Sstevel@tonic-gate 	kthread_id_t t;
5527c478bd9Sstevel@tonic-gate 
5537c478bd9Sstevel@tonic-gate 	if (MUTEX_TYPE_ADAPTIVE(lp) && (t = MUTEX_OWNER(lp)) != MUTEX_NO_OWNER)
5547c478bd9Sstevel@tonic-gate 		return (t);
5557c478bd9Sstevel@tonic-gate 	return (NULL);
5567c478bd9Sstevel@tonic-gate }
5577c478bd9Sstevel@tonic-gate 
5587c478bd9Sstevel@tonic-gate /*
5597c478bd9Sstevel@tonic-gate  * The iblock cookie 'ibc' is the spl level associated with the lock;
5607c478bd9Sstevel@tonic-gate  * this alone determines whether the lock will be ADAPTIVE or SPIN.
5617c478bd9Sstevel@tonic-gate  *
5627c478bd9Sstevel@tonic-gate  * Adaptive mutexes created in zeroed memory do not need to call
5637c478bd9Sstevel@tonic-gate  * mutex_init() as their allocation in this fashion guarantees
5647c478bd9Sstevel@tonic-gate  * their initialization.
5657c478bd9Sstevel@tonic-gate  *   eg adaptive mutexes created as static within the BSS or allocated
5667c478bd9Sstevel@tonic-gate  *      by kmem_zalloc().
5677c478bd9Sstevel@tonic-gate  */
5687c478bd9Sstevel@tonic-gate /* ARGSUSED */
5697c478bd9Sstevel@tonic-gate void
5707c478bd9Sstevel@tonic-gate mutex_init(kmutex_t *mp, char *name, kmutex_type_t type, void *ibc)
5717c478bd9Sstevel@tonic-gate {
5727c478bd9Sstevel@tonic-gate 	mutex_impl_t *lp = (mutex_impl_t *)mp;
5737c478bd9Sstevel@tonic-gate 
5747c478bd9Sstevel@tonic-gate 	ASSERT(ibc < (void *)KERNELBASE);	/* see 1215173 */
5757c478bd9Sstevel@tonic-gate 
5767c478bd9Sstevel@tonic-gate 	if ((intptr_t)ibc > ipltospl(LOCK_LEVEL) && ibc < (void *)KERNELBASE) {
5777c478bd9Sstevel@tonic-gate 		ASSERT(type != MUTEX_ADAPTIVE && type != MUTEX_DEFAULT);
5787c478bd9Sstevel@tonic-gate 		MUTEX_SET_TYPE(lp, MUTEX_SPIN);
5797c478bd9Sstevel@tonic-gate 		LOCK_INIT_CLEAR(&lp->m_spin.m_spinlock);
5807c478bd9Sstevel@tonic-gate 		LOCK_INIT_HELD(&lp->m_spin.m_dummylock);
5817c478bd9Sstevel@tonic-gate 		lp->m_spin.m_minspl = (int)(intptr_t)ibc;
5827c478bd9Sstevel@tonic-gate 	} else {
5837c478bd9Sstevel@tonic-gate 		ASSERT(type != MUTEX_SPIN);
5847c478bd9Sstevel@tonic-gate 		MUTEX_SET_TYPE(lp, MUTEX_ADAPTIVE);
5857c478bd9Sstevel@tonic-gate 		MUTEX_CLEAR_LOCK_AND_WAITERS(lp);
5867c478bd9Sstevel@tonic-gate 	}
5877c478bd9Sstevel@tonic-gate }
5887c478bd9Sstevel@tonic-gate 
5897c478bd9Sstevel@tonic-gate void
5907c478bd9Sstevel@tonic-gate mutex_destroy(kmutex_t *mp)
5917c478bd9Sstevel@tonic-gate {
5927c478bd9Sstevel@tonic-gate 	mutex_impl_t *lp = (mutex_impl_t *)mp;
5937c478bd9Sstevel@tonic-gate 
5947c478bd9Sstevel@tonic-gate 	if (lp->m_owner == 0 && !MUTEX_HAS_WAITERS(lp)) {
5957c478bd9Sstevel@tonic-gate 		MUTEX_DESTROY(lp);
5967c478bd9Sstevel@tonic-gate 	} else if (MUTEX_TYPE_SPIN(lp)) {
5977c478bd9Sstevel@tonic-gate 		LOCKSTAT_RECORD0(LS_MUTEX_DESTROY_RELEASE, lp);
5987c478bd9Sstevel@tonic-gate 		MUTEX_DESTROY(lp);
5997c478bd9Sstevel@tonic-gate 	} else if (MUTEX_TYPE_ADAPTIVE(lp)) {
6007c478bd9Sstevel@tonic-gate 		LOCKSTAT_RECORD0(LS_MUTEX_DESTROY_RELEASE, lp);
6017c478bd9Sstevel@tonic-gate 		if (MUTEX_OWNER(lp) != curthread)
6027c478bd9Sstevel@tonic-gate 			mutex_panic("mutex_destroy: not owner", lp);
6037c478bd9Sstevel@tonic-gate 		if (MUTEX_HAS_WAITERS(lp)) {
6047c478bd9Sstevel@tonic-gate 			turnstile_t *ts = turnstile_lookup(lp);
6057c478bd9Sstevel@tonic-gate 			turnstile_exit(lp);
6067c478bd9Sstevel@tonic-gate 			if (ts != NULL)
6077c478bd9Sstevel@tonic-gate 				mutex_panic("mutex_destroy: has waiters", lp);
6087c478bd9Sstevel@tonic-gate 		}
6097c478bd9Sstevel@tonic-gate 		MUTEX_DESTROY(lp);
6107c478bd9Sstevel@tonic-gate 	} else {
6117c478bd9Sstevel@tonic-gate 		mutex_panic("mutex_destroy: bad mutex", lp);
6127c478bd9Sstevel@tonic-gate 	}
6137c478bd9Sstevel@tonic-gate }
6147c478bd9Sstevel@tonic-gate 
6157c478bd9Sstevel@tonic-gate /*
6167c478bd9Sstevel@tonic-gate  * Simple C support for the cases where spin locks miss on the first try.
6177c478bd9Sstevel@tonic-gate  */
6187c478bd9Sstevel@tonic-gate void
6197c478bd9Sstevel@tonic-gate lock_set_spin(lock_t *lp)
6207c478bd9Sstevel@tonic-gate {
6217c478bd9Sstevel@tonic-gate 	int spin_count = 1;
622*575a7426Spt 	int loop_count = 0;
623*575a7426Spt 	uint_t backoff = 0;	/* current backoff */
6247c478bd9Sstevel@tonic-gate 
6257c478bd9Sstevel@tonic-gate 	if (panicstr)
6267c478bd9Sstevel@tonic-gate 		return;
6277c478bd9Sstevel@tonic-gate 
6287c478bd9Sstevel@tonic-gate 	if (ncpus == 1)
6297c478bd9Sstevel@tonic-gate 		panic("lock_set: %p lock held and only one CPU", lp);
6307c478bd9Sstevel@tonic-gate 
6317c478bd9Sstevel@tonic-gate 	while (LOCK_HELD(lp) || !lock_spin_try(lp)) {
6327c478bd9Sstevel@tonic-gate 		if (panicstr)
6337c478bd9Sstevel@tonic-gate 			return;
6347c478bd9Sstevel@tonic-gate 		spin_count++;
635*575a7426Spt 		loop_count++;
6367c478bd9Sstevel@tonic-gate 
637*575a7426Spt 		if (ncpus_online == loop_count) {
638*575a7426Spt 			backoff = mutex_lock_backoff(0);
639*575a7426Spt 			loop_count = 0;
640*575a7426Spt 		} else {
641*575a7426Spt 			backoff = mutex_lock_backoff(backoff);
6427c478bd9Sstevel@tonic-gate 		}
643*575a7426Spt 		mutex_lock_delay(backoff);
6447c478bd9Sstevel@tonic-gate 	}
6457c478bd9Sstevel@tonic-gate 
6467c478bd9Sstevel@tonic-gate 	if (spin_count) {
6477c478bd9Sstevel@tonic-gate 		LOCKSTAT_RECORD(LS_LOCK_SET_SPIN, lp, spin_count);
6487c478bd9Sstevel@tonic-gate 	}
6497c478bd9Sstevel@tonic-gate 
6507c478bd9Sstevel@tonic-gate 	LOCKSTAT_RECORD0(LS_LOCK_SET_ACQUIRE, lp);
6517c478bd9Sstevel@tonic-gate }
6527c478bd9Sstevel@tonic-gate 
6537c478bd9Sstevel@tonic-gate void
6547c478bd9Sstevel@tonic-gate lock_set_spl_spin(lock_t *lp, int new_pil, ushort_t *old_pil_addr, int old_pil)
6557c478bd9Sstevel@tonic-gate {
6567c478bd9Sstevel@tonic-gate 	int spin_count = 1;
657*575a7426Spt 	int loop_count = 0;
658*575a7426Spt 	uint_t backoff = 0;	/* current backoff */
6597c478bd9Sstevel@tonic-gate 
6607c478bd9Sstevel@tonic-gate 	if (panicstr)
6617c478bd9Sstevel@tonic-gate 		return;
6627c478bd9Sstevel@tonic-gate 
6637c478bd9Sstevel@tonic-gate 	if (ncpus == 1)
6647c478bd9Sstevel@tonic-gate 		panic("lock_set_spl: %p lock held and only one CPU", lp);
6657c478bd9Sstevel@tonic-gate 
6667c478bd9Sstevel@tonic-gate 	ASSERT(new_pil > LOCK_LEVEL);
6677c478bd9Sstevel@tonic-gate 
6687c478bd9Sstevel@tonic-gate 	do {
6697c478bd9Sstevel@tonic-gate 		splx(old_pil);
6707c478bd9Sstevel@tonic-gate 		while (LOCK_HELD(lp)) {
671*575a7426Spt 			spin_count++;
672*575a7426Spt 			loop_count++;
673*575a7426Spt 
6747c478bd9Sstevel@tonic-gate 			if (panicstr) {
6757c478bd9Sstevel@tonic-gate 				*old_pil_addr = (ushort_t)splr(new_pil);
6767c478bd9Sstevel@tonic-gate 				return;
6777c478bd9Sstevel@tonic-gate 			}
678*575a7426Spt 			if (ncpus_online == loop_count) {
679*575a7426Spt 				backoff = mutex_lock_backoff(0);
680*575a7426Spt 				loop_count = 0;
681e603b7d4Spm 			} else {
682*575a7426Spt 				backoff = mutex_lock_backoff(backoff);
6837c478bd9Sstevel@tonic-gate 			}
684*575a7426Spt 			mutex_lock_delay(backoff);
6857c478bd9Sstevel@tonic-gate 		}
6867c478bd9Sstevel@tonic-gate 		old_pil = splr(new_pil);
6877c478bd9Sstevel@tonic-gate 	} while (!lock_spin_try(lp));
6887c478bd9Sstevel@tonic-gate 
6897c478bd9Sstevel@tonic-gate 	*old_pil_addr = (ushort_t)old_pil;
6907c478bd9Sstevel@tonic-gate 
6917c478bd9Sstevel@tonic-gate 	if (spin_count) {
6927c478bd9Sstevel@tonic-gate 		LOCKSTAT_RECORD(LS_LOCK_SET_SPL_SPIN, lp, spin_count);
6937c478bd9Sstevel@tonic-gate 	}
6947c478bd9Sstevel@tonic-gate 
6957c478bd9Sstevel@tonic-gate 	LOCKSTAT_RECORD(LS_LOCK_SET_SPL_ACQUIRE, lp, spin_count);
6967c478bd9Sstevel@tonic-gate }
697