xref: /illumos-gate/usr/src/uts/common/os/mutex.c (revision e603b7d4a53c0b52084ca06218e6eed01ae7d6f1)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * Big Theory Statement for mutual exclusion locking primitives.
30  *
31  * A mutex serializes multiple threads so that only one thread
32  * (the "owner" of the mutex) is active at a time.  See mutex(9F)
33  * for a full description of the interfaces and programming model.
34  * The rest of this comment describes the implementation.
35  *
36  * Mutexes come in two flavors: adaptive and spin.  mutex_init(9F)
37  * determines the type based solely on the iblock cookie (PIL) argument.
38  * PIL > LOCK_LEVEL implies a spin lock; everything else is adaptive.
39  *
40  * Spin mutexes block interrupts and spin until the lock becomes available.
41  * A thread may not sleep, or call any function that might sleep, while
42  * holding a spin mutex.  With few exceptions, spin mutexes should only
43  * be used to synchronize with interrupt handlers.
44  *
45  * Adaptive mutexes (the default type) spin if the owner is running on
46  * another CPU and block otherwise.  This policy is based on the assumption
47  * that mutex hold times are typically short enough that the time spent
48  * spinning is less than the time it takes to block.  If you need mutual
49  * exclusion semantics with long hold times, consider an rwlock(9F) as
50  * RW_WRITER.  Better still, reconsider the algorithm: if it requires
51  * mutual exclusion for long periods of time, it's probably not scalable.
52  *
53  * Adaptive mutexes are overwhelmingly more common than spin mutexes,
54  * so mutex_enter() assumes that the lock is adaptive.  We get away
55  * with this by structuring mutexes so that an attempt to acquire a
56  * spin mutex as adaptive always fails.  When mutex_enter() fails
57  * it punts to mutex_vector_enter(), which does all the hard stuff.
58  *
59  * mutex_vector_enter() first checks the type.  If it's spin mutex,
60  * we just call lock_set_spl() and return.  If it's an adaptive mutex,
61  * we check to see what the owner is doing.  If the owner is running,
62  * we spin until the lock becomes available; if not, we mark the lock
63  * as having waiters and block.
64  *
65  * Blocking on a mutex is surprisingly delicate dance because, for speed,
66  * mutex_exit() doesn't use an atomic instruction.  Thus we have to work
67  * a little harder in the (rarely-executed) blocking path to make sure
68  * we don't block on a mutex that's just been released -- otherwise we
69  * might never be woken up.
70  *
71  * The logic for synchronizing mutex_vector_enter() with mutex_exit()
72  * in the face of preemption and relaxed memory ordering is as follows:
73  *
74  * (1) Preemption in the middle of mutex_exit() must cause mutex_exit()
75  *     to restart.  Each platform must enforce this by checking the
76  *     interrupted PC in the interrupt handler (or on return from trap --
77  *     whichever is more convenient for the platform).  If the PC
78  *     lies within the critical region of mutex_exit(), the interrupt
79  *     handler must reset the PC back to the beginning of mutex_exit().
80  *     The critical region consists of all instructions up to, but not
81  *     including, the store that clears the lock (which, of course,
82  *     must never be executed twice.)
83  *
84  *     This ensures that the owner will always check for waiters after
85  *     resuming from a previous preemption.
86  *
87  * (2) A thread resuming in mutex_exit() does (at least) the following:
88  *
89  *	when resuming:	set CPU_THREAD = owner
90  *			membar #StoreLoad
91  *
92  *	in mutex_exit:	check waiters bit; do wakeup if set
93  *			membar #LoadStore|#StoreStore
94  *			clear owner
95  *			(at this point, other threads may or may not grab
96  *			the lock, and we may or may not reacquire it)
97  *
98  *	when blocking:	membar #StoreStore (due to disp_lock_enter())
99  *			set CPU_THREAD = (possibly) someone else
100  *
101  * (3) A thread blocking in mutex_vector_enter() does the following:
102  *
103  *			set waiters bit
104  *			membar #StoreLoad (via membar_enter())
105  *			check CPU_THREAD for each CPU; abort if owner running
106  *			membar #LoadLoad (via membar_consumer())
107  *			check owner and waiters bit; abort if either changed
108  *			block
109  *
110  * Thus the global memory orderings for (2) and (3) are as follows:
111  *
112  * (2M) mutex_exit() memory order:
113  *
114  *			STORE	CPU_THREAD = owner
115  *			LOAD	waiters bit
116  *			STORE	owner = NULL
117  *			STORE	CPU_THREAD = (possibly) someone else
118  *
119  * (3M) mutex_vector_enter() memory order:
120  *
121  *			STORE	waiters bit = 1
122  *			LOAD	CPU_THREAD for each CPU
123  *			LOAD	owner and waiters bit
124  *
125  * It has been verified by exhaustive simulation that all possible global
126  * memory orderings of (2M) interleaved with (3M) result in correct
127  * behavior.  Moreover, these ordering constraints are minimal: changing
128  * the ordering of anything in (2M) or (3M) breaks the algorithm, creating
129  * windows for missed wakeups.  Note: the possibility that other threads
130  * may grab the lock after the owner drops it can be factored out of the
131  * memory ordering analysis because mutex_vector_enter() won't block
132  * if the lock isn't still owned by the same thread.
133  *
134  * The only requirements of code outside the mutex implementation are
135  * (1) mutex_exit() preemption fixup in interrupt handlers or trap return,
136  * and (2) a membar #StoreLoad after setting CPU_THREAD in resume().
137  * Note: idle threads cannot grab adaptive locks (since they cannot block),
138  * so the membar may be safely omitted when resuming an idle thread.
139  *
140  * When a mutex has waiters, mutex_vector_exit() has several options:
141  *
142  * (1) Choose a waiter and make that thread the owner before waking it;
143  *     this is known as "direct handoff" of ownership.
144  *
145  * (2) Drop the lock and wake one waiter.
146  *
147  * (3) Drop the lock, clear the waiters bit, and wake all waiters.
148  *
149  * In many ways (1) is the cleanest solution, but if a lock is moderately
150  * contended it defeats the adaptive spin logic.  If we make some other
151  * thread the owner, but he's not ONPROC yet, then all other threads on
152  * other cpus that try to get the lock will conclude that the owner is
153  * blocked, so they'll block too.  And so on -- it escalates quickly,
154  * with every thread taking the blocking path rather than the spin path.
155  * Thus, direct handoff is *not* a good idea for adaptive mutexes.
156  *
157  * Option (2) is the next most natural-seeming option, but it has several
158  * annoying properties.  If there's more than one waiter, we must preserve
159  * the waiters bit on an unheld lock.  On cas-capable platforms, where
160  * the waiters bit is part of the lock word, this means that both 0x0
161  * and 0x1 represent unheld locks, so we have to cas against *both*.
162  * Priority inheritance also gets more complicated, because a lock can
163  * have waiters but no owner to whom priority can be willed.  So while
164  * it is possible to make option (2) work, it's surprisingly vile.
165  *
166  * Option (3), the least-intuitive at first glance, is what we actually do.
167  * It has the advantage that because you always wake all waiters, you
168  * never have to preserve the waiters bit.  Waking all waiters seems like
169  * begging for a thundering herd problem, but consider: under option (2),
170  * every thread that grabs and drops the lock will wake one waiter -- so
171  * if the lock is fairly active, all waiters will be awakened very quickly
172  * anyway.  Moreover, this is how adaptive locks are *supposed* to work.
173  * The blocking case is rare; the more common case (by 3-4 orders of
174  * magnitude) is that one or more threads spin waiting to get the lock.
175  * Only direct handoff can prevent the thundering herd problem, but as
176  * mentioned earlier, that would tend to defeat the adaptive spin logic.
177  * In practice, option (3) works well because the blocking case is rare.
178  */
179 
180 /*
181  * delayed lock retry with exponential delay for spin locks
182  *
183  * It is noted above that for both the spin locks and the adaptive locks,
184  * spinning is the dominate mode of operation.  So long as there is only
185  * one thread waiting on a lock, the naive spin loop works very well in
186  * cache based architectures.  The lock data structure is pulled into the
187  * cache of the processor with the waiting/spinning thread and no further
188  * memory traffic is generated until the lock is released.  Unfortunately,
189  * once two or more threads are waiting on a lock, the naive spin has
190  * the property of generating maximum memory traffic from each spinning
191  * thread as the spinning threads contend for the lock data structure.
192  *
193  * By executing a delay loop before retrying a lock, a waiting thread
194  * can reduce its memory traffic by a large factor, depending on the
195  * size of the delay loop.  A large delay loop greatly reduced the memory
196  * traffic, but has the drawback of having a period of time when
197  * no thread is attempting to gain the lock even though several threads
198  * might be waiting.  A small delay loop has the drawback of not
199  * much reduction in memory traffic, but reduces the potential idle time.
200  * The theory of the exponential delay code is to start with a short
201  * delay loop and double the waiting time on each iteration, up to
202  * a preselected maximum.  The BACKOFF_BASE provides the equivalent
203  * of 2 to 3 memory references delay for US-III+ and US-IV architectures.
204  * The BACKOFF_CAP is the equivalent of 50 to 100 memory references of
205  * time (less than 12 microseconds for a 1000 MHz system).
206  *
207  * To determine appropriate BACKOFF_BASE and BACKOFF_CAP values,
208  * studies on US-III+ and US-IV systems using 1 to 66 threads were
209  * done.  A range of possible values were studied.
210  * Performance differences below 10 threads were not large.  For
211  * systems with more threads, substantial increases in total lock
212  * throughput was observed with the given values.  For cases where
213  * more than 20 threads were waiting on the same lock, lock throughput
214  * increased by a factor of 5 or more using the backoff algorithm.
215  *
216  * Some platforms may provide their own platform specific delay code,
217  * using plat_lock_delay(backoff).  If it is available, plat_lock_delay
218  * is executed instead of the default delay code.
219  */
220 
221 #pragma weak plat_lock_delay
222 
223 #include <sys/param.h>
224 #include <sys/time.h>
225 #include <sys/cpuvar.h>
226 #include <sys/thread.h>
227 #include <sys/debug.h>
228 #include <sys/cmn_err.h>
229 #include <sys/sobject.h>
230 #include <sys/turnstile.h>
231 #include <sys/systm.h>
232 #include <sys/mutex_impl.h>
233 #include <sys/spl.h>
234 #include <sys/lockstat.h>
235 #include <sys/atomic.h>
236 #include <sys/cpu.h>
237 #include <sys/stack.h>
238 
239 #define	BACKOFF_BASE	50
240 #define	BACKOFF_CAP 	1600
241 
242 /*
243  * The sobj_ops vector exports a set of functions needed when a thread
244  * is asleep on a synchronization object of this type.
245  */
246 static sobj_ops_t mutex_sobj_ops = {
247 	SOBJ_MUTEX, mutex_owner, turnstile_stay_asleep, turnstile_change_pri
248 };
249 
250 /*
251  * If the system panics on a mutex, save the address of the offending
252  * mutex in panic_mutex_addr, and save the contents in panic_mutex.
253  */
254 static mutex_impl_t panic_mutex;
255 static mutex_impl_t *panic_mutex_addr;
256 
257 static void
258 mutex_panic(char *msg, mutex_impl_t *lp)
259 {
260 	if (panicstr)
261 		return;
262 
263 	if (casptr(&panic_mutex_addr, NULL, lp) == NULL)
264 		panic_mutex = *lp;
265 
266 	panic("%s, lp=%p owner=%p thread=%p",
267 	    msg, lp, MUTEX_OWNER(&panic_mutex), curthread);
268 }
269 
270 /*
271  * mutex_vector_enter() is called from the assembly mutex_enter() routine
272  * if the lock is held or is not of type MUTEX_ADAPTIVE.
273  */
274 void
275 mutex_vector_enter(mutex_impl_t *lp)
276 {
277 	kthread_id_t	owner;
278 	hrtime_t	sleep_time = 0;	/* how long we slept */
279 	uint_t		spin_count = 0;	/* how many times we spun */
280 	cpu_t 		*cpup, *last_cpu;
281 	extern cpu_t	*cpu_list;
282 	turnstile_t	*ts;
283 	volatile mutex_impl_t *vlp = (volatile mutex_impl_t *)lp;
284 	int		backoff;	/* current backoff */
285 	int		backctr;	/* ctr for backoff */
286 	int		sleep_count = 0;
287 
288 	ASSERT_STACK_ALIGNED();
289 
290 	if (MUTEX_TYPE_SPIN(lp)) {
291 		lock_set_spl(&lp->m_spin.m_spinlock, lp->m_spin.m_minspl,
292 		    &lp->m_spin.m_oldspl);
293 		return;
294 	}
295 
296 	if (!MUTEX_TYPE_ADAPTIVE(lp)) {
297 		mutex_panic("mutex_enter: bad mutex", lp);
298 		return;
299 	}
300 
301 	/*
302 	 * Adaptive mutexes must not be acquired from above LOCK_LEVEL.
303 	 * We can migrate after loading CPU but before checking CPU_ON_INTR,
304 	 * so we must verify by disabling preemption and loading CPU again.
305 	 */
306 	cpup = CPU;
307 	if (CPU_ON_INTR(cpup) && !panicstr) {
308 		kpreempt_disable();
309 		if (CPU_ON_INTR(CPU))
310 			mutex_panic("mutex_enter: adaptive at high PIL", lp);
311 		kpreempt_enable();
312 	}
313 
314 	CPU_STATS_ADDQ(cpup, sys, mutex_adenters, 1);
315 
316 	if (&plat_lock_delay) {
317 		backoff = 0;
318 	} else {
319 		backoff = BACKOFF_BASE;
320 	}
321 
322 	for (;;) {
323 spin:
324 		spin_count++;
325 		/*
326 		 * Add an exponential backoff delay before trying again
327 		 * to touch the mutex data structure.
328 		 * the spin_count test and call to nulldev are to prevent
329 		 * the compiler optimizer from eliminating the delay loop.
330 		 */
331 		if (&plat_lock_delay) {
332 			plat_lock_delay(&backoff);
333 		} else {
334 			for (backctr = backoff; backctr; backctr--) {
335 				if (!spin_count) (void) nulldev();
336 			};    /* delay */
337 			backoff = backoff << 1;			/* double it */
338 			if (backoff > BACKOFF_CAP) {
339 				backoff = BACKOFF_CAP;
340 			}
341 
342 			SMT_PAUSE();
343 		}
344 
345 		if (panicstr)
346 			return;
347 
348 		if ((owner = MUTEX_OWNER(vlp)) == NULL) {
349 			if (mutex_adaptive_tryenter(lp))
350 				break;
351 			continue;
352 		}
353 
354 		if (owner == curthread)
355 			mutex_panic("recursive mutex_enter", lp);
356 
357 		/*
358 		 * If lock is held but owner is not yet set, spin.
359 		 * (Only relevant for platforms that don't have cas.)
360 		 */
361 		if (owner == MUTEX_NO_OWNER)
362 			continue;
363 
364 		/*
365 		 * When searching the other CPUs, start with the one where
366 		 * we last saw the owner thread.  If owner is running, spin.
367 		 *
368 		 * We must disable preemption at this point to guarantee
369 		 * that the list doesn't change while we traverse it
370 		 * without the cpu_lock mutex.  While preemption is
371 		 * disabled, we must revalidate our cached cpu pointer.
372 		 */
373 		kpreempt_disable();
374 		if (cpup->cpu_next == NULL)
375 			cpup = cpu_list;
376 		last_cpu = cpup;	/* mark end of search */
377 		do {
378 			if (cpup->cpu_thread == owner) {
379 				kpreempt_enable();
380 				goto spin;
381 			}
382 		} while ((cpup = cpup->cpu_next) != last_cpu);
383 		kpreempt_enable();
384 
385 		/*
386 		 * The owner appears not to be running, so block.
387 		 * See the Big Theory Statement for memory ordering issues.
388 		 */
389 		ts = turnstile_lookup(lp);
390 		MUTEX_SET_WAITERS(lp);
391 		membar_enter();
392 
393 		/*
394 		 * Recheck whether owner is running after waiters bit hits
395 		 * global visibility (above).  If owner is running, spin.
396 		 *
397 		 * Since we are at ipl DISP_LEVEL, kernel preemption is
398 		 * disabled, however we still need to revalidate our cached
399 		 * cpu pointer to make sure the cpu hasn't been deleted.
400 		 */
401 		if (cpup->cpu_next == NULL)
402 			last_cpu = cpup = cpu_list;
403 		do {
404 			if (cpup->cpu_thread == owner) {
405 				turnstile_exit(lp);
406 				goto spin;
407 			}
408 		} while ((cpup = cpup->cpu_next) != last_cpu);
409 		membar_consumer();
410 
411 		/*
412 		 * If owner and waiters bit are unchanged, block.
413 		 */
414 		if (MUTEX_OWNER(vlp) == owner && MUTEX_HAS_WAITERS(vlp)) {
415 			sleep_time -= gethrtime();
416 			(void) turnstile_block(ts, TS_WRITER_Q, lp,
417 			    &mutex_sobj_ops, NULL, NULL);
418 			sleep_time += gethrtime();
419 			sleep_count++;
420 		} else {
421 			turnstile_exit(lp);
422 		}
423 	}
424 
425 	ASSERT(MUTEX_OWNER(lp) == curthread);
426 
427 	if (sleep_time != 0) {
428 		/*
429 		 * Note, sleep time is the sum of all the sleeping we
430 		 * did.
431 		 */
432 		LOCKSTAT_RECORD(LS_MUTEX_ENTER_BLOCK, lp, sleep_time);
433 	}
434 
435 	/*
436 	 * We do not count a sleep as a spin.
437 	 */
438 	if (spin_count > sleep_count)
439 		LOCKSTAT_RECORD(LS_MUTEX_ENTER_SPIN, lp,
440 		    spin_count - sleep_count);
441 
442 	LOCKSTAT_RECORD0(LS_MUTEX_ENTER_ACQUIRE, lp);
443 }
444 
445 /*
446  * mutex_vector_tryenter() is called from the assembly mutex_tryenter()
447  * routine if the lock is held or is not of type MUTEX_ADAPTIVE.
448  */
449 int
450 mutex_vector_tryenter(mutex_impl_t *lp)
451 {
452 	int s;
453 
454 	if (MUTEX_TYPE_ADAPTIVE(lp))
455 		return (0);		/* we already tried in assembly */
456 
457 	if (!MUTEX_TYPE_SPIN(lp)) {
458 		mutex_panic("mutex_tryenter: bad mutex", lp);
459 		return (0);
460 	}
461 
462 	s = splr(lp->m_spin.m_minspl);
463 	if (lock_try(&lp->m_spin.m_spinlock)) {
464 		lp->m_spin.m_oldspl = (ushort_t)s;
465 		return (1);
466 	}
467 	splx(s);
468 	return (0);
469 }
470 
471 /*
472  * mutex_vector_exit() is called from mutex_exit() if the lock is not
473  * adaptive, has waiters, or is not owned by the current thread (panic).
474  */
475 void
476 mutex_vector_exit(mutex_impl_t *lp)
477 {
478 	turnstile_t *ts;
479 
480 	if (MUTEX_TYPE_SPIN(lp)) {
481 		lock_clear_splx(&lp->m_spin.m_spinlock, lp->m_spin.m_oldspl);
482 		return;
483 	}
484 
485 	if (MUTEX_OWNER(lp) != curthread) {
486 		mutex_panic("mutex_exit: not owner", lp);
487 		return;
488 	}
489 
490 	ts = turnstile_lookup(lp);
491 	MUTEX_CLEAR_LOCK_AND_WAITERS(lp);
492 	if (ts == NULL)
493 		turnstile_exit(lp);
494 	else
495 		turnstile_wakeup(ts, TS_WRITER_Q, ts->ts_waiters, NULL);
496 	LOCKSTAT_RECORD0(LS_MUTEX_EXIT_RELEASE, lp);
497 }
498 
499 int
500 mutex_owned(kmutex_t *mp)
501 {
502 	mutex_impl_t *lp = (mutex_impl_t *)mp;
503 
504 	if (panicstr)
505 		return (1);
506 
507 	if (MUTEX_TYPE_ADAPTIVE(lp))
508 		return (MUTEX_OWNER(lp) == curthread);
509 	return (LOCK_HELD(&lp->m_spin.m_spinlock));
510 }
511 
512 kthread_t *
513 mutex_owner(kmutex_t *mp)
514 {
515 	mutex_impl_t *lp = (mutex_impl_t *)mp;
516 	kthread_id_t t;
517 
518 	if (MUTEX_TYPE_ADAPTIVE(lp) && (t = MUTEX_OWNER(lp)) != MUTEX_NO_OWNER)
519 		return (t);
520 	return (NULL);
521 }
522 
523 /*
524  * The iblock cookie 'ibc' is the spl level associated with the lock;
525  * this alone determines whether the lock will be ADAPTIVE or SPIN.
526  *
527  * Adaptive mutexes created in zeroed memory do not need to call
528  * mutex_init() as their allocation in this fashion guarantees
529  * their initialization.
530  *   eg adaptive mutexes created as static within the BSS or allocated
531  *      by kmem_zalloc().
532  */
533 /* ARGSUSED */
534 void
535 mutex_init(kmutex_t *mp, char *name, kmutex_type_t type, void *ibc)
536 {
537 	mutex_impl_t *lp = (mutex_impl_t *)mp;
538 
539 	ASSERT(ibc < (void *)KERNELBASE);	/* see 1215173 */
540 
541 	if ((intptr_t)ibc > ipltospl(LOCK_LEVEL) && ibc < (void *)KERNELBASE) {
542 		ASSERT(type != MUTEX_ADAPTIVE && type != MUTEX_DEFAULT);
543 		MUTEX_SET_TYPE(lp, MUTEX_SPIN);
544 		LOCK_INIT_CLEAR(&lp->m_spin.m_spinlock);
545 		LOCK_INIT_HELD(&lp->m_spin.m_dummylock);
546 		lp->m_spin.m_minspl = (int)(intptr_t)ibc;
547 	} else {
548 		ASSERT(type != MUTEX_SPIN);
549 		MUTEX_SET_TYPE(lp, MUTEX_ADAPTIVE);
550 		MUTEX_CLEAR_LOCK_AND_WAITERS(lp);
551 	}
552 }
553 
554 void
555 mutex_destroy(kmutex_t *mp)
556 {
557 	mutex_impl_t *lp = (mutex_impl_t *)mp;
558 
559 	if (lp->m_owner == 0 && !MUTEX_HAS_WAITERS(lp)) {
560 		MUTEX_DESTROY(lp);
561 	} else if (MUTEX_TYPE_SPIN(lp)) {
562 		LOCKSTAT_RECORD0(LS_MUTEX_DESTROY_RELEASE, lp);
563 		MUTEX_DESTROY(lp);
564 	} else if (MUTEX_TYPE_ADAPTIVE(lp)) {
565 		LOCKSTAT_RECORD0(LS_MUTEX_DESTROY_RELEASE, lp);
566 		if (MUTEX_OWNER(lp) != curthread)
567 			mutex_panic("mutex_destroy: not owner", lp);
568 		if (MUTEX_HAS_WAITERS(lp)) {
569 			turnstile_t *ts = turnstile_lookup(lp);
570 			turnstile_exit(lp);
571 			if (ts != NULL)
572 				mutex_panic("mutex_destroy: has waiters", lp);
573 		}
574 		MUTEX_DESTROY(lp);
575 	} else {
576 		mutex_panic("mutex_destroy: bad mutex", lp);
577 	}
578 }
579 
580 /*
581  * Simple C support for the cases where spin locks miss on the first try.
582  */
583 void
584 lock_set_spin(lock_t *lp)
585 {
586 	int spin_count = 1;
587 	int backoff;	/* current backoff */
588 	int backctr;	/* ctr for backoff */
589 
590 	if (panicstr)
591 		return;
592 
593 	if (ncpus == 1)
594 		panic("lock_set: %p lock held and only one CPU", lp);
595 
596 	if (&plat_lock_delay) {
597 		backoff = 0;
598 	} else {
599 		backoff = BACKOFF_BASE;
600 	}
601 
602 	while (LOCK_HELD(lp) || !lock_spin_try(lp)) {
603 		if (panicstr)
604 			return;
605 		spin_count++;
606 		/*
607 		 * Add an exponential backoff delay before trying again
608 		 * to touch the mutex data structure.
609 		 * the spin_count test and call to nulldev are to prevent
610 		 * the compiler optimizer from eliminating the delay loop.
611 		 */
612 		if (&plat_lock_delay) {
613 			plat_lock_delay(&backoff);
614 		} else {
615 			/* delay */
616 			for (backctr = backoff; backctr; backctr--) {
617 				if (!spin_count) (void) nulldev();
618 			}
619 
620 			backoff = backoff << 1;		/* double it */
621 			if (backoff > BACKOFF_CAP) {
622 				backoff = BACKOFF_CAP;
623 			}
624 			SMT_PAUSE();
625 		}
626 	}
627 
628 	if (spin_count) {
629 		LOCKSTAT_RECORD(LS_LOCK_SET_SPIN, lp, spin_count);
630 	}
631 
632 	LOCKSTAT_RECORD0(LS_LOCK_SET_ACQUIRE, lp);
633 }
634 
635 void
636 lock_set_spl_spin(lock_t *lp, int new_pil, ushort_t *old_pil_addr, int old_pil)
637 {
638 	int spin_count = 1;
639 	int backoff;	/* current backoff */
640 	int backctr;	/* ctr for backoff */
641 
642 	if (panicstr)
643 		return;
644 
645 	if (ncpus == 1)
646 		panic("lock_set_spl: %p lock held and only one CPU", lp);
647 
648 	ASSERT(new_pil > LOCK_LEVEL);
649 
650 	if (&plat_lock_delay) {
651 		backoff = 0;
652 	} else {
653 		backoff = BACKOFF_BASE;
654 	}
655 	do {
656 		splx(old_pil);
657 		while (LOCK_HELD(lp)) {
658 			if (panicstr) {
659 				*old_pil_addr = (ushort_t)splr(new_pil);
660 				return;
661 			}
662 			spin_count++;
663 			/*
664 			 * Add an exponential backoff delay before trying again
665 			 * to touch the mutex data structure.
666 			 * spin_count test and call to nulldev are to prevent
667 			 * compiler optimizer from eliminating the delay loop.
668 			 */
669 			if (&plat_lock_delay) {
670 				plat_lock_delay(&backoff);
671 			} else {
672 				for (backctr = backoff; backctr; backctr--) {
673 					if (!spin_count) (void) nulldev();
674 				}
675 				backoff = backoff << 1;		/* double it */
676 				if (backoff > BACKOFF_CAP) {
677 					backoff = BACKOFF_CAP;
678 				}
679 
680 				SMT_PAUSE();
681 			}
682 		}
683 		old_pil = splr(new_pil);
684 	} while (!lock_spin_try(lp));
685 
686 	*old_pil_addr = (ushort_t)old_pil;
687 
688 	if (spin_count) {
689 		LOCKSTAT_RECORD(LS_LOCK_SET_SPL_SPIN, lp, spin_count);
690 	}
691 
692 	LOCKSTAT_RECORD(LS_LOCK_SET_SPL_ACQUIRE, lp, spin_count);
693 }
694