xref: /illumos-gate/usr/src/lib/libc/port/threads/synch.c (revision 31db3c26)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #define	atomic_cas_64	_atomic_cas_64
30 
31 #include "lint.h"
32 #include "thr_uberdata.h"
33 #include <sys/sdt.h>
34 #include <atomic.h>
35 
36 /*
37  * This mutex is initialized to be held by lwp#1.
38  * It is used to block a thread that has returned from a mutex_lock()
39  * of a LOCK_PRIO_INHERIT mutex with an unrecoverable error.
40  */
41 mutex_t	stall_mutex = DEFAULTMUTEX;
42 
43 static int shared_mutex_held(mutex_t *);
44 static int mutex_queuelock_adaptive(mutex_t *);
45 static void mutex_wakeup_all(mutex_t *);
46 
47 /*
48  * Lock statistics support functions.
49  */
50 void
51 record_begin_hold(tdb_mutex_stats_t *msp)
52 {
53 	tdb_incr(msp->mutex_lock);
54 	msp->mutex_begin_hold = gethrtime();
55 }
56 
57 hrtime_t
58 record_hold_time(tdb_mutex_stats_t *msp)
59 {
60 	hrtime_t now = gethrtime();
61 
62 	if (msp->mutex_begin_hold)
63 		msp->mutex_hold_time += now - msp->mutex_begin_hold;
64 	msp->mutex_begin_hold = 0;
65 	return (now);
66 }
67 
68 /*
69  * Called once at library initialization.
70  */
71 void
72 mutex_setup(void)
73 {
74 	if (set_lock_byte(&stall_mutex.mutex_lockw))
75 		thr_panic("mutex_setup() cannot acquire stall_mutex");
76 	stall_mutex.mutex_owner = (uintptr_t)curthread;
77 }
78 
79 /*
80  * The default spin count of 1000 is experimentally determined.
81  * On sun4u machines with any number of processors it could be raised
82  * to 10,000 but that (experimentally) makes almost no difference.
83  * The environment variable:
84  *	_THREAD_ADAPTIVE_SPIN=count
85  * can be used to override and set the count in the range [0 .. 1,000,000].
86  */
87 int	thread_adaptive_spin = 1000;
88 uint_t	thread_max_spinners = 100;
89 int	thread_queue_verify = 0;
90 static	int	ncpus;
91 
92 /*
93  * Distinguish spinning for queue locks from spinning for regular locks.
94  * We try harder to acquire queue locks by spinning.
95  * The environment variable:
96  *	_THREAD_QUEUE_SPIN=count
97  * can be used to override and set the count in the range [0 .. 1,000,000].
98  */
99 int	thread_queue_spin = 10000;
100 
101 #define	ALL_ATTRIBUTES				\
102 	(LOCK_RECURSIVE | LOCK_ERRORCHECK |	\
103 	LOCK_PRIO_INHERIT | LOCK_PRIO_PROTECT |	\
104 	LOCK_ROBUST)
105 
106 /*
107  * 'type' can be one of USYNC_THREAD, USYNC_PROCESS, or USYNC_PROCESS_ROBUST,
108  * augmented by zero or more the flags:
109  *	LOCK_RECURSIVE
110  *	LOCK_ERRORCHECK
111  *	LOCK_PRIO_INHERIT
112  *	LOCK_PRIO_PROTECT
113  *	LOCK_ROBUST
114  */
115 #pragma weak _private_mutex_init = __mutex_init
116 #pragma weak mutex_init = __mutex_init
117 #pragma weak _mutex_init = __mutex_init
118 /* ARGSUSED2 */
119 int
120 __mutex_init(mutex_t *mp, int type, void *arg)
121 {
122 	int basetype = (type & ~ALL_ATTRIBUTES);
123 	int error = 0;
124 
125 	if (basetype == USYNC_PROCESS_ROBUST) {
126 		/*
127 		 * USYNC_PROCESS_ROBUST is a deprecated historical type.
128 		 * We change it into (USYNC_PROCESS | LOCK_ROBUST) but
129 		 * retain the USYNC_PROCESS_ROBUST flag so we can return
130 		 * ELOCKUNMAPPED when necessary (only USYNC_PROCESS_ROBUST
131 		 * mutexes will ever draw ELOCKUNMAPPED).
132 		 */
133 		type |= (USYNC_PROCESS | LOCK_ROBUST);
134 		basetype = USYNC_PROCESS;
135 	}
136 
137 	if (!(basetype == USYNC_THREAD || basetype == USYNC_PROCESS) ||
138 	    (type & (LOCK_PRIO_INHERIT | LOCK_PRIO_PROTECT))
139 	    == (LOCK_PRIO_INHERIT | LOCK_PRIO_PROTECT)) {
140 		error = EINVAL;
141 	} else if (type & LOCK_ROBUST) {
142 		/*
143 		 * Callers of mutex_init() with the LOCK_ROBUST attribute
144 		 * are required to pass an initially all-zero mutex.
145 		 * Multiple calls to mutex_init() are allowed; all but
146 		 * the first return EBUSY.  A call to mutex_init() is
147 		 * allowed to make an inconsistent robust lock consistent
148 		 * (for historical usage, even though the proper interface
149 		 * for this is mutex_consistent()).  Note that we use
150 		 * atomic_or_16() to set the LOCK_INITED flag so as
151 		 * not to disturb surrounding bits (LOCK_OWNERDEAD, etc).
152 		 */
153 		extern void _atomic_or_16(volatile uint16_t *, uint16_t);
154 		if (!(mp->mutex_flag & LOCK_INITED)) {
155 			mp->mutex_type = (uint8_t)type;
156 			_atomic_or_16(&mp->mutex_flag, LOCK_INITED);
157 			mp->mutex_magic = MUTEX_MAGIC;
158 		} else if (type != mp->mutex_type ||
159 		    ((type & LOCK_PRIO_PROTECT) &&
160 		    mp->mutex_ceiling != (*(int *)arg))) {
161 			error = EINVAL;
162 		} else if (__mutex_consistent(mp) != 0) {
163 			error = EBUSY;
164 		}
165 		/* register a process robust mutex with the kernel */
166 		if (basetype == USYNC_PROCESS)
167 			register_lock(mp);
168 	} else {
169 		(void) _memset(mp, 0, sizeof (*mp));
170 		mp->mutex_type = (uint8_t)type;
171 		mp->mutex_flag = LOCK_INITED;
172 		mp->mutex_magic = MUTEX_MAGIC;
173 	}
174 
175 	if (error == 0 && (type & LOCK_PRIO_PROTECT))
176 		mp->mutex_ceiling = (uint8_t)(*(int *)arg);
177 
178 	return (error);
179 }
180 
181 /*
182  * Delete mp from list of ceil mutexes owned by curthread.
183  * Return 1 if the head of the chain was updated.
184  */
185 int
186 _ceil_mylist_del(mutex_t *mp)
187 {
188 	ulwp_t *self = curthread;
189 	mxchain_t **mcpp;
190 	mxchain_t *mcp;
191 
192 	mcpp = &self->ul_mxchain;
193 	while ((*mcpp)->mxchain_mx != mp)
194 		mcpp = &(*mcpp)->mxchain_next;
195 	mcp = *mcpp;
196 	*mcpp = mcp->mxchain_next;
197 	lfree(mcp, sizeof (*mcp));
198 	return (mcpp == &self->ul_mxchain);
199 }
200 
201 /*
202  * Add mp to head of list of ceil mutexes owned by curthread.
203  * Return ENOMEM if no memory could be allocated.
204  */
205 int
206 _ceil_mylist_add(mutex_t *mp)
207 {
208 	ulwp_t *self = curthread;
209 	mxchain_t *mcp;
210 
211 	if ((mcp = lmalloc(sizeof (*mcp))) == NULL)
212 		return (ENOMEM);
213 	mcp->mxchain_mx = mp;
214 	mcp->mxchain_next = self->ul_mxchain;
215 	self->ul_mxchain = mcp;
216 	return (0);
217 }
218 
219 /*
220  * Inherit priority from ceiling.  The inheritance impacts the effective
221  * priority, not the assigned priority.  See _thread_setschedparam_main().
222  */
223 void
224 _ceil_prio_inherit(int ceil)
225 {
226 	ulwp_t *self = curthread;
227 	struct sched_param param;
228 
229 	(void) _memset(&param, 0, sizeof (param));
230 	param.sched_priority = ceil;
231 	if (_thread_setschedparam_main(self->ul_lwpid,
232 	    self->ul_policy, &param, PRIO_INHERIT)) {
233 		/*
234 		 * Panic since unclear what error code to return.
235 		 * If we do return the error codes returned by above
236 		 * called routine, update the man page...
237 		 */
238 		thr_panic("_thread_setschedparam_main() fails");
239 	}
240 }
241 
242 /*
243  * Waive inherited ceiling priority.  Inherit from head of owned ceiling locks
244  * if holding at least one ceiling lock.  If no ceiling locks are held at this
245  * point, disinherit completely, reverting back to assigned priority.
246  */
247 void
248 _ceil_prio_waive(void)
249 {
250 	ulwp_t *self = curthread;
251 	struct sched_param param;
252 
253 	(void) _memset(&param, 0, sizeof (param));
254 	if (self->ul_mxchain == NULL) {
255 		/*
256 		 * No ceil locks held.  Zero the epri, revert back to ul_pri.
257 		 * Since thread's hash lock is not held, one cannot just
258 		 * read ul_pri here...do it in the called routine...
259 		 */
260 		param.sched_priority = self->ul_pri;	/* ignored */
261 		if (_thread_setschedparam_main(self->ul_lwpid,
262 		    self->ul_policy, &param, PRIO_DISINHERIT))
263 			thr_panic("_thread_setschedparam_main() fails");
264 	} else {
265 		/*
266 		 * Set priority to that of the mutex at the head
267 		 * of the ceilmutex chain.
268 		 */
269 		param.sched_priority =
270 		    self->ul_mxchain->mxchain_mx->mutex_ceiling;
271 		if (_thread_setschedparam_main(self->ul_lwpid,
272 		    self->ul_policy, &param, PRIO_INHERIT))
273 			thr_panic("_thread_setschedparam_main() fails");
274 	}
275 }
276 
277 /*
278  * Clear the lock byte.  Retain the waiters byte and the spinners byte.
279  * Return the old value of the lock word.
280  */
281 static uint32_t
282 clear_lockbyte(volatile uint32_t *lockword)
283 {
284 	uint32_t old;
285 	uint32_t new;
286 
287 	do {
288 		old = *lockword;
289 		new = old & ~LOCKMASK;
290 	} while (atomic_cas_32(lockword, old, new) != old);
291 
292 	return (old);
293 }
294 
295 /*
296  * Same as clear_lockbyte(), but operates on mutex_lockword64.
297  * The mutex_ownerpid field is cleared along with the lock byte.
298  */
299 static uint64_t
300 clear_lockbyte64(volatile uint64_t *lockword64)
301 {
302 	uint64_t old;
303 	uint64_t new;
304 
305 	do {
306 		old = *lockword64;
307 		new = old & ~LOCKMASK64;
308 	} while (atomic_cas_64(lockword64, old, new) != old);
309 
310 	return (old);
311 }
312 
313 /*
314  * Similar to set_lock_byte(), which only tries to set the lock byte.
315  * Here, we attempt to set the lock byte AND the mutex_ownerpid,
316  * keeping the remaining bytes constant.
317  */
318 static int
319 set_lock_byte64(volatile uint64_t *lockword64, pid_t ownerpid)
320 {
321 	uint64_t old;
322 	uint64_t new;
323 
324 	old = *lockword64 & ~LOCKMASK64;
325 	new = old | ((uint64_t)(uint_t)ownerpid << PIDSHIFT) | LOCKBYTE64;
326 	if (atomic_cas_64(lockword64, old, new) == old)
327 		return (LOCKCLEAR);
328 
329 	return (LOCKSET);
330 }
331 
332 /*
333  * Increment the spinners count in the mutex lock word.
334  * Return 0 on success.  Return -1 if the count would overflow.
335  */
336 static int
337 spinners_incr(volatile uint32_t *lockword, uint8_t max_spinners)
338 {
339 	uint32_t old;
340 	uint32_t new;
341 
342 	do {
343 		old = *lockword;
344 		if (((old & SPINNERMASK) >> SPINNERSHIFT) >= max_spinners)
345 			return (-1);
346 		new = old + (1 << SPINNERSHIFT);
347 	} while (atomic_cas_32(lockword, old, new) != old);
348 
349 	return (0);
350 }
351 
352 /*
353  * Decrement the spinners count in the mutex lock word.
354  * Return the new value of the lock word.
355  */
356 static uint32_t
357 spinners_decr(volatile uint32_t *lockword)
358 {
359 	uint32_t old;
360 	uint32_t new;
361 
362 	do {
363 		new = old = *lockword;
364 		if (new & SPINNERMASK)
365 			new -= (1 << SPINNERSHIFT);
366 	} while (atomic_cas_32(lockword, old, new) != old);
367 
368 	return (new);
369 }
370 
371 /*
372  * Non-preemptive spin locks.  Used by queue_lock().
373  * No lock statistics are gathered for these locks.
374  * No DTrace probes are provided for these locks.
375  */
376 void
377 spin_lock_set(mutex_t *mp)
378 {
379 	ulwp_t *self = curthread;
380 
381 	no_preempt(self);
382 	if (set_lock_byte(&mp->mutex_lockw) == 0) {
383 		mp->mutex_owner = (uintptr_t)self;
384 		return;
385 	}
386 	/*
387 	 * Spin for a while, attempting to acquire the lock.
388 	 */
389 	if (self->ul_spin_lock_spin != UINT_MAX)
390 		self->ul_spin_lock_spin++;
391 	if (mutex_queuelock_adaptive(mp) == 0 ||
392 	    set_lock_byte(&mp->mutex_lockw) == 0) {
393 		mp->mutex_owner = (uintptr_t)self;
394 		return;
395 	}
396 	/*
397 	 * Try harder if we were previously at a no premption level.
398 	 */
399 	if (self->ul_preempt > 1) {
400 		if (self->ul_spin_lock_spin2 != UINT_MAX)
401 			self->ul_spin_lock_spin2++;
402 		if (mutex_queuelock_adaptive(mp) == 0 ||
403 		    set_lock_byte(&mp->mutex_lockw) == 0) {
404 			mp->mutex_owner = (uintptr_t)self;
405 			return;
406 		}
407 	}
408 	/*
409 	 * Give up and block in the kernel for the mutex.
410 	 */
411 	if (self->ul_spin_lock_sleep != UINT_MAX)
412 		self->ul_spin_lock_sleep++;
413 	(void) ___lwp_mutex_timedlock(mp, NULL);
414 	mp->mutex_owner = (uintptr_t)self;
415 }
416 
417 void
418 spin_lock_clear(mutex_t *mp)
419 {
420 	ulwp_t *self = curthread;
421 
422 	mp->mutex_owner = 0;
423 	if (atomic_swap_32(&mp->mutex_lockword, 0) & WAITERMASK) {
424 		(void) ___lwp_mutex_wakeup(mp, 0);
425 		if (self->ul_spin_lock_wakeup != UINT_MAX)
426 			self->ul_spin_lock_wakeup++;
427 	}
428 	preempt(self);
429 }
430 
431 /*
432  * Allocate the sleep queue hash table.
433  */
434 void
435 queue_alloc(void)
436 {
437 	ulwp_t *self = curthread;
438 	uberdata_t *udp = self->ul_uberdata;
439 	mutex_t *mp;
440 	void *data;
441 	int i;
442 
443 	/*
444 	 * No locks are needed; we call here only when single-threaded.
445 	 */
446 	ASSERT(self == udp->ulwp_one);
447 	ASSERT(!udp->uberflags.uf_mt);
448 	if ((data = _private_mmap(NULL, 2 * QHASHSIZE * sizeof (queue_head_t),
449 	    PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, (off_t)0))
450 	    == MAP_FAILED)
451 		thr_panic("cannot allocate thread queue_head table");
452 	udp->queue_head = (queue_head_t *)data;
453 	for (i = 0; i < 2 * QHASHSIZE; i++) {
454 		mp = &udp->queue_head[i].qh_lock;
455 		mp->mutex_flag = LOCK_INITED;
456 		mp->mutex_magic = MUTEX_MAGIC;
457 	}
458 }
459 
460 #if defined(THREAD_DEBUG)
461 
462 /*
463  * Debugging: verify correctness of a sleep queue.
464  */
465 void
466 QVERIFY(queue_head_t *qp)
467 {
468 	ulwp_t *self = curthread;
469 	uberdata_t *udp = self->ul_uberdata;
470 	ulwp_t *ulwp;
471 	ulwp_t *prev;
472 	uint_t index;
473 	uint32_t cnt = 0;
474 	char qtype;
475 	void *wchan;
476 
477 	ASSERT(qp >= udp->queue_head && (qp - udp->queue_head) < 2 * QHASHSIZE);
478 	ASSERT(MUTEX_OWNED(&qp->qh_lock, self));
479 	ASSERT((qp->qh_head != NULL && qp->qh_tail != NULL) ||
480 	    (qp->qh_head == NULL && qp->qh_tail == NULL));
481 	if (!thread_queue_verify)
482 		return;
483 	/* real expensive stuff, only for _THREAD_QUEUE_VERIFY */
484 	qtype = ((qp - udp->queue_head) < QHASHSIZE)? MX : CV;
485 	for (prev = NULL, ulwp = qp->qh_head; ulwp != NULL;
486 	    prev = ulwp, ulwp = ulwp->ul_link, cnt++) {
487 		ASSERT(ulwp->ul_qtype == qtype);
488 		ASSERT(ulwp->ul_wchan != NULL);
489 		ASSERT(ulwp->ul_sleepq == qp);
490 		wchan = ulwp->ul_wchan;
491 		index = QUEUE_HASH(wchan, qtype);
492 		ASSERT(&udp->queue_head[index] == qp);
493 	}
494 	ASSERT(qp->qh_tail == prev);
495 	ASSERT(qp->qh_qlen == cnt);
496 }
497 
498 #else	/* THREAD_DEBUG */
499 
500 #define	QVERIFY(qp)
501 
502 #endif	/* THREAD_DEBUG */
503 
504 /*
505  * Acquire a queue head.
506  */
507 queue_head_t *
508 queue_lock(void *wchan, int qtype)
509 {
510 	uberdata_t *udp = curthread->ul_uberdata;
511 	queue_head_t *qp;
512 
513 	ASSERT(qtype == MX || qtype == CV);
514 
515 	/*
516 	 * It is possible that we could be called while still single-threaded.
517 	 * If so, we call queue_alloc() to allocate the queue_head[] array.
518 	 */
519 	if ((qp = udp->queue_head) == NULL) {
520 		queue_alloc();
521 		qp = udp->queue_head;
522 	}
523 	qp += QUEUE_HASH(wchan, qtype);
524 	spin_lock_set(&qp->qh_lock);
525 	/*
526 	 * At once per nanosecond, qh_lockcount will wrap after 512 years.
527 	 * Were we to care about this, we could peg the value at UINT64_MAX.
528 	 */
529 	qp->qh_lockcount++;
530 	QVERIFY(qp);
531 	return (qp);
532 }
533 
534 /*
535  * Release a queue head.
536  */
537 void
538 queue_unlock(queue_head_t *qp)
539 {
540 	QVERIFY(qp);
541 	spin_lock_clear(&qp->qh_lock);
542 }
543 
544 /*
545  * For rwlock queueing, we must queue writers ahead of readers of the
546  * same priority.  We do this by making writers appear to have a half
547  * point higher priority for purposes of priority comparisons below.
548  */
549 #define	CMP_PRIO(ulwp)	((real_priority(ulwp) << 1) + (ulwp)->ul_writer)
550 
551 void
552 enqueue(queue_head_t *qp, ulwp_t *ulwp, void *wchan, int qtype)
553 {
554 	ulwp_t **ulwpp;
555 	ulwp_t *next;
556 	int pri = CMP_PRIO(ulwp);
557 	int force_fifo = (qtype & FIFOQ);
558 	int do_fifo;
559 
560 	qtype &= ~FIFOQ;
561 	ASSERT(qtype == MX || qtype == CV);
562 	ASSERT(MUTEX_OWNED(&qp->qh_lock, curthread));
563 	ASSERT(ulwp->ul_sleepq != qp);
564 
565 	/*
566 	 * LIFO queue ordering is unfair and can lead to starvation,
567 	 * but it gives better performance for heavily contended locks.
568 	 * We use thread_queue_fifo (range is 0..8) to determine
569 	 * the frequency of FIFO vs LIFO queuing:
570 	 *	0 : every 256th time	(almost always LIFO)
571 	 *	1 : every 128th time
572 	 *	2 : every 64th  time
573 	 *	3 : every 32nd  time
574 	 *	4 : every 16th  time	(the default value, mostly LIFO)
575 	 *	5 : every 8th   time
576 	 *	6 : every 4th   time
577 	 *	7 : every 2nd   time
578 	 *	8 : every time		(never LIFO, always FIFO)
579 	 * Note that there is always some degree of FIFO ordering.
580 	 * This breaks live lock conditions that occur in applications
581 	 * that are written assuming (incorrectly) that threads acquire
582 	 * locks fairly, that is, in roughly round-robin order.
583 	 * In any event, the queue is maintained in priority order.
584 	 *
585 	 * If we are given the FIFOQ flag in qtype, fifo queueing is forced.
586 	 * SUSV3 requires this for semaphores.
587 	 */
588 	do_fifo = (force_fifo ||
589 	    ((++qp->qh_qcnt << curthread->ul_queue_fifo) & 0xff) == 0);
590 
591 	if (qp->qh_head == NULL) {
592 		/*
593 		 * The queue is empty.  LIFO/FIFO doesn't matter.
594 		 */
595 		ASSERT(qp->qh_tail == NULL);
596 		ulwpp = &qp->qh_head;
597 	} else if (do_fifo) {
598 		/*
599 		 * Enqueue after the last thread whose priority is greater
600 		 * than or equal to the priority of the thread being queued.
601 		 * Attempt first to go directly onto the tail of the queue.
602 		 */
603 		if (pri <= CMP_PRIO(qp->qh_tail))
604 			ulwpp = &qp->qh_tail->ul_link;
605 		else {
606 			for (ulwpp = &qp->qh_head; (next = *ulwpp) != NULL;
607 			    ulwpp = &next->ul_link)
608 				if (pri > CMP_PRIO(next))
609 					break;
610 		}
611 	} else {
612 		/*
613 		 * Enqueue before the first thread whose priority is less
614 		 * than or equal to the priority of the thread being queued.
615 		 * Hopefully we can go directly onto the head of the queue.
616 		 */
617 		for (ulwpp = &qp->qh_head; (next = *ulwpp) != NULL;
618 		    ulwpp = &next->ul_link)
619 			if (pri >= CMP_PRIO(next))
620 				break;
621 	}
622 	if ((ulwp->ul_link = *ulwpp) == NULL)
623 		qp->qh_tail = ulwp;
624 	*ulwpp = ulwp;
625 
626 	ulwp->ul_sleepq = qp;
627 	ulwp->ul_wchan = wchan;
628 	ulwp->ul_qtype = qtype;
629 	if (qp->qh_qmax < ++qp->qh_qlen)
630 		qp->qh_qmax = qp->qh_qlen;
631 }
632 
633 /*
634  * Return a pointer to the queue slot of the
635  * highest priority thread on the queue.
636  * On return, prevp, if not NULL, will contain a pointer
637  * to the thread's predecessor on the queue
638  */
639 static ulwp_t **
640 queue_slot(queue_head_t *qp, void *wchan, int *more, ulwp_t **prevp)
641 {
642 	ulwp_t **ulwpp;
643 	ulwp_t *ulwp;
644 	ulwp_t *prev = NULL;
645 	ulwp_t **suspp = NULL;
646 	ulwp_t *susprev;
647 
648 	ASSERT(MUTEX_OWNED(&qp->qh_lock, curthread));
649 
650 	/*
651 	 * Find a waiter on the sleep queue.
652 	 */
653 	for (ulwpp = &qp->qh_head; (ulwp = *ulwpp) != NULL;
654 	    prev = ulwp, ulwpp = &ulwp->ul_link) {
655 		if (ulwp->ul_wchan == wchan) {
656 			if (!ulwp->ul_stop)
657 				break;
658 			/*
659 			 * Try not to return a suspended thread.
660 			 * This mimics the old libthread's behavior.
661 			 */
662 			if (suspp == NULL) {
663 				suspp = ulwpp;
664 				susprev = prev;
665 			}
666 		}
667 	}
668 
669 	if (ulwp == NULL && suspp != NULL) {
670 		ulwp = *(ulwpp = suspp);
671 		prev = susprev;
672 		suspp = NULL;
673 	}
674 	if (ulwp == NULL) {
675 		if (more != NULL)
676 			*more = 0;
677 		return (NULL);
678 	}
679 
680 	if (prevp != NULL)
681 		*prevp = prev;
682 	if (more == NULL)
683 		return (ulwpp);
684 
685 	/*
686 	 * Scan the remainder of the queue for another waiter.
687 	 */
688 	if (suspp != NULL) {
689 		*more = 1;
690 		return (ulwpp);
691 	}
692 	for (ulwp = ulwp->ul_link; ulwp != NULL; ulwp = ulwp->ul_link) {
693 		if (ulwp->ul_wchan == wchan) {
694 			*more = 1;
695 			return (ulwpp);
696 		}
697 	}
698 
699 	*more = 0;
700 	return (ulwpp);
701 }
702 
703 ulwp_t *
704 queue_unlink(queue_head_t *qp, ulwp_t **ulwpp, ulwp_t *prev)
705 {
706 	ulwp_t *ulwp;
707 
708 	ulwp = *ulwpp;
709 	*ulwpp = ulwp->ul_link;
710 	ulwp->ul_link = NULL;
711 	if (qp->qh_tail == ulwp)
712 		qp->qh_tail = prev;
713 	qp->qh_qlen--;
714 	ulwp->ul_sleepq = NULL;
715 	ulwp->ul_wchan = NULL;
716 
717 	return (ulwp);
718 }
719 
720 ulwp_t *
721 dequeue(queue_head_t *qp, void *wchan, int *more)
722 {
723 	ulwp_t **ulwpp;
724 	ulwp_t *prev;
725 
726 	if ((ulwpp = queue_slot(qp, wchan, more, &prev)) == NULL)
727 		return (NULL);
728 	return (queue_unlink(qp, ulwpp, prev));
729 }
730 
731 /*
732  * Return a pointer to the highest priority thread sleeping on wchan.
733  */
734 ulwp_t *
735 queue_waiter(queue_head_t *qp, void *wchan)
736 {
737 	ulwp_t **ulwpp;
738 
739 	if ((ulwpp = queue_slot(qp, wchan, NULL, NULL)) == NULL)
740 		return (NULL);
741 	return (*ulwpp);
742 }
743 
744 uint8_t
745 dequeue_self(queue_head_t *qp, void *wchan)
746 {
747 	ulwp_t *self = curthread;
748 	ulwp_t **ulwpp;
749 	ulwp_t *ulwp;
750 	ulwp_t *prev = NULL;
751 	int found = 0;
752 	int more = 0;
753 
754 	ASSERT(MUTEX_OWNED(&qp->qh_lock, self));
755 
756 	/* find self on the sleep queue */
757 	for (ulwpp = &qp->qh_head; (ulwp = *ulwpp) != NULL;
758 	    prev = ulwp, ulwpp = &ulwp->ul_link) {
759 		if (ulwp == self) {
760 			/* dequeue ourself */
761 			ASSERT(self->ul_wchan == wchan);
762 			(void) queue_unlink(qp, ulwpp, prev);
763 			self->ul_cvmutex = NULL;
764 			self->ul_cv_wake = 0;
765 			found = 1;
766 			break;
767 		}
768 		if (ulwp->ul_wchan == wchan)
769 			more = 1;
770 	}
771 
772 	if (!found)
773 		thr_panic("dequeue_self(): curthread not found on queue");
774 
775 	if (more)
776 		return (1);
777 
778 	/* scan the remainder of the queue for another waiter */
779 	for (ulwp = *ulwpp; ulwp != NULL; ulwp = ulwp->ul_link) {
780 		if (ulwp->ul_wchan == wchan)
781 			return (1);
782 	}
783 
784 	return (0);
785 }
786 
787 /*
788  * Called from call_user_handler() and _thrp_suspend() to take
789  * ourself off of our sleep queue so we can grab locks.
790  */
791 void
792 unsleep_self(void)
793 {
794 	ulwp_t *self = curthread;
795 	queue_head_t *qp;
796 
797 	/*
798 	 * Calling enter_critical()/exit_critical() here would lead
799 	 * to recursion.  Just manipulate self->ul_critical directly.
800 	 */
801 	self->ul_critical++;
802 	while (self->ul_sleepq != NULL) {
803 		qp = queue_lock(self->ul_wchan, self->ul_qtype);
804 		/*
805 		 * We may have been moved from a CV queue to a
806 		 * mutex queue while we were attempting queue_lock().
807 		 * If so, just loop around and try again.
808 		 * dequeue_self() clears self->ul_sleepq.
809 		 */
810 		if (qp == self->ul_sleepq) {
811 			(void) dequeue_self(qp, self->ul_wchan);
812 			self->ul_writer = 0;
813 		}
814 		queue_unlock(qp);
815 	}
816 	self->ul_critical--;
817 }
818 
819 /*
820  * Common code for calling the the ___lwp_mutex_timedlock() system call.
821  * Returns with mutex_owner and mutex_ownerpid set correctly.
822  */
823 static int
824 mutex_lock_kernel(mutex_t *mp, timespec_t *tsp, tdb_mutex_stats_t *msp)
825 {
826 	ulwp_t *self = curthread;
827 	uberdata_t *udp = self->ul_uberdata;
828 	int mtype = mp->mutex_type;
829 	hrtime_t begin_sleep;
830 	int acquired;
831 	int error;
832 
833 	self->ul_sp = stkptr();
834 	self->ul_wchan = mp;
835 	if (__td_event_report(self, TD_SLEEP, udp)) {
836 		self->ul_td_evbuf.eventnum = TD_SLEEP;
837 		self->ul_td_evbuf.eventdata = mp;
838 		tdb_event(TD_SLEEP, udp);
839 	}
840 	if (msp) {
841 		tdb_incr(msp->mutex_sleep);
842 		begin_sleep = gethrtime();
843 	}
844 
845 	DTRACE_PROBE1(plockstat, mutex__block, mp);
846 
847 	for (;;) {
848 		/*
849 		 * A return value of EOWNERDEAD or ELOCKUNMAPPED
850 		 * means we successfully acquired the lock.
851 		 */
852 		if ((error = ___lwp_mutex_timedlock(mp, tsp)) != 0 &&
853 		    error != EOWNERDEAD && error != ELOCKUNMAPPED) {
854 			acquired = 0;
855 			break;
856 		}
857 
858 		if (mtype & USYNC_PROCESS) {
859 			/*
860 			 * Defend against forkall().  We may be the child,
861 			 * in which case we don't actually own the mutex.
862 			 */
863 			enter_critical(self);
864 			if (mp->mutex_ownerpid == udp->pid) {
865 				mp->mutex_owner = (uintptr_t)self;
866 				exit_critical(self);
867 				acquired = 1;
868 				break;
869 			}
870 			exit_critical(self);
871 		} else {
872 			mp->mutex_owner = (uintptr_t)self;
873 			acquired = 1;
874 			break;
875 		}
876 	}
877 	if (msp)
878 		msp->mutex_sleep_time += gethrtime() - begin_sleep;
879 	self->ul_wchan = NULL;
880 	self->ul_sp = 0;
881 
882 	if (acquired) {
883 		DTRACE_PROBE2(plockstat, mutex__blocked, mp, 1);
884 		DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0);
885 	} else {
886 		DTRACE_PROBE2(plockstat, mutex__blocked, mp, 0);
887 		DTRACE_PROBE2(plockstat, mutex__error, mp, error);
888 	}
889 
890 	return (error);
891 }
892 
893 /*
894  * Common code for calling the ___lwp_mutex_trylock() system call.
895  * Returns with mutex_owner and mutex_ownerpid set correctly.
896  */
897 int
898 mutex_trylock_kernel(mutex_t *mp)
899 {
900 	ulwp_t *self = curthread;
901 	uberdata_t *udp = self->ul_uberdata;
902 	int mtype = mp->mutex_type;
903 	int error;
904 	int acquired;
905 
906 	for (;;) {
907 		/*
908 		 * A return value of EOWNERDEAD or ELOCKUNMAPPED
909 		 * means we successfully acquired the lock.
910 		 */
911 		if ((error = ___lwp_mutex_trylock(mp)) != 0 &&
912 		    error != EOWNERDEAD && error != ELOCKUNMAPPED) {
913 			acquired = 0;
914 			break;
915 		}
916 
917 		if (mtype & USYNC_PROCESS) {
918 			/*
919 			 * Defend against forkall().  We may be the child,
920 			 * in which case we don't actually own the mutex.
921 			 */
922 			enter_critical(self);
923 			if (mp->mutex_ownerpid == udp->pid) {
924 				mp->mutex_owner = (uintptr_t)self;
925 				exit_critical(self);
926 				acquired = 1;
927 				break;
928 			}
929 			exit_critical(self);
930 		} else {
931 			mp->mutex_owner = (uintptr_t)self;
932 			acquired = 1;
933 			break;
934 		}
935 	}
936 
937 	if (acquired) {
938 		DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0);
939 	} else if (error != EBUSY) {
940 		DTRACE_PROBE2(plockstat, mutex__error, mp, error);
941 	}
942 
943 	return (error);
944 }
945 
946 volatile sc_shared_t *
947 setup_schedctl(void)
948 {
949 	ulwp_t *self = curthread;
950 	volatile sc_shared_t *scp;
951 	sc_shared_t *tmp;
952 
953 	if ((scp = self->ul_schedctl) == NULL && /* no shared state yet */
954 	    !self->ul_vfork &&			/* not a child of vfork() */
955 	    !self->ul_schedctl_called) {	/* haven't been called before */
956 		enter_critical(self);
957 		self->ul_schedctl_called = &self->ul_uberdata->uberflags;
958 		if ((tmp = __schedctl()) != (sc_shared_t *)(-1))
959 			self->ul_schedctl = scp = tmp;
960 		exit_critical(self);
961 	}
962 	/*
963 	 * Unless the call to setup_schedctl() is surrounded
964 	 * by enter_critical()/exit_critical(), the address
965 	 * we are returning could be invalid due to a forkall()
966 	 * having occurred in another thread.
967 	 */
968 	return (scp);
969 }
970 
971 /*
972  * Interfaces from libsched, incorporated into libc.
973  * libsched.so.1 is now a filter library onto libc.
974  */
975 #pragma weak schedctl_lookup = _schedctl_init
976 #pragma weak _schedctl_lookup = _schedctl_init
977 #pragma weak schedctl_init = _schedctl_init
978 schedctl_t *
979 _schedctl_init(void)
980 {
981 	volatile sc_shared_t *scp = setup_schedctl();
982 	return ((scp == NULL)? NULL : (schedctl_t *)&scp->sc_preemptctl);
983 }
984 
985 #pragma weak schedctl_exit = _schedctl_exit
986 void
987 _schedctl_exit(void)
988 {
989 }
990 
991 /*
992  * Contract private interface for java.
993  * Set up the schedctl data if it doesn't exist yet.
994  * Return a pointer to the pointer to the schedctl data.
995  */
996 volatile sc_shared_t *volatile *
997 _thr_schedctl(void)
998 {
999 	ulwp_t *self = curthread;
1000 	volatile sc_shared_t *volatile *ptr;
1001 
1002 	if (self->ul_vfork)
1003 		return (NULL);
1004 	if (*(ptr = &self->ul_schedctl) == NULL)
1005 		(void) setup_schedctl();
1006 	return (ptr);
1007 }
1008 
1009 /*
1010  * Block signals and attempt to block preemption.
1011  * no_preempt()/preempt() must be used in pairs but can be nested.
1012  */
1013 void
1014 no_preempt(ulwp_t *self)
1015 {
1016 	volatile sc_shared_t *scp;
1017 
1018 	if (self->ul_preempt++ == 0) {
1019 		enter_critical(self);
1020 		if ((scp = self->ul_schedctl) != NULL ||
1021 		    (scp = setup_schedctl()) != NULL) {
1022 			/*
1023 			 * Save the pre-existing preempt value.
1024 			 */
1025 			self->ul_savpreempt = scp->sc_preemptctl.sc_nopreempt;
1026 			scp->sc_preemptctl.sc_nopreempt = 1;
1027 		}
1028 	}
1029 }
1030 
1031 /*
1032  * Undo the effects of no_preempt().
1033  */
1034 void
1035 preempt(ulwp_t *self)
1036 {
1037 	volatile sc_shared_t *scp;
1038 
1039 	ASSERT(self->ul_preempt > 0);
1040 	if (--self->ul_preempt == 0) {
1041 		if ((scp = self->ul_schedctl) != NULL) {
1042 			/*
1043 			 * Restore the pre-existing preempt value.
1044 			 */
1045 			scp->sc_preemptctl.sc_nopreempt = self->ul_savpreempt;
1046 			if (scp->sc_preemptctl.sc_yield &&
1047 			    scp->sc_preemptctl.sc_nopreempt == 0) {
1048 				lwp_yield();
1049 				if (scp->sc_preemptctl.sc_yield) {
1050 					/*
1051 					 * Shouldn't happen.  This is either
1052 					 * a race condition or the thread
1053 					 * just entered the real-time class.
1054 					 */
1055 					lwp_yield();
1056 					scp->sc_preemptctl.sc_yield = 0;
1057 				}
1058 			}
1059 		}
1060 		exit_critical(self);
1061 	}
1062 }
1063 
1064 /*
1065  * If a call to preempt() would cause the current thread to yield or to
1066  * take deferred actions in exit_critical(), then unpark the specified
1067  * lwp so it can run while we delay.  Return the original lwpid if the
1068  * unpark was not performed, else return zero.  The tests are a repeat
1069  * of some of the tests in preempt(), above.  This is a statistical
1070  * optimization solely for cond_sleep_queue(), below.
1071  */
1072 static lwpid_t
1073 preempt_unpark(ulwp_t *self, lwpid_t lwpid)
1074 {
1075 	volatile sc_shared_t *scp = self->ul_schedctl;
1076 
1077 	ASSERT(self->ul_preempt == 1 && self->ul_critical > 0);
1078 	if ((scp != NULL && scp->sc_preemptctl.sc_yield) ||
1079 	    (self->ul_curplease && self->ul_critical == 1)) {
1080 		(void) __lwp_unpark(lwpid);
1081 		lwpid = 0;
1082 	}
1083 	return (lwpid);
1084 }
1085 
1086 /*
1087  * Spin for a while (if 'tryhard' is true), trying to grab the lock.
1088  * If this fails, return EBUSY and let the caller deal with it.
1089  * If this succeeds, return 0 with mutex_owner set to curthread.
1090  */
1091 static int
1092 mutex_trylock_adaptive(mutex_t *mp, int tryhard)
1093 {
1094 	ulwp_t *self = curthread;
1095 	int error = EBUSY;
1096 	ulwp_t *ulwp;
1097 	volatile sc_shared_t *scp;
1098 	volatile uint8_t *lockp = (volatile uint8_t *)&mp->mutex_lockw;
1099 	volatile uint64_t *ownerp = (volatile uint64_t *)&mp->mutex_owner;
1100 	uint32_t new_lockword;
1101 	int count = 0;
1102 	int max_count;
1103 	uint8_t max_spinners;
1104 
1105 	ASSERT(!(mp->mutex_type & USYNC_PROCESS));
1106 
1107 	if (MUTEX_OWNER(mp) == self)
1108 		return (EBUSY);
1109 
1110 	/* short-cut, not definitive (see below) */
1111 	if (mp->mutex_flag & LOCK_NOTRECOVERABLE) {
1112 		ASSERT(mp->mutex_type & LOCK_ROBUST);
1113 		error = ENOTRECOVERABLE;
1114 		goto done;
1115 	}
1116 
1117 	/*
1118 	 * Make one attempt to acquire the lock before
1119 	 * incurring the overhead of the spin loop.
1120 	 */
1121 	if (set_lock_byte(lockp) == 0) {
1122 		*ownerp = (uintptr_t)self;
1123 		error = 0;
1124 		goto done;
1125 	}
1126 	if (!tryhard)
1127 		goto done;
1128 	if (ncpus == 0)
1129 		ncpus = (int)_sysconf(_SC_NPROCESSORS_ONLN);
1130 	if ((max_spinners = self->ul_max_spinners) >= ncpus)
1131 		max_spinners = ncpus - 1;
1132 	max_count = (max_spinners != 0)? self->ul_adaptive_spin : 0;
1133 	if (max_count == 0)
1134 		goto done;
1135 
1136 	/*
1137 	 * This spin loop is unfair to lwps that have already dropped into
1138 	 * the kernel to sleep.  They will starve on a highly-contended mutex.
1139 	 * This is just too bad.  The adaptive spin algorithm is intended
1140 	 * to allow programs with highly-contended locks (that is, broken
1141 	 * programs) to execute with reasonable speed despite their contention.
1142 	 * Being fair would reduce the speed of such programs and well-written
1143 	 * programs will not suffer in any case.
1144 	 */
1145 	enter_critical(self);
1146 	if (spinners_incr(&mp->mutex_lockword, max_spinners) == -1) {
1147 		exit_critical(self);
1148 		goto done;
1149 	}
1150 	DTRACE_PROBE1(plockstat, mutex__spin, mp);
1151 	for (count = 1; ; count++) {
1152 		if (*lockp == 0 && set_lock_byte(lockp) == 0) {
1153 			*ownerp = (uintptr_t)self;
1154 			error = 0;
1155 			break;
1156 		}
1157 		if (count == max_count)
1158 			break;
1159 		SMT_PAUSE();
1160 		/*
1161 		 * Stop spinning if the mutex owner is not running on
1162 		 * a processor; it will not drop the lock any time soon
1163 		 * and we would just be wasting time to keep spinning.
1164 		 *
1165 		 * Note that we are looking at another thread (ulwp_t)
1166 		 * without ensuring that the other thread does not exit.
1167 		 * The scheme relies on ulwp_t structures never being
1168 		 * deallocated by the library (the library employs a free
1169 		 * list of ulwp_t structs that are reused when new threads
1170 		 * are created) and on schedctl shared memory never being
1171 		 * deallocated once created via __schedctl().
1172 		 *
1173 		 * Thus, the worst that can happen when the spinning thread
1174 		 * looks at the owner's schedctl data is that it is looking
1175 		 * at some other thread's schedctl data.  This almost never
1176 		 * happens and is benign when it does.
1177 		 */
1178 		if ((ulwp = (ulwp_t *)(uintptr_t)*ownerp) != NULL &&
1179 		    ((scp = ulwp->ul_schedctl) == NULL ||
1180 		    scp->sc_state != SC_ONPROC))
1181 			break;
1182 	}
1183 	new_lockword = spinners_decr(&mp->mutex_lockword);
1184 	if (error && (new_lockword & (LOCKMASK | SPINNERMASK)) == 0) {
1185 		/*
1186 		 * We haven't yet acquired the lock, the lock
1187 		 * is free, and there are no other spinners.
1188 		 * Make one final attempt to acquire the lock.
1189 		 *
1190 		 * This isn't strictly necessary since mutex_lock_queue()
1191 		 * (the next action this thread will take if it doesn't
1192 		 * acquire the lock here) makes one attempt to acquire
1193 		 * the lock before putting the thread to sleep.
1194 		 *
1195 		 * If the next action for this thread (on failure here)
1196 		 * were not to call mutex_lock_queue(), this would be
1197 		 * necessary for correctness, to avoid ending up with an
1198 		 * unheld mutex with waiters but no one to wake them up.
1199 		 */
1200 		if (set_lock_byte(lockp) == 0) {
1201 			*ownerp = (uintptr_t)self;
1202 			error = 0;
1203 		}
1204 		count++;
1205 	}
1206 	exit_critical(self);
1207 
1208 done:
1209 	if (error == 0 && (mp->mutex_flag & LOCK_NOTRECOVERABLE)) {
1210 		ASSERT(mp->mutex_type & LOCK_ROBUST);
1211 		/*
1212 		 * We shouldn't own the mutex.
1213 		 * Just clear the lock; everyone has already been waked up.
1214 		 */
1215 		mp->mutex_owner = 0;
1216 		(void) clear_lockbyte(&mp->mutex_lockword);
1217 		error = ENOTRECOVERABLE;
1218 	}
1219 
1220 	if (error) {
1221 		if (count) {
1222 			DTRACE_PROBE2(plockstat, mutex__spun, 0, count);
1223 		}
1224 		if (error != EBUSY) {
1225 			DTRACE_PROBE2(plockstat, mutex__error, mp, error);
1226 		}
1227 	} else {
1228 		if (count) {
1229 			DTRACE_PROBE2(plockstat, mutex__spun, 1, count);
1230 		}
1231 		DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, count);
1232 		if (mp->mutex_flag & LOCK_OWNERDEAD) {
1233 			ASSERT(mp->mutex_type & LOCK_ROBUST);
1234 			error = EOWNERDEAD;
1235 		}
1236 	}
1237 
1238 	return (error);
1239 }
1240 
1241 /*
1242  * Same as mutex_trylock_adaptive(), except specifically for queue locks.
1243  * The owner field is not set here; the caller (spin_lock_set()) sets it.
1244  */
1245 static int
1246 mutex_queuelock_adaptive(mutex_t *mp)
1247 {
1248 	ulwp_t *ulwp;
1249 	volatile sc_shared_t *scp;
1250 	volatile uint8_t *lockp;
1251 	volatile uint64_t *ownerp;
1252 	int count = curthread->ul_queue_spin;
1253 
1254 	ASSERT(mp->mutex_type == USYNC_THREAD);
1255 
1256 	if (count == 0)
1257 		return (EBUSY);
1258 
1259 	lockp = (volatile uint8_t *)&mp->mutex_lockw;
1260 	ownerp = (volatile uint64_t *)&mp->mutex_owner;
1261 	while (--count >= 0) {
1262 		if (*lockp == 0 && set_lock_byte(lockp) == 0)
1263 			return (0);
1264 		SMT_PAUSE();
1265 		if ((ulwp = (ulwp_t *)(uintptr_t)*ownerp) != NULL &&
1266 		    ((scp = ulwp->ul_schedctl) == NULL ||
1267 		    scp->sc_state != SC_ONPROC))
1268 			break;
1269 	}
1270 
1271 	return (EBUSY);
1272 }
1273 
1274 /*
1275  * Like mutex_trylock_adaptive(), but for process-shared mutexes.
1276  * Spin for a while (if 'tryhard' is true), trying to grab the lock.
1277  * If this fails, return EBUSY and let the caller deal with it.
1278  * If this succeeds, return 0 with mutex_owner set to curthread
1279  * and mutex_ownerpid set to the current pid.
1280  */
1281 static int
1282 mutex_trylock_process(mutex_t *mp, int tryhard)
1283 {
1284 	ulwp_t *self = curthread;
1285 	uberdata_t *udp = self->ul_uberdata;
1286 	int error = EBUSY;
1287 	volatile uint64_t *lockp = (volatile uint64_t *)&mp->mutex_lockword64;
1288 	uint32_t new_lockword;
1289 	int count = 0;
1290 	int max_count;
1291 	uint8_t max_spinners;
1292 
1293 	ASSERT(mp->mutex_type & USYNC_PROCESS);
1294 
1295 	if (shared_mutex_held(mp))
1296 		return (EBUSY);
1297 
1298 	/* short-cut, not definitive (see below) */
1299 	if (mp->mutex_flag & LOCK_NOTRECOVERABLE) {
1300 		ASSERT(mp->mutex_type & LOCK_ROBUST);
1301 		error = ENOTRECOVERABLE;
1302 		goto done;
1303 	}
1304 
1305 	/*
1306 	 * Make one attempt to acquire the lock before
1307 	 * incurring the overhead of the spin loop.
1308 	 */
1309 	enter_critical(self);
1310 	if (set_lock_byte64(lockp, udp->pid) == 0) {
1311 		mp->mutex_owner = (uintptr_t)self;
1312 		/* mp->mutex_ownerpid was set by set_lock_byte64() */
1313 		exit_critical(self);
1314 		error = 0;
1315 		goto done;
1316 	}
1317 	exit_critical(self);
1318 	if (!tryhard)
1319 		goto done;
1320 	if (ncpus == 0)
1321 		ncpus = (int)_sysconf(_SC_NPROCESSORS_ONLN);
1322 	if ((max_spinners = self->ul_max_spinners) >= ncpus)
1323 		max_spinners = ncpus - 1;
1324 	max_count = (max_spinners != 0)? self->ul_adaptive_spin : 0;
1325 	if (max_count == 0)
1326 		goto done;
1327 
1328 	/*
1329 	 * This is a process-shared mutex.
1330 	 * We cannot know if the owner is running on a processor.
1331 	 * We just spin and hope that it is on a processor.
1332 	 */
1333 	enter_critical(self);
1334 	if (spinners_incr(&mp->mutex_lockword, max_spinners) == -1) {
1335 		exit_critical(self);
1336 		goto done;
1337 	}
1338 	DTRACE_PROBE1(plockstat, mutex__spin, mp);
1339 	for (count = 1; ; count++) {
1340 		if ((*lockp & LOCKMASK64) == 0 &&
1341 		    set_lock_byte64(lockp, udp->pid) == 0) {
1342 			mp->mutex_owner = (uintptr_t)self;
1343 			/* mp->mutex_ownerpid was set by set_lock_byte64() */
1344 			error = 0;
1345 			break;
1346 		}
1347 		if (count == max_count)
1348 			break;
1349 		SMT_PAUSE();
1350 	}
1351 	new_lockword = spinners_decr(&mp->mutex_lockword);
1352 	if (error && (new_lockword & (LOCKMASK | SPINNERMASK)) == 0) {
1353 		/*
1354 		 * We haven't yet acquired the lock, the lock
1355 		 * is free, and there are no other spinners.
1356 		 * Make one final attempt to acquire the lock.
1357 		 *
1358 		 * This isn't strictly necessary since mutex_lock_kernel()
1359 		 * (the next action this thread will take if it doesn't
1360 		 * acquire the lock here) makes one attempt to acquire
1361 		 * the lock before putting the thread to sleep.
1362 		 *
1363 		 * If the next action for this thread (on failure here)
1364 		 * were not to call mutex_lock_kernel(), this would be
1365 		 * necessary for correctness, to avoid ending up with an
1366 		 * unheld mutex with waiters but no one to wake them up.
1367 		 */
1368 		if (set_lock_byte64(lockp, udp->pid) == 0) {
1369 			mp->mutex_owner = (uintptr_t)self;
1370 			/* mp->mutex_ownerpid was set by set_lock_byte64() */
1371 			error = 0;
1372 		}
1373 		count++;
1374 	}
1375 	exit_critical(self);
1376 
1377 done:
1378 	if (error == 0 && (mp->mutex_flag & LOCK_NOTRECOVERABLE)) {
1379 		ASSERT(mp->mutex_type & LOCK_ROBUST);
1380 		/*
1381 		 * We shouldn't own the mutex.
1382 		 * Just clear the lock; everyone has already been waked up.
1383 		 */
1384 		mp->mutex_owner = 0;
1385 		/* mp->mutex_ownerpid is cleared by clear_lockbyte64() */
1386 		(void) clear_lockbyte64(&mp->mutex_lockword64);
1387 		error = ENOTRECOVERABLE;
1388 	}
1389 
1390 	if (error) {
1391 		if (count) {
1392 			DTRACE_PROBE2(plockstat, mutex__spun, 0, count);
1393 		}
1394 		if (error != EBUSY) {
1395 			DTRACE_PROBE2(plockstat, mutex__error, mp, error);
1396 		}
1397 	} else {
1398 		if (count) {
1399 			DTRACE_PROBE2(plockstat, mutex__spun, 1, count);
1400 		}
1401 		DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, count);
1402 		if (mp->mutex_flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) {
1403 			ASSERT(mp->mutex_type & LOCK_ROBUST);
1404 			if (mp->mutex_flag & LOCK_OWNERDEAD)
1405 				error = EOWNERDEAD;
1406 			else if (mp->mutex_type & USYNC_PROCESS_ROBUST)
1407 				error = ELOCKUNMAPPED;
1408 			else
1409 				error = EOWNERDEAD;
1410 		}
1411 	}
1412 
1413 	return (error);
1414 }
1415 
1416 /*
1417  * Mutex wakeup code for releasing a USYNC_THREAD mutex.
1418  * Returns the lwpid of the thread that was dequeued, if any.
1419  * The caller of mutex_wakeup() must call __lwp_unpark(lwpid)
1420  * to wake up the specified lwp.
1421  */
1422 static lwpid_t
1423 mutex_wakeup(mutex_t *mp)
1424 {
1425 	lwpid_t lwpid = 0;
1426 	queue_head_t *qp;
1427 	ulwp_t *ulwp;
1428 	int more;
1429 
1430 	/*
1431 	 * Dequeue a waiter from the sleep queue.  Don't touch the mutex
1432 	 * waiters bit if no one was found on the queue because the mutex
1433 	 * might have been deallocated or reallocated for another purpose.
1434 	 */
1435 	qp = queue_lock(mp, MX);
1436 	if ((ulwp = dequeue(qp, mp, &more)) != NULL) {
1437 		lwpid = ulwp->ul_lwpid;
1438 		mp->mutex_waiters = (more? 1 : 0);
1439 	}
1440 	queue_unlock(qp);
1441 	return (lwpid);
1442 }
1443 
1444 /*
1445  * Mutex wakeup code for releasing all waiters on a USYNC_THREAD mutex.
1446  */
1447 static void
1448 mutex_wakeup_all(mutex_t *mp)
1449 {
1450 	queue_head_t *qp;
1451 	int nlwpid = 0;
1452 	int maxlwps = MAXLWPS;
1453 	ulwp_t **ulwpp;
1454 	ulwp_t *ulwp;
1455 	ulwp_t *prev = NULL;
1456 	lwpid_t buffer[MAXLWPS];
1457 	lwpid_t *lwpid = buffer;
1458 
1459 	/*
1460 	 * Walk the list of waiters and prepare to wake up all of them.
1461 	 * The waiters flag has already been cleared from the mutex.
1462 	 *
1463 	 * We keep track of lwpids that are to be unparked in lwpid[].
1464 	 * __lwp_unpark_all() is called to unpark all of them after
1465 	 * they have been removed from the sleep queue and the sleep
1466 	 * queue lock has been dropped.  If we run out of space in our
1467 	 * on-stack buffer, we need to allocate more but we can't call
1468 	 * lmalloc() because we are holding a queue lock when the overflow
1469 	 * occurs and lmalloc() acquires a lock.  We can't use alloca()
1470 	 * either because the application may have allocated a small
1471 	 * stack and we don't want to overrun the stack.  So we call
1472 	 * alloc_lwpids() to allocate a bigger buffer using the mmap()
1473 	 * system call directly since that path acquires no locks.
1474 	 */
1475 	qp = queue_lock(mp, MX);
1476 	ulwpp = &qp->qh_head;
1477 	while ((ulwp = *ulwpp) != NULL) {
1478 		if (ulwp->ul_wchan != mp) {
1479 			prev = ulwp;
1480 			ulwpp = &ulwp->ul_link;
1481 		} else {
1482 			if (nlwpid == maxlwps)
1483 				lwpid = alloc_lwpids(lwpid, &nlwpid, &maxlwps);
1484 			(void) queue_unlink(qp, ulwpp, prev);
1485 			lwpid[nlwpid++] = ulwp->ul_lwpid;
1486 		}
1487 	}
1488 
1489 	if (nlwpid == 0) {
1490 		queue_unlock(qp);
1491 	} else {
1492 		mp->mutex_waiters = 0;
1493 		no_preempt(curthread);
1494 		queue_unlock(qp);
1495 		if (nlwpid == 1)
1496 			(void) __lwp_unpark(lwpid[0]);
1497 		else
1498 			(void) __lwp_unpark_all(lwpid, nlwpid);
1499 		preempt(curthread);
1500 	}
1501 
1502 	if (lwpid != buffer)
1503 		(void) _private_munmap(lwpid, maxlwps * sizeof (lwpid_t));
1504 }
1505 
1506 /*
1507  * Release a process-private mutex.
1508  * As an optimization, if there are waiters but there are also spinners
1509  * attempting to acquire the mutex, then don't bother waking up a waiter;
1510  * one of the spinners will acquire the mutex soon and it would be a waste
1511  * of resources to wake up some thread just to have it spin for a while
1512  * and then possibly go back to sleep.  See mutex_trylock_adaptive().
1513  */
1514 static lwpid_t
1515 mutex_unlock_queue(mutex_t *mp, int release_all)
1516 {
1517 	lwpid_t lwpid = 0;
1518 	uint32_t old_lockword;
1519 
1520 	DTRACE_PROBE2(plockstat, mutex__release, mp, 0);
1521 	mp->mutex_owner = 0;
1522 	old_lockword = clear_lockbyte(&mp->mutex_lockword);
1523 	if ((old_lockword & WAITERMASK) &&
1524 	    (release_all || (old_lockword & SPINNERMASK) == 0)) {
1525 		ulwp_t *self = curthread;
1526 		no_preempt(self);	/* ensure a prompt wakeup */
1527 		if (release_all)
1528 			mutex_wakeup_all(mp);
1529 		else
1530 			lwpid = mutex_wakeup(mp);
1531 		if (lwpid == 0)
1532 			preempt(self);
1533 	}
1534 	return (lwpid);
1535 }
1536 
1537 /*
1538  * Like mutex_unlock_queue(), but for process-shared mutexes.
1539  */
1540 static void
1541 mutex_unlock_process(mutex_t *mp, int release_all)
1542 {
1543 	uint64_t old_lockword64;
1544 
1545 	DTRACE_PROBE2(plockstat, mutex__release, mp, 0);
1546 	mp->mutex_owner = 0;
1547 	/* mp->mutex_ownerpid is cleared by clear_lockbyte64() */
1548 	old_lockword64 = clear_lockbyte64(&mp->mutex_lockword64);
1549 	if ((old_lockword64 & WAITERMASK64) &&
1550 	    (release_all || (old_lockword64 & SPINNERMASK64) == 0)) {
1551 		ulwp_t *self = curthread;
1552 		no_preempt(self);	/* ensure a prompt wakeup */
1553 		(void) ___lwp_mutex_wakeup(mp, release_all);
1554 		preempt(self);
1555 	}
1556 }
1557 
1558 /*
1559  * Return the real priority of a thread.
1560  */
1561 int
1562 real_priority(ulwp_t *ulwp)
1563 {
1564 	if (ulwp->ul_epri == 0)
1565 		return (ulwp->ul_mappedpri? ulwp->ul_mappedpri : ulwp->ul_pri);
1566 	return (ulwp->ul_emappedpri? ulwp->ul_emappedpri : ulwp->ul_epri);
1567 }
1568 
1569 void
1570 stall(void)
1571 {
1572 	for (;;)
1573 		(void) mutex_lock_kernel(&stall_mutex, NULL, NULL);
1574 }
1575 
1576 /*
1577  * Acquire a USYNC_THREAD mutex via user-level sleep queues.
1578  * We failed set_lock_byte(&mp->mutex_lockw) before coming here.
1579  * If successful, returns with mutex_owner set correctly.
1580  */
1581 int
1582 mutex_lock_queue(ulwp_t *self, tdb_mutex_stats_t *msp, mutex_t *mp,
1583 	timespec_t *tsp)
1584 {
1585 	uberdata_t *udp = curthread->ul_uberdata;
1586 	queue_head_t *qp;
1587 	hrtime_t begin_sleep;
1588 	int error = 0;
1589 
1590 	self->ul_sp = stkptr();
1591 	if (__td_event_report(self, TD_SLEEP, udp)) {
1592 		self->ul_wchan = mp;
1593 		self->ul_td_evbuf.eventnum = TD_SLEEP;
1594 		self->ul_td_evbuf.eventdata = mp;
1595 		tdb_event(TD_SLEEP, udp);
1596 	}
1597 	if (msp) {
1598 		tdb_incr(msp->mutex_sleep);
1599 		begin_sleep = gethrtime();
1600 	}
1601 
1602 	DTRACE_PROBE1(plockstat, mutex__block, mp);
1603 
1604 	/*
1605 	 * Put ourself on the sleep queue, and while we are
1606 	 * unable to grab the lock, go park in the kernel.
1607 	 * Take ourself off the sleep queue after we acquire the lock.
1608 	 * The waiter bit can be set/cleared only while holding the queue lock.
1609 	 */
1610 	qp = queue_lock(mp, MX);
1611 	enqueue(qp, self, mp, MX);
1612 	mp->mutex_waiters = 1;
1613 	for (;;) {
1614 		if (set_lock_byte(&mp->mutex_lockw) == 0) {
1615 			mp->mutex_owner = (uintptr_t)self;
1616 			mp->mutex_waiters = dequeue_self(qp, mp);
1617 			break;
1618 		}
1619 		set_parking_flag(self, 1);
1620 		queue_unlock(qp);
1621 		/*
1622 		 * __lwp_park() will return the residual time in tsp
1623 		 * if we are unparked before the timeout expires.
1624 		 */
1625 		error = __lwp_park(tsp, 0);
1626 		set_parking_flag(self, 0);
1627 		/*
1628 		 * We could have taken a signal or suspended ourself.
1629 		 * If we did, then we removed ourself from the queue.
1630 		 * Someone else may have removed us from the queue
1631 		 * as a consequence of mutex_unlock().  We may have
1632 		 * gotten a timeout from __lwp_park().  Or we may still
1633 		 * be on the queue and this is just a spurious wakeup.
1634 		 */
1635 		qp = queue_lock(mp, MX);
1636 		if (self->ul_sleepq == NULL) {
1637 			if (error) {
1638 				mp->mutex_waiters = queue_waiter(qp, mp)? 1 : 0;
1639 				if (error != EINTR)
1640 					break;
1641 				error = 0;
1642 			}
1643 			if (set_lock_byte(&mp->mutex_lockw) == 0) {
1644 				mp->mutex_owner = (uintptr_t)self;
1645 				break;
1646 			}
1647 			enqueue(qp, self, mp, MX);
1648 			mp->mutex_waiters = 1;
1649 		}
1650 		ASSERT(self->ul_sleepq == qp &&
1651 		    self->ul_qtype == MX &&
1652 		    self->ul_wchan == mp);
1653 		if (error) {
1654 			if (error != EINTR) {
1655 				mp->mutex_waiters = dequeue_self(qp, mp);
1656 				break;
1657 			}
1658 			error = 0;
1659 		}
1660 	}
1661 	ASSERT(self->ul_sleepq == NULL && self->ul_link == NULL &&
1662 	    self->ul_wchan == NULL);
1663 	self->ul_sp = 0;
1664 	queue_unlock(qp);
1665 
1666 	if (msp)
1667 		msp->mutex_sleep_time += gethrtime() - begin_sleep;
1668 
1669 	ASSERT(error == 0 || error == EINVAL || error == ETIME);
1670 
1671 	if (error == 0 && (mp->mutex_flag & LOCK_NOTRECOVERABLE)) {
1672 		ASSERT(mp->mutex_type & LOCK_ROBUST);
1673 		/*
1674 		 * We shouldn't own the mutex.
1675 		 * Just clear the lock; everyone has already been waked up.
1676 		 */
1677 		mp->mutex_owner = 0;
1678 		(void) clear_lockbyte(&mp->mutex_lockword);
1679 		error = ENOTRECOVERABLE;
1680 	}
1681 
1682 	if (error) {
1683 		DTRACE_PROBE2(plockstat, mutex__blocked, mp, 0);
1684 		DTRACE_PROBE2(plockstat, mutex__error, mp, error);
1685 	} else {
1686 		DTRACE_PROBE2(plockstat, mutex__blocked, mp, 1);
1687 		DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0);
1688 		if (mp->mutex_flag & LOCK_OWNERDEAD) {
1689 			ASSERT(mp->mutex_type & LOCK_ROBUST);
1690 			error = EOWNERDEAD;
1691 		}
1692 	}
1693 
1694 	return (error);
1695 }
1696 
1697 static int
1698 mutex_recursion(mutex_t *mp, int mtype, int try)
1699 {
1700 	ASSERT(mutex_is_held(mp));
1701 	ASSERT(mtype & (LOCK_RECURSIVE|LOCK_ERRORCHECK));
1702 	ASSERT(try == MUTEX_TRY || try == MUTEX_LOCK);
1703 
1704 	if (mtype & LOCK_RECURSIVE) {
1705 		if (mp->mutex_rcount == RECURSION_MAX) {
1706 			DTRACE_PROBE2(plockstat, mutex__error, mp, EAGAIN);
1707 			return (EAGAIN);
1708 		}
1709 		mp->mutex_rcount++;
1710 		DTRACE_PROBE3(plockstat, mutex__acquire, mp, 1, 0);
1711 		return (0);
1712 	}
1713 	if (try == MUTEX_LOCK) {
1714 		DTRACE_PROBE2(plockstat, mutex__error, mp, EDEADLK);
1715 		return (EDEADLK);
1716 	}
1717 	return (EBUSY);
1718 }
1719 
1720 /*
1721  * Register this USYNC_PROCESS|LOCK_ROBUST mutex with the kernel so
1722  * it can apply LOCK_OWNERDEAD|LOCK_UNMAPPED if it becomes necessary.
1723  * We use tdb_hash_lock here and in the synch object tracking code in
1724  * the tdb_agent.c file.  There is no conflict between these two usages.
1725  */
1726 void
1727 register_lock(mutex_t *mp)
1728 {
1729 	uberdata_t *udp = curthread->ul_uberdata;
1730 	uint_t hash = LOCK_HASH(mp);
1731 	robust_t *rlp;
1732 	robust_t **rlpp;
1733 	robust_t **table;
1734 
1735 	if ((table = udp->robustlocks) == NULL) {
1736 		lmutex_lock(&udp->tdb_hash_lock);
1737 		if ((table = udp->robustlocks) == NULL) {
1738 			table = lmalloc(LOCKHASHSZ * sizeof (robust_t *));
1739 			_membar_producer();
1740 			udp->robustlocks = table;
1741 		}
1742 		lmutex_unlock(&udp->tdb_hash_lock);
1743 	}
1744 	_membar_consumer();
1745 
1746 	/*
1747 	 * First search the registered table with no locks held.
1748 	 * This is safe because the table never shrinks
1749 	 * and we can only get a false negative.
1750 	 */
1751 	for (rlp = table[hash]; rlp != NULL; rlp = rlp->robust_next) {
1752 		if (rlp->robust_lock == mp)	/* already registered */
1753 			return;
1754 	}
1755 
1756 	/*
1757 	 * The lock was not found.
1758 	 * Repeat the operation with tdb_hash_lock held.
1759 	 */
1760 	lmutex_lock(&udp->tdb_hash_lock);
1761 
1762 	for (rlpp = &table[hash];
1763 	    (rlp = *rlpp) != NULL;
1764 	    rlpp = &rlp->robust_next) {
1765 		if (rlp->robust_lock == mp) {	/* already registered */
1766 			lmutex_unlock(&udp->tdb_hash_lock);
1767 			return;
1768 		}
1769 	}
1770 
1771 	/*
1772 	 * The lock has never been registered.
1773 	 * Register it now and add it to the table.
1774 	 */
1775 	(void) ___lwp_mutex_register(mp);
1776 	rlp = lmalloc(sizeof (*rlp));
1777 	rlp->robust_lock = mp;
1778 	_membar_producer();
1779 	*rlpp = rlp;
1780 
1781 	lmutex_unlock(&udp->tdb_hash_lock);
1782 }
1783 
1784 /*
1785  * This is called in the child of fork()/forkall() to start over
1786  * with a clean slate.  (Each process must register its own locks.)
1787  * No locks are needed because all other threads are suspended or gone.
1788  */
1789 void
1790 unregister_locks(void)
1791 {
1792 	uberdata_t *udp = curthread->ul_uberdata;
1793 	uint_t hash;
1794 	robust_t **table;
1795 	robust_t *rlp;
1796 	robust_t *next;
1797 
1798 	if ((table = udp->robustlocks) != NULL) {
1799 		for (hash = 0; hash < LOCKHASHSZ; hash++) {
1800 			rlp = table[hash];
1801 			while (rlp != NULL) {
1802 				next = rlp->robust_next;
1803 				lfree(rlp, sizeof (*rlp));
1804 				rlp = next;
1805 			}
1806 		}
1807 		lfree(table, LOCKHASHSZ * sizeof (robust_t *));
1808 		udp->robustlocks = NULL;
1809 	}
1810 }
1811 
1812 /*
1813  * Returns with mutex_owner set correctly.
1814  */
1815 static int
1816 mutex_lock_internal(mutex_t *mp, timespec_t *tsp, int try)
1817 {
1818 	ulwp_t *self = curthread;
1819 	uberdata_t *udp = self->ul_uberdata;
1820 	int mtype = mp->mutex_type;
1821 	tdb_mutex_stats_t *msp = MUTEX_STATS(mp, udp);
1822 	int error = 0;
1823 	uint8_t ceil;
1824 	int myprio;
1825 
1826 	ASSERT(try == MUTEX_TRY || try == MUTEX_LOCK);
1827 
1828 	if (!self->ul_schedctl_called)
1829 		(void) setup_schedctl();
1830 
1831 	if (msp && try == MUTEX_TRY)
1832 		tdb_incr(msp->mutex_try);
1833 
1834 	if ((mtype & (LOCK_RECURSIVE|LOCK_ERRORCHECK)) && mutex_is_held(mp))
1835 		return (mutex_recursion(mp, mtype, try));
1836 
1837 	if (self->ul_error_detection && try == MUTEX_LOCK &&
1838 	    tsp == NULL && mutex_is_held(mp))
1839 		lock_error(mp, "mutex_lock", NULL, NULL);
1840 
1841 	if (mtype & LOCK_PRIO_PROTECT) {
1842 		ceil = mp->mutex_ceiling;
1843 		ASSERT(_validate_rt_prio(SCHED_FIFO, ceil) == 0);
1844 		myprio = real_priority(self);
1845 		if (myprio > ceil) {
1846 			DTRACE_PROBE2(plockstat, mutex__error, mp, EINVAL);
1847 			return (EINVAL);
1848 		}
1849 		if ((error = _ceil_mylist_add(mp)) != 0) {
1850 			DTRACE_PROBE2(plockstat, mutex__error, mp, error);
1851 			return (error);
1852 		}
1853 		if (myprio < ceil)
1854 			_ceil_prio_inherit(ceil);
1855 	}
1856 
1857 	if ((mtype & (USYNC_PROCESS | LOCK_ROBUST))
1858 	    == (USYNC_PROCESS | LOCK_ROBUST))
1859 		register_lock(mp);
1860 
1861 	if (mtype & LOCK_PRIO_INHERIT) {
1862 		/* go straight to the kernel */
1863 		if (try == MUTEX_TRY)
1864 			error = mutex_trylock_kernel(mp);
1865 		else	/* MUTEX_LOCK */
1866 			error = mutex_lock_kernel(mp, tsp, msp);
1867 		/*
1868 		 * The kernel never sets or clears the lock byte
1869 		 * for LOCK_PRIO_INHERIT mutexes.
1870 		 * Set it here for consistency.
1871 		 */
1872 		switch (error) {
1873 		case 0:
1874 			mp->mutex_lockw = LOCKSET;
1875 			break;
1876 		case EOWNERDEAD:
1877 		case ELOCKUNMAPPED:
1878 			mp->mutex_lockw = LOCKSET;
1879 			/* FALLTHROUGH */
1880 		case ENOTRECOVERABLE:
1881 			ASSERT(mtype & LOCK_ROBUST);
1882 			break;
1883 		case EDEADLK:
1884 			if (try == MUTEX_LOCK)
1885 				stall();
1886 			error = EBUSY;
1887 			break;
1888 		}
1889 	} else if (mtype & USYNC_PROCESS) {
1890 		error = mutex_trylock_process(mp, try == MUTEX_LOCK);
1891 		if (error == EBUSY && try == MUTEX_LOCK)
1892 			error = mutex_lock_kernel(mp, tsp, msp);
1893 	} else {	/* USYNC_THREAD */
1894 		error = mutex_trylock_adaptive(mp, try == MUTEX_LOCK);
1895 		if (error == EBUSY && try == MUTEX_LOCK)
1896 			error = mutex_lock_queue(self, msp, mp, tsp);
1897 	}
1898 
1899 	switch (error) {
1900 	case 0:
1901 	case EOWNERDEAD:
1902 	case ELOCKUNMAPPED:
1903 		if (mtype & LOCK_ROBUST)
1904 			remember_lock(mp);
1905 		if (msp)
1906 			record_begin_hold(msp);
1907 		break;
1908 	default:
1909 		if (mtype & LOCK_PRIO_PROTECT) {
1910 			(void) _ceil_mylist_del(mp);
1911 			if (myprio < ceil)
1912 				_ceil_prio_waive();
1913 		}
1914 		if (try == MUTEX_TRY) {
1915 			if (msp)
1916 				tdb_incr(msp->mutex_try_fail);
1917 			if (__td_event_report(self, TD_LOCK_TRY, udp)) {
1918 				self->ul_td_evbuf.eventnum = TD_LOCK_TRY;
1919 				tdb_event(TD_LOCK_TRY, udp);
1920 			}
1921 		}
1922 		break;
1923 	}
1924 
1925 	return (error);
1926 }
1927 
1928 int
1929 fast_process_lock(mutex_t *mp, timespec_t *tsp, int mtype, int try)
1930 {
1931 	ulwp_t *self = curthread;
1932 	uberdata_t *udp = self->ul_uberdata;
1933 
1934 	/*
1935 	 * We know that USYNC_PROCESS is set in mtype and that
1936 	 * zero, one, or both of the flags LOCK_RECURSIVE and
1937 	 * LOCK_ERRORCHECK are set, and that no other flags are set.
1938 	 */
1939 	ASSERT((mtype & ~(USYNC_PROCESS|LOCK_RECURSIVE|LOCK_ERRORCHECK)) == 0);
1940 	enter_critical(self);
1941 	if (set_lock_byte64(&mp->mutex_lockword64, udp->pid) == 0) {
1942 		mp->mutex_owner = (uintptr_t)self;
1943 		/* mp->mutex_ownerpid was set by set_lock_byte64() */
1944 		exit_critical(self);
1945 		DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0);
1946 		return (0);
1947 	}
1948 	exit_critical(self);
1949 
1950 	if ((mtype & (LOCK_RECURSIVE|LOCK_ERRORCHECK)) && shared_mutex_held(mp))
1951 		return (mutex_recursion(mp, mtype, try));
1952 
1953 	if (try == MUTEX_LOCK) {
1954 		if (mutex_trylock_process(mp, 1) == 0)
1955 			return (0);
1956 		return (mutex_lock_kernel(mp, tsp, NULL));
1957 	}
1958 
1959 	if (__td_event_report(self, TD_LOCK_TRY, udp)) {
1960 		self->ul_td_evbuf.eventnum = TD_LOCK_TRY;
1961 		tdb_event(TD_LOCK_TRY, udp);
1962 	}
1963 	return (EBUSY);
1964 }
1965 
1966 static int
1967 mutex_lock_impl(mutex_t *mp, timespec_t *tsp)
1968 {
1969 	ulwp_t *self = curthread;
1970 	uberdata_t *udp = self->ul_uberdata;
1971 	uberflags_t *gflags;
1972 	int mtype;
1973 
1974 	/*
1975 	 * Optimize the case of USYNC_THREAD, including
1976 	 * the LOCK_RECURSIVE and LOCK_ERRORCHECK cases,
1977 	 * no error detection, no lock statistics,
1978 	 * and the process has only a single thread.
1979 	 * (Most likely a traditional single-threaded application.)
1980 	 */
1981 	if ((((mtype = mp->mutex_type) & ~(LOCK_RECURSIVE|LOCK_ERRORCHECK)) |
1982 	    udp->uberflags.uf_all) == 0) {
1983 		/*
1984 		 * Only one thread exists so we don't need an atomic operation.
1985 		 */
1986 		if (mp->mutex_lockw == 0) {
1987 			mp->mutex_lockw = LOCKSET;
1988 			mp->mutex_owner = (uintptr_t)self;
1989 			DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0);
1990 			return (0);
1991 		}
1992 		if (mtype && MUTEX_OWNER(mp) == self)
1993 			return (mutex_recursion(mp, mtype, MUTEX_LOCK));
1994 		/*
1995 		 * We have reached a deadlock, probably because the
1996 		 * process is executing non-async-signal-safe code in
1997 		 * a signal handler and is attempting to acquire a lock
1998 		 * that it already owns.  This is not surprising, given
1999 		 * bad programming practices over the years that has
2000 		 * resulted in applications calling printf() and such
2001 		 * in their signal handlers.  Unless the user has told
2002 		 * us that the signal handlers are safe by setting:
2003 		 *	export _THREAD_ASYNC_SAFE=1
2004 		 * we return EDEADLK rather than actually deadlocking.
2005 		 */
2006 		if (tsp == NULL &&
2007 		    MUTEX_OWNER(mp) == self && !self->ul_async_safe) {
2008 			DTRACE_PROBE2(plockstat, mutex__error, mp, EDEADLK);
2009 			return (EDEADLK);
2010 		}
2011 	}
2012 
2013 	/*
2014 	 * Optimize the common cases of USYNC_THREAD or USYNC_PROCESS,
2015 	 * no error detection, and no lock statistics.
2016 	 * Include LOCK_RECURSIVE and LOCK_ERRORCHECK cases.
2017 	 */
2018 	if ((gflags = self->ul_schedctl_called) != NULL &&
2019 	    (gflags->uf_trs_ted |
2020 	    (mtype & ~(USYNC_PROCESS|LOCK_RECURSIVE|LOCK_ERRORCHECK))) == 0) {
2021 		if (mtype & USYNC_PROCESS)
2022 			return (fast_process_lock(mp, tsp, mtype, MUTEX_LOCK));
2023 		if (set_lock_byte(&mp->mutex_lockw) == 0) {
2024 			mp->mutex_owner = (uintptr_t)self;
2025 			DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0);
2026 			return (0);
2027 		}
2028 		if (mtype && MUTEX_OWNER(mp) == self)
2029 			return (mutex_recursion(mp, mtype, MUTEX_LOCK));
2030 		if (mutex_trylock_adaptive(mp, 1) != 0)
2031 			return (mutex_lock_queue(self, NULL, mp, tsp));
2032 		return (0);
2033 	}
2034 
2035 	/* else do it the long way */
2036 	return (mutex_lock_internal(mp, tsp, MUTEX_LOCK));
2037 }
2038 
2039 /*
2040  * Of the following function names (all the same function, of course),
2041  * only _private_mutex_lock() is not exported from libc.  This means
2042  * that calling _private_mutex_lock() within libc will not invoke the
2043  * dynamic linker.  This is critical for any code called in the child
2044  * of vfork() (via posix_spawn()) because invoking the dynamic linker
2045  * in such a case would corrupt the parent's address space.  There are
2046  * other places in libc where avoiding the dynamic linker is necessary.
2047  * Of course, _private_mutex_lock() can be called in cases not requiring
2048  * the avoidance of the dynamic linker too, and often is.
2049  */
2050 #pragma weak _private_mutex_lock = __mutex_lock
2051 #pragma weak mutex_lock = __mutex_lock
2052 #pragma weak _mutex_lock = __mutex_lock
2053 #pragma weak pthread_mutex_lock = __mutex_lock
2054 #pragma weak _pthread_mutex_lock = __mutex_lock
2055 int
2056 __mutex_lock(mutex_t *mp)
2057 {
2058 	ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
2059 	return (mutex_lock_impl(mp, NULL));
2060 }
2061 
2062 #pragma weak pthread_mutex_timedlock = _pthread_mutex_timedlock
2063 int
2064 _pthread_mutex_timedlock(mutex_t *mp, const timespec_t *abstime)
2065 {
2066 	timespec_t tslocal;
2067 	int error;
2068 
2069 	ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
2070 	abstime_to_reltime(CLOCK_REALTIME, abstime, &tslocal);
2071 	error = mutex_lock_impl(mp, &tslocal);
2072 	if (error == ETIME)
2073 		error = ETIMEDOUT;
2074 	return (error);
2075 }
2076 
2077 #pragma weak pthread_mutex_reltimedlock_np = _pthread_mutex_reltimedlock_np
2078 int
2079 _pthread_mutex_reltimedlock_np(mutex_t *mp, const timespec_t *reltime)
2080 {
2081 	timespec_t tslocal;
2082 	int error;
2083 
2084 	ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
2085 	tslocal = *reltime;
2086 	error = mutex_lock_impl(mp, &tslocal);
2087 	if (error == ETIME)
2088 		error = ETIMEDOUT;
2089 	return (error);
2090 }
2091 
2092 #pragma weak _private_mutex_trylock = __mutex_trylock
2093 #pragma weak mutex_trylock = __mutex_trylock
2094 #pragma weak _mutex_trylock = __mutex_trylock
2095 #pragma weak pthread_mutex_trylock = __mutex_trylock
2096 #pragma weak _pthread_mutex_trylock = __mutex_trylock
2097 int
2098 __mutex_trylock(mutex_t *mp)
2099 {
2100 	ulwp_t *self = curthread;
2101 	uberdata_t *udp = self->ul_uberdata;
2102 	uberflags_t *gflags;
2103 	int mtype;
2104 
2105 	ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
2106 	/*
2107 	 * Optimize the case of USYNC_THREAD, including
2108 	 * the LOCK_RECURSIVE and LOCK_ERRORCHECK cases,
2109 	 * no error detection, no lock statistics,
2110 	 * and the process has only a single thread.
2111 	 * (Most likely a traditional single-threaded application.)
2112 	 */
2113 	if ((((mtype = mp->mutex_type) & ~(LOCK_RECURSIVE|LOCK_ERRORCHECK)) |
2114 	    udp->uberflags.uf_all) == 0) {
2115 		/*
2116 		 * Only one thread exists so we don't need an atomic operation.
2117 		 */
2118 		if (mp->mutex_lockw == 0) {
2119 			mp->mutex_lockw = LOCKSET;
2120 			mp->mutex_owner = (uintptr_t)self;
2121 			DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0);
2122 			return (0);
2123 		}
2124 		if (mtype && MUTEX_OWNER(mp) == self)
2125 			return (mutex_recursion(mp, mtype, MUTEX_TRY));
2126 		return (EBUSY);
2127 	}
2128 
2129 	/*
2130 	 * Optimize the common cases of USYNC_THREAD or USYNC_PROCESS,
2131 	 * no error detection, and no lock statistics.
2132 	 * Include LOCK_RECURSIVE and LOCK_ERRORCHECK cases.
2133 	 */
2134 	if ((gflags = self->ul_schedctl_called) != NULL &&
2135 	    (gflags->uf_trs_ted |
2136 	    (mtype & ~(USYNC_PROCESS|LOCK_RECURSIVE|LOCK_ERRORCHECK))) == 0) {
2137 		if (mtype & USYNC_PROCESS)
2138 			return (fast_process_lock(mp, NULL, mtype, MUTEX_TRY));
2139 		if (set_lock_byte(&mp->mutex_lockw) == 0) {
2140 			mp->mutex_owner = (uintptr_t)self;
2141 			DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0);
2142 			return (0);
2143 		}
2144 		if (mtype && MUTEX_OWNER(mp) == self)
2145 			return (mutex_recursion(mp, mtype, MUTEX_TRY));
2146 		if (__td_event_report(self, TD_LOCK_TRY, udp)) {
2147 			self->ul_td_evbuf.eventnum = TD_LOCK_TRY;
2148 			tdb_event(TD_LOCK_TRY, udp);
2149 		}
2150 		return (EBUSY);
2151 	}
2152 
2153 	/* else do it the long way */
2154 	return (mutex_lock_internal(mp, NULL, MUTEX_TRY));
2155 }
2156 
2157 int
2158 mutex_unlock_internal(mutex_t *mp, int retain_robust_flags)
2159 {
2160 	ulwp_t *self = curthread;
2161 	uberdata_t *udp = self->ul_uberdata;
2162 	int mtype = mp->mutex_type;
2163 	tdb_mutex_stats_t *msp;
2164 	int error = 0;
2165 	int release_all;
2166 	lwpid_t lwpid;
2167 
2168 	if ((mtype & LOCK_ERRORCHECK) && !mutex_is_held(mp))
2169 		return (EPERM);
2170 
2171 	if (self->ul_error_detection && !mutex_is_held(mp))
2172 		lock_error(mp, "mutex_unlock", NULL, NULL);
2173 
2174 	if ((mtype & LOCK_RECURSIVE) && mp->mutex_rcount != 0) {
2175 		mp->mutex_rcount--;
2176 		DTRACE_PROBE2(plockstat, mutex__release, mp, 1);
2177 		return (0);
2178 	}
2179 
2180 	if ((msp = MUTEX_STATS(mp, udp)) != NULL)
2181 		(void) record_hold_time(msp);
2182 
2183 	if (!retain_robust_flags && !(mtype & LOCK_PRIO_INHERIT) &&
2184 	    (mp->mutex_flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED))) {
2185 		ASSERT(mp->mutex_type & LOCK_ROBUST);
2186 		mp->mutex_flag &= ~(LOCK_OWNERDEAD | LOCK_UNMAPPED);
2187 		mp->mutex_flag |= LOCK_NOTRECOVERABLE;
2188 	}
2189 	release_all = ((mp->mutex_flag & LOCK_NOTRECOVERABLE) != 0);
2190 
2191 	if (mtype & LOCK_PRIO_INHERIT) {
2192 		no_preempt(self);
2193 		mp->mutex_owner = 0;
2194 		/* mp->mutex_ownerpid is cleared by ___lwp_mutex_unlock() */
2195 		DTRACE_PROBE2(plockstat, mutex__release, mp, 0);
2196 		mp->mutex_lockw = LOCKCLEAR;
2197 		error = ___lwp_mutex_unlock(mp);
2198 		preempt(self);
2199 	} else if (mtype & USYNC_PROCESS) {
2200 		mutex_unlock_process(mp, release_all);
2201 	} else {	/* USYNC_THREAD */
2202 		if ((lwpid = mutex_unlock_queue(mp, release_all)) != 0) {
2203 			(void) __lwp_unpark(lwpid);
2204 			preempt(self);
2205 		}
2206 	}
2207 
2208 	if (mtype & LOCK_ROBUST)
2209 		forget_lock(mp);
2210 
2211 	if ((mtype & LOCK_PRIO_PROTECT) && _ceil_mylist_del(mp))
2212 		_ceil_prio_waive();
2213 
2214 	return (error);
2215 }
2216 
2217 #pragma weak _private_mutex_unlock = __mutex_unlock
2218 #pragma weak mutex_unlock = __mutex_unlock
2219 #pragma weak _mutex_unlock = __mutex_unlock
2220 #pragma weak pthread_mutex_unlock = __mutex_unlock
2221 #pragma weak _pthread_mutex_unlock = __mutex_unlock
2222 int
2223 __mutex_unlock(mutex_t *mp)
2224 {
2225 	ulwp_t *self = curthread;
2226 	uberdata_t *udp = self->ul_uberdata;
2227 	uberflags_t *gflags;
2228 	lwpid_t lwpid;
2229 	int mtype;
2230 	short el;
2231 
2232 	/*
2233 	 * Optimize the case of USYNC_THREAD, including
2234 	 * the LOCK_RECURSIVE and LOCK_ERRORCHECK cases,
2235 	 * no error detection, no lock statistics,
2236 	 * and the process has only a single thread.
2237 	 * (Most likely a traditional single-threaded application.)
2238 	 */
2239 	if ((((mtype = mp->mutex_type) & ~(LOCK_RECURSIVE|LOCK_ERRORCHECK)) |
2240 	    udp->uberflags.uf_all) == 0) {
2241 		if (mtype) {
2242 			/*
2243 			 * At this point we know that one or both of the
2244 			 * flags LOCK_RECURSIVE or LOCK_ERRORCHECK is set.
2245 			 */
2246 			if ((mtype & LOCK_ERRORCHECK) && !MUTEX_OWNED(mp, self))
2247 				return (EPERM);
2248 			if ((mtype & LOCK_RECURSIVE) && mp->mutex_rcount != 0) {
2249 				mp->mutex_rcount--;
2250 				DTRACE_PROBE2(plockstat, mutex__release, mp, 1);
2251 				return (0);
2252 			}
2253 		}
2254 		/*
2255 		 * Only one thread exists so we don't need an atomic operation.
2256 		 * Also, there can be no waiters.
2257 		 */
2258 		mp->mutex_owner = 0;
2259 		mp->mutex_lockword = 0;
2260 		DTRACE_PROBE2(plockstat, mutex__release, mp, 0);
2261 		return (0);
2262 	}
2263 
2264 	/*
2265 	 * Optimize the common cases of USYNC_THREAD or USYNC_PROCESS,
2266 	 * no error detection, and no lock statistics.
2267 	 * Include LOCK_RECURSIVE and LOCK_ERRORCHECK cases.
2268 	 */
2269 	if ((gflags = self->ul_schedctl_called) != NULL) {
2270 		if (((el = gflags->uf_trs_ted) | mtype) == 0) {
2271 fast_unlock:
2272 			if ((lwpid = mutex_unlock_queue(mp, 0)) != 0) {
2273 				(void) __lwp_unpark(lwpid);
2274 				preempt(self);
2275 			}
2276 			return (0);
2277 		}
2278 		if (el)		/* error detection or lock statistics */
2279 			goto slow_unlock;
2280 		if ((mtype & ~(LOCK_RECURSIVE|LOCK_ERRORCHECK)) == 0) {
2281 			/*
2282 			 * At this point we know that one or both of the
2283 			 * flags LOCK_RECURSIVE or LOCK_ERRORCHECK is set.
2284 			 */
2285 			if ((mtype & LOCK_ERRORCHECK) && !MUTEX_OWNED(mp, self))
2286 				return (EPERM);
2287 			if ((mtype & LOCK_RECURSIVE) && mp->mutex_rcount != 0) {
2288 				mp->mutex_rcount--;
2289 				DTRACE_PROBE2(plockstat, mutex__release, mp, 1);
2290 				return (0);
2291 			}
2292 			goto fast_unlock;
2293 		}
2294 		if ((mtype &
2295 		    ~(USYNC_PROCESS|LOCK_RECURSIVE|LOCK_ERRORCHECK)) == 0) {
2296 			/*
2297 			 * At this point we know that zero, one, or both of the
2298 			 * flags LOCK_RECURSIVE or LOCK_ERRORCHECK is set and
2299 			 * that the USYNC_PROCESS flag is set.
2300 			 */
2301 			if ((mtype & LOCK_ERRORCHECK) && !shared_mutex_held(mp))
2302 				return (EPERM);
2303 			if ((mtype & LOCK_RECURSIVE) && mp->mutex_rcount != 0) {
2304 				mp->mutex_rcount--;
2305 				DTRACE_PROBE2(plockstat, mutex__release, mp, 1);
2306 				return (0);
2307 			}
2308 			mutex_unlock_process(mp, 0);
2309 			return (0);
2310 		}
2311 	}
2312 
2313 	/* else do it the long way */
2314 slow_unlock:
2315 	return (mutex_unlock_internal(mp, 0));
2316 }
2317 
2318 /*
2319  * Internally to the library, almost all mutex lock/unlock actions
2320  * go through these lmutex_ functions, to protect critical regions.
2321  * We replicate a bit of code from __mutex_lock() and __mutex_unlock()
2322  * to make these functions faster since we know that the mutex type
2323  * of all internal locks is USYNC_THREAD.  We also know that internal
2324  * locking can never fail, so we panic if it does.
2325  */
2326 void
2327 lmutex_lock(mutex_t *mp)
2328 {
2329 	ulwp_t *self = curthread;
2330 	uberdata_t *udp = self->ul_uberdata;
2331 
2332 	ASSERT(mp->mutex_type == USYNC_THREAD);
2333 
2334 	enter_critical(self);
2335 	/*
2336 	 * Optimize the case of no lock statistics and only a single thread.
2337 	 * (Most likely a traditional single-threaded application.)
2338 	 */
2339 	if (udp->uberflags.uf_all == 0) {
2340 		/*
2341 		 * Only one thread exists; the mutex must be free.
2342 		 */
2343 		ASSERT(mp->mutex_lockw == 0);
2344 		mp->mutex_lockw = LOCKSET;
2345 		mp->mutex_owner = (uintptr_t)self;
2346 		DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0);
2347 	} else {
2348 		tdb_mutex_stats_t *msp = MUTEX_STATS(mp, udp);
2349 
2350 		if (!self->ul_schedctl_called)
2351 			(void) setup_schedctl();
2352 
2353 		if (set_lock_byte(&mp->mutex_lockw) == 0) {
2354 			mp->mutex_owner = (uintptr_t)self;
2355 			DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0);
2356 		} else if (mutex_trylock_adaptive(mp, 1) != 0) {
2357 			(void) mutex_lock_queue(self, msp, mp, NULL);
2358 		}
2359 
2360 		if (msp)
2361 			record_begin_hold(msp);
2362 	}
2363 }
2364 
2365 void
2366 lmutex_unlock(mutex_t *mp)
2367 {
2368 	ulwp_t *self = curthread;
2369 	uberdata_t *udp = self->ul_uberdata;
2370 
2371 	ASSERT(mp->mutex_type == USYNC_THREAD);
2372 
2373 	/*
2374 	 * Optimize the case of no lock statistics and only a single thread.
2375 	 * (Most likely a traditional single-threaded application.)
2376 	 */
2377 	if (udp->uberflags.uf_all == 0) {
2378 		/*
2379 		 * Only one thread exists so there can be no waiters.
2380 		 */
2381 		mp->mutex_owner = 0;
2382 		mp->mutex_lockword = 0;
2383 		DTRACE_PROBE2(plockstat, mutex__release, mp, 0);
2384 	} else {
2385 		tdb_mutex_stats_t *msp = MUTEX_STATS(mp, udp);
2386 		lwpid_t lwpid;
2387 
2388 		if (msp)
2389 			(void) record_hold_time(msp);
2390 		if ((lwpid = mutex_unlock_queue(mp, 0)) != 0) {
2391 			(void) __lwp_unpark(lwpid);
2392 			preempt(self);
2393 		}
2394 	}
2395 	exit_critical(self);
2396 }
2397 
2398 /*
2399  * For specialized code in libc, like the asynchronous i/o code,
2400  * the following sig_*() locking primitives are used in order
2401  * to make the code asynchronous signal safe.  Signals are
2402  * deferred while locks acquired by these functions are held.
2403  */
2404 void
2405 sig_mutex_lock(mutex_t *mp)
2406 {
2407 	sigoff(curthread);
2408 	(void) _private_mutex_lock(mp);
2409 }
2410 
2411 void
2412 sig_mutex_unlock(mutex_t *mp)
2413 {
2414 	(void) _private_mutex_unlock(mp);
2415 	sigon(curthread);
2416 }
2417 
2418 int
2419 sig_mutex_trylock(mutex_t *mp)
2420 {
2421 	int error;
2422 
2423 	sigoff(curthread);
2424 	if ((error = _private_mutex_trylock(mp)) != 0)
2425 		sigon(curthread);
2426 	return (error);
2427 }
2428 
2429 /*
2430  * sig_cond_wait() is a cancellation point.
2431  */
2432 int
2433 sig_cond_wait(cond_t *cv, mutex_t *mp)
2434 {
2435 	int error;
2436 
2437 	ASSERT(curthread->ul_sigdefer != 0);
2438 	_private_testcancel();
2439 	error = __cond_wait(cv, mp);
2440 	if (error == EINTR && curthread->ul_cursig) {
2441 		sig_mutex_unlock(mp);
2442 		/* take the deferred signal here */
2443 		sig_mutex_lock(mp);
2444 	}
2445 	_private_testcancel();
2446 	return (error);
2447 }
2448 
2449 /*
2450  * sig_cond_reltimedwait() is a cancellation point.
2451  */
2452 int
2453 sig_cond_reltimedwait(cond_t *cv, mutex_t *mp, const timespec_t *ts)
2454 {
2455 	int error;
2456 
2457 	ASSERT(curthread->ul_sigdefer != 0);
2458 	_private_testcancel();
2459 	error = __cond_reltimedwait(cv, mp, ts);
2460 	if (error == EINTR && curthread->ul_cursig) {
2461 		sig_mutex_unlock(mp);
2462 		/* take the deferred signal here */
2463 		sig_mutex_lock(mp);
2464 	}
2465 	_private_testcancel();
2466 	return (error);
2467 }
2468 
2469 /*
2470  * For specialized code in libc, like the stdio code.
2471  * the following cancel_safe_*() locking primitives are used in
2472  * order to make the code cancellation-safe.  Cancellation is
2473  * deferred while locks acquired by these functions are held.
2474  */
2475 void
2476 cancel_safe_mutex_lock(mutex_t *mp)
2477 {
2478 	(void) _private_mutex_lock(mp);
2479 	curthread->ul_libc_locks++;
2480 }
2481 
2482 int
2483 cancel_safe_mutex_trylock(mutex_t *mp)
2484 {
2485 	int error;
2486 
2487 	if ((error = _private_mutex_trylock(mp)) == 0)
2488 		curthread->ul_libc_locks++;
2489 	return (error);
2490 }
2491 
2492 void
2493 cancel_safe_mutex_unlock(mutex_t *mp)
2494 {
2495 	ulwp_t *self = curthread;
2496 
2497 	ASSERT(self->ul_libc_locks != 0);
2498 
2499 	(void) _private_mutex_unlock(mp);
2500 
2501 	/*
2502 	 * Decrement the count of locks held by cancel_safe_mutex_lock().
2503 	 * If we are then in a position to terminate cleanly and
2504 	 * if there is a pending cancellation and cancellation
2505 	 * is not disabled and we received EINTR from a recent
2506 	 * system call then perform the cancellation action now.
2507 	 */
2508 	if (--self->ul_libc_locks == 0 &&
2509 	    !(self->ul_vfork | self->ul_nocancel |
2510 	    self->ul_critical | self->ul_sigdefer) &&
2511 	    cancel_active())
2512 		_pthread_exit(PTHREAD_CANCELED);
2513 }
2514 
2515 static int
2516 shared_mutex_held(mutex_t *mparg)
2517 {
2518 	/*
2519 	 * The 'volatile' is necessary to make sure the compiler doesn't
2520 	 * reorder the tests of the various components of the mutex.
2521 	 * They must be tested in this order:
2522 	 *	mutex_lockw
2523 	 *	mutex_owner
2524 	 *	mutex_ownerpid
2525 	 * This relies on the fact that everywhere mutex_lockw is cleared,
2526 	 * mutex_owner and mutex_ownerpid are cleared before mutex_lockw
2527 	 * is cleared, and that everywhere mutex_lockw is set, mutex_owner
2528 	 * and mutex_ownerpid are set after mutex_lockw is set, and that
2529 	 * mutex_lockw is set or cleared with a memory barrier.
2530 	 */
2531 	volatile mutex_t *mp = (volatile mutex_t *)mparg;
2532 	ulwp_t *self = curthread;
2533 	uberdata_t *udp = self->ul_uberdata;
2534 
2535 	return (MUTEX_OWNED(mp, self) && mp->mutex_ownerpid == udp->pid);
2536 }
2537 
2538 /*
2539  * Some crufty old programs define their own version of _mutex_held()
2540  * to be simply return(1).  This breaks internal libc logic, so we
2541  * define a private version for exclusive use by libc, mutex_is_held(),
2542  * and also a new public function, __mutex_held(), to be used in new
2543  * code to circumvent these crufty old programs.
2544  */
2545 #pragma weak mutex_held = mutex_is_held
2546 #pragma weak _mutex_held = mutex_is_held
2547 #pragma weak __mutex_held = mutex_is_held
2548 int
2549 mutex_is_held(mutex_t *mparg)
2550 {
2551 	volatile mutex_t *mp = (volatile mutex_t *)mparg;
2552 
2553 	if (mparg->mutex_type & USYNC_PROCESS)
2554 		return (shared_mutex_held(mparg));
2555 	return (MUTEX_OWNED(mp, curthread));
2556 }
2557 
2558 #pragma weak _private_mutex_destroy = __mutex_destroy
2559 #pragma weak mutex_destroy = __mutex_destroy
2560 #pragma weak _mutex_destroy = __mutex_destroy
2561 #pragma weak pthread_mutex_destroy = __mutex_destroy
2562 #pragma weak _pthread_mutex_destroy = __mutex_destroy
2563 int
2564 __mutex_destroy(mutex_t *mp)
2565 {
2566 	if (mp->mutex_type & USYNC_PROCESS)
2567 		forget_lock(mp);
2568 	(void) _memset(mp, 0, sizeof (*mp));
2569 	tdb_sync_obj_deregister(mp);
2570 	return (0);
2571 }
2572 
2573 #pragma weak mutex_consistent = __mutex_consistent
2574 #pragma weak _mutex_consistent = __mutex_consistent
2575 #pragma weak pthread_mutex_consistent_np = __mutex_consistent
2576 #pragma weak _pthread_mutex_consistent_np = __mutex_consistent
2577 int
2578 __mutex_consistent(mutex_t *mp)
2579 {
2580 	/*
2581 	 * Do this only for an inconsistent, initialized robust lock
2582 	 * that we hold.  For all other cases, return EINVAL.
2583 	 */
2584 	if (mutex_is_held(mp) &&
2585 	    (mp->mutex_type & LOCK_ROBUST) &&
2586 	    (mp->mutex_flag & LOCK_INITED) &&
2587 	    (mp->mutex_flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED))) {
2588 		mp->mutex_flag &= ~(LOCK_OWNERDEAD | LOCK_UNMAPPED);
2589 		mp->mutex_rcount = 0;
2590 		return (0);
2591 	}
2592 	return (EINVAL);
2593 }
2594 
2595 /*
2596  * Spin locks are separate from ordinary mutexes,
2597  * but we use the same data structure for them.
2598  */
2599 
2600 #pragma weak pthread_spin_init = _pthread_spin_init
2601 int
2602 _pthread_spin_init(pthread_spinlock_t *lock, int pshared)
2603 {
2604 	mutex_t *mp = (mutex_t *)lock;
2605 
2606 	(void) _memset(mp, 0, sizeof (*mp));
2607 	if (pshared == PTHREAD_PROCESS_SHARED)
2608 		mp->mutex_type = USYNC_PROCESS;
2609 	else
2610 		mp->mutex_type = USYNC_THREAD;
2611 	mp->mutex_flag = LOCK_INITED;
2612 	mp->mutex_magic = MUTEX_MAGIC;
2613 	return (0);
2614 }
2615 
2616 #pragma weak pthread_spin_destroy = _pthread_spin_destroy
2617 int
2618 _pthread_spin_destroy(pthread_spinlock_t *lock)
2619 {
2620 	(void) _memset(lock, 0, sizeof (*lock));
2621 	return (0);
2622 }
2623 
2624 #pragma weak pthread_spin_trylock = _pthread_spin_trylock
2625 int
2626 _pthread_spin_trylock(pthread_spinlock_t *lock)
2627 {
2628 	mutex_t *mp = (mutex_t *)lock;
2629 	ulwp_t *self = curthread;
2630 	int error = 0;
2631 
2632 	no_preempt(self);
2633 	if (set_lock_byte(&mp->mutex_lockw) != 0)
2634 		error = EBUSY;
2635 	else {
2636 		mp->mutex_owner = (uintptr_t)self;
2637 		if (mp->mutex_type == USYNC_PROCESS)
2638 			mp->mutex_ownerpid = self->ul_uberdata->pid;
2639 		DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0);
2640 	}
2641 	preempt(self);
2642 	return (error);
2643 }
2644 
2645 #pragma weak pthread_spin_lock = _pthread_spin_lock
2646 int
2647 _pthread_spin_lock(pthread_spinlock_t *lock)
2648 {
2649 	mutex_t *mp = (mutex_t *)lock;
2650 	ulwp_t *self = curthread;
2651 	volatile uint8_t *lockp = (volatile uint8_t *)&mp->mutex_lockw;
2652 	int count = 0;
2653 
2654 	ASSERT(!self->ul_critical || self->ul_bindflags);
2655 
2656 	DTRACE_PROBE1(plockstat, mutex__spin, mp);
2657 
2658 	/*
2659 	 * We don't care whether the owner is running on a processor.
2660 	 * We just spin because that's what this interface requires.
2661 	 */
2662 	for (;;) {
2663 		if (*lockp == 0) {	/* lock byte appears to be clear */
2664 			no_preempt(self);
2665 			if (set_lock_byte(lockp) == 0)
2666 				break;
2667 			preempt(self);
2668 		}
2669 		if (count < INT_MAX)
2670 			count++;
2671 		SMT_PAUSE();
2672 	}
2673 	mp->mutex_owner = (uintptr_t)self;
2674 	if (mp->mutex_type == USYNC_PROCESS)
2675 		mp->mutex_ownerpid = self->ul_uberdata->pid;
2676 	preempt(self);
2677 	if (count) {
2678 		DTRACE_PROBE2(plockstat, mutex__spun, 1, count);
2679 	}
2680 	DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, count);
2681 	return (0);
2682 }
2683 
2684 #pragma weak pthread_spin_unlock = _pthread_spin_unlock
2685 int
2686 _pthread_spin_unlock(pthread_spinlock_t *lock)
2687 {
2688 	mutex_t *mp = (mutex_t *)lock;
2689 	ulwp_t *self = curthread;
2690 
2691 	no_preempt(self);
2692 	mp->mutex_owner = 0;
2693 	mp->mutex_ownerpid = 0;
2694 	DTRACE_PROBE2(plockstat, mutex__release, mp, 0);
2695 	(void) atomic_swap_32(&mp->mutex_lockword, 0);
2696 	preempt(self);
2697 	return (0);
2698 }
2699 
2700 #define	INITIAL_LOCKS	8	/* initial size of ul_heldlocks.array */
2701 
2702 /*
2703  * Find/allocate an entry for 'lock' in our array of held locks.
2704  */
2705 static mutex_t **
2706 find_lock_entry(mutex_t *lock)
2707 {
2708 	ulwp_t *self = curthread;
2709 	mutex_t **remembered = NULL;
2710 	mutex_t **lockptr;
2711 	uint_t nlocks;
2712 
2713 	if ((nlocks = self->ul_heldlockcnt) != 0)
2714 		lockptr = self->ul_heldlocks.array;
2715 	else {
2716 		nlocks = 1;
2717 		lockptr = &self->ul_heldlocks.single;
2718 	}
2719 
2720 	for (; nlocks; nlocks--, lockptr++) {
2721 		if (*lockptr == lock)
2722 			return (lockptr);
2723 		if (*lockptr == NULL && remembered == NULL)
2724 			remembered = lockptr;
2725 	}
2726 	if (remembered != NULL) {
2727 		*remembered = lock;
2728 		return (remembered);
2729 	}
2730 
2731 	/*
2732 	 * No entry available.  Allocate more space, converting
2733 	 * the single entry into an array of entries if necessary.
2734 	 */
2735 	if ((nlocks = self->ul_heldlockcnt) == 0) {
2736 		/*
2737 		 * Initial allocation of the array.
2738 		 * Convert the single entry into an array.
2739 		 */
2740 		self->ul_heldlockcnt = nlocks = INITIAL_LOCKS;
2741 		lockptr = lmalloc(nlocks * sizeof (mutex_t *));
2742 		/*
2743 		 * The single entry becomes the first entry in the array.
2744 		 */
2745 		*lockptr = self->ul_heldlocks.single;
2746 		self->ul_heldlocks.array = lockptr;
2747 		/*
2748 		 * Return the next available entry in the array.
2749 		 */
2750 		*++lockptr = lock;
2751 		return (lockptr);
2752 	}
2753 	/*
2754 	 * Reallocate the array, double the size each time.
2755 	 */
2756 	lockptr = lmalloc(nlocks * 2 * sizeof (mutex_t *));
2757 	(void) _memcpy(lockptr, self->ul_heldlocks.array,
2758 	    nlocks * sizeof (mutex_t *));
2759 	lfree(self->ul_heldlocks.array, nlocks * sizeof (mutex_t *));
2760 	self->ul_heldlocks.array = lockptr;
2761 	self->ul_heldlockcnt *= 2;
2762 	/*
2763 	 * Return the next available entry in the newly allocated array.
2764 	 */
2765 	*(lockptr += nlocks) = lock;
2766 	return (lockptr);
2767 }
2768 
2769 /*
2770  * Insert 'lock' into our list of held locks.
2771  * Currently only used for LOCK_ROBUST mutexes.
2772  */
2773 void
2774 remember_lock(mutex_t *lock)
2775 {
2776 	(void) find_lock_entry(lock);
2777 }
2778 
2779 /*
2780  * Remove 'lock' from our list of held locks.
2781  * Currently only used for LOCK_ROBUST mutexes.
2782  */
2783 void
2784 forget_lock(mutex_t *lock)
2785 {
2786 	*find_lock_entry(lock) = NULL;
2787 }
2788 
2789 /*
2790  * Free the array of held locks.
2791  */
2792 void
2793 heldlock_free(ulwp_t *ulwp)
2794 {
2795 	uint_t nlocks;
2796 
2797 	if ((nlocks = ulwp->ul_heldlockcnt) != 0)
2798 		lfree(ulwp->ul_heldlocks.array, nlocks * sizeof (mutex_t *));
2799 	ulwp->ul_heldlockcnt = 0;
2800 	ulwp->ul_heldlocks.array = NULL;
2801 }
2802 
2803 /*
2804  * Mark all held LOCK_ROBUST mutexes LOCK_OWNERDEAD.
2805  * Called from _thrp_exit() to deal with abandoned locks.
2806  */
2807 void
2808 heldlock_exit(void)
2809 {
2810 	ulwp_t *self = curthread;
2811 	mutex_t **lockptr;
2812 	uint_t nlocks;
2813 	mutex_t *mp;
2814 
2815 	if ((nlocks = self->ul_heldlockcnt) != 0)
2816 		lockptr = self->ul_heldlocks.array;
2817 	else {
2818 		nlocks = 1;
2819 		lockptr = &self->ul_heldlocks.single;
2820 	}
2821 
2822 	for (; nlocks; nlocks--, lockptr++) {
2823 		/*
2824 		 * The kernel takes care of transitioning held
2825 		 * LOCK_PRIO_INHERIT mutexes to LOCK_OWNERDEAD.
2826 		 * We avoid that case here.
2827 		 */
2828 		if ((mp = *lockptr) != NULL &&
2829 		    mutex_is_held(mp) &&
2830 		    (mp->mutex_type & (LOCK_ROBUST | LOCK_PRIO_INHERIT)) ==
2831 		    LOCK_ROBUST) {
2832 			mp->mutex_rcount = 0;
2833 			if (!(mp->mutex_flag & LOCK_UNMAPPED))
2834 				mp->mutex_flag |= LOCK_OWNERDEAD;
2835 			(void) mutex_unlock_internal(mp, 1);
2836 		}
2837 	}
2838 
2839 	heldlock_free(self);
2840 }
2841 
2842 #pragma weak cond_init = _cond_init
2843 /* ARGSUSED2 */
2844 int
2845 _cond_init(cond_t *cvp, int type, void *arg)
2846 {
2847 	if (type != USYNC_THREAD && type != USYNC_PROCESS)
2848 		return (EINVAL);
2849 	(void) _memset(cvp, 0, sizeof (*cvp));
2850 	cvp->cond_type = (uint16_t)type;
2851 	cvp->cond_magic = COND_MAGIC;
2852 	return (0);
2853 }
2854 
2855 /*
2856  * cond_sleep_queue(): utility function for cond_wait_queue().
2857  *
2858  * Go to sleep on a condvar sleep queue, expect to be waked up
2859  * by someone calling cond_signal() or cond_broadcast() or due
2860  * to receiving a UNIX signal or being cancelled, or just simply
2861  * due to a spurious wakeup (like someome calling forkall()).
2862  *
2863  * The associated mutex is *not* reacquired before returning.
2864  * That must be done by the caller of cond_sleep_queue().
2865  */
2866 static int
2867 cond_sleep_queue(cond_t *cvp, mutex_t *mp, timespec_t *tsp)
2868 {
2869 	ulwp_t *self = curthread;
2870 	queue_head_t *qp;
2871 	queue_head_t *mqp;
2872 	lwpid_t lwpid;
2873 	int signalled;
2874 	int error;
2875 	int release_all;
2876 
2877 	/*
2878 	 * Put ourself on the CV sleep queue, unlock the mutex, then
2879 	 * park ourself and unpark a candidate lwp to grab the mutex.
2880 	 * We must go onto the CV sleep queue before dropping the
2881 	 * mutex in order to guarantee atomicity of the operation.
2882 	 */
2883 	self->ul_sp = stkptr();
2884 	qp = queue_lock(cvp, CV);
2885 	enqueue(qp, self, cvp, CV);
2886 	cvp->cond_waiters_user = 1;
2887 	self->ul_cvmutex = mp;
2888 	self->ul_cv_wake = (tsp != NULL);
2889 	self->ul_signalled = 0;
2890 	if (mp->mutex_flag & LOCK_OWNERDEAD) {
2891 		mp->mutex_flag &= ~LOCK_OWNERDEAD;
2892 		mp->mutex_flag |= LOCK_NOTRECOVERABLE;
2893 	}
2894 	release_all = ((mp->mutex_flag & LOCK_NOTRECOVERABLE) != 0);
2895 	lwpid = mutex_unlock_queue(mp, release_all);
2896 	for (;;) {
2897 		set_parking_flag(self, 1);
2898 		queue_unlock(qp);
2899 		if (lwpid != 0) {
2900 			lwpid = preempt_unpark(self, lwpid);
2901 			preempt(self);
2902 		}
2903 		/*
2904 		 * We may have a deferred signal present,
2905 		 * in which case we should return EINTR.
2906 		 * Also, we may have received a SIGCANCEL; if so
2907 		 * and we are cancelable we should return EINTR.
2908 		 * We force an immediate EINTR return from
2909 		 * __lwp_park() by turning our parking flag off.
2910 		 */
2911 		if (self->ul_cursig != 0 ||
2912 		    (self->ul_cancelable && self->ul_cancel_pending))
2913 			set_parking_flag(self, 0);
2914 		/*
2915 		 * __lwp_park() will return the residual time in tsp
2916 		 * if we are unparked before the timeout expires.
2917 		 */
2918 		error = __lwp_park(tsp, lwpid);
2919 		set_parking_flag(self, 0);
2920 		lwpid = 0;	/* unpark the other lwp only once */
2921 		/*
2922 		 * We were waked up by cond_signal(), cond_broadcast(),
2923 		 * by an interrupt or timeout (EINTR or ETIME),
2924 		 * or we may just have gotten a spurious wakeup.
2925 		 */
2926 		qp = queue_lock(cvp, CV);
2927 		mqp = queue_lock(mp, MX);
2928 		if (self->ul_sleepq == NULL)
2929 			break;
2930 		/*
2931 		 * We are on either the condvar sleep queue or the
2932 		 * mutex sleep queue.  Break out of the sleep if we
2933 		 * were interrupted or we timed out (EINTR or ETIME).
2934 		 * Else this is a spurious wakeup; continue the loop.
2935 		 */
2936 		if (self->ul_sleepq == mqp) {		/* mutex queue */
2937 			if (error) {
2938 				mp->mutex_waiters = dequeue_self(mqp, mp);
2939 				break;
2940 			}
2941 			tsp = NULL;	/* no more timeout */
2942 		} else if (self->ul_sleepq == qp) {	/* condvar queue */
2943 			if (error) {
2944 				cvp->cond_waiters_user = dequeue_self(qp, cvp);
2945 				break;
2946 			}
2947 			/*
2948 			 * Else a spurious wakeup on the condvar queue.
2949 			 * __lwp_park() has already adjusted the timeout.
2950 			 */
2951 		} else {
2952 			thr_panic("cond_sleep_queue(): thread not on queue");
2953 		}
2954 		queue_unlock(mqp);
2955 	}
2956 
2957 	self->ul_sp = 0;
2958 	ASSERT(self->ul_cvmutex == NULL && self->ul_cv_wake == 0);
2959 	ASSERT(self->ul_sleepq == NULL && self->ul_link == NULL &&
2960 	    self->ul_wchan == NULL);
2961 
2962 	signalled = self->ul_signalled;
2963 	self->ul_signalled = 0;
2964 	queue_unlock(qp);
2965 	queue_unlock(mqp);
2966 
2967 	/*
2968 	 * If we were concurrently cond_signal()d and any of:
2969 	 * received a UNIX signal, were cancelled, or got a timeout,
2970 	 * then perform another cond_signal() to avoid consuming it.
2971 	 */
2972 	if (error && signalled)
2973 		(void) cond_signal_internal(cvp);
2974 
2975 	return (error);
2976 }
2977 
2978 int
2979 cond_wait_queue(cond_t *cvp, mutex_t *mp, timespec_t *tsp)
2980 {
2981 	ulwp_t *self = curthread;
2982 	int error;
2983 	int merror;
2984 
2985 	/*
2986 	 * The old thread library was programmed to defer signals
2987 	 * while in cond_wait() so that the associated mutex would
2988 	 * be guaranteed to be held when the application signal
2989 	 * handler was invoked.
2990 	 *
2991 	 * We do not behave this way by default; the state of the
2992 	 * associated mutex in the signal handler is undefined.
2993 	 *
2994 	 * To accommodate applications that depend on the old
2995 	 * behavior, the _THREAD_COND_WAIT_DEFER environment
2996 	 * variable can be set to 1 and we will behave in the
2997 	 * old way with respect to cond_wait().
2998 	 */
2999 	if (self->ul_cond_wait_defer)
3000 		sigoff(self);
3001 
3002 	error = cond_sleep_queue(cvp, mp, tsp);
3003 
3004 	/*
3005 	 * Reacquire the mutex.
3006 	 */
3007 	if ((merror = mutex_lock_impl(mp, NULL)) != 0)
3008 		error = merror;
3009 
3010 	/*
3011 	 * Take any deferred signal now, after we have reacquired the mutex.
3012 	 */
3013 	if (self->ul_cond_wait_defer)
3014 		sigon(self);
3015 
3016 	return (error);
3017 }
3018 
3019 /*
3020  * cond_sleep_kernel(): utility function for cond_wait_kernel().
3021  * See the comment ahead of cond_sleep_queue(), above.
3022  */
3023 static int
3024 cond_sleep_kernel(cond_t *cvp, mutex_t *mp, timespec_t *tsp)
3025 {
3026 	int mtype = mp->mutex_type;
3027 	ulwp_t *self = curthread;
3028 	int error;
3029 
3030 	if ((mtype & LOCK_PRIO_PROTECT) && _ceil_mylist_del(mp))
3031 		_ceil_prio_waive();
3032 
3033 	self->ul_sp = stkptr();
3034 	self->ul_wchan = cvp;
3035 	mp->mutex_owner = 0;
3036 	/* mp->mutex_ownerpid is cleared by ___lwp_cond_wait() */
3037 	if (mtype & LOCK_PRIO_INHERIT)
3038 		mp->mutex_lockw = LOCKCLEAR;
3039 	/*
3040 	 * ___lwp_cond_wait() returns immediately with EINTR if
3041 	 * set_parking_flag(self,0) is called on this lwp before it
3042 	 * goes to sleep in the kernel.  sigacthandler() calls this
3043 	 * when a deferred signal is noted.  This assures that we don't
3044 	 * get stuck in ___lwp_cond_wait() with all signals blocked
3045 	 * due to taking a deferred signal before going to sleep.
3046 	 */
3047 	set_parking_flag(self, 1);
3048 	if (self->ul_cursig != 0 ||
3049 	    (self->ul_cancelable && self->ul_cancel_pending))
3050 		set_parking_flag(self, 0);
3051 	error = ___lwp_cond_wait(cvp, mp, tsp, 1);
3052 	set_parking_flag(self, 0);
3053 	self->ul_sp = 0;
3054 	self->ul_wchan = NULL;
3055 	return (error);
3056 }
3057 
3058 int
3059 cond_wait_kernel(cond_t *cvp, mutex_t *mp, timespec_t *tsp)
3060 {
3061 	ulwp_t *self = curthread;
3062 	int error;
3063 	int merror;
3064 
3065 	/*
3066 	 * See the large comment in cond_wait_queue(), above.
3067 	 */
3068 	if (self->ul_cond_wait_defer)
3069 		sigoff(self);
3070 
3071 	error = cond_sleep_kernel(cvp, mp, tsp);
3072 
3073 	/*
3074 	 * Override the return code from ___lwp_cond_wait()
3075 	 * with any non-zero return code from mutex_lock().
3076 	 * This addresses robust lock failures in particular;
3077 	 * the caller must see the EOWNERDEAD or ENOTRECOVERABLE
3078 	 * errors in order to take corrective action.
3079 	 */
3080 	if ((merror = mutex_lock_impl(mp, NULL)) != 0)
3081 		error = merror;
3082 
3083 	/*
3084 	 * Take any deferred signal now, after we have reacquired the mutex.
3085 	 */
3086 	if (self->ul_cond_wait_defer)
3087 		sigon(self);
3088 
3089 	return (error);
3090 }
3091 
3092 /*
3093  * Common code for _cond_wait() and _cond_timedwait()
3094  */
3095 int
3096 cond_wait_common(cond_t *cvp, mutex_t *mp, timespec_t *tsp)
3097 {
3098 	int mtype = mp->mutex_type;
3099 	hrtime_t begin_sleep = 0;
3100 	ulwp_t *self = curthread;
3101 	uberdata_t *udp = self->ul_uberdata;
3102 	tdb_cond_stats_t *csp = COND_STATS(cvp, udp);
3103 	tdb_mutex_stats_t *msp = MUTEX_STATS(mp, udp);
3104 	uint8_t rcount;
3105 	int error = 0;
3106 
3107 	/*
3108 	 * The SUSV3 Posix spec for pthread_cond_timedwait() states:
3109 	 *	Except in the case of [ETIMEDOUT], all these error checks
3110 	 *	shall act as if they were performed immediately at the
3111 	 *	beginning of processing for the function and shall cause
3112 	 *	an error return, in effect, prior to modifying the state
3113 	 *	of the mutex specified by mutex or the condition variable
3114 	 *	specified by cond.
3115 	 * Therefore, we must return EINVAL now if the timout is invalid.
3116 	 */
3117 	if (tsp != NULL &&
3118 	    (tsp->tv_sec < 0 || (ulong_t)tsp->tv_nsec >= NANOSEC))
3119 		return (EINVAL);
3120 
3121 	if (__td_event_report(self, TD_SLEEP, udp)) {
3122 		self->ul_sp = stkptr();
3123 		self->ul_wchan = cvp;
3124 		self->ul_td_evbuf.eventnum = TD_SLEEP;
3125 		self->ul_td_evbuf.eventdata = cvp;
3126 		tdb_event(TD_SLEEP, udp);
3127 		self->ul_sp = 0;
3128 	}
3129 	if (csp) {
3130 		if (tsp)
3131 			tdb_incr(csp->cond_timedwait);
3132 		else
3133 			tdb_incr(csp->cond_wait);
3134 	}
3135 	if (msp)
3136 		begin_sleep = record_hold_time(msp);
3137 	else if (csp)
3138 		begin_sleep = gethrtime();
3139 
3140 	if (self->ul_error_detection) {
3141 		if (!mutex_is_held(mp))
3142 			lock_error(mp, "cond_wait", cvp, NULL);
3143 		if ((mtype & LOCK_RECURSIVE) && mp->mutex_rcount != 0)
3144 			lock_error(mp, "recursive mutex in cond_wait",
3145 			    cvp, NULL);
3146 		if (cvp->cond_type & USYNC_PROCESS) {
3147 			if (!(mtype & USYNC_PROCESS))
3148 				lock_error(mp, "cond_wait", cvp,
3149 				    "condvar process-shared, "
3150 				    "mutex process-private");
3151 		} else {
3152 			if (mtype & USYNC_PROCESS)
3153 				lock_error(mp, "cond_wait", cvp,
3154 				    "condvar process-private, "
3155 				    "mutex process-shared");
3156 		}
3157 	}
3158 
3159 	/*
3160 	 * We deal with recursive mutexes by completely
3161 	 * dropping the lock and restoring the recursion
3162 	 * count after waking up.  This is arguably wrong,
3163 	 * but it obeys the principle of least astonishment.
3164 	 */
3165 	rcount = mp->mutex_rcount;
3166 	mp->mutex_rcount = 0;
3167 	if ((mtype &
3168 	    (USYNC_PROCESS | LOCK_PRIO_INHERIT | LOCK_PRIO_PROTECT)) |
3169 	    (cvp->cond_type & USYNC_PROCESS))
3170 		error = cond_wait_kernel(cvp, mp, tsp);
3171 	else
3172 		error = cond_wait_queue(cvp, mp, tsp);
3173 	mp->mutex_rcount = rcount;
3174 
3175 	if (csp) {
3176 		hrtime_t lapse = gethrtime() - begin_sleep;
3177 		if (tsp == NULL)
3178 			csp->cond_wait_sleep_time += lapse;
3179 		else {
3180 			csp->cond_timedwait_sleep_time += lapse;
3181 			if (error == ETIME)
3182 				tdb_incr(csp->cond_timedwait_timeout);
3183 		}
3184 	}
3185 	return (error);
3186 }
3187 
3188 /*
3189  * cond_wait() and _cond_wait() are cancellation points but __cond_wait()
3190  * is not.  Internally, libc calls the non-cancellation version.
3191  * Other libraries need to use pthread_setcancelstate(), as appropriate,
3192  * since __cond_wait() is not exported from libc.
3193  */
3194 int
3195 __cond_wait(cond_t *cvp, mutex_t *mp)
3196 {
3197 	ulwp_t *self = curthread;
3198 	uberdata_t *udp = self->ul_uberdata;
3199 	uberflags_t *gflags;
3200 
3201 	/*
3202 	 * Optimize the common case of USYNC_THREAD plus
3203 	 * no error detection, no lock statistics, and no event tracing.
3204 	 */
3205 	if ((gflags = self->ul_schedctl_called) != NULL &&
3206 	    (cvp->cond_type | mp->mutex_type | gflags->uf_trs_ted |
3207 	    self->ul_td_events_enable |
3208 	    udp->tdb.tdb_ev_global_mask.event_bits[0]) == 0)
3209 		return (cond_wait_queue(cvp, mp, NULL));
3210 
3211 	/*
3212 	 * Else do it the long way.
3213 	 */
3214 	return (cond_wait_common(cvp, mp, NULL));
3215 }
3216 
3217 #pragma weak cond_wait = _cond_wait
3218 int
3219 _cond_wait(cond_t *cvp, mutex_t *mp)
3220 {
3221 	int error;
3222 
3223 	_cancelon();
3224 	error = __cond_wait(cvp, mp);
3225 	if (error == EINTR)
3226 		_canceloff();
3227 	else
3228 		_canceloff_nocancel();
3229 	return (error);
3230 }
3231 
3232 /*
3233  * pthread_cond_wait() is a cancellation point.
3234  */
3235 #pragma weak pthread_cond_wait = _pthread_cond_wait
3236 int
3237 _pthread_cond_wait(cond_t *cvp, mutex_t *mp)
3238 {
3239 	int error;
3240 
3241 	error = _cond_wait(cvp, mp);
3242 	return ((error == EINTR)? 0 : error);
3243 }
3244 
3245 /*
3246  * cond_timedwait() and _cond_timedwait() are cancellation points
3247  * but __cond_timedwait() is not.
3248  */
3249 int
3250 __cond_timedwait(cond_t *cvp, mutex_t *mp, const timespec_t *abstime)
3251 {
3252 	clockid_t clock_id = cvp->cond_clockid;
3253 	timespec_t reltime;
3254 	int error;
3255 
3256 	if (clock_id != CLOCK_REALTIME && clock_id != CLOCK_HIGHRES)
3257 		clock_id = CLOCK_REALTIME;
3258 	abstime_to_reltime(clock_id, abstime, &reltime);
3259 	error = cond_wait_common(cvp, mp, &reltime);
3260 	if (error == ETIME && clock_id == CLOCK_HIGHRES) {
3261 		/*
3262 		 * Don't return ETIME if we didn't really get a timeout.
3263 		 * This can happen if we return because someone resets
3264 		 * the system clock.  Just return zero in this case,
3265 		 * giving a spurious wakeup but not a timeout.
3266 		 */
3267 		if ((hrtime_t)(uint32_t)abstime->tv_sec * NANOSEC +
3268 		    abstime->tv_nsec > gethrtime())
3269 			error = 0;
3270 	}
3271 	return (error);
3272 }
3273 
3274 #pragma weak cond_timedwait = _cond_timedwait
3275 int
3276 _cond_timedwait(cond_t *cvp, mutex_t *mp, const timespec_t *abstime)
3277 {
3278 	int error;
3279 
3280 	_cancelon();
3281 	error = __cond_timedwait(cvp, mp, abstime);
3282 	if (error == EINTR)
3283 		_canceloff();
3284 	else
3285 		_canceloff_nocancel();
3286 	return (error);
3287 }
3288 
3289 /*
3290  * pthread_cond_timedwait() is a cancellation point.
3291  */
3292 #pragma weak pthread_cond_timedwait = _pthread_cond_timedwait
3293 int
3294 _pthread_cond_timedwait(cond_t *cvp, mutex_t *mp, const timespec_t *abstime)
3295 {
3296 	int error;
3297 
3298 	error = _cond_timedwait(cvp, mp, abstime);
3299 	if (error == ETIME)
3300 		error = ETIMEDOUT;
3301 	else if (error == EINTR)
3302 		error = 0;
3303 	return (error);
3304 }
3305 
3306 /*
3307  * cond_reltimedwait() and _cond_reltimedwait() are cancellation points
3308  * but __cond_reltimedwait() is not.
3309  */
3310 int
3311 __cond_reltimedwait(cond_t *cvp, mutex_t *mp, const timespec_t *reltime)
3312 {
3313 	timespec_t tslocal = *reltime;
3314 
3315 	return (cond_wait_common(cvp, mp, &tslocal));
3316 }
3317 
3318 #pragma weak cond_reltimedwait = _cond_reltimedwait
3319 int
3320 _cond_reltimedwait(cond_t *cvp, mutex_t *mp, const timespec_t *reltime)
3321 {
3322 	int error;
3323 
3324 	_cancelon();
3325 	error = __cond_reltimedwait(cvp, mp, reltime);
3326 	if (error == EINTR)
3327 		_canceloff();
3328 	else
3329 		_canceloff_nocancel();
3330 	return (error);
3331 }
3332 
3333 #pragma weak pthread_cond_reltimedwait_np = _pthread_cond_reltimedwait_np
3334 int
3335 _pthread_cond_reltimedwait_np(cond_t *cvp, mutex_t *mp,
3336 	const timespec_t *reltime)
3337 {
3338 	int error;
3339 
3340 	error = _cond_reltimedwait(cvp, mp, reltime);
3341 	if (error == ETIME)
3342 		error = ETIMEDOUT;
3343 	else if (error == EINTR)
3344 		error = 0;
3345 	return (error);
3346 }
3347 
3348 #pragma weak pthread_cond_signal = cond_signal_internal
3349 #pragma weak _pthread_cond_signal = cond_signal_internal
3350 #pragma weak cond_signal = cond_signal_internal
3351 #pragma weak _cond_signal = cond_signal_internal
3352 int
3353 cond_signal_internal(cond_t *cvp)
3354 {
3355 	ulwp_t *self = curthread;
3356 	uberdata_t *udp = self->ul_uberdata;
3357 	tdb_cond_stats_t *csp = COND_STATS(cvp, udp);
3358 	int error = 0;
3359 	queue_head_t *qp;
3360 	mutex_t *mp;
3361 	queue_head_t *mqp;
3362 	ulwp_t **ulwpp;
3363 	ulwp_t *ulwp;
3364 	ulwp_t *prev = NULL;
3365 	ulwp_t *next;
3366 	ulwp_t **suspp = NULL;
3367 	ulwp_t *susprev;
3368 
3369 	if (csp)
3370 		tdb_incr(csp->cond_signal);
3371 
3372 	if (cvp->cond_waiters_kernel)	/* someone sleeping in the kernel? */
3373 		error = __lwp_cond_signal(cvp);
3374 
3375 	if (!cvp->cond_waiters_user)	/* no one sleeping at user-level */
3376 		return (error);
3377 
3378 	/*
3379 	 * Move someone from the condvar sleep queue to the mutex sleep
3380 	 * queue for the mutex that he will acquire on being waked up.
3381 	 * We can do this only if we own the mutex he will acquire.
3382 	 * If we do not own the mutex, or if his ul_cv_wake flag
3383 	 * is set, just dequeue and unpark him.
3384 	 */
3385 	qp = queue_lock(cvp, CV);
3386 	for (ulwpp = &qp->qh_head; (ulwp = *ulwpp) != NULL;
3387 	    prev = ulwp, ulwpp = &ulwp->ul_link) {
3388 		if (ulwp->ul_wchan == cvp) {
3389 			if (!ulwp->ul_stop)
3390 				break;
3391 			/*
3392 			 * Try not to dequeue a suspended thread.
3393 			 * This mimics the old libthread's behavior.
3394 			 */
3395 			if (suspp == NULL) {
3396 				suspp = ulwpp;
3397 				susprev = prev;
3398 			}
3399 		}
3400 	}
3401 	if (ulwp == NULL && suspp != NULL) {
3402 		ulwp = *(ulwpp = suspp);
3403 		prev = susprev;
3404 		suspp = NULL;
3405 	}
3406 	if (ulwp == NULL) {	/* no one on the sleep queue */
3407 		cvp->cond_waiters_user = 0;
3408 		queue_unlock(qp);
3409 		return (error);
3410 	}
3411 	/*
3412 	 * Scan the remainder of the CV queue for another waiter.
3413 	 */
3414 	if (suspp != NULL) {
3415 		next = *suspp;
3416 	} else {
3417 		for (next = ulwp->ul_link; next != NULL; next = next->ul_link)
3418 			if (next->ul_wchan == cvp)
3419 				break;
3420 	}
3421 	if (next == NULL)
3422 		cvp->cond_waiters_user = 0;
3423 
3424 	/*
3425 	 * Inform the thread that he was the recipient of a cond_signal().
3426 	 * This lets him deal with cond_signal() and, concurrently,
3427 	 * one or more of a cancellation, a UNIX signal, or a timeout.
3428 	 * These latter conditions must not consume a cond_signal().
3429 	 */
3430 	ulwp->ul_signalled = 1;
3431 
3432 	/*
3433 	 * Dequeue the waiter but leave his ul_sleepq non-NULL
3434 	 * while we move him to the mutex queue so that he can
3435 	 * deal properly with spurious wakeups.
3436 	 */
3437 	*ulwpp = ulwp->ul_link;
3438 	ulwp->ul_link = NULL;
3439 	if (qp->qh_tail == ulwp)
3440 		qp->qh_tail = prev;
3441 	qp->qh_qlen--;
3442 
3443 	mp = ulwp->ul_cvmutex;		/* the mutex he will acquire */
3444 	ulwp->ul_cvmutex = NULL;
3445 	ASSERT(mp != NULL);
3446 
3447 	if (ulwp->ul_cv_wake || !MUTEX_OWNED(mp, self)) {
3448 		lwpid_t lwpid = ulwp->ul_lwpid;
3449 
3450 		no_preempt(self);
3451 		ulwp->ul_sleepq = NULL;
3452 		ulwp->ul_wchan = NULL;
3453 		ulwp->ul_cv_wake = 0;
3454 		queue_unlock(qp);
3455 		(void) __lwp_unpark(lwpid);
3456 		preempt(self);
3457 	} else {
3458 		mqp = queue_lock(mp, MX);
3459 		enqueue(mqp, ulwp, mp, MX);
3460 		mp->mutex_waiters = 1;
3461 		queue_unlock(mqp);
3462 		queue_unlock(qp);
3463 	}
3464 
3465 	return (error);
3466 }
3467 
3468 /*
3469  * Utility function called by mutex_wakeup_all(), cond_broadcast(),
3470  * and rw_queue_release() to (re)allocate a big buffer to hold the
3471  * lwpids of all the threads to be set running after they are removed
3472  * from their sleep queues.  Since we are holding a queue lock, we
3473  * cannot call any function that might acquire a lock.  mmap(), munmap(),
3474  * lwp_unpark_all() are simple system calls and are safe in this regard.
3475  */
3476 lwpid_t *
3477 alloc_lwpids(lwpid_t *lwpid, int *nlwpid_ptr, int *maxlwps_ptr)
3478 {
3479 	/*
3480 	 * Allocate NEWLWPS ids on the first overflow.
3481 	 * Double the allocation each time after that.
3482 	 */
3483 	int nlwpid = *nlwpid_ptr;
3484 	int maxlwps = *maxlwps_ptr;
3485 	int first_allocation;
3486 	int newlwps;
3487 	void *vaddr;
3488 
3489 	ASSERT(nlwpid == maxlwps);
3490 
3491 	first_allocation = (maxlwps == MAXLWPS);
3492 	newlwps = first_allocation? NEWLWPS : 2 * maxlwps;
3493 	vaddr = _private_mmap(NULL, newlwps * sizeof (lwpid_t),
3494 	    PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, (off_t)0);
3495 
3496 	if (vaddr == MAP_FAILED) {
3497 		/*
3498 		 * Let's hope this never happens.
3499 		 * If it does, then we have a terrible
3500 		 * thundering herd on our hands.
3501 		 */
3502 		(void) __lwp_unpark_all(lwpid, nlwpid);
3503 		*nlwpid_ptr = 0;
3504 	} else {
3505 		(void) _memcpy(vaddr, lwpid, maxlwps * sizeof (lwpid_t));
3506 		if (!first_allocation)
3507 			(void) _private_munmap(lwpid,
3508 			    maxlwps * sizeof (lwpid_t));
3509 		lwpid = vaddr;
3510 		*maxlwps_ptr = newlwps;
3511 	}
3512 
3513 	return (lwpid);
3514 }
3515 
3516 #pragma weak pthread_cond_broadcast = cond_broadcast_internal
3517 #pragma weak _pthread_cond_broadcast = cond_broadcast_internal
3518 #pragma weak cond_broadcast = cond_broadcast_internal
3519 #pragma weak _cond_broadcast = cond_broadcast_internal
3520 int
3521 cond_broadcast_internal(cond_t *cvp)
3522 {
3523 	ulwp_t *self = curthread;
3524 	uberdata_t *udp = self->ul_uberdata;
3525 	tdb_cond_stats_t *csp = COND_STATS(cvp, udp);
3526 	int error = 0;
3527 	queue_head_t *qp;
3528 	mutex_t *mp;
3529 	mutex_t *mp_cache = NULL;
3530 	queue_head_t *mqp = NULL;
3531 	ulwp_t **ulwpp;
3532 	ulwp_t *ulwp;
3533 	ulwp_t *prev = NULL;
3534 	int nlwpid = 0;
3535 	int maxlwps = MAXLWPS;
3536 	lwpid_t buffer[MAXLWPS];
3537 	lwpid_t *lwpid = buffer;
3538 
3539 	if (csp)
3540 		tdb_incr(csp->cond_broadcast);
3541 
3542 	if (cvp->cond_waiters_kernel)	/* someone sleeping in the kernel? */
3543 		error = __lwp_cond_broadcast(cvp);
3544 
3545 	if (!cvp->cond_waiters_user)	/* no one sleeping at user-level */
3546 		return (error);
3547 
3548 	/*
3549 	 * Move everyone from the condvar sleep queue to the mutex sleep
3550 	 * queue for the mutex that they will acquire on being waked up.
3551 	 * We can do this only if we own the mutex they will acquire.
3552 	 * If we do not own the mutex, or if their ul_cv_wake flag
3553 	 * is set, just dequeue and unpark them.
3554 	 *
3555 	 * We keep track of lwpids that are to be unparked in lwpid[].
3556 	 * __lwp_unpark_all() is called to unpark all of them after
3557 	 * they have been removed from the sleep queue and the sleep
3558 	 * queue lock has been dropped.  If we run out of space in our
3559 	 * on-stack buffer, we need to allocate more but we can't call
3560 	 * lmalloc() because we are holding a queue lock when the overflow
3561 	 * occurs and lmalloc() acquires a lock.  We can't use alloca()
3562 	 * either because the application may have allocated a small
3563 	 * stack and we don't want to overrun the stack.  So we call
3564 	 * alloc_lwpids() to allocate a bigger buffer using the mmap()
3565 	 * system call directly since that path acquires no locks.
3566 	 */
3567 	qp = queue_lock(cvp, CV);
3568 	cvp->cond_waiters_user = 0;
3569 	ulwpp = &qp->qh_head;
3570 	while ((ulwp = *ulwpp) != NULL) {
3571 		if (ulwp->ul_wchan != cvp) {
3572 			prev = ulwp;
3573 			ulwpp = &ulwp->ul_link;
3574 			continue;
3575 		}
3576 		*ulwpp = ulwp->ul_link;
3577 		ulwp->ul_link = NULL;
3578 		if (qp->qh_tail == ulwp)
3579 			qp->qh_tail = prev;
3580 		qp->qh_qlen--;
3581 		mp = ulwp->ul_cvmutex;		/* his mutex */
3582 		ulwp->ul_cvmutex = NULL;
3583 		ASSERT(mp != NULL);
3584 		if (ulwp->ul_cv_wake || !MUTEX_OWNED(mp, self)) {
3585 			ulwp->ul_sleepq = NULL;
3586 			ulwp->ul_wchan = NULL;
3587 			ulwp->ul_cv_wake = 0;
3588 			if (nlwpid == maxlwps)
3589 				lwpid = alloc_lwpids(lwpid, &nlwpid, &maxlwps);
3590 			lwpid[nlwpid++] = ulwp->ul_lwpid;
3591 		} else {
3592 			if (mp != mp_cache) {
3593 				mp_cache = mp;
3594 				if (mqp != NULL)
3595 					queue_unlock(mqp);
3596 				mqp = queue_lock(mp, MX);
3597 			}
3598 			enqueue(mqp, ulwp, mp, MX);
3599 			mp->mutex_waiters = 1;
3600 		}
3601 	}
3602 	if (mqp != NULL)
3603 		queue_unlock(mqp);
3604 	if (nlwpid == 0) {
3605 		queue_unlock(qp);
3606 	} else {
3607 		no_preempt(self);
3608 		queue_unlock(qp);
3609 		if (nlwpid == 1)
3610 			(void) __lwp_unpark(lwpid[0]);
3611 		else
3612 			(void) __lwp_unpark_all(lwpid, nlwpid);
3613 		preempt(self);
3614 	}
3615 	if (lwpid != buffer)
3616 		(void) _private_munmap(lwpid, maxlwps * sizeof (lwpid_t));
3617 	return (error);
3618 }
3619 
3620 #pragma weak pthread_cond_destroy = _cond_destroy
3621 #pragma weak _pthread_cond_destroy = _cond_destroy
3622 #pragma weak cond_destroy = _cond_destroy
3623 int
3624 _cond_destroy(cond_t *cvp)
3625 {
3626 	cvp->cond_magic = 0;
3627 	tdb_sync_obj_deregister(cvp);
3628 	return (0);
3629 }
3630 
3631 #if defined(THREAD_DEBUG)
3632 void
3633 assert_no_libc_locks_held(void)
3634 {
3635 	ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
3636 }
3637 #endif
3638 
3639 /* protected by link_lock */
3640 uint64_t spin_lock_spin;
3641 uint64_t spin_lock_spin2;
3642 uint64_t spin_lock_sleep;
3643 uint64_t spin_lock_wakeup;
3644 
3645 /*
3646  * Record spin lock statistics.
3647  * Called by a thread exiting itself in thrp_exit().
3648  * Also called via atexit() from the thread calling
3649  * exit() to do all the other threads as well.
3650  */
3651 void
3652 record_spin_locks(ulwp_t *ulwp)
3653 {
3654 	spin_lock_spin += ulwp->ul_spin_lock_spin;
3655 	spin_lock_spin2 += ulwp->ul_spin_lock_spin2;
3656 	spin_lock_sleep += ulwp->ul_spin_lock_sleep;
3657 	spin_lock_wakeup += ulwp->ul_spin_lock_wakeup;
3658 	ulwp->ul_spin_lock_spin = 0;
3659 	ulwp->ul_spin_lock_spin2 = 0;
3660 	ulwp->ul_spin_lock_sleep = 0;
3661 	ulwp->ul_spin_lock_wakeup = 0;
3662 }
3663 
3664 /*
3665  * atexit function:  dump the queue statistics to stderr.
3666  */
3667 #if !defined(__lint)
3668 #define	fprintf	_fprintf
3669 #endif
3670 #include <stdio.h>
3671 void
3672 dump_queue_statistics(void)
3673 {
3674 	uberdata_t *udp = curthread->ul_uberdata;
3675 	queue_head_t *qp;
3676 	int qn;
3677 	uint64_t spin_lock_total = 0;
3678 
3679 	if (udp->queue_head == NULL || thread_queue_dump == 0)
3680 		return;
3681 
3682 	if (fprintf(stderr, "\n%5d mutex queues:\n", QHASHSIZE) < 0 ||
3683 	    fprintf(stderr, "queue#   lockcount    max qlen\n") < 0)
3684 		return;
3685 	for (qn = 0, qp = udp->queue_head; qn < QHASHSIZE; qn++, qp++) {
3686 		if (qp->qh_lockcount == 0)
3687 			continue;
3688 		spin_lock_total += qp->qh_lockcount;
3689 		if (fprintf(stderr, "%5d %12llu%12u\n", qn,
3690 		    (u_longlong_t)qp->qh_lockcount, qp->qh_qmax) < 0)
3691 			return;
3692 	}
3693 
3694 	if (fprintf(stderr, "\n%5d condvar queues:\n", QHASHSIZE) < 0 ||
3695 	    fprintf(stderr, "queue#   lockcount    max qlen\n") < 0)
3696 		return;
3697 	for (qn = 0; qn < QHASHSIZE; qn++, qp++) {
3698 		if (qp->qh_lockcount == 0)
3699 			continue;
3700 		spin_lock_total += qp->qh_lockcount;
3701 		if (fprintf(stderr, "%5d %12llu%12u\n", qn,
3702 		    (u_longlong_t)qp->qh_lockcount, qp->qh_qmax) < 0)
3703 			return;
3704 	}
3705 
3706 	(void) fprintf(stderr, "\n  spin_lock_total  = %10llu\n",
3707 	    (u_longlong_t)spin_lock_total);
3708 	(void) fprintf(stderr, "  spin_lock_spin   = %10llu\n",
3709 	    (u_longlong_t)spin_lock_spin);
3710 	(void) fprintf(stderr, "  spin_lock_spin2  = %10llu\n",
3711 	    (u_longlong_t)spin_lock_spin2);
3712 	(void) fprintf(stderr, "  spin_lock_sleep  = %10llu\n",
3713 	    (u_longlong_t)spin_lock_sleep);
3714 	(void) fprintf(stderr, "  spin_lock_wakeup = %10llu\n",
3715 	    (u_longlong_t)spin_lock_wakeup);
3716 }
3717