17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
541efec22Sraf  * Common Development and Distribution License (the "License").
641efec22Sraf  * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate  *
87c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate  * and limitations under the License.
127c478bd9Sstevel@tonic-gate  *
137c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate  *
197c478bd9Sstevel@tonic-gate  * CDDL HEADER END
207c478bd9Sstevel@tonic-gate  */
217c478bd9Sstevel@tonic-gate 
227c478bd9Sstevel@tonic-gate /*
238fd04b83SRoger A. Faulkner  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
247c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
25*3ce2fcdcSRobert Mustacchi  * Copyright 2015 Joyent, Inc.
267c478bd9Sstevel@tonic-gate  */
277c478bd9Sstevel@tonic-gate 
2841efec22Sraf /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
2941efec22Sraf /*	  All Rights Reserved	*/
3041efec22Sraf 
317c478bd9Sstevel@tonic-gate #include <sys/param.h>
327c478bd9Sstevel@tonic-gate #include <sys/types.h>
337c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h>
347c478bd9Sstevel@tonic-gate #include <sys/systm.h>
357c478bd9Sstevel@tonic-gate #include <sys/cred.h>
367c478bd9Sstevel@tonic-gate #include <sys/user.h>
377c478bd9Sstevel@tonic-gate #include <sys/errno.h>
387c478bd9Sstevel@tonic-gate #include <sys/file.h>
397c478bd9Sstevel@tonic-gate #include <sys/proc.h>
407c478bd9Sstevel@tonic-gate #include <sys/prsystm.h>
417c478bd9Sstevel@tonic-gate #include <sys/kmem.h>
427c478bd9Sstevel@tonic-gate #include <sys/sobject.h>
437c478bd9Sstevel@tonic-gate #include <sys/fault.h>
447c478bd9Sstevel@tonic-gate #include <sys/procfs.h>
457c478bd9Sstevel@tonic-gate #include <sys/watchpoint.h>
467c478bd9Sstevel@tonic-gate #include <sys/time.h>
477c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h>
487c478bd9Sstevel@tonic-gate #include <sys/machlock.h>
497c478bd9Sstevel@tonic-gate #include <sys/debug.h>
507c478bd9Sstevel@tonic-gate #include <sys/synch.h>
517c478bd9Sstevel@tonic-gate #include <sys/synch32.h>
527c478bd9Sstevel@tonic-gate #include <sys/mman.h>
537c478bd9Sstevel@tonic-gate #include <sys/class.h>
547c478bd9Sstevel@tonic-gate #include <sys/schedctl.h>
557c478bd9Sstevel@tonic-gate #include <sys/sleepq.h>
567c478bd9Sstevel@tonic-gate #include <sys/policy.h>
577c478bd9Sstevel@tonic-gate #include <sys/lwpchan_impl.h>
587c478bd9Sstevel@tonic-gate #include <sys/turnstile.h>
597c478bd9Sstevel@tonic-gate #include <sys/atomic.h>
607c478bd9Sstevel@tonic-gate #include <sys/lwp_timer_impl.h>
617c478bd9Sstevel@tonic-gate #include <sys/lwp_upimutex_impl.h>
627c478bd9Sstevel@tonic-gate #include <vm/as.h>
637c478bd9Sstevel@tonic-gate #include <sys/sdt.h>
647c478bd9Sstevel@tonic-gate 
657c478bd9Sstevel@tonic-gate static kthread_t *lwpsobj_owner(caddr_t);
667c478bd9Sstevel@tonic-gate static void lwp_unsleep(kthread_t *t);
677c478bd9Sstevel@tonic-gate static void lwp_change_pri(kthread_t *t, pri_t pri, pri_t *t_prip);
687c478bd9Sstevel@tonic-gate static void lwp_mutex_cleanup(lwpchan_entry_t *ent, uint16_t lockflg);
69c242ec1bSRoger A. Faulkner static void lwp_mutex_unregister(void *uaddr);
70db94676fSRoger A. Faulkner static void set_owner_pid(lwp_mutex_t *, uintptr_t, pid_t);
71db94676fSRoger A. Faulkner static int iswanted(kthread_t *, lwpchan_t *);
727c478bd9Sstevel@tonic-gate 
737c478bd9Sstevel@tonic-gate extern int lwp_cond_signal(lwp_cond_t *cv);
747c478bd9Sstevel@tonic-gate 
757c478bd9Sstevel@tonic-gate /*
767c478bd9Sstevel@tonic-gate  * Maximum number of user prio inheritance locks that can be held by a thread.
777c478bd9Sstevel@tonic-gate  * Used to limit kmem for each thread. This is a per-thread limit that
787c478bd9Sstevel@tonic-gate  * can be administered on a system wide basis (using /etc/system).
797c478bd9Sstevel@tonic-gate  *
807c478bd9Sstevel@tonic-gate  * Also, when a limit, say maxlwps is added for numbers of lwps within a
817c478bd9Sstevel@tonic-gate  * process, the per-thread limit automatically becomes a process-wide limit
827c478bd9Sstevel@tonic-gate  * of maximum number of held upi locks within a process:
837c478bd9Sstevel@tonic-gate  *      maxheldupimx = maxnestupimx * maxlwps;
847c478bd9Sstevel@tonic-gate  */
857c478bd9Sstevel@tonic-gate static uint32_t maxnestupimx = 2000;
867c478bd9Sstevel@tonic-gate 
877c478bd9Sstevel@tonic-gate /*
887c478bd9Sstevel@tonic-gate  * The sobj_ops vector exports a set of functions needed when a thread
897c478bd9Sstevel@tonic-gate  * is asleep on a synchronization object of this type.
907c478bd9Sstevel@tonic-gate  */
917c478bd9Sstevel@tonic-gate static sobj_ops_t lwp_sobj_ops = {
927c478bd9Sstevel@tonic-gate 	SOBJ_USER, lwpsobj_owner, lwp_unsleep, lwp_change_pri
937c478bd9Sstevel@tonic-gate };
947c478bd9Sstevel@tonic-gate 
957c478bd9Sstevel@tonic-gate static kthread_t *lwpsobj_pi_owner(upimutex_t *up);
967c478bd9Sstevel@tonic-gate 
977c478bd9Sstevel@tonic-gate static sobj_ops_t lwp_sobj_pi_ops = {
987c478bd9Sstevel@tonic-gate 	SOBJ_USER_PI, lwpsobj_pi_owner, turnstile_unsleep,
997c478bd9Sstevel@tonic-gate 	turnstile_change_pri
1007c478bd9Sstevel@tonic-gate };
1017c478bd9Sstevel@tonic-gate 
1027c478bd9Sstevel@tonic-gate static sleepq_head_t	lwpsleepq[NSLEEPQ];
1037c478bd9Sstevel@tonic-gate upib_t			upimutextab[UPIMUTEX_TABSIZE];
1047c478bd9Sstevel@tonic-gate 
1057c478bd9Sstevel@tonic-gate #define	LWPCHAN_LOCK_SHIFT	10	/* 1024 locks for each pool */
1067c478bd9Sstevel@tonic-gate #define	LWPCHAN_LOCK_SIZE	(1 << LWPCHAN_LOCK_SHIFT)
1077c478bd9Sstevel@tonic-gate 
1087c478bd9Sstevel@tonic-gate /*
1097c478bd9Sstevel@tonic-gate  * We know that both lc_wchan and lc_wchan0 are addresses that most
1107c478bd9Sstevel@tonic-gate  * likely are 8-byte aligned, so we shift off the low-order 3 bits.
1117c478bd9Sstevel@tonic-gate  * 'pool' is either 0 or 1.
1127c478bd9Sstevel@tonic-gate  */
1137c478bd9Sstevel@tonic-gate #define	LWPCHAN_LOCK_HASH(X, pool) \
1147c478bd9Sstevel@tonic-gate 	(((((X) >> 3) ^ ((X) >> (LWPCHAN_LOCK_SHIFT + 3))) & \
1157c478bd9Sstevel@tonic-gate 	(LWPCHAN_LOCK_SIZE - 1)) + ((pool)? LWPCHAN_LOCK_SIZE : 0))
1167c478bd9Sstevel@tonic-gate 
1177c478bd9Sstevel@tonic-gate static kmutex_t		lwpchanlock[2 * LWPCHAN_LOCK_SIZE];
1187c478bd9Sstevel@tonic-gate 
1197c478bd9Sstevel@tonic-gate /*
1207c478bd9Sstevel@tonic-gate  * Is this a POSIX threads user-level lock requiring priority inheritance?
1217c478bd9Sstevel@tonic-gate  */
1227c478bd9Sstevel@tonic-gate #define	UPIMUTEX(type)	((type) & LOCK_PRIO_INHERIT)
1237c478bd9Sstevel@tonic-gate 
1247c478bd9Sstevel@tonic-gate static sleepq_head_t *
lwpsqhash(lwpchan_t * lwpchan)1257c478bd9Sstevel@tonic-gate lwpsqhash(lwpchan_t *lwpchan)
1267c478bd9Sstevel@tonic-gate {
1277c478bd9Sstevel@tonic-gate 	uint_t x = (uintptr_t)lwpchan->lc_wchan ^ (uintptr_t)lwpchan->lc_wchan0;
1287c478bd9Sstevel@tonic-gate 	return (&lwpsleepq[SQHASHINDEX(x)]);
1297c478bd9Sstevel@tonic-gate }
1307c478bd9Sstevel@tonic-gate 
1317c478bd9Sstevel@tonic-gate /*
1327c478bd9Sstevel@tonic-gate  * Lock an lwpchan.
1337c478bd9Sstevel@tonic-gate  * Keep this in sync with lwpchan_unlock(), below.
1347c478bd9Sstevel@tonic-gate  */
1357c478bd9Sstevel@tonic-gate static void
lwpchan_lock(lwpchan_t * lwpchan,int pool)1367c478bd9Sstevel@tonic-gate lwpchan_lock(lwpchan_t *lwpchan, int pool)
1377c478bd9Sstevel@tonic-gate {
1387c478bd9Sstevel@tonic-gate 	uint_t x = (uintptr_t)lwpchan->lc_wchan ^ (uintptr_t)lwpchan->lc_wchan0;
1397c478bd9Sstevel@tonic-gate 	mutex_enter(&lwpchanlock[LWPCHAN_LOCK_HASH(x, pool)]);
1407c478bd9Sstevel@tonic-gate }
1417c478bd9Sstevel@tonic-gate 
1427c478bd9Sstevel@tonic-gate /*
1437c478bd9Sstevel@tonic-gate  * Unlock an lwpchan.
1447c478bd9Sstevel@tonic-gate  * Keep this in sync with lwpchan_lock(), above.
1457c478bd9Sstevel@tonic-gate  */
1467c478bd9Sstevel@tonic-gate static void
lwpchan_unlock(lwpchan_t * lwpchan,int pool)1477c478bd9Sstevel@tonic-gate lwpchan_unlock(lwpchan_t *lwpchan, int pool)
1487c478bd9Sstevel@tonic-gate {
1497c478bd9Sstevel@tonic-gate 	uint_t x = (uintptr_t)lwpchan->lc_wchan ^ (uintptr_t)lwpchan->lc_wchan0;
1507c478bd9Sstevel@tonic-gate 	mutex_exit(&lwpchanlock[LWPCHAN_LOCK_HASH(x, pool)]);
1517c478bd9Sstevel@tonic-gate }
1527c478bd9Sstevel@tonic-gate 
1537c478bd9Sstevel@tonic-gate /*
1547c478bd9Sstevel@tonic-gate  * Delete mappings from the lwpchan cache for pages that are being
1557c478bd9Sstevel@tonic-gate  * unmapped by as_unmap().  Given a range of addresses, "start" to "end",
1567c478bd9Sstevel@tonic-gate  * all mappings within the range are deleted from the lwpchan cache.
1577c478bd9Sstevel@tonic-gate  */
1587c478bd9Sstevel@tonic-gate void
lwpchan_delete_mapping(proc_t * p,caddr_t start,caddr_t end)1597c478bd9Sstevel@tonic-gate lwpchan_delete_mapping(proc_t *p, caddr_t start, caddr_t end)
1607c478bd9Sstevel@tonic-gate {
1617c478bd9Sstevel@tonic-gate 	lwpchan_data_t *lcp;
1627c478bd9Sstevel@tonic-gate 	lwpchan_hashbucket_t *hashbucket;
1637c478bd9Sstevel@tonic-gate 	lwpchan_hashbucket_t *endbucket;
1647c478bd9Sstevel@tonic-gate 	lwpchan_entry_t *ent;
1657c478bd9Sstevel@tonic-gate 	lwpchan_entry_t **prev;
1667c478bd9Sstevel@tonic-gate 	caddr_t addr;
1677c478bd9Sstevel@tonic-gate 
1687c478bd9Sstevel@tonic-gate 	mutex_enter(&p->p_lcp_lock);
1697c478bd9Sstevel@tonic-gate 	lcp = p->p_lcp;
1707c478bd9Sstevel@tonic-gate 	hashbucket = lcp->lwpchan_cache;
1717c478bd9Sstevel@tonic-gate 	endbucket = hashbucket + lcp->lwpchan_size;
1727c478bd9Sstevel@tonic-gate 	for (; hashbucket < endbucket; hashbucket++) {
1737c478bd9Sstevel@tonic-gate 		if (hashbucket->lwpchan_chain == NULL)
1747c478bd9Sstevel@tonic-gate 			continue;
1757c478bd9Sstevel@tonic-gate 		mutex_enter(&hashbucket->lwpchan_lock);
1767c478bd9Sstevel@tonic-gate 		prev = &hashbucket->lwpchan_chain;
1777c478bd9Sstevel@tonic-gate 		/* check entire chain */
1787c478bd9Sstevel@tonic-gate 		while ((ent = *prev) != NULL) {
1797c478bd9Sstevel@tonic-gate 			addr = ent->lwpchan_addr;
1807c478bd9Sstevel@tonic-gate 			if (start <= addr && addr < end) {
1817c478bd9Sstevel@tonic-gate 				*prev = ent->lwpchan_next;
18231db3c26Sraf 				/*
18331db3c26Sraf 				 * We do this only for the obsolete type
18431db3c26Sraf 				 * USYNC_PROCESS_ROBUST.  Otherwise robust
18531db3c26Sraf 				 * locks do not draw ELOCKUNMAPPED or
18631db3c26Sraf 				 * EOWNERDEAD due to being unmapped.
18731db3c26Sraf 				 */
1887c478bd9Sstevel@tonic-gate 				if (ent->lwpchan_pool == LWPCHAN_MPPOOL &&
18931db3c26Sraf 				    (ent->lwpchan_type & USYNC_PROCESS_ROBUST))
1907c478bd9Sstevel@tonic-gate 					lwp_mutex_cleanup(ent, LOCK_UNMAPPED);
191c242ec1bSRoger A. Faulkner 				/*
192c242ec1bSRoger A. Faulkner 				 * If there is a user-level robust lock
193c242ec1bSRoger A. Faulkner 				 * registration, mark it as invalid.
194c242ec1bSRoger A. Faulkner 				 */
195c242ec1bSRoger A. Faulkner 				if ((addr = ent->lwpchan_uaddr) != NULL)
196c242ec1bSRoger A. Faulkner 					lwp_mutex_unregister(addr);
1977c478bd9Sstevel@tonic-gate 				kmem_free(ent, sizeof (*ent));
1981a5e258fSJosef 'Jeff' Sipek 				atomic_dec_32(&lcp->lwpchan_entries);
1997c478bd9Sstevel@tonic-gate 			} else {
2007c478bd9Sstevel@tonic-gate 				prev = &ent->lwpchan_next;
2017c478bd9Sstevel@tonic-gate 			}
2027c478bd9Sstevel@tonic-gate 		}
2037c478bd9Sstevel@tonic-gate 		mutex_exit(&hashbucket->lwpchan_lock);
2047c478bd9Sstevel@tonic-gate 	}
2057c478bd9Sstevel@tonic-gate 	mutex_exit(&p->p_lcp_lock);
2067c478bd9Sstevel@tonic-gate }
2077c478bd9Sstevel@tonic-gate 
2087c478bd9Sstevel@tonic-gate /*
2097c478bd9Sstevel@tonic-gate  * Given an lwpchan cache pointer and a process virtual address,
2107c478bd9Sstevel@tonic-gate  * return a pointer to the corresponding lwpchan hash bucket.
2117c478bd9Sstevel@tonic-gate  */
2127c478bd9Sstevel@tonic-gate static lwpchan_hashbucket_t *
lwpchan_bucket(lwpchan_data_t * lcp,uintptr_t addr)2137c478bd9Sstevel@tonic-gate lwpchan_bucket(lwpchan_data_t *lcp, uintptr_t addr)
2147c478bd9Sstevel@tonic-gate {
2157c478bd9Sstevel@tonic-gate 	uint_t i;
2167c478bd9Sstevel@tonic-gate 
2177c478bd9Sstevel@tonic-gate 	/*
2187c478bd9Sstevel@tonic-gate 	 * All user-level sync object addresses are 8-byte aligned.
2197c478bd9Sstevel@tonic-gate 	 * Ignore the lowest 3 bits of the address and use the
2207c478bd9Sstevel@tonic-gate 	 * higher-order 2*lwpchan_bits bits for the hash index.
2217c478bd9Sstevel@tonic-gate 	 */
2227c478bd9Sstevel@tonic-gate 	addr >>= 3;
2237c478bd9Sstevel@tonic-gate 	i = (addr ^ (addr >> lcp->lwpchan_bits)) & lcp->lwpchan_mask;
2247c478bd9Sstevel@tonic-gate 	return (lcp->lwpchan_cache + i);
2257c478bd9Sstevel@tonic-gate }
2267c478bd9Sstevel@tonic-gate 
2277c478bd9Sstevel@tonic-gate /*
2287c478bd9Sstevel@tonic-gate  * (Re)allocate the per-process lwpchan cache.
2297c478bd9Sstevel@tonic-gate  */
2307c478bd9Sstevel@tonic-gate static void
lwpchan_alloc_cache(proc_t * p,uint_t bits)2317c478bd9Sstevel@tonic-gate lwpchan_alloc_cache(proc_t *p, uint_t bits)
2327c478bd9Sstevel@tonic-gate {
2337c478bd9Sstevel@tonic-gate 	lwpchan_data_t *lcp;
2347c478bd9Sstevel@tonic-gate 	lwpchan_data_t *old_lcp;
2357c478bd9Sstevel@tonic-gate 	lwpchan_hashbucket_t *hashbucket;
2367c478bd9Sstevel@tonic-gate 	lwpchan_hashbucket_t *endbucket;
2377c478bd9Sstevel@tonic-gate 	lwpchan_hashbucket_t *newbucket;
2387c478bd9Sstevel@tonic-gate 	lwpchan_entry_t *ent;
2397c478bd9Sstevel@tonic-gate 	lwpchan_entry_t *next;
2407c478bd9Sstevel@tonic-gate 	uint_t count;
2417c478bd9Sstevel@tonic-gate 
2427c478bd9Sstevel@tonic-gate 	ASSERT(bits >= LWPCHAN_INITIAL_BITS && bits <= LWPCHAN_MAX_BITS);
2437c478bd9Sstevel@tonic-gate 
2447c478bd9Sstevel@tonic-gate 	lcp = kmem_alloc(sizeof (lwpchan_data_t), KM_SLEEP);
2457c478bd9Sstevel@tonic-gate 	lcp->lwpchan_bits = bits;
2467c478bd9Sstevel@tonic-gate 	lcp->lwpchan_size = 1 << lcp->lwpchan_bits;
2477c478bd9Sstevel@tonic-gate 	lcp->lwpchan_mask = lcp->lwpchan_size - 1;
2487c478bd9Sstevel@tonic-gate 	lcp->lwpchan_entries = 0;
2497c478bd9Sstevel@tonic-gate 	lcp->lwpchan_cache = kmem_zalloc(lcp->lwpchan_size *
25031db3c26Sraf 	    sizeof (lwpchan_hashbucket_t), KM_SLEEP);
2517c478bd9Sstevel@tonic-gate 	lcp->lwpchan_next_data = NULL;
2527c478bd9Sstevel@tonic-gate 
2537c478bd9Sstevel@tonic-gate 	mutex_enter(&p->p_lcp_lock);
2547c478bd9Sstevel@tonic-gate 	if ((old_lcp = p->p_lcp) != NULL) {
2557c478bd9Sstevel@tonic-gate 		if (old_lcp->lwpchan_bits >= bits) {
2567c478bd9Sstevel@tonic-gate 			/* someone beat us to it */
2577c478bd9Sstevel@tonic-gate 			mutex_exit(&p->p_lcp_lock);
2587c478bd9Sstevel@tonic-gate 			kmem_free(lcp->lwpchan_cache, lcp->lwpchan_size *
25931db3c26Sraf 			    sizeof (lwpchan_hashbucket_t));
2607c478bd9Sstevel@tonic-gate 			kmem_free(lcp, sizeof (lwpchan_data_t));
2617c478bd9Sstevel@tonic-gate 			return;
2627c478bd9Sstevel@tonic-gate 		}
2637c478bd9Sstevel@tonic-gate 		/*
2647c478bd9Sstevel@tonic-gate 		 * Acquire all of the old hash table locks.
2657c478bd9Sstevel@tonic-gate 		 */
2667c478bd9Sstevel@tonic-gate 		hashbucket = old_lcp->lwpchan_cache;
2677c478bd9Sstevel@tonic-gate 		endbucket = hashbucket + old_lcp->lwpchan_size;
2687c478bd9Sstevel@tonic-gate 		for (; hashbucket < endbucket; hashbucket++)
2697c478bd9Sstevel@tonic-gate 			mutex_enter(&hashbucket->lwpchan_lock);
2707c478bd9Sstevel@tonic-gate 		/*
2717c478bd9Sstevel@tonic-gate 		 * Move all of the old hash table entries to the
2727c478bd9Sstevel@tonic-gate 		 * new hash table.  The new hash table has not yet
2737c478bd9Sstevel@tonic-gate 		 * been installed so we don't need any of its locks.
2747c478bd9Sstevel@tonic-gate 		 */
2757c478bd9Sstevel@tonic-gate 		count = 0;
2767c478bd9Sstevel@tonic-gate 		hashbucket = old_lcp->lwpchan_cache;
2777c478bd9Sstevel@tonic-gate 		for (; hashbucket < endbucket; hashbucket++) {
2787c478bd9Sstevel@tonic-gate 			ent = hashbucket->lwpchan_chain;
2797c478bd9Sstevel@tonic-gate 			while (ent != NULL) {
2807c478bd9Sstevel@tonic-gate 				next = ent->lwpchan_next;
2817c478bd9Sstevel@tonic-gate 				newbucket = lwpchan_bucket(lcp,
28231db3c26Sraf 				    (uintptr_t)ent->lwpchan_addr);
2837c478bd9Sstevel@tonic-gate 				ent->lwpchan_next = newbucket->lwpchan_chain;
2847c478bd9Sstevel@tonic-gate 				newbucket->lwpchan_chain = ent;
2857c478bd9Sstevel@tonic-gate 				ent = next;
2867c478bd9Sstevel@tonic-gate 				count++;
2877c478bd9Sstevel@tonic-gate 			}
2887c478bd9Sstevel@tonic-gate 			hashbucket->lwpchan_chain = NULL;
2897c478bd9Sstevel@tonic-gate 		}
2907c478bd9Sstevel@tonic-gate 		lcp->lwpchan_entries = count;
2917c478bd9Sstevel@tonic-gate 	}
2927c478bd9Sstevel@tonic-gate 
2937c478bd9Sstevel@tonic-gate 	/*
2947c478bd9Sstevel@tonic-gate 	 * Retire the old hash table.  We can't actually kmem_free() it
2957c478bd9Sstevel@tonic-gate 	 * now because someone may still have a pointer to it.  Instead,
2967c478bd9Sstevel@tonic-gate 	 * we link it onto the new hash table's list of retired hash tables.
2977c478bd9Sstevel@tonic-gate 	 * The new hash table is double the size of the previous one, so
2987c478bd9Sstevel@tonic-gate 	 * the total size of all retired hash tables is less than the size
2997c478bd9Sstevel@tonic-gate 	 * of the new one.  exit() and exec() free the retired hash tables
3007c478bd9Sstevel@tonic-gate 	 * (see lwpchan_destroy_cache(), below).
3017c478bd9Sstevel@tonic-gate 	 */
3027c478bd9Sstevel@tonic-gate 	lcp->lwpchan_next_data = old_lcp;
3037c478bd9Sstevel@tonic-gate 
3047c478bd9Sstevel@tonic-gate 	/*
3057c478bd9Sstevel@tonic-gate 	 * As soon as we store the new lcp, future locking operations will
3067c478bd9Sstevel@tonic-gate 	 * use it.  Therefore, we must ensure that all the state we've just
3077c478bd9Sstevel@tonic-gate 	 * established reaches global visibility before the new lcp does.
3087c478bd9Sstevel@tonic-gate 	 */
3097c478bd9Sstevel@tonic-gate 	membar_producer();
3107c478bd9Sstevel@tonic-gate 	p->p_lcp = lcp;
3117c478bd9Sstevel@tonic-gate 
3127c478bd9Sstevel@tonic-gate 	if (old_lcp != NULL) {
3137c478bd9Sstevel@tonic-gate 		/*
3147c478bd9Sstevel@tonic-gate 		 * Release all of the old hash table locks.
3157c478bd9Sstevel@tonic-gate 		 */
3167c478bd9Sstevel@tonic-gate 		hashbucket = old_lcp->lwpchan_cache;
3177c478bd9Sstevel@tonic-gate 		for (; hashbucket < endbucket; hashbucket++)
3187c478bd9Sstevel@tonic-gate 			mutex_exit(&hashbucket->lwpchan_lock);
3197c478bd9Sstevel@tonic-gate 	}
3207c478bd9Sstevel@tonic-gate 	mutex_exit(&p->p_lcp_lock);
3217c478bd9Sstevel@tonic-gate }
3227c478bd9Sstevel@tonic-gate 
3237c478bd9Sstevel@tonic-gate /*
3247c478bd9Sstevel@tonic-gate  * Deallocate the lwpchan cache, and any dynamically allocated mappings.
3257c478bd9Sstevel@tonic-gate  * Called when the process exits or execs.  All lwps except one have
3267c478bd9Sstevel@tonic-gate  * exited so we need no locks here.
3277c478bd9Sstevel@tonic-gate  */
3287c478bd9Sstevel@tonic-gate void
lwpchan_destroy_cache(int exec)3297c478bd9Sstevel@tonic-gate lwpchan_destroy_cache(int exec)
3307c478bd9Sstevel@tonic-gate {
3317c478bd9Sstevel@tonic-gate 	proc_t *p = curproc;
3327c478bd9Sstevel@tonic-gate 	lwpchan_hashbucket_t *hashbucket;
3337c478bd9Sstevel@tonic-gate 	lwpchan_hashbucket_t *endbucket;
3347c478bd9Sstevel@tonic-gate 	lwpchan_data_t *lcp;
3357c478bd9Sstevel@tonic-gate 	lwpchan_entry_t *ent;
3367c478bd9Sstevel@tonic-gate 	lwpchan_entry_t *next;
3377c478bd9Sstevel@tonic-gate 	uint16_t lockflg;
3387c478bd9Sstevel@tonic-gate 
3397c478bd9Sstevel@tonic-gate 	lcp = p->p_lcp;
3407c478bd9Sstevel@tonic-gate 	p->p_lcp = NULL;
3417c478bd9Sstevel@tonic-gate 
3427c478bd9Sstevel@tonic-gate 	lockflg = exec? LOCK_UNMAPPED : LOCK_OWNERDEAD;
3437c478bd9Sstevel@tonic-gate 	hashbucket = lcp->lwpchan_cache;
3447c478bd9Sstevel@tonic-gate 	endbucket = hashbucket + lcp->lwpchan_size;
3457c478bd9Sstevel@tonic-gate 	for (; hashbucket < endbucket; hashbucket++) {
3467c478bd9Sstevel@tonic-gate 		ent = hashbucket->lwpchan_chain;
3477c478bd9Sstevel@tonic-gate 		hashbucket->lwpchan_chain = NULL;
3487c478bd9Sstevel@tonic-gate 		while (ent != NULL) {
3497c478bd9Sstevel@tonic-gate 			next = ent->lwpchan_next;
3507c478bd9Sstevel@tonic-gate 			if (ent->lwpchan_pool == LWPCHAN_MPPOOL &&
351bb88be57SRoger A. Faulkner 			    (ent->lwpchan_type & (USYNC_PROCESS | LOCK_ROBUST))
352bb88be57SRoger A. Faulkner 			    == (USYNC_PROCESS | LOCK_ROBUST))
3537c478bd9Sstevel@tonic-gate 				lwp_mutex_cleanup(ent, lockflg);
3547c478bd9Sstevel@tonic-gate 			kmem_free(ent, sizeof (*ent));
3557c478bd9Sstevel@tonic-gate 			ent = next;
3567c478bd9Sstevel@tonic-gate 		}
3577c478bd9Sstevel@tonic-gate 	}
3587c478bd9Sstevel@tonic-gate 
3597c478bd9Sstevel@tonic-gate 	while (lcp != NULL) {
3607c478bd9Sstevel@tonic-gate 		lwpchan_data_t *next_lcp = lcp->lwpchan_next_data;
3617c478bd9Sstevel@tonic-gate 		kmem_free(lcp->lwpchan_cache, lcp->lwpchan_size *
36231db3c26Sraf 		    sizeof (lwpchan_hashbucket_t));
3637c478bd9Sstevel@tonic-gate 		kmem_free(lcp, sizeof (lwpchan_data_t));
3647c478bd9Sstevel@tonic-gate 		lcp = next_lcp;
3657c478bd9Sstevel@tonic-gate 	}
3667c478bd9Sstevel@tonic-gate }
3677c478bd9Sstevel@tonic-gate 
3687c478bd9Sstevel@tonic-gate /*
3697c478bd9Sstevel@tonic-gate  * Return zero when there is an entry in the lwpchan cache for the
3707c478bd9Sstevel@tonic-gate  * given process virtual address and non-zero when there is not.
3717c478bd9Sstevel@tonic-gate  * The returned non-zero value is the current length of the
3727c478bd9Sstevel@tonic-gate  * hash chain plus one.  The caller holds the hash bucket lock.
3737c478bd9Sstevel@tonic-gate  */
3747c478bd9Sstevel@tonic-gate static uint_t
lwpchan_cache_mapping(caddr_t addr,int type,int pool,lwpchan_t * lwpchan,lwpchan_hashbucket_t * hashbucket)3757c478bd9Sstevel@tonic-gate lwpchan_cache_mapping(caddr_t addr, int type, int pool, lwpchan_t *lwpchan,
3767c478bd9Sstevel@tonic-gate 	lwpchan_hashbucket_t *hashbucket)
3777c478bd9Sstevel@tonic-gate {
3787c478bd9Sstevel@tonic-gate 	lwpchan_entry_t *ent;
3797c478bd9Sstevel@tonic-gate 	uint_t count = 1;
3807c478bd9Sstevel@tonic-gate 
3817c478bd9Sstevel@tonic-gate 	for (ent = hashbucket->lwpchan_chain; ent; ent = ent->lwpchan_next) {
3827c478bd9Sstevel@tonic-gate 		if (ent->lwpchan_addr == addr) {
3837c478bd9Sstevel@tonic-gate 			if (ent->lwpchan_type != type ||
3847c478bd9Sstevel@tonic-gate 			    ent->lwpchan_pool != pool) {
3857c478bd9Sstevel@tonic-gate 				/*
3867c478bd9Sstevel@tonic-gate 				 * This shouldn't happen, but might if the
3877c478bd9Sstevel@tonic-gate 				 * process reuses its memory for different
3887c478bd9Sstevel@tonic-gate 				 * types of sync objects.  We test first
3897c478bd9Sstevel@tonic-gate 				 * to avoid grabbing the memory cache line.
3907c478bd9Sstevel@tonic-gate 				 */
3917c478bd9Sstevel@tonic-gate 				ent->lwpchan_type = (uint16_t)type;
3927c478bd9Sstevel@tonic-gate 				ent->lwpchan_pool = (uint16_t)pool;
3937c478bd9Sstevel@tonic-gate 			}
3947c478bd9Sstevel@tonic-gate 			*lwpchan = ent->lwpchan_lwpchan;
3957c478bd9Sstevel@tonic-gate 			return (0);
3967c478bd9Sstevel@tonic-gate 		}
3977c478bd9Sstevel@tonic-gate 		count++;
3987c478bd9Sstevel@tonic-gate 	}
3997c478bd9Sstevel@tonic-gate 	return (count);
4007c478bd9Sstevel@tonic-gate }
4017c478bd9Sstevel@tonic-gate 
4027c478bd9Sstevel@tonic-gate /*
4037c478bd9Sstevel@tonic-gate  * Return the cached lwpchan mapping if cached, otherwise insert
4047c478bd9Sstevel@tonic-gate  * a virtual address to lwpchan mapping into the cache.
4057c478bd9Sstevel@tonic-gate  */
4067c478bd9Sstevel@tonic-gate static int
lwpchan_get_mapping(struct as * as,caddr_t addr,caddr_t uaddr,int type,lwpchan_t * lwpchan,int pool)407c242ec1bSRoger A. Faulkner lwpchan_get_mapping(struct as *as, caddr_t addr, caddr_t uaddr,
4087c478bd9Sstevel@tonic-gate 	int type, lwpchan_t *lwpchan, int pool)
4097c478bd9Sstevel@tonic-gate {
4107c478bd9Sstevel@tonic-gate 	proc_t *p = curproc;
4117c478bd9Sstevel@tonic-gate 	lwpchan_data_t *lcp;
4127c478bd9Sstevel@tonic-gate 	lwpchan_hashbucket_t *hashbucket;
4137c478bd9Sstevel@tonic-gate 	lwpchan_entry_t *ent;
4147c478bd9Sstevel@tonic-gate 	memid_t	memid;
4157c478bd9Sstevel@tonic-gate 	uint_t count;
4167c478bd9Sstevel@tonic-gate 	uint_t bits;
4177c478bd9Sstevel@tonic-gate 
4187c478bd9Sstevel@tonic-gate top:
4197c478bd9Sstevel@tonic-gate 	/* initialize the lwpchan cache, if necesary */
4207c478bd9Sstevel@tonic-gate 	if ((lcp = p->p_lcp) == NULL) {
4217c478bd9Sstevel@tonic-gate 		lwpchan_alloc_cache(p, LWPCHAN_INITIAL_BITS);
4227c478bd9Sstevel@tonic-gate 		goto top;
4237c478bd9Sstevel@tonic-gate 	}
4247c478bd9Sstevel@tonic-gate 	hashbucket = lwpchan_bucket(lcp, (uintptr_t)addr);
4257c478bd9Sstevel@tonic-gate 	mutex_enter(&hashbucket->lwpchan_lock);
4267c478bd9Sstevel@tonic-gate 	if (lcp != p->p_lcp) {
4277c478bd9Sstevel@tonic-gate 		/* someone resized the lwpchan cache; start over */
4287c478bd9Sstevel@tonic-gate 		mutex_exit(&hashbucket->lwpchan_lock);
4297c478bd9Sstevel@tonic-gate 		goto top;
4307c478bd9Sstevel@tonic-gate 	}
4317c478bd9Sstevel@tonic-gate 	if (lwpchan_cache_mapping(addr, type, pool, lwpchan, hashbucket) == 0) {
4327c478bd9Sstevel@tonic-gate 		/* it's in the cache */
4337c478bd9Sstevel@tonic-gate 		mutex_exit(&hashbucket->lwpchan_lock);
4347c478bd9Sstevel@tonic-gate 		return (1);
4357c478bd9Sstevel@tonic-gate 	}
4367c478bd9Sstevel@tonic-gate 	mutex_exit(&hashbucket->lwpchan_lock);
4377c478bd9Sstevel@tonic-gate 	if (as_getmemid(as, addr, &memid) != 0)
4387c478bd9Sstevel@tonic-gate 		return (0);
4397c478bd9Sstevel@tonic-gate 	lwpchan->lc_wchan0 = (caddr_t)(uintptr_t)memid.val[0];
4407c478bd9Sstevel@tonic-gate 	lwpchan->lc_wchan = (caddr_t)(uintptr_t)memid.val[1];
4417c478bd9Sstevel@tonic-gate 	ent = kmem_alloc(sizeof (lwpchan_entry_t), KM_SLEEP);
4427c478bd9Sstevel@tonic-gate 	mutex_enter(&hashbucket->lwpchan_lock);
4437c478bd9Sstevel@tonic-gate 	if (lcp != p->p_lcp) {
4447c478bd9Sstevel@tonic-gate 		/* someone resized the lwpchan cache; start over */
4457c478bd9Sstevel@tonic-gate 		mutex_exit(&hashbucket->lwpchan_lock);
4467c478bd9Sstevel@tonic-gate 		kmem_free(ent, sizeof (*ent));
4477c478bd9Sstevel@tonic-gate 		goto top;
4487c478bd9Sstevel@tonic-gate 	}
4497c478bd9Sstevel@tonic-gate 	count = lwpchan_cache_mapping(addr, type, pool, lwpchan, hashbucket);
4507c478bd9Sstevel@tonic-gate 	if (count == 0) {
4517c478bd9Sstevel@tonic-gate 		/* someone else added this entry to the cache */
4527c478bd9Sstevel@tonic-gate 		mutex_exit(&hashbucket->lwpchan_lock);
4537c478bd9Sstevel@tonic-gate 		kmem_free(ent, sizeof (*ent));
4547c478bd9Sstevel@tonic-gate 		return (1);
4557c478bd9Sstevel@tonic-gate 	}
4567c478bd9Sstevel@tonic-gate 	if (count > lcp->lwpchan_bits + 2 && /* larger table, longer chains */
4577c478bd9Sstevel@tonic-gate 	    (bits = lcp->lwpchan_bits) < LWPCHAN_MAX_BITS) {
4587c478bd9Sstevel@tonic-gate 		/* hash chain too long; reallocate the hash table */
4597c478bd9Sstevel@tonic-gate 		mutex_exit(&hashbucket->lwpchan_lock);
4607c478bd9Sstevel@tonic-gate 		kmem_free(ent, sizeof (*ent));
4617c478bd9Sstevel@tonic-gate 		lwpchan_alloc_cache(p, bits + 1);
4627c478bd9Sstevel@tonic-gate 		goto top;
4637c478bd9Sstevel@tonic-gate 	}
4647c478bd9Sstevel@tonic-gate 	ent->lwpchan_addr = addr;
465c242ec1bSRoger A. Faulkner 	ent->lwpchan_uaddr = uaddr;
4667c478bd9Sstevel@tonic-gate 	ent->lwpchan_type = (uint16_t)type;
4677c478bd9Sstevel@tonic-gate 	ent->lwpchan_pool = (uint16_t)pool;
4687c478bd9Sstevel@tonic-gate 	ent->lwpchan_lwpchan = *lwpchan;
4697c478bd9Sstevel@tonic-gate 	ent->lwpchan_next = hashbucket->lwpchan_chain;
4707c478bd9Sstevel@tonic-gate 	hashbucket->lwpchan_chain = ent;
4711a5e258fSJosef 'Jeff' Sipek 	atomic_inc_32(&lcp->lwpchan_entries);
4727c478bd9Sstevel@tonic-gate 	mutex_exit(&hashbucket->lwpchan_lock);
4737c478bd9Sstevel@tonic-gate 	return (1);
4747c478bd9Sstevel@tonic-gate }
4757c478bd9Sstevel@tonic-gate 
4767c478bd9Sstevel@tonic-gate /*
4777c478bd9Sstevel@tonic-gate  * Return a unique pair of identifiers that corresponds to a
4787c478bd9Sstevel@tonic-gate  * synchronization object's virtual address.  Process-shared
4797c478bd9Sstevel@tonic-gate  * sync objects usually get vnode/offset from as_getmemid().
4807c478bd9Sstevel@tonic-gate  */
4817c478bd9Sstevel@tonic-gate static int
get_lwpchan(struct as * as,caddr_t addr,int type,lwpchan_t * lwpchan,int pool)4827c478bd9Sstevel@tonic-gate get_lwpchan(struct as *as, caddr_t addr, int type, lwpchan_t *lwpchan, int pool)
4837c478bd9Sstevel@tonic-gate {
4847c478bd9Sstevel@tonic-gate 	/*
4857c478bd9Sstevel@tonic-gate 	 * If the lwp synch object is defined to be process-private,
4867c478bd9Sstevel@tonic-gate 	 * we just make the first field of the lwpchan be 'as' and
4877c478bd9Sstevel@tonic-gate 	 * the second field be the synch object's virtual address.
4887c478bd9Sstevel@tonic-gate 	 * (segvn_getmemid() does the same for MAP_PRIVATE mappings.)
4897c478bd9Sstevel@tonic-gate 	 * The lwpchan cache is used only for process-shared objects.
4907c478bd9Sstevel@tonic-gate 	 */
491883492d5Sraf 	if (!(type & USYNC_PROCESS)) {
4927c478bd9Sstevel@tonic-gate 		lwpchan->lc_wchan0 = (caddr_t)as;
4937c478bd9Sstevel@tonic-gate 		lwpchan->lc_wchan = addr;
4947c478bd9Sstevel@tonic-gate 		return (1);
4957c478bd9Sstevel@tonic-gate 	}
496883492d5Sraf 
497c242ec1bSRoger A. Faulkner 	return (lwpchan_get_mapping(as, addr, NULL, type, lwpchan, pool));
4987c478bd9Sstevel@tonic-gate }
4997c478bd9Sstevel@tonic-gate 
5007c478bd9Sstevel@tonic-gate static void
lwp_block(lwpchan_t * lwpchan)5017c478bd9Sstevel@tonic-gate lwp_block(lwpchan_t *lwpchan)
5027c478bd9Sstevel@tonic-gate {
5037c478bd9Sstevel@tonic-gate 	kthread_t *t = curthread;
5047c478bd9Sstevel@tonic-gate 	klwp_t *lwp = ttolwp(t);
5057c478bd9Sstevel@tonic-gate 	sleepq_head_t *sqh;
5067c478bd9Sstevel@tonic-gate 
5077c478bd9Sstevel@tonic-gate 	thread_lock(t);
5087c478bd9Sstevel@tonic-gate 	t->t_flag |= T_WAKEABLE;
5097c478bd9Sstevel@tonic-gate 	t->t_lwpchan = *lwpchan;
5107c478bd9Sstevel@tonic-gate 	t->t_sobj_ops = &lwp_sobj_ops;
5117c478bd9Sstevel@tonic-gate 	t->t_release = 0;
5127c478bd9Sstevel@tonic-gate 	sqh = lwpsqhash(lwpchan);
5137c478bd9Sstevel@tonic-gate 	disp_lock_enter_high(&sqh->sq_lock);
5147c478bd9Sstevel@tonic-gate 	CL_SLEEP(t);
5157c478bd9Sstevel@tonic-gate 	DTRACE_SCHED(sleep);
5167c478bd9Sstevel@tonic-gate 	THREAD_SLEEP(t, &sqh->sq_lock);
5177c478bd9Sstevel@tonic-gate 	sleepq_insert(&sqh->sq_queue, t);
5187c478bd9Sstevel@tonic-gate 	thread_unlock(t);
5197c478bd9Sstevel@tonic-gate 	lwp->lwp_asleep = 1;
5207c478bd9Sstevel@tonic-gate 	lwp->lwp_sysabort = 0;
5217c478bd9Sstevel@tonic-gate 	lwp->lwp_ru.nvcsw++;
5227c478bd9Sstevel@tonic-gate 	(void) new_mstate(curthread, LMS_SLEEP);
5237c478bd9Sstevel@tonic-gate }
5247c478bd9Sstevel@tonic-gate 
5257c478bd9Sstevel@tonic-gate static kthread_t *
lwpsobj_pi_owner(upimutex_t * up)5267c478bd9Sstevel@tonic-gate lwpsobj_pi_owner(upimutex_t *up)
5277c478bd9Sstevel@tonic-gate {
5287c478bd9Sstevel@tonic-gate 	return (up->upi_owner);
5297c478bd9Sstevel@tonic-gate }
5307c478bd9Sstevel@tonic-gate 
5317c478bd9Sstevel@tonic-gate static struct upimutex *
upi_get(upib_t * upibp,lwpchan_t * lcp)5327c478bd9Sstevel@tonic-gate upi_get(upib_t *upibp, lwpchan_t *lcp)
5337c478bd9Sstevel@tonic-gate {
5347c478bd9Sstevel@tonic-gate 	struct upimutex *upip;
5357c478bd9Sstevel@tonic-gate 
5367c478bd9Sstevel@tonic-gate 	for (upip = upibp->upib_first; upip != NULL;
5377c478bd9Sstevel@tonic-gate 	    upip = upip->upi_nextchain) {
5387c478bd9Sstevel@tonic-gate 		if (upip->upi_lwpchan.lc_wchan0 == lcp->lc_wchan0 &&
5397c478bd9Sstevel@tonic-gate 		    upip->upi_lwpchan.lc_wchan == lcp->lc_wchan)
5407c478bd9Sstevel@tonic-gate 			break;
5417c478bd9Sstevel@tonic-gate 	}
5427c478bd9Sstevel@tonic-gate 	return (upip);
5437c478bd9Sstevel@tonic-gate }
5447c478bd9Sstevel@tonic-gate 
5457c478bd9Sstevel@tonic-gate static void
upi_chain_add(upib_t * upibp,struct upimutex * upimutex)5467c478bd9Sstevel@tonic-gate upi_chain_add(upib_t *upibp, struct upimutex *upimutex)
5477c478bd9Sstevel@tonic-gate {
5487c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&upibp->upib_lock));
5497c478bd9Sstevel@tonic-gate 
5507c478bd9Sstevel@tonic-gate 	/*
5517c478bd9Sstevel@tonic-gate 	 * Insert upimutex at front of list. Maybe a bit unfair
5527c478bd9Sstevel@tonic-gate 	 * but assume that not many lwpchans hash to the same
5537c478bd9Sstevel@tonic-gate 	 * upimutextab bucket, i.e. the list of upimutexes from
5547c478bd9Sstevel@tonic-gate 	 * upib_first is not too long.
5557c478bd9Sstevel@tonic-gate 	 */
5567c478bd9Sstevel@tonic-gate 	upimutex->upi_nextchain = upibp->upib_first;
5577c478bd9Sstevel@tonic-gate 	upibp->upib_first = upimutex;
5587c478bd9Sstevel@tonic-gate }
5597c478bd9Sstevel@tonic-gate 
5607c478bd9Sstevel@tonic-gate static void
upi_chain_del(upib_t * upibp,struct upimutex * upimutex)5617c478bd9Sstevel@tonic-gate upi_chain_del(upib_t *upibp, struct upimutex *upimutex)
5627c478bd9Sstevel@tonic-gate {
5637c478bd9Sstevel@tonic-gate 	struct upimutex **prev;
5647c478bd9Sstevel@tonic-gate 
5657c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&upibp->upib_lock));
5667c478bd9Sstevel@tonic-gate 
5677c478bd9Sstevel@tonic-gate 	prev = &upibp->upib_first;
5687c478bd9Sstevel@tonic-gate 	while (*prev != upimutex) {
5697c478bd9Sstevel@tonic-gate 		prev = &(*prev)->upi_nextchain;
5707c478bd9Sstevel@tonic-gate 	}
5717c478bd9Sstevel@tonic-gate 	*prev = upimutex->upi_nextchain;
5727c478bd9Sstevel@tonic-gate 	upimutex->upi_nextchain = NULL;
5737c478bd9Sstevel@tonic-gate }
5747c478bd9Sstevel@tonic-gate 
5757c478bd9Sstevel@tonic-gate /*
5767c478bd9Sstevel@tonic-gate  * Add upimutex to chain of upimutexes held by curthread.
5777c478bd9Sstevel@tonic-gate  * Returns number of upimutexes held by curthread.
5787c478bd9Sstevel@tonic-gate  */
5797c478bd9Sstevel@tonic-gate static uint32_t
upi_mylist_add(struct upimutex * upimutex)5807c478bd9Sstevel@tonic-gate upi_mylist_add(struct upimutex *upimutex)
5817c478bd9Sstevel@tonic-gate {
5827c478bd9Sstevel@tonic-gate 	kthread_t *t = curthread;
5837c478bd9Sstevel@tonic-gate 
5847c478bd9Sstevel@tonic-gate 	/*
5857c478bd9Sstevel@tonic-gate 	 * Insert upimutex at front of list of upimutexes owned by t. This
5867c478bd9Sstevel@tonic-gate 	 * would match typical LIFO order in which nested locks are acquired
5877c478bd9Sstevel@tonic-gate 	 * and released.
5887c478bd9Sstevel@tonic-gate 	 */
5897c478bd9Sstevel@tonic-gate 	upimutex->upi_nextowned = t->t_upimutex;
5907c478bd9Sstevel@tonic-gate 	t->t_upimutex = upimutex;
5917c478bd9Sstevel@tonic-gate 	t->t_nupinest++;
5927c478bd9Sstevel@tonic-gate 	ASSERT(t->t_nupinest > 0);
5937c478bd9Sstevel@tonic-gate 	return (t->t_nupinest);
5947c478bd9Sstevel@tonic-gate }
5957c478bd9Sstevel@tonic-gate 
5967c478bd9Sstevel@tonic-gate /*
5977c478bd9Sstevel@tonic-gate  * Delete upimutex from list of upimutexes owned by curthread.
5987c478bd9Sstevel@tonic-gate  */
5997c478bd9Sstevel@tonic-gate static void
upi_mylist_del(struct upimutex * upimutex)6007c478bd9Sstevel@tonic-gate upi_mylist_del(struct upimutex *upimutex)
6017c478bd9Sstevel@tonic-gate {
6027c478bd9Sstevel@tonic-gate 	kthread_t *t = curthread;
6037c478bd9Sstevel@tonic-gate 	struct upimutex **prev;
6047c478bd9Sstevel@tonic-gate 
6057c478bd9Sstevel@tonic-gate 	/*
6067c478bd9Sstevel@tonic-gate 	 * Since the order in which nested locks are acquired and released,
6077c478bd9Sstevel@tonic-gate 	 * is typically LIFO, and typical nesting levels are not too deep, the
6087c478bd9Sstevel@tonic-gate 	 * following should not be expensive in the general case.
6097c478bd9Sstevel@tonic-gate 	 */
6107c478bd9Sstevel@tonic-gate 	prev = &t->t_upimutex;
6117c478bd9Sstevel@tonic-gate 	while (*prev != upimutex) {
6127c478bd9Sstevel@tonic-gate 		prev = &(*prev)->upi_nextowned;
6137c478bd9Sstevel@tonic-gate 	}
6147c478bd9Sstevel@tonic-gate 	*prev = upimutex->upi_nextowned;
6157c478bd9Sstevel@tonic-gate 	upimutex->upi_nextowned = NULL;
6167c478bd9Sstevel@tonic-gate 	ASSERT(t->t_nupinest > 0);
6177c478bd9Sstevel@tonic-gate 	t->t_nupinest--;
6187c478bd9Sstevel@tonic-gate }
6197c478bd9Sstevel@tonic-gate 
6207c478bd9Sstevel@tonic-gate /*
6217c478bd9Sstevel@tonic-gate  * Returns true if upimutex is owned. Should be called only when upim points
6227c478bd9Sstevel@tonic-gate  * to kmem which cannot disappear from underneath.
6237c478bd9Sstevel@tonic-gate  */
6247c478bd9Sstevel@tonic-gate static int
upi_owned(upimutex_t * upim)6257c478bd9Sstevel@tonic-gate upi_owned(upimutex_t *upim)
6267c478bd9Sstevel@tonic-gate {
6277c478bd9Sstevel@tonic-gate 	return (upim->upi_owner == curthread);
6287c478bd9Sstevel@tonic-gate }
6297c478bd9Sstevel@tonic-gate 
6307c478bd9Sstevel@tonic-gate /*
6317c478bd9Sstevel@tonic-gate  * Returns pointer to kernel object (upimutex_t *) if lp is owned.
6327c478bd9Sstevel@tonic-gate  */
6337c478bd9Sstevel@tonic-gate static struct upimutex *
lwp_upimutex_owned(lwp_mutex_t * lp,uint8_t type)6347c478bd9Sstevel@tonic-gate lwp_upimutex_owned(lwp_mutex_t *lp, uint8_t type)
6357c478bd9Sstevel@tonic-gate {
6367c478bd9Sstevel@tonic-gate 	lwpchan_t lwpchan;
6377c478bd9Sstevel@tonic-gate 	upib_t *upibp;
6387c478bd9Sstevel@tonic-gate 	struct upimutex *upimutex;
6397c478bd9Sstevel@tonic-gate 
6407c478bd9Sstevel@tonic-gate 	if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type,
6417c478bd9Sstevel@tonic-gate 	    &lwpchan, LWPCHAN_MPPOOL))
6427c478bd9Sstevel@tonic-gate 		return (NULL);
6437c478bd9Sstevel@tonic-gate 
6447c478bd9Sstevel@tonic-gate 	upibp = &UPI_CHAIN(lwpchan);
6457c478bd9Sstevel@tonic-gate 	mutex_enter(&upibp->upib_lock);
6467c478bd9Sstevel@tonic-gate 	upimutex = upi_get(upibp, &lwpchan);
6477c478bd9Sstevel@tonic-gate 	if (upimutex == NULL || upimutex->upi_owner != curthread) {
6487c478bd9Sstevel@tonic-gate 		mutex_exit(&upibp->upib_lock);
6497c478bd9Sstevel@tonic-gate 		return (NULL);
6507c478bd9Sstevel@tonic-gate 	}
6517c478bd9Sstevel@tonic-gate 	mutex_exit(&upibp->upib_lock);
6527c478bd9Sstevel@tonic-gate 	return (upimutex);
6537c478bd9Sstevel@tonic-gate }
6547c478bd9Sstevel@tonic-gate 
6557c478bd9Sstevel@tonic-gate /*
6567c478bd9Sstevel@tonic-gate  * Unlocks upimutex, waking up waiters if any. upimutex kmem is freed if
6577c478bd9Sstevel@tonic-gate  * no lock hand-off occurrs.
6587c478bd9Sstevel@tonic-gate  */
6597c478bd9Sstevel@tonic-gate static void
upimutex_unlock(struct upimutex * upimutex,uint16_t flag)6607c478bd9Sstevel@tonic-gate upimutex_unlock(struct upimutex *upimutex, uint16_t flag)
6617c478bd9Sstevel@tonic-gate {
6627c478bd9Sstevel@tonic-gate 	turnstile_t *ts;
6637c478bd9Sstevel@tonic-gate 	upib_t *upibp;
6647c478bd9Sstevel@tonic-gate 	kthread_t *newowner;
6657c478bd9Sstevel@tonic-gate 
6667c478bd9Sstevel@tonic-gate 	upi_mylist_del(upimutex);
6677c478bd9Sstevel@tonic-gate 	upibp = upimutex->upi_upibp;
6687c478bd9Sstevel@tonic-gate 	mutex_enter(&upibp->upib_lock);
6697c478bd9Sstevel@tonic-gate 	if (upimutex->upi_waiter != 0) { /* if waiters */
6707c478bd9Sstevel@tonic-gate 		ts = turnstile_lookup(upimutex);
6717c478bd9Sstevel@tonic-gate 		if (ts != NULL && !(flag & LOCK_NOTRECOVERABLE)) {
6727c478bd9Sstevel@tonic-gate 			/* hand-off lock to highest prio waiter */
6737c478bd9Sstevel@tonic-gate 			newowner = ts->ts_sleepq[TS_WRITER_Q].sq_first;
6747c478bd9Sstevel@tonic-gate 			upimutex->upi_owner = newowner;
6757c478bd9Sstevel@tonic-gate 			if (ts->ts_waiters == 1)
6767c478bd9Sstevel@tonic-gate 				upimutex->upi_waiter = 0;
6777c478bd9Sstevel@tonic-gate 			turnstile_wakeup(ts, TS_WRITER_Q, 1, newowner);
6787c478bd9Sstevel@tonic-gate 			mutex_exit(&upibp->upib_lock);
6797c478bd9Sstevel@tonic-gate 			return;
6807c478bd9Sstevel@tonic-gate 		} else if (ts != NULL) {
6817c478bd9Sstevel@tonic-gate 			/* LOCK_NOTRECOVERABLE: wakeup all */
6827c478bd9Sstevel@tonic-gate 			turnstile_wakeup(ts, TS_WRITER_Q, ts->ts_waiters, NULL);
6837c478bd9Sstevel@tonic-gate 		} else {
6847c478bd9Sstevel@tonic-gate 			/*
6857c478bd9Sstevel@tonic-gate 			 * Misleading w bit. Waiters might have been
6867c478bd9Sstevel@tonic-gate 			 * interrupted. No need to clear the w bit (upimutex
6877c478bd9Sstevel@tonic-gate 			 * will soon be freed). Re-calculate PI from existing
6887c478bd9Sstevel@tonic-gate 			 * waiters.
6897c478bd9Sstevel@tonic-gate 			 */
6907c478bd9Sstevel@tonic-gate 			turnstile_exit(upimutex);
6917c478bd9Sstevel@tonic-gate 			turnstile_pi_recalc();
6927c478bd9Sstevel@tonic-gate 		}
6937c478bd9Sstevel@tonic-gate 	}
6947c478bd9Sstevel@tonic-gate 	/*
6957c478bd9Sstevel@tonic-gate 	 * no waiters, or LOCK_NOTRECOVERABLE.
6967c478bd9Sstevel@tonic-gate 	 * remove from the bucket chain of upi mutexes.
6977c478bd9Sstevel@tonic-gate 	 * de-allocate kernel memory (upimutex).
6987c478bd9Sstevel@tonic-gate 	 */
6997c478bd9Sstevel@tonic-gate 	upi_chain_del(upimutex->upi_upibp, upimutex);
7007c478bd9Sstevel@tonic-gate 	mutex_exit(&upibp->upib_lock);
7017c478bd9Sstevel@tonic-gate 	kmem_free(upimutex, sizeof (upimutex_t));
7027c478bd9Sstevel@tonic-gate }
7037c478bd9Sstevel@tonic-gate 
7047c478bd9Sstevel@tonic-gate static int
lwp_upimutex_lock(lwp_mutex_t * lp,uint8_t type,int try,lwp_timer_t * lwptp)7057c478bd9Sstevel@tonic-gate lwp_upimutex_lock(lwp_mutex_t *lp, uint8_t type, int try, lwp_timer_t *lwptp)
7067c478bd9Sstevel@tonic-gate {
7077c478bd9Sstevel@tonic-gate 	label_t ljb;
7087c478bd9Sstevel@tonic-gate 	int error = 0;
7097c478bd9Sstevel@tonic-gate 	lwpchan_t lwpchan;
7107c478bd9Sstevel@tonic-gate 	uint16_t flag;
7117c478bd9Sstevel@tonic-gate 	upib_t *upibp;
7127c478bd9Sstevel@tonic-gate 	volatile struct upimutex *upimutex = NULL;
7137c478bd9Sstevel@tonic-gate 	turnstile_t *ts;
7147c478bd9Sstevel@tonic-gate 	uint32_t nupinest;
7157c478bd9Sstevel@tonic-gate 	volatile int upilocked = 0;
7167c478bd9Sstevel@tonic-gate 
7177c478bd9Sstevel@tonic-gate 	if (on_fault(&ljb)) {
7187c478bd9Sstevel@tonic-gate 		if (upilocked)
7197c478bd9Sstevel@tonic-gate 			upimutex_unlock((upimutex_t *)upimutex, 0);
7207c478bd9Sstevel@tonic-gate 		error = EFAULT;
7217c478bd9Sstevel@tonic-gate 		goto out;
7227c478bd9Sstevel@tonic-gate 	}
7237c478bd9Sstevel@tonic-gate 	if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type,
7247c478bd9Sstevel@tonic-gate 	    &lwpchan, LWPCHAN_MPPOOL)) {
7257c478bd9Sstevel@tonic-gate 		error = EFAULT;
7267c478bd9Sstevel@tonic-gate 		goto out;
7277c478bd9Sstevel@tonic-gate 	}
7287c478bd9Sstevel@tonic-gate 	upibp = &UPI_CHAIN(lwpchan);
7297c478bd9Sstevel@tonic-gate retry:
7307c478bd9Sstevel@tonic-gate 	mutex_enter(&upibp->upib_lock);
7317c478bd9Sstevel@tonic-gate 	upimutex = upi_get(upibp, &lwpchan);
7327c478bd9Sstevel@tonic-gate 	if (upimutex == NULL)  {
7337c478bd9Sstevel@tonic-gate 		/* lock available since lwpchan has no upimutex */
7347c478bd9Sstevel@tonic-gate 		upimutex = kmem_zalloc(sizeof (upimutex_t), KM_SLEEP);
7357c478bd9Sstevel@tonic-gate 		upi_chain_add(upibp, (upimutex_t *)upimutex);
7367c478bd9Sstevel@tonic-gate 		upimutex->upi_owner = curthread; /* grab lock */
7377c478bd9Sstevel@tonic-gate 		upimutex->upi_upibp = upibp;
7387c478bd9Sstevel@tonic-gate 		upimutex->upi_vaddr = lp;
7397c478bd9Sstevel@tonic-gate 		upimutex->upi_lwpchan = lwpchan;
7407c478bd9Sstevel@tonic-gate 		mutex_exit(&upibp->upib_lock);
7417c478bd9Sstevel@tonic-gate 		nupinest = upi_mylist_add((upimutex_t *)upimutex);
7427c478bd9Sstevel@tonic-gate 		upilocked = 1;
7437c478bd9Sstevel@tonic-gate 		fuword16_noerr(&lp->mutex_flag, &flag);
7447c478bd9Sstevel@tonic-gate 		if (nupinest > maxnestupimx &&
7457c478bd9Sstevel@tonic-gate 		    secpolicy_resource(CRED()) != 0) {
7467c478bd9Sstevel@tonic-gate 			upimutex_unlock((upimutex_t *)upimutex, flag);
7477c478bd9Sstevel@tonic-gate 			error = ENOMEM;
7487c478bd9Sstevel@tonic-gate 			goto out;
7497c478bd9Sstevel@tonic-gate 		}
750883492d5Sraf 		if (flag & LOCK_NOTRECOVERABLE) {
7517c478bd9Sstevel@tonic-gate 			/*
7527c478bd9Sstevel@tonic-gate 			 * Since the setting of LOCK_NOTRECOVERABLE
7537c478bd9Sstevel@tonic-gate 			 * was done under the high-level upi mutex,
7547c478bd9Sstevel@tonic-gate 			 * in lwp_upimutex_unlock(), this flag needs to
7557c478bd9Sstevel@tonic-gate 			 * be checked while holding the upi mutex.
756883492d5Sraf 			 * If set, this thread should return without
757883492d5Sraf 			 * the lock held, and with the right error code.
7587c478bd9Sstevel@tonic-gate 			 */
7597c478bd9Sstevel@tonic-gate 			upimutex_unlock((upimutex_t *)upimutex, flag);
7607c478bd9Sstevel@tonic-gate 			upilocked = 0;
7617c478bd9Sstevel@tonic-gate 			error = ENOTRECOVERABLE;
762883492d5Sraf 		} else if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) {
763883492d5Sraf 			if (flag & LOCK_OWNERDEAD)
764883492d5Sraf 				error = EOWNERDEAD;
765883492d5Sraf 			else if (type & USYNC_PROCESS_ROBUST)
766883492d5Sraf 				error = ELOCKUNMAPPED;
767883492d5Sraf 			else
768883492d5Sraf 				error = EOWNERDEAD;
7697c478bd9Sstevel@tonic-gate 		}
7707c478bd9Sstevel@tonic-gate 		goto out;
7717c478bd9Sstevel@tonic-gate 	}
7727c478bd9Sstevel@tonic-gate 	/*
7737c478bd9Sstevel@tonic-gate 	 * If a upimutex object exists, it must have an owner.
7747c478bd9Sstevel@tonic-gate 	 * This is due to lock hand-off, and release of upimutex when no
7757c478bd9Sstevel@tonic-gate 	 * waiters are present at unlock time,
7767c478bd9Sstevel@tonic-gate 	 */
7777c478bd9Sstevel@tonic-gate 	ASSERT(upimutex->upi_owner != NULL);
7787c478bd9Sstevel@tonic-gate 	if (upimutex->upi_owner == curthread) {
7797c478bd9Sstevel@tonic-gate 		/*
7807c478bd9Sstevel@tonic-gate 		 * The user wrapper can check if the mutex type is
7817c478bd9Sstevel@tonic-gate 		 * ERRORCHECK: if not, it should stall at user-level.
7827c478bd9Sstevel@tonic-gate 		 * If so, it should return the error code.
7837c478bd9Sstevel@tonic-gate 		 */
7847c478bd9Sstevel@tonic-gate 		mutex_exit(&upibp->upib_lock);
7857c478bd9Sstevel@tonic-gate 		error = EDEADLK;
7867c478bd9Sstevel@tonic-gate 		goto out;
7877c478bd9Sstevel@tonic-gate 	}
7887c478bd9Sstevel@tonic-gate 	if (try == UPIMUTEX_TRY) {
7897c478bd9Sstevel@tonic-gate 		mutex_exit(&upibp->upib_lock);
7907c478bd9Sstevel@tonic-gate 		error = EBUSY;
7917c478bd9Sstevel@tonic-gate 		goto out;
7927c478bd9Sstevel@tonic-gate 	}
7937c478bd9Sstevel@tonic-gate 	/*
7947c478bd9Sstevel@tonic-gate 	 * Block for the lock.
7957c478bd9Sstevel@tonic-gate 	 */
7967c478bd9Sstevel@tonic-gate 	if ((error = lwptp->lwpt_time_error) != 0) {
7977c478bd9Sstevel@tonic-gate 		/*
7987c478bd9Sstevel@tonic-gate 		 * The SUSV3 Posix spec is very clear that we
7997c478bd9Sstevel@tonic-gate 		 * should get no error from validating the
8007c478bd9Sstevel@tonic-gate 		 * timer until we would actually sleep.
8017c478bd9Sstevel@tonic-gate 		 */
8027c478bd9Sstevel@tonic-gate 		mutex_exit(&upibp->upib_lock);
8037c478bd9Sstevel@tonic-gate 		goto out;
8047c478bd9Sstevel@tonic-gate 	}
8057c478bd9Sstevel@tonic-gate 	if (lwptp->lwpt_tsp != NULL) {
8067c478bd9Sstevel@tonic-gate 		/*
807fd6545c7Sraf 		 * Unlike the protocol for other lwp timedwait operations,
808fd6545c7Sraf 		 * we must drop t_delay_lock before going to sleep in
809fd6545c7Sraf 		 * turnstile_block() for a upi mutex.
810fd6545c7Sraf 		 * See the comments below and in turnstile.c
8117c478bd9Sstevel@tonic-gate 		 */
8127c478bd9Sstevel@tonic-gate 		mutex_enter(&curthread->t_delay_lock);
813fd6545c7Sraf 		(void) lwp_timer_enqueue(lwptp);
814fd6545c7Sraf 		mutex_exit(&curthread->t_delay_lock);
8157c478bd9Sstevel@tonic-gate 	}
8167c478bd9Sstevel@tonic-gate 	/*
8177c478bd9Sstevel@tonic-gate 	 * Now, set the waiter bit and block for the lock in turnstile_block().
8187c478bd9Sstevel@tonic-gate 	 * No need to preserve the previous wbit since a lock try is not
8197c478bd9Sstevel@tonic-gate 	 * attempted after setting the wait bit. Wait bit is set under
8207c478bd9Sstevel@tonic-gate 	 * the upib_lock, which is not released until the turnstile lock
8217c478bd9Sstevel@tonic-gate 	 * is acquired. Say, the upimutex is L:
8227c478bd9Sstevel@tonic-gate 	 *
8237c478bd9Sstevel@tonic-gate 	 * 1. upib_lock is held so the waiter does not have to retry L after
8247c478bd9Sstevel@tonic-gate 	 *    setting the wait bit: since the owner has to grab the upib_lock
8257c478bd9Sstevel@tonic-gate 	 *    to unlock L, it will certainly see the wait bit set.
8267c478bd9Sstevel@tonic-gate 	 * 2. upib_lock is not released until the turnstile lock is acquired.
8277c478bd9Sstevel@tonic-gate 	 *    This is the key to preventing a missed wake-up. Otherwise, the
8287c478bd9Sstevel@tonic-gate 	 *    owner could acquire the upib_lock, and the tc_lock, to call
8297c478bd9Sstevel@tonic-gate 	 *    turnstile_wakeup(). All this, before the waiter gets tc_lock
8307c478bd9Sstevel@tonic-gate 	 *    to sleep in turnstile_block(). turnstile_wakeup() will then not
8317c478bd9Sstevel@tonic-gate 	 *    find this waiter, resulting in the missed wakeup.
8327c478bd9Sstevel@tonic-gate 	 * 3. The upib_lock, being a kernel mutex, cannot be released while
8337c478bd9Sstevel@tonic-gate 	 *    holding the tc_lock (since mutex_exit() could need to acquire
8347c478bd9Sstevel@tonic-gate 	 *    the same tc_lock)...and so is held when calling turnstile_block().
8357c478bd9Sstevel@tonic-gate 	 *    The address of upib_lock is passed to turnstile_block() which
8367c478bd9Sstevel@tonic-gate 	 *    releases it after releasing all turnstile locks, and before going
8377c478bd9Sstevel@tonic-gate 	 *    to sleep in swtch().
8387c478bd9Sstevel@tonic-gate 	 * 4. The waiter value cannot be a count of waiters, because a waiter
8397c478bd9Sstevel@tonic-gate 	 *    can be interrupted. The interrupt occurs under the tc_lock, at
8407c478bd9Sstevel@tonic-gate 	 *    which point, the upib_lock cannot be locked, to decrement waiter
8417c478bd9Sstevel@tonic-gate 	 *    count. So, just treat the waiter state as a bit, not a count.
8427c478bd9Sstevel@tonic-gate 	 */
8437c478bd9Sstevel@tonic-gate 	ts = turnstile_lookup((upimutex_t *)upimutex);
8447c478bd9Sstevel@tonic-gate 	upimutex->upi_waiter = 1;
8457c478bd9Sstevel@tonic-gate 	error = turnstile_block(ts, TS_WRITER_Q, (upimutex_t *)upimutex,
8467c478bd9Sstevel@tonic-gate 	    &lwp_sobj_pi_ops, &upibp->upib_lock, lwptp);
8477c478bd9Sstevel@tonic-gate 	/*
8487c478bd9Sstevel@tonic-gate 	 * Hand-off implies that we wakeup holding the lock, except when:
8497c478bd9Sstevel@tonic-gate 	 *	- deadlock is detected
8507c478bd9Sstevel@tonic-gate 	 *	- lock is not recoverable
8517c478bd9Sstevel@tonic-gate 	 *	- we got an interrupt or timeout
8527c478bd9Sstevel@tonic-gate 	 * If we wake up due to an interrupt or timeout, we may
8537c478bd9Sstevel@tonic-gate 	 * or may not be holding the lock due to mutex hand-off.
8547c478bd9Sstevel@tonic-gate 	 * Use lwp_upimutex_owned() to check if we do hold the lock.
8557c478bd9Sstevel@tonic-gate 	 */
8567c478bd9Sstevel@tonic-gate 	if (error != 0) {
8577c478bd9Sstevel@tonic-gate 		if ((error == EINTR || error == ETIME) &&
8587c478bd9Sstevel@tonic-gate 		    (upimutex = lwp_upimutex_owned(lp, type))) {
8597c478bd9Sstevel@tonic-gate 			/*
8607c478bd9Sstevel@tonic-gate 			 * Unlock and return - the re-startable syscall will
8617c478bd9Sstevel@tonic-gate 			 * try the lock again if we got EINTR.
8627c478bd9Sstevel@tonic-gate 			 */
8637c478bd9Sstevel@tonic-gate 			(void) upi_mylist_add((upimutex_t *)upimutex);
8647c478bd9Sstevel@tonic-gate 			upimutex_unlock((upimutex_t *)upimutex, 0);
8657c478bd9Sstevel@tonic-gate 		}
8667c478bd9Sstevel@tonic-gate 		/*
8677c478bd9Sstevel@tonic-gate 		 * The only other possible error is EDEADLK.  If so, upimutex
8687c478bd9Sstevel@tonic-gate 		 * is valid, since its owner is deadlocked with curthread.
8697c478bd9Sstevel@tonic-gate 		 */
8707c478bd9Sstevel@tonic-gate 		ASSERT(error == EINTR || error == ETIME ||
8717c478bd9Sstevel@tonic-gate 		    (error == EDEADLK && !upi_owned((upimutex_t *)upimutex)));
8727c478bd9Sstevel@tonic-gate 		ASSERT(!lwp_upimutex_owned(lp, type));
8737c478bd9Sstevel@tonic-gate 		goto out;
8747c478bd9Sstevel@tonic-gate 	}
8757c478bd9Sstevel@tonic-gate 	if (lwp_upimutex_owned(lp, type)) {
8767c478bd9Sstevel@tonic-gate 		ASSERT(lwp_upimutex_owned(lp, type) == upimutex);
8777c478bd9Sstevel@tonic-gate 		nupinest = upi_mylist_add((upimutex_t *)upimutex);
8787c478bd9Sstevel@tonic-gate 		upilocked = 1;
8797c478bd9Sstevel@tonic-gate 	}
8807c478bd9Sstevel@tonic-gate 	/*
8817c478bd9Sstevel@tonic-gate 	 * Now, need to read the user-level lp->mutex_flag to do the following:
8827c478bd9Sstevel@tonic-gate 	 *
883883492d5Sraf 	 * - if lock is held, check if EOWNERDEAD or ELOCKUNMAPPED
884883492d5Sraf 	 *   should be returned.
885883492d5Sraf 	 * - if lock isn't held, check if ENOTRECOVERABLE should
886883492d5Sraf 	 *   be returned.
8877c478bd9Sstevel@tonic-gate 	 *
8887c478bd9Sstevel@tonic-gate 	 * Now, either lp->mutex_flag is readable or it's not. If not
889883492d5Sraf 	 * readable, the on_fault path will cause a return with EFAULT
890883492d5Sraf 	 * as it should.  If it is readable, the state of the flag
891883492d5Sraf 	 * encodes the robustness state of the lock:
8927c478bd9Sstevel@tonic-gate 	 *
893883492d5Sraf 	 * If the upimutex is locked here, the flag's LOCK_OWNERDEAD
894883492d5Sraf 	 * or LOCK_UNMAPPED setting will influence the return code
895883492d5Sraf 	 * appropriately.  If the upimutex is not locked here, this
896883492d5Sraf 	 * could be due to a spurious wake-up or a NOTRECOVERABLE
897883492d5Sraf 	 * event.  The flag's setting can be used to distinguish
8987c478bd9Sstevel@tonic-gate 	 * between these two events.
8997c478bd9Sstevel@tonic-gate 	 */
9007c478bd9Sstevel@tonic-gate 	fuword16_noerr(&lp->mutex_flag, &flag);
9017c478bd9Sstevel@tonic-gate 	if (upilocked) {
9027c478bd9Sstevel@tonic-gate 		/*
9037c478bd9Sstevel@tonic-gate 		 * If the thread wakes up from turnstile_block with the lock
9047c478bd9Sstevel@tonic-gate 		 * held, the flag could not be set to LOCK_NOTRECOVERABLE,
9057c478bd9Sstevel@tonic-gate 		 * since it would not have been handed-off the lock.
9067c478bd9Sstevel@tonic-gate 		 * So, no need to check for this case.
9077c478bd9Sstevel@tonic-gate 		 */
9087c478bd9Sstevel@tonic-gate 		if (nupinest > maxnestupimx &&
9097c478bd9Sstevel@tonic-gate 		    secpolicy_resource(CRED()) != 0) {
9107c478bd9Sstevel@tonic-gate 			upimutex_unlock((upimutex_t *)upimutex, flag);
9117c478bd9Sstevel@tonic-gate 			upilocked = 0;
9127c478bd9Sstevel@tonic-gate 			error = ENOMEM;
913883492d5Sraf 		} else if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) {
914883492d5Sraf 			if (flag & LOCK_OWNERDEAD)
915883492d5Sraf 				error = EOWNERDEAD;
916883492d5Sraf 			else if (type & USYNC_PROCESS_ROBUST)
917883492d5Sraf 				error = ELOCKUNMAPPED;
918883492d5Sraf 			else
919883492d5Sraf 				error = EOWNERDEAD;
9207c478bd9Sstevel@tonic-gate 		}
9217c478bd9Sstevel@tonic-gate 	} else {
9227c478bd9Sstevel@tonic-gate 		/*
9237c478bd9Sstevel@tonic-gate 		 * Wake-up without the upimutex held. Either this is a
9247c478bd9Sstevel@tonic-gate 		 * spurious wake-up (due to signals, forkall(), whatever), or
9257c478bd9Sstevel@tonic-gate 		 * it is a LOCK_NOTRECOVERABLE robustness event. The setting
9267c478bd9Sstevel@tonic-gate 		 * of the mutex flag can be used to distinguish between the
9277c478bd9Sstevel@tonic-gate 		 * two events.
9287c478bd9Sstevel@tonic-gate 		 */
9297c478bd9Sstevel@tonic-gate 		if (flag & LOCK_NOTRECOVERABLE) {
9307c478bd9Sstevel@tonic-gate 			error = ENOTRECOVERABLE;
9317c478bd9Sstevel@tonic-gate 		} else {
9327c478bd9Sstevel@tonic-gate 			/*
9337c478bd9Sstevel@tonic-gate 			 * Here, the flag could be set to LOCK_OWNERDEAD or
9347c478bd9Sstevel@tonic-gate 			 * not. In both cases, this is a spurious wakeup,
9357c478bd9Sstevel@tonic-gate 			 * since the upi lock is not held, but the thread
9367c478bd9Sstevel@tonic-gate 			 * has returned from turnstile_block().
9377c478bd9Sstevel@tonic-gate 			 *
9387c478bd9Sstevel@tonic-gate 			 * The user flag could be LOCK_OWNERDEAD if, at the
9397c478bd9Sstevel@tonic-gate 			 * same time as curthread having been woken up
9407c478bd9Sstevel@tonic-gate 			 * spuriously, the owner (say Tdead) has died, marked
9417c478bd9Sstevel@tonic-gate 			 * the mutex flag accordingly, and handed off the lock
9427c478bd9Sstevel@tonic-gate 			 * to some other waiter (say Tnew). curthread just
9437c478bd9Sstevel@tonic-gate 			 * happened to read the flag while Tnew has yet to deal
9447c478bd9Sstevel@tonic-gate 			 * with the owner-dead event.
9457c478bd9Sstevel@tonic-gate 			 *
9467c478bd9Sstevel@tonic-gate 			 * In this event, curthread should retry the lock.
9477c478bd9Sstevel@tonic-gate 			 * If Tnew is able to cleanup the lock, curthread
9487c478bd9Sstevel@tonic-gate 			 * will eventually get the lock with a zero error code,
9497c478bd9Sstevel@tonic-gate 			 * If Tnew is unable to cleanup, its eventual call to
9507c478bd9Sstevel@tonic-gate 			 * unlock the lock will result in the mutex flag being
9517c478bd9Sstevel@tonic-gate 			 * set to LOCK_NOTRECOVERABLE, and the wake-up of
9527c478bd9Sstevel@tonic-gate 			 * all waiters, including curthread, which will then
9537c478bd9Sstevel@tonic-gate 			 * eventually return ENOTRECOVERABLE due to the above
9547c478bd9Sstevel@tonic-gate 			 * check.
9557c478bd9Sstevel@tonic-gate 			 *
9567c478bd9Sstevel@tonic-gate 			 * Of course, if the user-flag is not set with
9577c478bd9Sstevel@tonic-gate 			 * LOCK_OWNERDEAD, retrying is the thing to do, since
9587c478bd9Sstevel@tonic-gate 			 * this is definitely a spurious wakeup.
9597c478bd9Sstevel@tonic-gate 			 */
9607c478bd9Sstevel@tonic-gate 			goto retry;
9617c478bd9Sstevel@tonic-gate 		}
9627c478bd9Sstevel@tonic-gate 	}
9637c478bd9Sstevel@tonic-gate 
9647c478bd9Sstevel@tonic-gate out:
9657c478bd9Sstevel@tonic-gate 	no_fault();
9667c478bd9Sstevel@tonic-gate 	return (error);
9677c478bd9Sstevel@tonic-gate }
9687c478bd9Sstevel@tonic-gate 
9697c478bd9Sstevel@tonic-gate 
9707c478bd9Sstevel@tonic-gate static int
lwp_upimutex_unlock(lwp_mutex_t * lp,uint8_t type)9717c478bd9Sstevel@tonic-gate lwp_upimutex_unlock(lwp_mutex_t *lp, uint8_t type)
9727c478bd9Sstevel@tonic-gate {
9737c478bd9Sstevel@tonic-gate 	label_t ljb;
9747c478bd9Sstevel@tonic-gate 	int error = 0;
9757c478bd9Sstevel@tonic-gate 	lwpchan_t lwpchan;
9767c478bd9Sstevel@tonic-gate 	uint16_t flag;
9777c478bd9Sstevel@tonic-gate 	upib_t *upibp;
9787c478bd9Sstevel@tonic-gate 	volatile struct upimutex *upimutex = NULL;
9797c478bd9Sstevel@tonic-gate 	volatile int upilocked = 0;
9807c478bd9Sstevel@tonic-gate 
9817c478bd9Sstevel@tonic-gate 	if (on_fault(&ljb)) {
9827c478bd9Sstevel@tonic-gate 		if (upilocked)
9837c478bd9Sstevel@tonic-gate 			upimutex_unlock((upimutex_t *)upimutex, 0);
9847c478bd9Sstevel@tonic-gate 		error = EFAULT;
9857c478bd9Sstevel@tonic-gate 		goto out;
9867c478bd9Sstevel@tonic-gate 	}
9877c478bd9Sstevel@tonic-gate 	if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type,
9887c478bd9Sstevel@tonic-gate 	    &lwpchan, LWPCHAN_MPPOOL)) {
9897c478bd9Sstevel@tonic-gate 		error = EFAULT;
9907c478bd9Sstevel@tonic-gate 		goto out;
9917c478bd9Sstevel@tonic-gate 	}
9927c478bd9Sstevel@tonic-gate 	upibp = &UPI_CHAIN(lwpchan);
9937c478bd9Sstevel@tonic-gate 	mutex_enter(&upibp->upib_lock);
9947c478bd9Sstevel@tonic-gate 	upimutex = upi_get(upibp, &lwpchan);
9957c478bd9Sstevel@tonic-gate 	/*
9967c478bd9Sstevel@tonic-gate 	 * If the lock is not held, or the owner is not curthread, return
9977c478bd9Sstevel@tonic-gate 	 * error. The user-level wrapper can return this error or stall,
9987c478bd9Sstevel@tonic-gate 	 * depending on whether mutex is of ERRORCHECK type or not.
9997c478bd9Sstevel@tonic-gate 	 */
10007c478bd9Sstevel@tonic-gate 	if (upimutex == NULL || upimutex->upi_owner != curthread) {
10017c478bd9Sstevel@tonic-gate 		mutex_exit(&upibp->upib_lock);
10027c478bd9Sstevel@tonic-gate 		error = EPERM;
10037c478bd9Sstevel@tonic-gate 		goto out;
10047c478bd9Sstevel@tonic-gate 	}
10057c478bd9Sstevel@tonic-gate 	mutex_exit(&upibp->upib_lock); /* release for user memory access */
10067c478bd9Sstevel@tonic-gate 	upilocked = 1;
10077c478bd9Sstevel@tonic-gate 	fuword16_noerr(&lp->mutex_flag, &flag);
1008883492d5Sraf 	if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) {
10097c478bd9Sstevel@tonic-gate 		/*
10107c478bd9Sstevel@tonic-gate 		 * transition mutex to the LOCK_NOTRECOVERABLE state.
10117c478bd9Sstevel@tonic-gate 		 */
1012883492d5Sraf 		flag &= ~(LOCK_OWNERDEAD | LOCK_UNMAPPED);
10137c478bd9Sstevel@tonic-gate 		flag |= LOCK_NOTRECOVERABLE;
10147c478bd9Sstevel@tonic-gate 		suword16_noerr(&lp->mutex_flag, flag);
10157c478bd9Sstevel@tonic-gate 	}
1016db94676fSRoger A. Faulkner 	set_owner_pid(lp, 0, 0);
10177c478bd9Sstevel@tonic-gate 	upimutex_unlock((upimutex_t *)upimutex, flag);
10187c478bd9Sstevel@tonic-gate 	upilocked = 0;
10197c478bd9Sstevel@tonic-gate out:
10207c478bd9Sstevel@tonic-gate 	no_fault();
10217c478bd9Sstevel@tonic-gate 	return (error);
10227c478bd9Sstevel@tonic-gate }
10237c478bd9Sstevel@tonic-gate 
1024db94676fSRoger A. Faulkner /*
1025*3ce2fcdcSRobert Mustacchi  * Set the owner and ownerpid fields of a user-level mutex. Note, this function
1026*3ce2fcdcSRobert Mustacchi  * uses the suword*_noerr routines which must be called between
1027*3ce2fcdcSRobert Mustacchi  * on_fault/no_fault. However, this routine itself does not do the
1028*3ce2fcdcSRobert Mustacchi  * on_fault/no_fault and it is assumed all the callers will do so instead!
1029db94676fSRoger A. Faulkner  */
1030db94676fSRoger A. Faulkner static void
set_owner_pid(lwp_mutex_t * lp,uintptr_t owner,pid_t pid)1031db94676fSRoger A. Faulkner set_owner_pid(lwp_mutex_t *lp, uintptr_t owner, pid_t pid)
1032db94676fSRoger A. Faulkner {
1033db94676fSRoger A. Faulkner 	union {
1034db94676fSRoger A. Faulkner 		uint64_t word64;
1035db94676fSRoger A. Faulkner 		uint32_t word32[2];
1036db94676fSRoger A. Faulkner 	} un;
1037db94676fSRoger A. Faulkner 
1038db94676fSRoger A. Faulkner 	un.word64 = (uint64_t)owner;
1039db94676fSRoger A. Faulkner 
1040db94676fSRoger A. Faulkner 	suword32_noerr(&lp->mutex_ownerpid, pid);
1041db94676fSRoger A. Faulkner #if defined(_LP64)
1042db94676fSRoger A. Faulkner 	if (((uintptr_t)lp & (_LONG_LONG_ALIGNMENT - 1)) == 0) { /* aligned */
1043db94676fSRoger A. Faulkner 		suword64_noerr(&lp->mutex_owner, un.word64);
1044db94676fSRoger A. Faulkner 		return;
1045db94676fSRoger A. Faulkner 	}
1046db94676fSRoger A. Faulkner #endif
1047db94676fSRoger A. Faulkner 	/* mutex is unaligned or we are running on a 32-bit kernel */
1048db94676fSRoger A. Faulkner 	suword32_noerr((uint32_t *)&lp->mutex_owner, un.word32[0]);
1049db94676fSRoger A. Faulkner 	suword32_noerr((uint32_t *)&lp->mutex_owner + 1, un.word32[1]);
1050db94676fSRoger A. Faulkner }
1051db94676fSRoger A. Faulkner 
10527c478bd9Sstevel@tonic-gate /*
1053883492d5Sraf  * Clear the contents of a user-level mutex; return the flags.
1054883492d5Sraf  * Used only by upi_dead() and lwp_mutex_cleanup(), below.
1055883492d5Sraf  */
1056883492d5Sraf static uint16_t
lwp_clear_mutex(lwp_mutex_t * lp,uint16_t lockflg)1057883492d5Sraf lwp_clear_mutex(lwp_mutex_t *lp, uint16_t lockflg)
1058883492d5Sraf {
1059883492d5Sraf 	uint16_t flag;
1060883492d5Sraf 
1061883492d5Sraf 	fuword16_noerr(&lp->mutex_flag, &flag);
106231db3c26Sraf 	if ((flag &
106331db3c26Sraf 	    (LOCK_OWNERDEAD | LOCK_UNMAPPED | LOCK_NOTRECOVERABLE)) == 0) {
1064883492d5Sraf 		flag |= lockflg;
1065883492d5Sraf 		suword16_noerr(&lp->mutex_flag, flag);
1066883492d5Sraf 	}
1067db94676fSRoger A. Faulkner 	set_owner_pid(lp, 0, 0);
1068883492d5Sraf 	suword8_noerr(&lp->mutex_rcount, 0);
1069883492d5Sraf 
1070883492d5Sraf 	return (flag);
1071883492d5Sraf }
1072883492d5Sraf 
1073883492d5Sraf /*
1074883492d5Sraf  * Mark user mutex state, corresponding to kernel upimutex,
1075883492d5Sraf  * as LOCK_UNMAPPED or LOCK_OWNERDEAD, as appropriate
10767c478bd9Sstevel@tonic-gate  */
10777c478bd9Sstevel@tonic-gate static int
upi_dead(upimutex_t * upip,uint16_t lockflg)1078883492d5Sraf upi_dead(upimutex_t *upip, uint16_t lockflg)
10797c478bd9Sstevel@tonic-gate {
10807c478bd9Sstevel@tonic-gate 	label_t ljb;
10817c478bd9Sstevel@tonic-gate 	int error = 0;
10827c478bd9Sstevel@tonic-gate 	lwp_mutex_t *lp;
10837c478bd9Sstevel@tonic-gate 
10847c478bd9Sstevel@tonic-gate 	if (on_fault(&ljb)) {
10857c478bd9Sstevel@tonic-gate 		error = EFAULT;
10867c478bd9Sstevel@tonic-gate 		goto out;
10877c478bd9Sstevel@tonic-gate 	}
10887c478bd9Sstevel@tonic-gate 
10897c478bd9Sstevel@tonic-gate 	lp = upip->upi_vaddr;
1090883492d5Sraf 	(void) lwp_clear_mutex(lp, lockflg);
1091883492d5Sraf 	suword8_noerr(&lp->mutex_lockw, 0);
10927c478bd9Sstevel@tonic-gate out:
10937c478bd9Sstevel@tonic-gate 	no_fault();
10947c478bd9Sstevel@tonic-gate 	return (error);
10957c478bd9Sstevel@tonic-gate }
10967c478bd9Sstevel@tonic-gate 
10977c478bd9Sstevel@tonic-gate /*
10987c478bd9Sstevel@tonic-gate  * Unlock all upimutexes held by curthread, since curthread is dying.
10997c478bd9Sstevel@tonic-gate  * For each upimutex, attempt to mark its corresponding user mutex object as
11007c478bd9Sstevel@tonic-gate  * dead.
11017c478bd9Sstevel@tonic-gate  */
11027c478bd9Sstevel@tonic-gate void
upimutex_cleanup()11037c478bd9Sstevel@tonic-gate upimutex_cleanup()
11047c478bd9Sstevel@tonic-gate {
11057c478bd9Sstevel@tonic-gate 	kthread_t *t = curthread;
1106883492d5Sraf 	uint16_t lockflg = (ttoproc(t)->p_proc_flag & P_PR_EXEC)?
1107883492d5Sraf 	    LOCK_UNMAPPED : LOCK_OWNERDEAD;
11087c478bd9Sstevel@tonic-gate 	struct upimutex *upip;
11097c478bd9Sstevel@tonic-gate 
11107c478bd9Sstevel@tonic-gate 	while ((upip = t->t_upimutex) != NULL) {
1111883492d5Sraf 		if (upi_dead(upip, lockflg) != 0) {
11127c478bd9Sstevel@tonic-gate 			/*
11137c478bd9Sstevel@tonic-gate 			 * If the user object associated with this upimutex is
11147c478bd9Sstevel@tonic-gate 			 * unmapped, unlock upimutex with the
11157c478bd9Sstevel@tonic-gate 			 * LOCK_NOTRECOVERABLE flag, so that all waiters are
11167c478bd9Sstevel@tonic-gate 			 * woken up. Since user object is unmapped, it could
11177c478bd9Sstevel@tonic-gate 			 * not be marked as dead or notrecoverable.
11187c478bd9Sstevel@tonic-gate 			 * The waiters will now all wake up and return
11197c478bd9Sstevel@tonic-gate 			 * ENOTRECOVERABLE, since they would find that the lock
11207c478bd9Sstevel@tonic-gate 			 * has not been handed-off to them.
11217c478bd9Sstevel@tonic-gate 			 * See lwp_upimutex_lock().
11227c478bd9Sstevel@tonic-gate 			 */
11237c478bd9Sstevel@tonic-gate 			upimutex_unlock(upip, LOCK_NOTRECOVERABLE);
11247c478bd9Sstevel@tonic-gate 		} else {
11257c478bd9Sstevel@tonic-gate 			/*
11267c478bd9Sstevel@tonic-gate 			 * The user object has been updated as dead.
11277c478bd9Sstevel@tonic-gate 			 * Unlock the upimutex: if no waiters, upip kmem will
11287c478bd9Sstevel@tonic-gate 			 * be freed. If there is a waiter, the lock will be
11297c478bd9Sstevel@tonic-gate 			 * handed off. If exit() is in progress, each existing
11307c478bd9Sstevel@tonic-gate 			 * waiter will successively get the lock, as owners
11317c478bd9Sstevel@tonic-gate 			 * die, and each new owner will call this routine as
11327c478bd9Sstevel@tonic-gate 			 * it dies. The last owner will free kmem, since
11337c478bd9Sstevel@tonic-gate 			 * it will find the upimutex has no waiters. So,
11347c478bd9Sstevel@tonic-gate 			 * eventually, the kmem is guaranteed to be freed.
11357c478bd9Sstevel@tonic-gate 			 */
11367c478bd9Sstevel@tonic-gate 			upimutex_unlock(upip, 0);
11377c478bd9Sstevel@tonic-gate 		}
11387c478bd9Sstevel@tonic-gate 		/*
11397c478bd9Sstevel@tonic-gate 		 * Note that the call to upimutex_unlock() above will delete
11407c478bd9Sstevel@tonic-gate 		 * upimutex from the t_upimutexes chain. And so the
11417c478bd9Sstevel@tonic-gate 		 * while loop will eventually terminate.
11427c478bd9Sstevel@tonic-gate 		 */
11437c478bd9Sstevel@tonic-gate 	}
11447c478bd9Sstevel@tonic-gate }
11457c478bd9Sstevel@tonic-gate 
11467c478bd9Sstevel@tonic-gate int
lwp_mutex_timedlock(lwp_mutex_t * lp,timespec_t * tsp,uintptr_t owner)1147db94676fSRoger A. Faulkner lwp_mutex_timedlock(lwp_mutex_t *lp, timespec_t *tsp, uintptr_t owner)
11487c478bd9Sstevel@tonic-gate {
11497c478bd9Sstevel@tonic-gate 	kthread_t *t = curthread;
11507c478bd9Sstevel@tonic-gate 	klwp_t *lwp = ttolwp(t);
11517c478bd9Sstevel@tonic-gate 	proc_t *p = ttoproc(t);
11527c478bd9Sstevel@tonic-gate 	lwp_timer_t lwpt;
11537c478bd9Sstevel@tonic-gate 	caddr_t timedwait;
11547c478bd9Sstevel@tonic-gate 	int error = 0;
11557c478bd9Sstevel@tonic-gate 	int time_error;
11567c478bd9Sstevel@tonic-gate 	clock_t tim = -1;
11577c478bd9Sstevel@tonic-gate 	uchar_t waiters;
11587c478bd9Sstevel@tonic-gate 	volatile int locked = 0;
11597c478bd9Sstevel@tonic-gate 	volatile int watched = 0;
11607c478bd9Sstevel@tonic-gate 	label_t ljb;
11617c478bd9Sstevel@tonic-gate 	volatile uint8_t type = 0;
11627c478bd9Sstevel@tonic-gate 	lwpchan_t lwpchan;
11637c478bd9Sstevel@tonic-gate 	sleepq_head_t *sqh;
11647c478bd9Sstevel@tonic-gate 	uint16_t flag;
11657c478bd9Sstevel@tonic-gate 	int imm_timeout = 0;
11667c478bd9Sstevel@tonic-gate 
11677c478bd9Sstevel@tonic-gate 	if ((caddr_t)lp >= p->p_as->a_userlimit)
11687c478bd9Sstevel@tonic-gate 		return (set_errno(EFAULT));
11697c478bd9Sstevel@tonic-gate 
117007a48826SRoger A. Faulkner 	/*
117107a48826SRoger A. Faulkner 	 * Put the lwp in an orderly state for debugging,
117207a48826SRoger A. Faulkner 	 * in case we are stopped while sleeping, below.
117307a48826SRoger A. Faulkner 	 */
117407a48826SRoger A. Faulkner 	prstop(PR_REQUESTED, 0);
117507a48826SRoger A. Faulkner 
11767c478bd9Sstevel@tonic-gate 	timedwait = (caddr_t)tsp;
11777c478bd9Sstevel@tonic-gate 	if ((time_error = lwp_timer_copyin(&lwpt, tsp)) == 0 &&
11787c478bd9Sstevel@tonic-gate 	    lwpt.lwpt_imm_timeout) {
11797c478bd9Sstevel@tonic-gate 		imm_timeout = 1;
11807c478bd9Sstevel@tonic-gate 		timedwait = NULL;
11817c478bd9Sstevel@tonic-gate 	}
11827c478bd9Sstevel@tonic-gate 
11837c478bd9Sstevel@tonic-gate 	/*
11847c478bd9Sstevel@tonic-gate 	 * Although LMS_USER_LOCK implies "asleep waiting for user-mode lock",
11857c478bd9Sstevel@tonic-gate 	 * this micro state is really a run state. If the thread indeed blocks,
11867c478bd9Sstevel@tonic-gate 	 * this state becomes valid. If not, the state is converted back to
11877c478bd9Sstevel@tonic-gate 	 * LMS_SYSTEM. So, it is OK to set the mstate here, instead of just
11887c478bd9Sstevel@tonic-gate 	 * when blocking.
11897c478bd9Sstevel@tonic-gate 	 */
11907c478bd9Sstevel@tonic-gate 	(void) new_mstate(t, LMS_USER_LOCK);
11917c478bd9Sstevel@tonic-gate 	if (on_fault(&ljb)) {
11927c478bd9Sstevel@tonic-gate 		if (locked)
11937c478bd9Sstevel@tonic-gate 			lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL);
11947c478bd9Sstevel@tonic-gate 		error = EFAULT;
11957c478bd9Sstevel@tonic-gate 		goto out;
11967c478bd9Sstevel@tonic-gate 	}
11978118ecd5Sraf 	/*
11988118ecd5Sraf 	 * Force Copy-on-write if necessary and ensure that the
11998118ecd5Sraf 	 * synchronization object resides in read/write memory.
12008118ecd5Sraf 	 * Cause an EFAULT return now if this is not so.
12018118ecd5Sraf 	 */
12027c478bd9Sstevel@tonic-gate 	fuword8_noerr(&lp->mutex_type, (uint8_t *)&type);
12038118ecd5Sraf 	suword8_noerr(&lp->mutex_type, type);
12047c478bd9Sstevel@tonic-gate 	if (UPIMUTEX(type)) {
12057c478bd9Sstevel@tonic-gate 		no_fault();
12067c478bd9Sstevel@tonic-gate 		error = lwp_upimutex_lock(lp, type, UPIMUTEX_BLOCK, &lwpt);
1207*3ce2fcdcSRobert Mustacchi 		if (error == 0 || error == EOWNERDEAD ||
1208*3ce2fcdcSRobert Mustacchi 		    error == ELOCKUNMAPPED) {
1209*3ce2fcdcSRobert Mustacchi 			volatile int locked = error != 0;
1210*3ce2fcdcSRobert Mustacchi 			if (on_fault(&ljb)) {
1211*3ce2fcdcSRobert Mustacchi 				if (locked != 0)
1212*3ce2fcdcSRobert Mustacchi 					error = lwp_upimutex_unlock(lp, type);
1213*3ce2fcdcSRobert Mustacchi 				else
1214*3ce2fcdcSRobert Mustacchi 					error = EFAULT;
1215*3ce2fcdcSRobert Mustacchi 				goto upierr;
1216*3ce2fcdcSRobert Mustacchi 			}
1217db94676fSRoger A. Faulkner 			set_owner_pid(lp, owner,
1218db94676fSRoger A. Faulkner 			    (type & USYNC_PROCESS)? p->p_pid : 0);
1219*3ce2fcdcSRobert Mustacchi 			no_fault();
1220*3ce2fcdcSRobert Mustacchi 		}
1221*3ce2fcdcSRobert Mustacchi upierr:
12227c478bd9Sstevel@tonic-gate 		if (tsp && !time_error)	/* copyout the residual time left */
12237c478bd9Sstevel@tonic-gate 			error = lwp_timer_copyout(&lwpt, error);
12247c478bd9Sstevel@tonic-gate 		if (error)
12257c478bd9Sstevel@tonic-gate 			return (set_errno(error));
12267c478bd9Sstevel@tonic-gate 		return (0);
12277c478bd9Sstevel@tonic-gate 	}
12287c478bd9Sstevel@tonic-gate 	if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type,
12297c478bd9Sstevel@tonic-gate 	    &lwpchan, LWPCHAN_MPPOOL)) {
12307c478bd9Sstevel@tonic-gate 		error = EFAULT;
12317c478bd9Sstevel@tonic-gate 		goto out;
12327c478bd9Sstevel@tonic-gate 	}
12337c478bd9Sstevel@tonic-gate 	lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL);
12347c478bd9Sstevel@tonic-gate 	locked = 1;
1235883492d5Sraf 	if (type & LOCK_ROBUST) {
12367c478bd9Sstevel@tonic-gate 		fuword16_noerr(&lp->mutex_flag, &flag);
12377c478bd9Sstevel@tonic-gate 		if (flag & LOCK_NOTRECOVERABLE) {
12387c478bd9Sstevel@tonic-gate 			lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL);
12397c478bd9Sstevel@tonic-gate 			error = ENOTRECOVERABLE;
12407c478bd9Sstevel@tonic-gate 			goto out;
12417c478bd9Sstevel@tonic-gate 		}
12427c478bd9Sstevel@tonic-gate 	}
1243883492d5Sraf 	fuword8_noerr(&lp->mutex_waiters, &waiters);
1244883492d5Sraf 	suword8_noerr(&lp->mutex_waiters, 1);
12457c478bd9Sstevel@tonic-gate 
12467c478bd9Sstevel@tonic-gate 	/*
12477c478bd9Sstevel@tonic-gate 	 * If watchpoints are set, they need to be restored, since
12487c478bd9Sstevel@tonic-gate 	 * atomic accesses of memory such as the call to ulock_try()
12497c478bd9Sstevel@tonic-gate 	 * below cannot be watched.
12507c478bd9Sstevel@tonic-gate 	 */
12517c478bd9Sstevel@tonic-gate 
12527c478bd9Sstevel@tonic-gate 	watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE);
12537c478bd9Sstevel@tonic-gate 
12547c478bd9Sstevel@tonic-gate 	while (!ulock_try(&lp->mutex_lockw)) {
12557c478bd9Sstevel@tonic-gate 		if (time_error) {
12567c478bd9Sstevel@tonic-gate 			/*
12577c478bd9Sstevel@tonic-gate 			 * The SUSV3 Posix spec is very clear that we
12587c478bd9Sstevel@tonic-gate 			 * should get no error from validating the
12597c478bd9Sstevel@tonic-gate 			 * timer until we would actually sleep.
12607c478bd9Sstevel@tonic-gate 			 */
12617c478bd9Sstevel@tonic-gate 			error = time_error;
12627c478bd9Sstevel@tonic-gate 			break;
12637c478bd9Sstevel@tonic-gate 		}
12647c478bd9Sstevel@tonic-gate 
12657c478bd9Sstevel@tonic-gate 		if (watched) {
12667c478bd9Sstevel@tonic-gate 			watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE);
12677c478bd9Sstevel@tonic-gate 			watched = 0;
12687c478bd9Sstevel@tonic-gate 		}
12697c478bd9Sstevel@tonic-gate 
12707c478bd9Sstevel@tonic-gate 		if (timedwait) {
12717c478bd9Sstevel@tonic-gate 			/*
12727c478bd9Sstevel@tonic-gate 			 * If we successfully queue the timeout,
12737c478bd9Sstevel@tonic-gate 			 * then don't drop t_delay_lock until
12747c478bd9Sstevel@tonic-gate 			 * we are on the sleep queue (below).
12757c478bd9Sstevel@tonic-gate 			 */
12767c478bd9Sstevel@tonic-gate 			mutex_enter(&t->t_delay_lock);
12777c478bd9Sstevel@tonic-gate 			if (lwp_timer_enqueue(&lwpt) != 0) {
12787c478bd9Sstevel@tonic-gate 				mutex_exit(&t->t_delay_lock);
12797c478bd9Sstevel@tonic-gate 				imm_timeout = 1;
12807c478bd9Sstevel@tonic-gate 				timedwait = NULL;
12817c478bd9Sstevel@tonic-gate 			}
12827c478bd9Sstevel@tonic-gate 		}
12837c478bd9Sstevel@tonic-gate 		lwp_block(&lwpchan);
12847c478bd9Sstevel@tonic-gate 		/*
12857c478bd9Sstevel@tonic-gate 		 * Nothing should happen to cause the lwp to go to
12867c478bd9Sstevel@tonic-gate 		 * sleep again until after it returns from swtch().
12877c478bd9Sstevel@tonic-gate 		 */
12887c478bd9Sstevel@tonic-gate 		if (timedwait)
12897c478bd9Sstevel@tonic-gate 			mutex_exit(&t->t_delay_lock);
12907c478bd9Sstevel@tonic-gate 		locked = 0;
12917c478bd9Sstevel@tonic-gate 		lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL);
12927c478bd9Sstevel@tonic-gate 		if (ISSIG(t, JUSTLOOKING) || MUSTRETURN(p, t) || imm_timeout)
12937c478bd9Sstevel@tonic-gate 			setrun(t);
12947c478bd9Sstevel@tonic-gate 		swtch();
12957c478bd9Sstevel@tonic-gate 		t->t_flag &= ~T_WAKEABLE;
12967c478bd9Sstevel@tonic-gate 		if (timedwait)
12977c478bd9Sstevel@tonic-gate 			tim = lwp_timer_dequeue(&lwpt);
12987c478bd9Sstevel@tonic-gate 		setallwatch();
12997c478bd9Sstevel@tonic-gate 		if (ISSIG(t, FORREAL) || lwp->lwp_sysabort || MUSTRETURN(p, t))
13007c478bd9Sstevel@tonic-gate 			error = EINTR;
13017c478bd9Sstevel@tonic-gate 		else if (imm_timeout || (timedwait && tim == -1))
13027c478bd9Sstevel@tonic-gate 			error = ETIME;
13037c478bd9Sstevel@tonic-gate 		if (error) {
13047c478bd9Sstevel@tonic-gate 			lwp->lwp_asleep = 0;
13057c478bd9Sstevel@tonic-gate 			lwp->lwp_sysabort = 0;
13067c478bd9Sstevel@tonic-gate 			watched = watch_disable_addr((caddr_t)lp, sizeof (*lp),
13077c478bd9Sstevel@tonic-gate 			    S_WRITE);
13087c478bd9Sstevel@tonic-gate 
13097c478bd9Sstevel@tonic-gate 			/*
13107c478bd9Sstevel@tonic-gate 			 * Need to re-compute waiters bit. The waiters field in
13117c478bd9Sstevel@tonic-gate 			 * the lock is not reliable. Either of two things could
13127c478bd9Sstevel@tonic-gate 			 * have occurred: no lwp may have called lwp_release()
13137c478bd9Sstevel@tonic-gate 			 * for me but I have woken up due to a signal or
13147c478bd9Sstevel@tonic-gate 			 * timeout.  In this case, the waiter bit is incorrect
13157c478bd9Sstevel@tonic-gate 			 * since it is still set to 1, set above.
13167c478bd9Sstevel@tonic-gate 			 * OR an lwp_release() did occur for some other lwp on
13177c478bd9Sstevel@tonic-gate 			 * the same lwpchan. In this case, the waiter bit is
13187c478bd9Sstevel@tonic-gate 			 * correct.  But which event occurred, one can't tell.
13197c478bd9Sstevel@tonic-gate 			 * So, recompute.
13207c478bd9Sstevel@tonic-gate 			 */
13217c478bd9Sstevel@tonic-gate 			lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL);
13227c478bd9Sstevel@tonic-gate 			locked = 1;
13237c478bd9Sstevel@tonic-gate 			sqh = lwpsqhash(&lwpchan);
13247c478bd9Sstevel@tonic-gate 			disp_lock_enter(&sqh->sq_lock);
13257c478bd9Sstevel@tonic-gate 			waiters = iswanted(sqh->sq_queue.sq_first, &lwpchan);
13267c478bd9Sstevel@tonic-gate 			disp_lock_exit(&sqh->sq_lock);
13277c478bd9Sstevel@tonic-gate 			break;
13287c478bd9Sstevel@tonic-gate 		}
13297c478bd9Sstevel@tonic-gate 		lwp->lwp_asleep = 0;
13307c478bd9Sstevel@tonic-gate 		watched = watch_disable_addr((caddr_t)lp, sizeof (*lp),
13317c478bd9Sstevel@tonic-gate 		    S_WRITE);
13327c478bd9Sstevel@tonic-gate 		lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL);
13337c478bd9Sstevel@tonic-gate 		locked = 1;
13347c478bd9Sstevel@tonic-gate 		fuword8_noerr(&lp->mutex_waiters, &waiters);
13357c478bd9Sstevel@tonic-gate 		suword8_noerr(&lp->mutex_waiters, 1);
1336883492d5Sraf 		if (type & LOCK_ROBUST) {
13377c478bd9Sstevel@tonic-gate 			fuword16_noerr(&lp->mutex_flag, &flag);
13387c478bd9Sstevel@tonic-gate 			if (flag & LOCK_NOTRECOVERABLE) {
13397c478bd9Sstevel@tonic-gate 				error = ENOTRECOVERABLE;
13407c478bd9Sstevel@tonic-gate 				break;
13417c478bd9Sstevel@tonic-gate 			}
13427c478bd9Sstevel@tonic-gate 		}
13437c478bd9Sstevel@tonic-gate 	}
13447c478bd9Sstevel@tonic-gate 
13457c478bd9Sstevel@tonic-gate 	if (t->t_mstate == LMS_USER_LOCK)
13467c478bd9Sstevel@tonic-gate 		(void) new_mstate(t, LMS_SYSTEM);
13477c478bd9Sstevel@tonic-gate 
1348883492d5Sraf 	if (error == 0) {
1349db94676fSRoger A. Faulkner 		set_owner_pid(lp, owner, (type & USYNC_PROCESS)? p->p_pid : 0);
1350883492d5Sraf 		if (type & LOCK_ROBUST) {
13517c478bd9Sstevel@tonic-gate 			fuword16_noerr(&lp->mutex_flag, &flag);
1352883492d5Sraf 			if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) {
1353883492d5Sraf 				if (flag & LOCK_OWNERDEAD)
1354883492d5Sraf 					error = EOWNERDEAD;
1355883492d5Sraf 				else if (type & USYNC_PROCESS_ROBUST)
1356883492d5Sraf 					error = ELOCKUNMAPPED;
1357883492d5Sraf 				else
1358883492d5Sraf 					error = EOWNERDEAD;
1359883492d5Sraf 			}
13607c478bd9Sstevel@tonic-gate 		}
13617c478bd9Sstevel@tonic-gate 	}
13627c478bd9Sstevel@tonic-gate 	suword8_noerr(&lp->mutex_waiters, waiters);
13637c478bd9Sstevel@tonic-gate 	locked = 0;
13647c478bd9Sstevel@tonic-gate 	lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL);
13657c478bd9Sstevel@tonic-gate out:
13667c478bd9Sstevel@tonic-gate 	no_fault();
13677c478bd9Sstevel@tonic-gate 	if (watched)
13687c478bd9Sstevel@tonic-gate 		watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE);
13697c478bd9Sstevel@tonic-gate 	if (tsp && !time_error)		/* copyout the residual time left */
13707c478bd9Sstevel@tonic-gate 		error = lwp_timer_copyout(&lwpt, error);
13717c478bd9Sstevel@tonic-gate 	if (error)
13727c478bd9Sstevel@tonic-gate 		return (set_errno(error));
13737c478bd9Sstevel@tonic-gate 	return (0);
13747c478bd9Sstevel@tonic-gate }
13757c478bd9Sstevel@tonic-gate 
13767c478bd9Sstevel@tonic-gate static int
iswanted(kthread_t * t,lwpchan_t * lwpchan)13777c478bd9Sstevel@tonic-gate iswanted(kthread_t *t, lwpchan_t *lwpchan)
13787c478bd9Sstevel@tonic-gate {
13797c478bd9Sstevel@tonic-gate 	/*
13807c478bd9Sstevel@tonic-gate 	 * The caller holds the dispatcher lock on the sleep queue.
13817c478bd9Sstevel@tonic-gate 	 */
13827c478bd9Sstevel@tonic-gate 	while (t != NULL) {
13837c478bd9Sstevel@tonic-gate 		if (t->t_lwpchan.lc_wchan0 == lwpchan->lc_wchan0 &&
13847c478bd9Sstevel@tonic-gate 		    t->t_lwpchan.lc_wchan == lwpchan->lc_wchan)
13857c478bd9Sstevel@tonic-gate 			return (1);
13867c478bd9Sstevel@tonic-gate 		t = t->t_link;
13877c478bd9Sstevel@tonic-gate 	}
13887c478bd9Sstevel@tonic-gate 	return (0);
13897c478bd9Sstevel@tonic-gate }
13907c478bd9Sstevel@tonic-gate 
13917c478bd9Sstevel@tonic-gate /*
13927c478bd9Sstevel@tonic-gate  * Return the highest priority thread sleeping on this lwpchan.
13937c478bd9Sstevel@tonic-gate  */
13947c478bd9Sstevel@tonic-gate static kthread_t *
lwp_queue_waiter(lwpchan_t * lwpchan)13957c478bd9Sstevel@tonic-gate lwp_queue_waiter(lwpchan_t *lwpchan)
13967c478bd9Sstevel@tonic-gate {
13977c478bd9Sstevel@tonic-gate 	sleepq_head_t *sqh;
13987c478bd9Sstevel@tonic-gate 	kthread_t *tp;
13997c478bd9Sstevel@tonic-gate 
14007c478bd9Sstevel@tonic-gate 	sqh = lwpsqhash(lwpchan);
14017c478bd9Sstevel@tonic-gate 	disp_lock_enter(&sqh->sq_lock);		/* lock the sleep queue */
14027c478bd9Sstevel@tonic-gate 	for (tp = sqh->sq_queue.sq_first; tp != NULL; tp = tp->t_link) {
14037c478bd9Sstevel@tonic-gate 		if (tp->t_lwpchan.lc_wchan0 == lwpchan->lc_wchan0 &&
14047c478bd9Sstevel@tonic-gate 		    tp->t_lwpchan.lc_wchan == lwpchan->lc_wchan)
14057c478bd9Sstevel@tonic-gate 			break;
14067c478bd9Sstevel@tonic-gate 	}
14077c478bd9Sstevel@tonic-gate 	disp_lock_exit(&sqh->sq_lock);
14087c478bd9Sstevel@tonic-gate 	return (tp);
14097c478bd9Sstevel@tonic-gate }
14107c478bd9Sstevel@tonic-gate 
14117c478bd9Sstevel@tonic-gate static int
lwp_release(lwpchan_t * lwpchan,uchar_t * waiters,int sync_type)14127c478bd9Sstevel@tonic-gate lwp_release(lwpchan_t *lwpchan, uchar_t *waiters, int sync_type)
14137c478bd9Sstevel@tonic-gate {
14147c478bd9Sstevel@tonic-gate 	sleepq_head_t *sqh;
14157c478bd9Sstevel@tonic-gate 	kthread_t *tp;
14167c478bd9Sstevel@tonic-gate 	kthread_t **tpp;
14177c478bd9Sstevel@tonic-gate 
14187c478bd9Sstevel@tonic-gate 	sqh = lwpsqhash(lwpchan);
14197c478bd9Sstevel@tonic-gate 	disp_lock_enter(&sqh->sq_lock);		/* lock the sleep queue */
14207c478bd9Sstevel@tonic-gate 	tpp = &sqh->sq_queue.sq_first;
14217c478bd9Sstevel@tonic-gate 	while ((tp = *tpp) != NULL) {
14227c478bd9Sstevel@tonic-gate 		if (tp->t_lwpchan.lc_wchan0 == lwpchan->lc_wchan0 &&
14237c478bd9Sstevel@tonic-gate 		    tp->t_lwpchan.lc_wchan == lwpchan->lc_wchan) {
14247c478bd9Sstevel@tonic-gate 			/*
14257c478bd9Sstevel@tonic-gate 			 * The following is typically false. It could be true
14267c478bd9Sstevel@tonic-gate 			 * only if lwp_release() is called from
14277c478bd9Sstevel@tonic-gate 			 * lwp_mutex_wakeup() after reading the waiters field
14287c478bd9Sstevel@tonic-gate 			 * from memory in which the lwp lock used to be, but has
14297c478bd9Sstevel@tonic-gate 			 * since been re-used to hold a lwp cv or lwp semaphore.
14307c478bd9Sstevel@tonic-gate 			 * The thread "tp" found to match the lwp lock's wchan
14317c478bd9Sstevel@tonic-gate 			 * is actually sleeping for the cv or semaphore which
14327c478bd9Sstevel@tonic-gate 			 * now has the same wchan. In this case, lwp_release()
14337c478bd9Sstevel@tonic-gate 			 * should return failure.
14347c478bd9Sstevel@tonic-gate 			 */
14357c478bd9Sstevel@tonic-gate 			if (sync_type != (tp->t_flag & T_WAITCVSEM)) {
14367c478bd9Sstevel@tonic-gate 				ASSERT(sync_type == 0);
14377c478bd9Sstevel@tonic-gate 				/*
14387c478bd9Sstevel@tonic-gate 				 * assert that this can happen only for mutexes
14397c478bd9Sstevel@tonic-gate 				 * i.e. sync_type == 0, for correctly written
14407c478bd9Sstevel@tonic-gate 				 * user programs.
14417c478bd9Sstevel@tonic-gate 				 */
14427c478bd9Sstevel@tonic-gate 				disp_lock_exit(&sqh->sq_lock);
14437c478bd9Sstevel@tonic-gate 				return (0);
14447c478bd9Sstevel@tonic-gate 			}
14457c478bd9Sstevel@tonic-gate 			*waiters = iswanted(tp->t_link, lwpchan);
14467c478bd9Sstevel@tonic-gate 			sleepq_unlink(tpp, tp);
14477c478bd9Sstevel@tonic-gate 			DTRACE_SCHED1(wakeup, kthread_t *, tp);
14487c478bd9Sstevel@tonic-gate 			tp->t_wchan0 = NULL;
14497c478bd9Sstevel@tonic-gate 			tp->t_wchan = NULL;
14507c478bd9Sstevel@tonic-gate 			tp->t_sobj_ops = NULL;
14517c478bd9Sstevel@tonic-gate 			tp->t_release = 1;
14527c478bd9Sstevel@tonic-gate 			THREAD_TRANSITION(tp);	/* drops sleepq lock */
14537c478bd9Sstevel@tonic-gate 			CL_WAKEUP(tp);
14547c478bd9Sstevel@tonic-gate 			thread_unlock(tp);	/* drop run queue lock */
14557c478bd9Sstevel@tonic-gate 			return (1);
14567c478bd9Sstevel@tonic-gate 		}
14577c478bd9Sstevel@tonic-gate 		tpp = &tp->t_link;
14587c478bd9Sstevel@tonic-gate 	}
14597c478bd9Sstevel@tonic-gate 	*waiters = 0;
14607c478bd9Sstevel@tonic-gate 	disp_lock_exit(&sqh->sq_lock);
14617c478bd9Sstevel@tonic-gate 	return (0);
14627c478bd9Sstevel@tonic-gate }
14637c478bd9Sstevel@tonic-gate 
14647c478bd9Sstevel@tonic-gate static void
lwp_release_all(lwpchan_t * lwpchan)14657c478bd9Sstevel@tonic-gate lwp_release_all(lwpchan_t *lwpchan)
14667c478bd9Sstevel@tonic-gate {
14677c478bd9Sstevel@tonic-gate 	sleepq_head_t	*sqh;
14687c478bd9Sstevel@tonic-gate 	kthread_t *tp;
14697c478bd9Sstevel@tonic-gate 	kthread_t **tpp;
14707c478bd9Sstevel@tonic-gate 
14717c478bd9Sstevel@tonic-gate 	sqh = lwpsqhash(lwpchan);
14727c478bd9Sstevel@tonic-gate 	disp_lock_enter(&sqh->sq_lock);		/* lock sleep q queue */
14737c478bd9Sstevel@tonic-gate 	tpp = &sqh->sq_queue.sq_first;
14747c478bd9Sstevel@tonic-gate 	while ((tp = *tpp) != NULL) {
14757c478bd9Sstevel@tonic-gate 		if (tp->t_lwpchan.lc_wchan0 == lwpchan->lc_wchan0 &&
14767c478bd9Sstevel@tonic-gate 		    tp->t_lwpchan.lc_wchan == lwpchan->lc_wchan) {
14777c478bd9Sstevel@tonic-gate 			sleepq_unlink(tpp, tp);
14787c478bd9Sstevel@tonic-gate 			DTRACE_SCHED1(wakeup, kthread_t *, tp);
14797c478bd9Sstevel@tonic-gate 			tp->t_wchan0 = NULL;
14807c478bd9Sstevel@tonic-gate 			tp->t_wchan = NULL;
14817c478bd9Sstevel@tonic-gate 			tp->t_sobj_ops = NULL;
14827c478bd9Sstevel@tonic-gate 			CL_WAKEUP(tp);
14837c478bd9Sstevel@tonic-gate 			thread_unlock_high(tp);	/* release run queue lock */
14847c478bd9Sstevel@tonic-gate 		} else {
14857c478bd9Sstevel@tonic-gate 			tpp = &tp->t_link;
14867c478bd9Sstevel@tonic-gate 		}
14877c478bd9Sstevel@tonic-gate 	}
14887c478bd9Sstevel@tonic-gate 	disp_lock_exit(&sqh->sq_lock);		/* drop sleep q lock */
14897c478bd9Sstevel@tonic-gate }
14907c478bd9Sstevel@tonic-gate 
14917c478bd9Sstevel@tonic-gate /*
14927c478bd9Sstevel@tonic-gate  * unblock a lwp that is trying to acquire this mutex. the blocked
14937c478bd9Sstevel@tonic-gate  * lwp resumes and retries to acquire the lock.
14947c478bd9Sstevel@tonic-gate  */
14957c478bd9Sstevel@tonic-gate int
lwp_mutex_wakeup(lwp_mutex_t * lp,int release_all)1496883492d5Sraf lwp_mutex_wakeup(lwp_mutex_t *lp, int release_all)
14977c478bd9Sstevel@tonic-gate {
14987c478bd9Sstevel@tonic-gate 	proc_t *p = ttoproc(curthread);
14997c478bd9Sstevel@tonic-gate 	lwpchan_t lwpchan;
15007c478bd9Sstevel@tonic-gate 	uchar_t waiters;
15017c478bd9Sstevel@tonic-gate 	volatile int locked = 0;
15027c478bd9Sstevel@tonic-gate 	volatile int watched = 0;
15037c478bd9Sstevel@tonic-gate 	volatile uint8_t type = 0;
15047c478bd9Sstevel@tonic-gate 	label_t ljb;
15057c478bd9Sstevel@tonic-gate 	int error = 0;
15067c478bd9Sstevel@tonic-gate 
15077c478bd9Sstevel@tonic-gate 	if ((caddr_t)lp >= p->p_as->a_userlimit)
15087c478bd9Sstevel@tonic-gate 		return (set_errno(EFAULT));
15097c478bd9Sstevel@tonic-gate 
15107c478bd9Sstevel@tonic-gate 	watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE);
15117c478bd9Sstevel@tonic-gate 
15127c478bd9Sstevel@tonic-gate 	if (on_fault(&ljb)) {
15137c478bd9Sstevel@tonic-gate 		if (locked)
15147c478bd9Sstevel@tonic-gate 			lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL);
15157c478bd9Sstevel@tonic-gate 		error = EFAULT;
15167c478bd9Sstevel@tonic-gate 		goto out;
15177c478bd9Sstevel@tonic-gate 	}
15187c478bd9Sstevel@tonic-gate 	/*
15198118ecd5Sraf 	 * Force Copy-on-write if necessary and ensure that the
15208118ecd5Sraf 	 * synchronization object resides in read/write memory.
15218118ecd5Sraf 	 * Cause an EFAULT return now if this is not so.
15227c478bd9Sstevel@tonic-gate 	 */
15237c478bd9Sstevel@tonic-gate 	fuword8_noerr(&lp->mutex_type, (uint8_t *)&type);
15247c478bd9Sstevel@tonic-gate 	suword8_noerr(&lp->mutex_type, type);
15257c478bd9Sstevel@tonic-gate 	if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type,
15267c478bd9Sstevel@tonic-gate 	    &lwpchan, LWPCHAN_MPPOOL)) {
15277c478bd9Sstevel@tonic-gate 		error = EFAULT;
15287c478bd9Sstevel@tonic-gate 		goto out;
15297c478bd9Sstevel@tonic-gate 	}
15307c478bd9Sstevel@tonic-gate 	lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL);
15317c478bd9Sstevel@tonic-gate 	locked = 1;
15327c478bd9Sstevel@tonic-gate 	/*
15337c478bd9Sstevel@tonic-gate 	 * Always wake up an lwp (if any) waiting on lwpchan. The woken lwp will
15347c478bd9Sstevel@tonic-gate 	 * re-try the lock in lwp_mutex_timedlock(). The call to lwp_release()
15357c478bd9Sstevel@tonic-gate 	 * may fail.  If it fails, do not write into the waiter bit.
15367c478bd9Sstevel@tonic-gate 	 * The call to lwp_release() might fail due to one of three reasons:
15377c478bd9Sstevel@tonic-gate 	 *
15387c478bd9Sstevel@tonic-gate 	 * 	1. due to the thread which set the waiter bit not actually
15397c478bd9Sstevel@tonic-gate 	 *	   sleeping since it got the lock on the re-try. The waiter
15407c478bd9Sstevel@tonic-gate 	 *	   bit will then be correctly updated by that thread. This
15417c478bd9Sstevel@tonic-gate 	 *	   window may be closed by reading the wait bit again here
15427c478bd9Sstevel@tonic-gate 	 *	   and not calling lwp_release() at all if it is zero.
15437c478bd9Sstevel@tonic-gate 	 *	2. the thread which set the waiter bit and went to sleep
15447c478bd9Sstevel@tonic-gate 	 *	   was woken up by a signal. This time, the waiter recomputes
15457c478bd9Sstevel@tonic-gate 	 *	   the wait bit in the return with EINTR code.
15467c478bd9Sstevel@tonic-gate 	 *	3. the waiter bit read by lwp_mutex_wakeup() was in
15477c478bd9Sstevel@tonic-gate 	 *	   memory that has been re-used after the lock was dropped.
15487c478bd9Sstevel@tonic-gate 	 *	   In this case, writing into the waiter bit would cause data
15497c478bd9Sstevel@tonic-gate 	 *	   corruption.
15507c478bd9Sstevel@tonic-gate 	 */
1551883492d5Sraf 	if (release_all)
1552883492d5Sraf 		lwp_release_all(&lwpchan);
155331db3c26Sraf 	else if (lwp_release(&lwpchan, &waiters, 0))
15547c478bd9Sstevel@tonic-gate 		suword8_noerr(&lp->mutex_waiters, waiters);
15557c478bd9Sstevel@tonic-gate 	lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL);
15567c478bd9Sstevel@tonic-gate out:
15577c478bd9Sstevel@tonic-gate 	no_fault();
15587c478bd9Sstevel@tonic-gate 	if (watched)
15597c478bd9Sstevel@tonic-gate 		watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE);
15607c478bd9Sstevel@tonic-gate 	if (error)
15617c478bd9Sstevel@tonic-gate 		return (set_errno(error));
15627c478bd9Sstevel@tonic-gate 	return (0);
15637c478bd9Sstevel@tonic-gate }
15647c478bd9Sstevel@tonic-gate 
15657c478bd9Sstevel@tonic-gate /*
15667c478bd9Sstevel@tonic-gate  * lwp_cond_wait() has four arguments, a pointer to a condition variable,
15677c478bd9Sstevel@tonic-gate  * a pointer to a mutex, a pointer to a timespec for a timed wait and
15687c478bd9Sstevel@tonic-gate  * a flag telling the kernel whether or not to honor the kernel/user
15697c478bd9Sstevel@tonic-gate  * schedctl parking protocol (see schedctl_is_park() in schedctl.c).
15707c478bd9Sstevel@tonic-gate  * The kernel puts the lwp to sleep on a unique pair of caddr_t's called an
15717c478bd9Sstevel@tonic-gate  * lwpchan, returned by get_lwpchan().  If the timespec pointer is non-NULL,
15727c478bd9Sstevel@tonic-gate  * it is used an an in/out parameter.  On entry, it contains the relative
15737c478bd9Sstevel@tonic-gate  * time until timeout.  On exit, we copyout the residual time left to it.
15747c478bd9Sstevel@tonic-gate  */
15757c478bd9Sstevel@tonic-gate int
lwp_cond_wait(lwp_cond_t * cv,lwp_mutex_t * mp,timespec_t * tsp,int check_park)15767c478bd9Sstevel@tonic-gate lwp_cond_wait(lwp_cond_t *cv, lwp_mutex_t *mp, timespec_t *tsp, int check_park)
15777c478bd9Sstevel@tonic-gate {
15787c478bd9Sstevel@tonic-gate 	kthread_t *t = curthread;
15797c478bd9Sstevel@tonic-gate 	klwp_t *lwp = ttolwp(t);
15807c478bd9Sstevel@tonic-gate 	proc_t *p = ttoproc(t);
15817c478bd9Sstevel@tonic-gate 	lwp_timer_t lwpt;
15827c478bd9Sstevel@tonic-gate 	lwpchan_t cv_lwpchan;
15837c478bd9Sstevel@tonic-gate 	lwpchan_t m_lwpchan;
15847c478bd9Sstevel@tonic-gate 	caddr_t timedwait;
15857c478bd9Sstevel@tonic-gate 	volatile uint16_t type = 0;
15867c478bd9Sstevel@tonic-gate 	volatile uint8_t mtype = 0;
15877c478bd9Sstevel@tonic-gate 	uchar_t waiters;
15887c478bd9Sstevel@tonic-gate 	volatile int error;
15897c478bd9Sstevel@tonic-gate 	clock_t tim = -1;
15907c478bd9Sstevel@tonic-gate 	volatile int locked = 0;
15917c478bd9Sstevel@tonic-gate 	volatile int m_locked = 0;
15927c478bd9Sstevel@tonic-gate 	volatile int cvwatched = 0;
15937c478bd9Sstevel@tonic-gate 	volatile int mpwatched = 0;
15947c478bd9Sstevel@tonic-gate 	label_t ljb;
15957c478bd9Sstevel@tonic-gate 	volatile int no_lwpchan = 1;
15967c478bd9Sstevel@tonic-gate 	int imm_timeout = 0;
15977c478bd9Sstevel@tonic-gate 	int imm_unpark = 0;
15987c478bd9Sstevel@tonic-gate 
15997c478bd9Sstevel@tonic-gate 	if ((caddr_t)cv >= p->p_as->a_userlimit ||
16007c478bd9Sstevel@tonic-gate 	    (caddr_t)mp >= p->p_as->a_userlimit)
16017c478bd9Sstevel@tonic-gate 		return (set_errno(EFAULT));
16027c478bd9Sstevel@tonic-gate 
160307a48826SRoger A. Faulkner 	/*
160407a48826SRoger A. Faulkner 	 * Put the lwp in an orderly state for debugging,
160507a48826SRoger A. Faulkner 	 * in case we are stopped while sleeping, below.
160607a48826SRoger A. Faulkner 	 */
160707a48826SRoger A. Faulkner 	prstop(PR_REQUESTED, 0);
160807a48826SRoger A. Faulkner 
16097c478bd9Sstevel@tonic-gate 	timedwait = (caddr_t)tsp;
16107c478bd9Sstevel@tonic-gate 	if ((error = lwp_timer_copyin(&lwpt, tsp)) != 0)
16117c478bd9Sstevel@tonic-gate 		return (set_errno(error));
16127c478bd9Sstevel@tonic-gate 	if (lwpt.lwpt_imm_timeout) {
16137c478bd9Sstevel@tonic-gate 		imm_timeout = 1;
16147c478bd9Sstevel@tonic-gate 		timedwait = NULL;
16157c478bd9Sstevel@tonic-gate 	}
16167c478bd9Sstevel@tonic-gate 
16177c478bd9Sstevel@tonic-gate 	(void) new_mstate(t, LMS_USER_LOCK);
16187c478bd9Sstevel@tonic-gate 
16197c478bd9Sstevel@tonic-gate 	if (on_fault(&ljb)) {
16207c478bd9Sstevel@tonic-gate 		if (no_lwpchan) {
16217c478bd9Sstevel@tonic-gate 			error = EFAULT;
16227c478bd9Sstevel@tonic-gate 			goto out;
16237c478bd9Sstevel@tonic-gate 		}
16247c478bd9Sstevel@tonic-gate 		if (m_locked) {
16257c478bd9Sstevel@tonic-gate 			m_locked = 0;
16267c478bd9Sstevel@tonic-gate 			lwpchan_unlock(&m_lwpchan, LWPCHAN_MPPOOL);
16277c478bd9Sstevel@tonic-gate 		}
16287c478bd9Sstevel@tonic-gate 		if (locked) {
16297c478bd9Sstevel@tonic-gate 			locked = 0;
16307c478bd9Sstevel@tonic-gate 			lwpchan_unlock(&cv_lwpchan, LWPCHAN_CVPOOL);
16317c478bd9Sstevel@tonic-gate 		}
16327c478bd9Sstevel@tonic-gate 		/*
16337c478bd9Sstevel@tonic-gate 		 * set up another on_fault() for a possible fault
16347c478bd9Sstevel@tonic-gate 		 * on the user lock accessed at "efault"
16357c478bd9Sstevel@tonic-gate 		 */
16367c478bd9Sstevel@tonic-gate 		if (on_fault(&ljb)) {
16377c478bd9Sstevel@tonic-gate 			if (m_locked) {
16387c478bd9Sstevel@tonic-gate 				m_locked = 0;
16397c478bd9Sstevel@tonic-gate 				lwpchan_unlock(&m_lwpchan, LWPCHAN_MPPOOL);
16407c478bd9Sstevel@tonic-gate 			}
16417c478bd9Sstevel@tonic-gate 			goto out;
16427c478bd9Sstevel@tonic-gate 		}
16437c478bd9Sstevel@tonic-gate 		error = EFAULT;
16447c478bd9Sstevel@tonic-gate 		goto efault;
16457c478bd9Sstevel@tonic-gate 	}
16467c478bd9Sstevel@tonic-gate 
16477c478bd9Sstevel@tonic-gate 	/*
16488118ecd5Sraf 	 * Force Copy-on-write if necessary and ensure that the
16498118ecd5Sraf 	 * synchronization object resides in read/write memory.
16508118ecd5Sraf 	 * Cause an EFAULT return now if this is not so.
16517c478bd9Sstevel@tonic-gate 	 */
16527c478bd9Sstevel@tonic-gate 	fuword8_noerr(&mp->mutex_type, (uint8_t *)&mtype);
16538118ecd5Sraf 	suword8_noerr(&mp->mutex_type, mtype);
16547c478bd9Sstevel@tonic-gate 	if (UPIMUTEX(mtype) == 0) {
16557c478bd9Sstevel@tonic-gate 		/* convert user level mutex, "mp", to a unique lwpchan */
16567c478bd9Sstevel@tonic-gate 		/* check if mtype is ok to use below, instead of type from cv */
16577c478bd9Sstevel@tonic-gate 		if (!get_lwpchan(p->p_as, (caddr_t)mp, mtype,
16587c478bd9Sstevel@tonic-gate 		    &m_lwpchan, LWPCHAN_MPPOOL)) {
16597c478bd9Sstevel@tonic-gate 			error = EFAULT;
16607c478bd9Sstevel@tonic-gate 			goto out;
16617c478bd9Sstevel@tonic-gate 		}
16627c478bd9Sstevel@tonic-gate 	}
16637c478bd9Sstevel@tonic-gate 	fuword16_noerr(&cv->cond_type, (uint16_t *)&type);
16647c478bd9Sstevel@tonic-gate 	suword16_noerr(&cv->cond_type, type);
16657c478bd9Sstevel@tonic-gate 	/* convert user level condition variable, "cv", to a unique lwpchan */
16667c478bd9Sstevel@tonic-gate 	if (!get_lwpchan(p->p_as, (caddr_t)cv, type,
16677c478bd9Sstevel@tonic-gate 	    &cv_lwpchan, LWPCHAN_CVPOOL)) {
16687c478bd9Sstevel@tonic-gate 		error = EFAULT;
16697c478bd9Sstevel@tonic-gate 		goto out;
16707c478bd9Sstevel@tonic-gate 	}
16717c478bd9Sstevel@tonic-gate 	no_lwpchan = 0;
16727c478bd9Sstevel@tonic-gate 	cvwatched = watch_disable_addr((caddr_t)cv, sizeof (*cv), S_WRITE);
16737c478bd9Sstevel@tonic-gate 	if (UPIMUTEX(mtype) == 0)
16747c478bd9Sstevel@tonic-gate 		mpwatched = watch_disable_addr((caddr_t)mp, sizeof (*mp),
16757c478bd9Sstevel@tonic-gate 		    S_WRITE);
16767c478bd9Sstevel@tonic-gate 
16777c478bd9Sstevel@tonic-gate 	/*
16787c478bd9Sstevel@tonic-gate 	 * lwpchan_lock ensures that the calling lwp is put to sleep atomically
16797c478bd9Sstevel@tonic-gate 	 * with respect to a possible wakeup which is a result of either
16807c478bd9Sstevel@tonic-gate 	 * an lwp_cond_signal() or an lwp_cond_broadcast().
16817c478bd9Sstevel@tonic-gate 	 *
16827c478bd9Sstevel@tonic-gate 	 * What's misleading, is that the lwp is put to sleep after the
16837c478bd9Sstevel@tonic-gate 	 * condition variable's mutex is released.  This is OK as long as
16847c478bd9Sstevel@tonic-gate 	 * the release operation is also done while holding lwpchan_lock.
16857c478bd9Sstevel@tonic-gate 	 * The lwp is then put to sleep when the possibility of pagefaulting
16867c478bd9Sstevel@tonic-gate 	 * or sleeping is completely eliminated.
16877c478bd9Sstevel@tonic-gate 	 */
16887c478bd9Sstevel@tonic-gate 	lwpchan_lock(&cv_lwpchan, LWPCHAN_CVPOOL);
16897c478bd9Sstevel@tonic-gate 	locked = 1;
16907c478bd9Sstevel@tonic-gate 	if (UPIMUTEX(mtype) == 0) {
16917c478bd9Sstevel@tonic-gate 		lwpchan_lock(&m_lwpchan, LWPCHAN_MPPOOL);
16927c478bd9Sstevel@tonic-gate 		m_locked = 1;
16937c478bd9Sstevel@tonic-gate 		suword8_noerr(&cv->cond_waiters_kernel, 1);
16947c478bd9Sstevel@tonic-gate 		/*
16957c478bd9Sstevel@tonic-gate 		 * unlock the condition variable's mutex. (pagefaults are
16967c478bd9Sstevel@tonic-gate 		 * possible here.)
16977c478bd9Sstevel@tonic-gate 		 */
1698db94676fSRoger A. Faulkner 		set_owner_pid(mp, 0, 0);
16997c478bd9Sstevel@tonic-gate 		ulock_clear(&mp->mutex_lockw);
17007c478bd9Sstevel@tonic-gate 		fuword8_noerr(&mp->mutex_waiters, &waiters);
17017c478bd9Sstevel@tonic-gate 		if (waiters != 0) {
17027c478bd9Sstevel@tonic-gate 			/*
17037c478bd9Sstevel@tonic-gate 			 * Given the locking of lwpchan_lock around the release
17047c478bd9Sstevel@tonic-gate 			 * of the mutex and checking for waiters, the following
17057c478bd9Sstevel@tonic-gate 			 * call to lwp_release() can fail ONLY if the lock
17067c478bd9Sstevel@tonic-gate 			 * acquirer is interrupted after setting the waiter bit,
17077c478bd9Sstevel@tonic-gate 			 * calling lwp_block() and releasing lwpchan_lock.
17087c478bd9Sstevel@tonic-gate 			 * In this case, it could get pulled off the lwp sleep
17097c478bd9Sstevel@tonic-gate 			 * q (via setrun()) before the following call to
17107c478bd9Sstevel@tonic-gate 			 * lwp_release() occurs. In this case, the lock
17117c478bd9Sstevel@tonic-gate 			 * requestor will update the waiter bit correctly by
17127c478bd9Sstevel@tonic-gate 			 * re-evaluating it.
17137c478bd9Sstevel@tonic-gate 			 */
171431db3c26Sraf 			if (lwp_release(&m_lwpchan, &waiters, 0))
17157c478bd9Sstevel@tonic-gate 				suword8_noerr(&mp->mutex_waiters, waiters);
17167c478bd9Sstevel@tonic-gate 		}
17177c478bd9Sstevel@tonic-gate 		m_locked = 0;
17187c478bd9Sstevel@tonic-gate 		lwpchan_unlock(&m_lwpchan, LWPCHAN_MPPOOL);
17197c478bd9Sstevel@tonic-gate 	} else {
17207c478bd9Sstevel@tonic-gate 		suword8_noerr(&cv->cond_waiters_kernel, 1);
17217c478bd9Sstevel@tonic-gate 		error = lwp_upimutex_unlock(mp, mtype);
17227c478bd9Sstevel@tonic-gate 		if (error) {	/* if the upimutex unlock failed */
17237c478bd9Sstevel@tonic-gate 			locked = 0;
17247c478bd9Sstevel@tonic-gate 			lwpchan_unlock(&cv_lwpchan, LWPCHAN_CVPOOL);
17257c478bd9Sstevel@tonic-gate 			goto out;
17267c478bd9Sstevel@tonic-gate 		}
17277c478bd9Sstevel@tonic-gate 	}
17287c478bd9Sstevel@tonic-gate 	no_fault();
17297c478bd9Sstevel@tonic-gate 
17307c478bd9Sstevel@tonic-gate 	if (mpwatched) {
17317c478bd9Sstevel@tonic-gate 		watch_enable_addr((caddr_t)mp, sizeof (*mp), S_WRITE);
17327c478bd9Sstevel@tonic-gate 		mpwatched = 0;
17337c478bd9Sstevel@tonic-gate 	}
17347c478bd9Sstevel@tonic-gate 	if (cvwatched) {
17357c478bd9Sstevel@tonic-gate 		watch_enable_addr((caddr_t)cv, sizeof (*cv), S_WRITE);
17367c478bd9Sstevel@tonic-gate 		cvwatched = 0;
17377c478bd9Sstevel@tonic-gate 	}
17387c478bd9Sstevel@tonic-gate 
17397c478bd9Sstevel@tonic-gate 	if (check_park && (!schedctl_is_park() || t->t_unpark)) {
17407c478bd9Sstevel@tonic-gate 		/*
17417c478bd9Sstevel@tonic-gate 		 * We received a signal at user-level before calling here
17427c478bd9Sstevel@tonic-gate 		 * or another thread wants us to return immediately
17437c478bd9Sstevel@tonic-gate 		 * with EINTR.  See lwp_unpark().
17447c478bd9Sstevel@tonic-gate 		 */
17457c478bd9Sstevel@tonic-gate 		imm_unpark = 1;
17467c478bd9Sstevel@tonic-gate 		t->t_unpark = 0;
17477c478bd9Sstevel@tonic-gate 		timedwait = NULL;
17487c478bd9Sstevel@tonic-gate 	} else if (timedwait) {
17497c478bd9Sstevel@tonic-gate 		/*
17507c478bd9Sstevel@tonic-gate 		 * If we successfully queue the timeout,
17517c478bd9Sstevel@tonic-gate 		 * then don't drop t_delay_lock until
17527c478bd9Sstevel@tonic-gate 		 * we are on the sleep queue (below).
17537c478bd9Sstevel@tonic-gate 		 */
17547c478bd9Sstevel@tonic-gate 		mutex_enter(&t->t_delay_lock);
17557c478bd9Sstevel@tonic-gate 		if (lwp_timer_enqueue(&lwpt) != 0) {
17567c478bd9Sstevel@tonic-gate 			mutex_exit(&t->t_delay_lock);
17577c478bd9Sstevel@tonic-gate 			imm_timeout = 1;
17587c478bd9Sstevel@tonic-gate 			timedwait = NULL;
17597c478bd9Sstevel@tonic-gate 		}
17607c478bd9Sstevel@tonic-gate 	}
17617c478bd9Sstevel@tonic-gate 	t->t_flag |= T_WAITCVSEM;
17627c478bd9Sstevel@tonic-gate 	lwp_block(&cv_lwpchan);
17637c478bd9Sstevel@tonic-gate 	/*
17647c478bd9Sstevel@tonic-gate 	 * Nothing should happen to cause the lwp to go to sleep
17657c478bd9Sstevel@tonic-gate 	 * until after it returns from swtch().
17667c478bd9Sstevel@tonic-gate 	 */
17677c478bd9Sstevel@tonic-gate 	if (timedwait)
17687c478bd9Sstevel@tonic-gate 		mutex_exit(&t->t_delay_lock);
17697c478bd9Sstevel@tonic-gate 	locked = 0;
17707c478bd9Sstevel@tonic-gate 	lwpchan_unlock(&cv_lwpchan, LWPCHAN_CVPOOL);
17717c478bd9Sstevel@tonic-gate 	if (ISSIG(t, JUSTLOOKING) || MUSTRETURN(p, t) ||
17727c478bd9Sstevel@tonic-gate 	    (imm_timeout | imm_unpark))
17737c478bd9Sstevel@tonic-gate 		setrun(t);
17747c478bd9Sstevel@tonic-gate 	swtch();
17757c478bd9Sstevel@tonic-gate 	t->t_flag &= ~(T_WAITCVSEM | T_WAKEABLE);
17767c478bd9Sstevel@tonic-gate 	if (timedwait)
17777c478bd9Sstevel@tonic-gate 		tim = lwp_timer_dequeue(&lwpt);
17787c478bd9Sstevel@tonic-gate 	if (ISSIG(t, FORREAL) || lwp->lwp_sysabort ||
17797c478bd9Sstevel@tonic-gate 	    MUSTRETURN(p, t) || imm_unpark)
17807c478bd9Sstevel@tonic-gate 		error = EINTR;
17817c478bd9Sstevel@tonic-gate 	else if (imm_timeout || (timedwait && tim == -1))
17827c478bd9Sstevel@tonic-gate 		error = ETIME;
17837c478bd9Sstevel@tonic-gate 	lwp->lwp_asleep = 0;
17847c478bd9Sstevel@tonic-gate 	lwp->lwp_sysabort = 0;
17857c478bd9Sstevel@tonic-gate 	setallwatch();
17867c478bd9Sstevel@tonic-gate 
17877c478bd9Sstevel@tonic-gate 	if (t->t_mstate == LMS_USER_LOCK)
17887c478bd9Sstevel@tonic-gate 		(void) new_mstate(t, LMS_SYSTEM);
17897c478bd9Sstevel@tonic-gate 
17907c478bd9Sstevel@tonic-gate 	if (tsp && check_park)		/* copyout the residual time left */
17917c478bd9Sstevel@tonic-gate 		error = lwp_timer_copyout(&lwpt, error);
17927c478bd9Sstevel@tonic-gate 
17937c478bd9Sstevel@tonic-gate 	/* the mutex is reacquired by the caller on return to user level */
17947c478bd9Sstevel@tonic-gate 	if (error) {
17957c478bd9Sstevel@tonic-gate 		/*
17967c478bd9Sstevel@tonic-gate 		 * If we were concurrently lwp_cond_signal()d and we
17977c478bd9Sstevel@tonic-gate 		 * received a UNIX signal or got a timeout, then perform
17987c478bd9Sstevel@tonic-gate 		 * another lwp_cond_signal() to avoid consuming the wakeup.
17997c478bd9Sstevel@tonic-gate 		 */
18007c478bd9Sstevel@tonic-gate 		if (t->t_release)
18017c478bd9Sstevel@tonic-gate 			(void) lwp_cond_signal(cv);
18027c478bd9Sstevel@tonic-gate 		return (set_errno(error));
18037c478bd9Sstevel@tonic-gate 	}
18047c478bd9Sstevel@tonic-gate 	return (0);
18057c478bd9Sstevel@tonic-gate 
18067c478bd9Sstevel@tonic-gate efault:
18077c478bd9Sstevel@tonic-gate 	/*
18087c478bd9Sstevel@tonic-gate 	 * make sure that the user level lock is dropped before
18097c478bd9Sstevel@tonic-gate 	 * returning to caller, since the caller always re-acquires it.
18107c478bd9Sstevel@tonic-gate 	 */
18117c478bd9Sstevel@tonic-gate 	if (UPIMUTEX(mtype) == 0) {
18127c478bd9Sstevel@tonic-gate 		lwpchan_lock(&m_lwpchan, LWPCHAN_MPPOOL);
18137c478bd9Sstevel@tonic-gate 		m_locked = 1;
1814db94676fSRoger A. Faulkner 		set_owner_pid(mp, 0, 0);
18157c478bd9Sstevel@tonic-gate 		ulock_clear(&mp->mutex_lockw);
18167c478bd9Sstevel@tonic-gate 		fuword8_noerr(&mp->mutex_waiters, &waiters);
18177c478bd9Sstevel@tonic-gate 		if (waiters != 0) {
18187c478bd9Sstevel@tonic-gate 			/*
18197c478bd9Sstevel@tonic-gate 			 * See comment above on lock clearing and lwp_release()
18207c478bd9Sstevel@tonic-gate 			 * success/failure.
18217c478bd9Sstevel@tonic-gate 			 */
182231db3c26Sraf 			if (lwp_release(&m_lwpchan, &waiters, 0))
18237c478bd9Sstevel@tonic-gate 				suword8_noerr(&mp->mutex_waiters, waiters);
18247c478bd9Sstevel@tonic-gate 		}
18257c478bd9Sstevel@tonic-gate 		m_locked = 0;
18267c478bd9Sstevel@tonic-gate 		lwpchan_unlock(&m_lwpchan, LWPCHAN_MPPOOL);
18277c478bd9Sstevel@tonic-gate 	} else {
18287c478bd9Sstevel@tonic-gate 		(void) lwp_upimutex_unlock(mp, mtype);
18297c478bd9Sstevel@tonic-gate 	}
18307c478bd9Sstevel@tonic-gate out:
18317c478bd9Sstevel@tonic-gate 	no_fault();
18327c478bd9Sstevel@tonic-gate 	if (mpwatched)
18337c478bd9Sstevel@tonic-gate 		watch_enable_addr((caddr_t)mp, sizeof (*mp), S_WRITE);
18347c478bd9Sstevel@tonic-gate 	if (cvwatched)
18357c478bd9Sstevel@tonic-gate 		watch_enable_addr((caddr_t)cv, sizeof (*cv), S_WRITE);
18367c478bd9Sstevel@tonic-gate 	if (t->t_mstate == LMS_USER_LOCK)
18377c478bd9Sstevel@tonic-gate 		(void) new_mstate(t, LMS_SYSTEM);
18387c478bd9Sstevel@tonic-gate 	return (set_errno(error));
18397c478bd9Sstevel@tonic-gate }
18407c478bd9Sstevel@tonic-gate 
18417c478bd9Sstevel@tonic-gate /*
18427c478bd9Sstevel@tonic-gate  * wakeup one lwp that's blocked on this condition variable.
18437c478bd9Sstevel@tonic-gate  */
18447c478bd9Sstevel@tonic-gate int
lwp_cond_signal(lwp_cond_t * cv)18457c478bd9Sstevel@tonic-gate lwp_cond_signal(lwp_cond_t *cv)
18467c478bd9Sstevel@tonic-gate {
18477c478bd9Sstevel@tonic-gate 	proc_t *p = ttoproc(curthread);
18487c478bd9Sstevel@tonic-gate 	lwpchan_t lwpchan;
18497c478bd9Sstevel@tonic-gate 	uchar_t waiters;
18507c478bd9Sstevel@tonic-gate 	volatile uint16_t type = 0;
18517c478bd9Sstevel@tonic-gate 	volatile int locked = 0;
18527c478bd9Sstevel@tonic-gate 	volatile int watched = 0;
18537c478bd9Sstevel@tonic-gate 	label_t ljb;
18547c478bd9Sstevel@tonic-gate 	int error = 0;
18557c478bd9Sstevel@tonic-gate 
18567c478bd9Sstevel@tonic-gate 	if ((caddr_t)cv >= p->p_as->a_userlimit)
18577c478bd9Sstevel@tonic-gate 		return (set_errno(EFAULT));
18587c478bd9Sstevel@tonic-gate 
18597c478bd9Sstevel@tonic-gate 	watched = watch_disable_addr((caddr_t)cv, sizeof (*cv), S_WRITE);
18607c478bd9Sstevel@tonic-gate 
18617c478bd9Sstevel@tonic-gate 	if (on_fault(&ljb)) {
18627c478bd9Sstevel@tonic-gate 		if (locked)
18637c478bd9Sstevel@tonic-gate 			lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL);
18647c478bd9Sstevel@tonic-gate 		error = EFAULT;
18657c478bd9Sstevel@tonic-gate 		goto out;
18667c478bd9Sstevel@tonic-gate 	}
18677c478bd9Sstevel@tonic-gate 	/*
18688118ecd5Sraf 	 * Force Copy-on-write if necessary and ensure that the
18698118ecd5Sraf 	 * synchronization object resides in read/write memory.
18708118ecd5Sraf 	 * Cause an EFAULT return now if this is not so.
18717c478bd9Sstevel@tonic-gate 	 */
18727c478bd9Sstevel@tonic-gate 	fuword16_noerr(&cv->cond_type, (uint16_t *)&type);
18737c478bd9Sstevel@tonic-gate 	suword16_noerr(&cv->cond_type, type);
18747c478bd9Sstevel@tonic-gate 	if (!get_lwpchan(curproc->p_as, (caddr_t)cv, type,
18757c478bd9Sstevel@tonic-gate 	    &lwpchan, LWPCHAN_CVPOOL)) {
18767c478bd9Sstevel@tonic-gate 		error = EFAULT;
18777c478bd9Sstevel@tonic-gate 		goto out;
18787c478bd9Sstevel@tonic-gate 	}
18797c478bd9Sstevel@tonic-gate 	lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL);
18807c478bd9Sstevel@tonic-gate 	locked = 1;
18817c478bd9Sstevel@tonic-gate 	fuword8_noerr(&cv->cond_waiters_kernel, &waiters);
18827c478bd9Sstevel@tonic-gate 	if (waiters != 0) {
18837c478bd9Sstevel@tonic-gate 		/*
18847c478bd9Sstevel@tonic-gate 		 * The following call to lwp_release() might fail but it is
18857c478bd9Sstevel@tonic-gate 		 * OK to write into the waiters bit below, since the memory
18867c478bd9Sstevel@tonic-gate 		 * could not have been re-used or unmapped (for correctly
18877c478bd9Sstevel@tonic-gate 		 * written user programs) as in the case of lwp_mutex_wakeup().
18887c478bd9Sstevel@tonic-gate 		 * For an incorrect program, we should not care about data
18897c478bd9Sstevel@tonic-gate 		 * corruption since this is just one instance of other places
18907c478bd9Sstevel@tonic-gate 		 * where corruption can occur for such a program. Of course
18917c478bd9Sstevel@tonic-gate 		 * if the memory is unmapped, normal fault recovery occurs.
18927c478bd9Sstevel@tonic-gate 		 */
18937c478bd9Sstevel@tonic-gate 		(void) lwp_release(&lwpchan, &waiters, T_WAITCVSEM);
18947c478bd9Sstevel@tonic-gate 		suword8_noerr(&cv->cond_waiters_kernel, waiters);
18957c478bd9Sstevel@tonic-gate 	}
18967c478bd9Sstevel@tonic-gate 	lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL);
18977c478bd9Sstevel@tonic-gate out:
18987c478bd9Sstevel@tonic-gate 	no_fault();
18997c478bd9Sstevel@tonic-gate 	if (watched)
19007c478bd9Sstevel@tonic-gate 		watch_enable_addr((caddr_t)cv, sizeof (*cv), S_WRITE);
19017c478bd9Sstevel@tonic-gate 	if (error)
19027c478bd9Sstevel@tonic-gate 		return (set_errno(error));
19037c478bd9Sstevel@tonic-gate 	return (0);
19047c478bd9Sstevel@tonic-gate }
19057c478bd9Sstevel@tonic-gate 
19067c478bd9Sstevel@tonic-gate /*
19077c478bd9Sstevel@tonic-gate  * wakeup every lwp that's blocked on this condition variable.
19087c478bd9Sstevel@tonic-gate  */
19097c478bd9Sstevel@tonic-gate int
lwp_cond_broadcast(lwp_cond_t * cv)19107c478bd9Sstevel@tonic-gate lwp_cond_broadcast(lwp_cond_t *cv)
19117c478bd9Sstevel@tonic-gate {
19127c478bd9Sstevel@tonic-gate 	proc_t *p = ttoproc(curthread);
19137c478bd9Sstevel@tonic-gate 	lwpchan_t lwpchan;
19147c478bd9Sstevel@tonic-gate 	volatile uint16_t type = 0;
19157c478bd9Sstevel@tonic-gate 	volatile int locked = 0;
19167c478bd9Sstevel@tonic-gate 	volatile int watched = 0;
19177c478bd9Sstevel@tonic-gate 	label_t ljb;
19187c478bd9Sstevel@tonic-gate 	uchar_t waiters;
19197c478bd9Sstevel@tonic-gate 	int error = 0;
19207c478bd9Sstevel@tonic-gate 
19217c478bd9Sstevel@tonic-gate 	if ((caddr_t)cv >= p->p_as->a_userlimit)
19227c478bd9Sstevel@tonic-gate 		return (set_errno(EFAULT));
19237c478bd9Sstevel@tonic-gate 
19247c478bd9Sstevel@tonic-gate 	watched = watch_disable_addr((caddr_t)cv, sizeof (*cv), S_WRITE);
19257c478bd9Sstevel@tonic-gate 
19267c478bd9Sstevel@tonic-gate 	if (on_fault(&ljb)) {
19277c478bd9Sstevel@tonic-gate 		if (locked)
19287c478bd9Sstevel@tonic-gate 			lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL);
19297c478bd9Sstevel@tonic-gate 		error = EFAULT;
19307c478bd9Sstevel@tonic-gate 		goto out;
19317c478bd9Sstevel@tonic-gate 	}
19327c478bd9Sstevel@tonic-gate 	/*
19338118ecd5Sraf 	 * Force Copy-on-write if necessary and ensure that the
19348118ecd5Sraf 	 * synchronization object resides in read/write memory.
19358118ecd5Sraf 	 * Cause an EFAULT return now if this is not so.
19367c478bd9Sstevel@tonic-gate 	 */
19377c478bd9Sstevel@tonic-gate 	fuword16_noerr(&cv->cond_type, (uint16_t *)&type);
19387c478bd9Sstevel@tonic-gate 	suword16_noerr(&cv->cond_type, type);
19397c478bd9Sstevel@tonic-gate 	if (!get_lwpchan(curproc->p_as, (caddr_t)cv, type,
19407c478bd9Sstevel@tonic-gate 	    &lwpchan, LWPCHAN_CVPOOL)) {
19417c478bd9Sstevel@tonic-gate 		error = EFAULT;
19427c478bd9Sstevel@tonic-gate 		goto out;
19437c478bd9Sstevel@tonic-gate 	}
19447c478bd9Sstevel@tonic-gate 	lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL);
19457c478bd9Sstevel@tonic-gate 	locked = 1;
19467c478bd9Sstevel@tonic-gate 	fuword8_noerr(&cv->cond_waiters_kernel, &waiters);
19477c478bd9Sstevel@tonic-gate 	if (waiters != 0) {
19487c478bd9Sstevel@tonic-gate 		lwp_release_all(&lwpchan);
19497c478bd9Sstevel@tonic-gate 		suword8_noerr(&cv->cond_waiters_kernel, 0);
19507c478bd9Sstevel@tonic-gate 	}
19517c478bd9Sstevel@tonic-gate 	lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL);
19527c478bd9Sstevel@tonic-gate out:
19537c478bd9Sstevel@tonic-gate 	no_fault();
19547c478bd9Sstevel@tonic-gate 	if (watched)
19557c478bd9Sstevel@tonic-gate 		watch_enable_addr((caddr_t)cv, sizeof (*cv), S_WRITE);
19567c478bd9Sstevel@tonic-gate 	if (error)
19577c478bd9Sstevel@tonic-gate 		return (set_errno(error));
19587c478bd9Sstevel@tonic-gate 	return (0);
19597c478bd9Sstevel@tonic-gate }
19607c478bd9Sstevel@tonic-gate 
19617c478bd9Sstevel@tonic-gate int
lwp_sema_trywait(lwp_sema_t * sp)19627c478bd9Sstevel@tonic-gate lwp_sema_trywait(lwp_sema_t *sp)
19637c478bd9Sstevel@tonic-gate {
19647c478bd9Sstevel@tonic-gate 	kthread_t *t = curthread;
19657c478bd9Sstevel@tonic-gate 	proc_t *p = ttoproc(t);
19667c478bd9Sstevel@tonic-gate 	label_t ljb;
19677c478bd9Sstevel@tonic-gate 	volatile int locked = 0;
19687c478bd9Sstevel@tonic-gate 	volatile int watched = 0;
19697c478bd9Sstevel@tonic-gate 	volatile uint16_t type = 0;
19707c478bd9Sstevel@tonic-gate 	int count;
19717c478bd9Sstevel@tonic-gate 	lwpchan_t lwpchan;
19727c478bd9Sstevel@tonic-gate 	uchar_t waiters;
19737c478bd9Sstevel@tonic-gate 	int error = 0;
19747c478bd9Sstevel@tonic-gate 
19757c478bd9Sstevel@tonic-gate 	if ((caddr_t)sp >= p->p_as->a_userlimit)
19767c478bd9Sstevel@tonic-gate 		return (set_errno(EFAULT));
19777c478bd9Sstevel@tonic-gate 
19787c478bd9Sstevel@tonic-gate 	watched = watch_disable_addr((caddr_t)sp, sizeof (*sp), S_WRITE);
19797c478bd9Sstevel@tonic-gate 
19807c478bd9Sstevel@tonic-gate 	if (on_fault(&ljb)) {
19817c478bd9Sstevel@tonic-gate 		if (locked)
19827c478bd9Sstevel@tonic-gate 			lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL);
19837c478bd9Sstevel@tonic-gate 		error = EFAULT;
19847c478bd9Sstevel@tonic-gate 		goto out;
19857c478bd9Sstevel@tonic-gate 	}
19867c478bd9Sstevel@tonic-gate 	/*
19878118ecd5Sraf 	 * Force Copy-on-write if necessary and ensure that the
19888118ecd5Sraf 	 * synchronization object resides in read/write memory.
19898118ecd5Sraf 	 * Cause an EFAULT return now if this is not so.
19907c478bd9Sstevel@tonic-gate 	 */
19917c478bd9Sstevel@tonic-gate 	fuword16_noerr((void *)&sp->sema_type, (uint16_t *)&type);
19927c478bd9Sstevel@tonic-gate 	suword16_noerr((void *)&sp->sema_type, type);
19937c478bd9Sstevel@tonic-gate 	if (!get_lwpchan(p->p_as, (caddr_t)sp, type,
19947c478bd9Sstevel@tonic-gate 	    &lwpchan, LWPCHAN_CVPOOL)) {
19957c478bd9Sstevel@tonic-gate 		error = EFAULT;
19967c478bd9Sstevel@tonic-gate 		goto out;
19977c478bd9Sstevel@tonic-gate 	}
19987c478bd9Sstevel@tonic-gate 	lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL);
19997c478bd9Sstevel@tonic-gate 	locked = 1;
20007c478bd9Sstevel@tonic-gate 	fuword32_noerr((void *)&sp->sema_count, (uint32_t *)&count);
20017c478bd9Sstevel@tonic-gate 	if (count == 0)
20027c478bd9Sstevel@tonic-gate 		error = EBUSY;
20037c478bd9Sstevel@tonic-gate 	else
20047c478bd9Sstevel@tonic-gate 		suword32_noerr((void *)&sp->sema_count, --count);
20057c478bd9Sstevel@tonic-gate 	if (count != 0) {
20067c478bd9Sstevel@tonic-gate 		fuword8_noerr(&sp->sema_waiters, &waiters);
20077c478bd9Sstevel@tonic-gate 		if (waiters != 0) {
20087c478bd9Sstevel@tonic-gate 			(void) lwp_release(&lwpchan, &waiters, T_WAITCVSEM);
20097c478bd9Sstevel@tonic-gate 			suword8_noerr(&sp->sema_waiters, waiters);
20107c478bd9Sstevel@tonic-gate 		}
20117c478bd9Sstevel@tonic-gate 	}
20127c478bd9Sstevel@tonic-gate 	lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL);
20137c478bd9Sstevel@tonic-gate out:
20147c478bd9Sstevel@tonic-gate 	no_fault();
20157c478bd9Sstevel@tonic-gate 	if (watched)
20167c478bd9Sstevel@tonic-gate 		watch_enable_addr((caddr_t)sp, sizeof (*sp), S_WRITE);
20177c478bd9Sstevel@tonic-gate 	if (error)
20187c478bd9Sstevel@tonic-gate 		return (set_errno(error));
20197c478bd9Sstevel@tonic-gate 	return (0);
20207c478bd9Sstevel@tonic-gate }
20217c478bd9Sstevel@tonic-gate 
20227c478bd9Sstevel@tonic-gate /*
20237c478bd9Sstevel@tonic-gate  * See lwp_cond_wait(), above, for an explanation of the 'check_park' argument.
20247c478bd9Sstevel@tonic-gate  */
20257c478bd9Sstevel@tonic-gate int
lwp_sema_timedwait(lwp_sema_t * sp,timespec_t * tsp,int check_park)20267c478bd9Sstevel@tonic-gate lwp_sema_timedwait(lwp_sema_t *sp, timespec_t *tsp, int check_park)
20277c478bd9Sstevel@tonic-gate {
20287c478bd9Sstevel@tonic-gate 	kthread_t *t = curthread;
20297c478bd9Sstevel@tonic-gate 	klwp_t *lwp = ttolwp(t);
20307c478bd9Sstevel@tonic-gate 	proc_t *p = ttoproc(t);
20317c478bd9Sstevel@tonic-gate 	lwp_timer_t lwpt;
20327c478bd9Sstevel@tonic-gate 	caddr_t timedwait;
20337c478bd9Sstevel@tonic-gate 	clock_t tim = -1;
20347c478bd9Sstevel@tonic-gate 	label_t ljb;
20357c478bd9Sstevel@tonic-gate 	volatile int locked = 0;
20367c478bd9Sstevel@tonic-gate 	volatile int watched = 0;
20377c478bd9Sstevel@tonic-gate 	volatile uint16_t type = 0;
20387c478bd9Sstevel@tonic-gate 	int count;
20397c478bd9Sstevel@tonic-gate 	lwpchan_t lwpchan;
20407c478bd9Sstevel@tonic-gate 	uchar_t waiters;
20417c478bd9Sstevel@tonic-gate 	int error = 0;
20427c478bd9Sstevel@tonic-gate 	int time_error;
20437c478bd9Sstevel@tonic-gate 	int imm_timeout = 0;
20447c478bd9Sstevel@tonic-gate 	int imm_unpark = 0;
20457c478bd9Sstevel@tonic-gate 
20467c478bd9Sstevel@tonic-gate 	if ((caddr_t)sp >= p->p_as->a_userlimit)
20477c478bd9Sstevel@tonic-gate 		return (set_errno(EFAULT));
20487c478bd9Sstevel@tonic-gate 
204907a48826SRoger A. Faulkner 	/*
205007a48826SRoger A. Faulkner 	 * Put the lwp in an orderly state for debugging,
205107a48826SRoger A. Faulkner 	 * in case we are stopped while sleeping, below.
205207a48826SRoger A. Faulkner 	 */
205307a48826SRoger A. Faulkner 	prstop(PR_REQUESTED, 0);
205407a48826SRoger A. Faulkner 
20557c478bd9Sstevel@tonic-gate 	timedwait = (caddr_t)tsp;
20567c478bd9Sstevel@tonic-gate 	if ((time_error = lwp_timer_copyin(&lwpt, tsp)) == 0 &&
20577c478bd9Sstevel@tonic-gate 	    lwpt.lwpt_imm_timeout) {
20587c478bd9Sstevel@tonic-gate 		imm_timeout = 1;
20597c478bd9Sstevel@tonic-gate 		timedwait = NULL;
20607c478bd9Sstevel@tonic-gate 	}
20617c478bd9Sstevel@tonic-gate 
20627c478bd9Sstevel@tonic-gate 	watched = watch_disable_addr((caddr_t)sp, sizeof (*sp), S_WRITE);
20637c478bd9Sstevel@tonic-gate 
20647c478bd9Sstevel@tonic-gate 	if (on_fault(&ljb)) {
20657c478bd9Sstevel@tonic-gate 		if (locked)
20667c478bd9Sstevel@tonic-gate 			lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL);
20677c478bd9Sstevel@tonic-gate 		error = EFAULT;
20687c478bd9Sstevel@tonic-gate 		goto out;
20697c478bd9Sstevel@tonic-gate 	}
20707c478bd9Sstevel@tonic-gate 	/*
20718118ecd5Sraf 	 * Force Copy-on-write if necessary and ensure that the
20728118ecd5Sraf 	 * synchronization object resides in read/write memory.
20738118ecd5Sraf 	 * Cause an EFAULT return now if this is not so.
20747c478bd9Sstevel@tonic-gate 	 */
20757c478bd9Sstevel@tonic-gate 	fuword16_noerr((void *)&sp->sema_type, (uint16_t *)&type);
20767c478bd9Sstevel@tonic-gate 	suword16_noerr((void *)&sp->sema_type, type);
20777c478bd9Sstevel@tonic-gate 	if (!get_lwpchan(p->p_as, (caddr_t)sp, type,
20787c478bd9Sstevel@tonic-gate 	    &lwpchan, LWPCHAN_CVPOOL)) {
20797c478bd9Sstevel@tonic-gate 		error = EFAULT;
20807c478bd9Sstevel@tonic-gate 		goto out;
20817c478bd9Sstevel@tonic-gate 	}
20827c478bd9Sstevel@tonic-gate 	lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL);
20837c478bd9Sstevel@tonic-gate 	locked = 1;
20847c478bd9Sstevel@tonic-gate 	fuword32_noerr((void *)&sp->sema_count, (uint32_t *)&count);
20857c478bd9Sstevel@tonic-gate 	while (error == 0 && count == 0) {
20867c478bd9Sstevel@tonic-gate 		if (time_error) {
20877c478bd9Sstevel@tonic-gate 			/*
20887c478bd9Sstevel@tonic-gate 			 * The SUSV3 Posix spec is very clear that we
20897c478bd9Sstevel@tonic-gate 			 * should get no error from validating the
20907c478bd9Sstevel@tonic-gate 			 * timer until we would actually sleep.
20917c478bd9Sstevel@tonic-gate 			 */
20927c478bd9Sstevel@tonic-gate 			error = time_error;
20937c478bd9Sstevel@tonic-gate 			break;
20947c478bd9Sstevel@tonic-gate 		}
20957c478bd9Sstevel@tonic-gate 		suword8_noerr(&sp->sema_waiters, 1);
20967c478bd9Sstevel@tonic-gate 		if (watched)
20977c478bd9Sstevel@tonic-gate 			watch_enable_addr((caddr_t)sp, sizeof (*sp), S_WRITE);
20987c478bd9Sstevel@tonic-gate 		if (check_park && (!schedctl_is_park() || t->t_unpark)) {
20997c478bd9Sstevel@tonic-gate 			/*
21007c478bd9Sstevel@tonic-gate 			 * We received a signal at user-level before calling
21017c478bd9Sstevel@tonic-gate 			 * here or another thread wants us to return
21027c478bd9Sstevel@tonic-gate 			 * immediately with EINTR.  See lwp_unpark().
21037c478bd9Sstevel@tonic-gate 			 */
21047c478bd9Sstevel@tonic-gate 			imm_unpark = 1;
21057c478bd9Sstevel@tonic-gate 			t->t_unpark = 0;
21067c478bd9Sstevel@tonic-gate 			timedwait = NULL;
21077c478bd9Sstevel@tonic-gate 		} else if (timedwait) {
21087c478bd9Sstevel@tonic-gate 			/*
21097c478bd9Sstevel@tonic-gate 			 * If we successfully queue the timeout,
21107c478bd9Sstevel@tonic-gate 			 * then don't drop t_delay_lock until
21117c478bd9Sstevel@tonic-gate 			 * we are on the sleep queue (below).
21127c478bd9Sstevel@tonic-gate 			 */
21137c478bd9Sstevel@tonic-gate 			mutex_enter(&t->t_delay_lock);
21147c478bd9Sstevel@tonic-gate 			if (lwp_timer_enqueue(&lwpt) != 0) {
21157c478bd9Sstevel@tonic-gate 				mutex_exit(&t->t_delay_lock);
21167c478bd9Sstevel@tonic-gate 				imm_timeout = 1;
21177c478bd9Sstevel@tonic-gate 				timedwait = NULL;
21187c478bd9Sstevel@tonic-gate 			}
21197c478bd9Sstevel@tonic-gate 		}
21207c478bd9Sstevel@tonic-gate 		t->t_flag |= T_WAITCVSEM;
21217c478bd9Sstevel@tonic-gate 		lwp_block(&lwpchan);
21227c478bd9Sstevel@tonic-gate 		/*
21237c478bd9Sstevel@tonic-gate 		 * Nothing should happen to cause the lwp to sleep
21247c478bd9Sstevel@tonic-gate 		 * again until after it returns from swtch().
21257c478bd9Sstevel@tonic-gate 		 */
21267c478bd9Sstevel@tonic-gate 		if (timedwait)
21277c478bd9Sstevel@tonic-gate 			mutex_exit(&t->t_delay_lock);
21287c478bd9Sstevel@tonic-gate 		locked = 0;
21297c478bd9Sstevel@tonic-gate 		lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL);
21307c478bd9Sstevel@tonic-gate 		if (ISSIG(t, JUSTLOOKING) || MUSTRETURN(p, t) ||
21317c478bd9Sstevel@tonic-gate 		    (imm_timeout | imm_unpark))
21327c478bd9Sstevel@tonic-gate 			setrun(t);
21337c478bd9Sstevel@tonic-gate 		swtch();
21347c478bd9Sstevel@tonic-gate 		t->t_flag &= ~(T_WAITCVSEM | T_WAKEABLE);
21357c478bd9Sstevel@tonic-gate 		if (timedwait)
21367c478bd9Sstevel@tonic-gate 			tim = lwp_timer_dequeue(&lwpt);
21377c478bd9Sstevel@tonic-gate 		setallwatch();
21387c478bd9Sstevel@tonic-gate 		if (ISSIG(t, FORREAL) || lwp->lwp_sysabort ||
21397c478bd9Sstevel@tonic-gate 		    MUSTRETURN(p, t) || imm_unpark)
21407c478bd9Sstevel@tonic-gate 			error = EINTR;
21417c478bd9Sstevel@tonic-gate 		else if (imm_timeout || (timedwait && tim == -1))
21427c478bd9Sstevel@tonic-gate 			error = ETIME;
21437c478bd9Sstevel@tonic-gate 		lwp->lwp_asleep = 0;
21447c478bd9Sstevel@tonic-gate 		lwp->lwp_sysabort = 0;
21457c478bd9Sstevel@tonic-gate 		watched = watch_disable_addr((caddr_t)sp,
21467c478bd9Sstevel@tonic-gate 		    sizeof (*sp), S_WRITE);
21477c478bd9Sstevel@tonic-gate 		lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL);
21487c478bd9Sstevel@tonic-gate 		locked = 1;
21497c478bd9Sstevel@tonic-gate 		fuword32_noerr((void *)&sp->sema_count, (uint32_t *)&count);
21507c478bd9Sstevel@tonic-gate 	}
21517c478bd9Sstevel@tonic-gate 	if (error == 0)
21527c478bd9Sstevel@tonic-gate 		suword32_noerr((void *)&sp->sema_count, --count);
21537c478bd9Sstevel@tonic-gate 	if (count != 0) {
21547c478bd9Sstevel@tonic-gate 		(void) lwp_release(&lwpchan, &waiters, T_WAITCVSEM);
21557c478bd9Sstevel@tonic-gate 		suword8_noerr(&sp->sema_waiters, waiters);
21567c478bd9Sstevel@tonic-gate 	}
21577c478bd9Sstevel@tonic-gate 	lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL);
21587c478bd9Sstevel@tonic-gate out:
21597c478bd9Sstevel@tonic-gate 	no_fault();
21607c478bd9Sstevel@tonic-gate 	if (watched)
21617c478bd9Sstevel@tonic-gate 		watch_enable_addr((caddr_t)sp, sizeof (*sp), S_WRITE);
21627c478bd9Sstevel@tonic-gate 	if (tsp && check_park && !time_error)
21637c478bd9Sstevel@tonic-gate 		error = lwp_timer_copyout(&lwpt, error);
21647c478bd9Sstevel@tonic-gate 	if (error)
21657c478bd9Sstevel@tonic-gate 		return (set_errno(error));
21667c478bd9Sstevel@tonic-gate 	return (0);
21677c478bd9Sstevel@tonic-gate }
21687c478bd9Sstevel@tonic-gate 
21697c478bd9Sstevel@tonic-gate int
lwp_sema_post(lwp_sema_t * sp)21707c478bd9Sstevel@tonic-gate lwp_sema_post(lwp_sema_t *sp)
21717c478bd9Sstevel@tonic-gate {
21727c478bd9Sstevel@tonic-gate 	proc_t *p = ttoproc(curthread);
21737c478bd9Sstevel@tonic-gate 	label_t ljb;
21747c478bd9Sstevel@tonic-gate 	volatile int locked = 0;
21757c478bd9Sstevel@tonic-gate 	volatile int watched = 0;
21767c478bd9Sstevel@tonic-gate 	volatile uint16_t type = 0;
21777c478bd9Sstevel@tonic-gate 	int count;
21787c478bd9Sstevel@tonic-gate 	lwpchan_t lwpchan;
21797c478bd9Sstevel@tonic-gate 	uchar_t waiters;
21807c478bd9Sstevel@tonic-gate 	int error = 0;
21817c478bd9Sstevel@tonic-gate 
21827c478bd9Sstevel@tonic-gate 	if ((caddr_t)sp >= p->p_as->a_userlimit)
21837c478bd9Sstevel@tonic-gate 		return (set_errno(EFAULT));
21847c478bd9Sstevel@tonic-gate 
21857c478bd9Sstevel@tonic-gate 	watched = watch_disable_addr((caddr_t)sp, sizeof (*sp), S_WRITE);
21867c478bd9Sstevel@tonic-gate 
21877c478bd9Sstevel@tonic-gate 	if (on_fault(&ljb)) {
21887c478bd9Sstevel@tonic-gate 		if (locked)
21897c478bd9Sstevel@tonic-gate 			lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL);
21907c478bd9Sstevel@tonic-gate 		error = EFAULT;
21917c478bd9Sstevel@tonic-gate 		goto out;
21927c478bd9Sstevel@tonic-gate 	}
21937c478bd9Sstevel@tonic-gate 	/*
21948118ecd5Sraf 	 * Force Copy-on-write if necessary and ensure that the
21958118ecd5Sraf 	 * synchronization object resides in read/write memory.
21968118ecd5Sraf 	 * Cause an EFAULT return now if this is not so.
21977c478bd9Sstevel@tonic-gate 	 */
21987c478bd9Sstevel@tonic-gate 	fuword16_noerr(&sp->sema_type, (uint16_t *)&type);
21997c478bd9Sstevel@tonic-gate 	suword16_noerr(&sp->sema_type, type);
22007c478bd9Sstevel@tonic-gate 	if (!get_lwpchan(curproc->p_as, (caddr_t)sp, type,
22017c478bd9Sstevel@tonic-gate 	    &lwpchan, LWPCHAN_CVPOOL)) {
22027c478bd9Sstevel@tonic-gate 		error = EFAULT;
22037c478bd9Sstevel@tonic-gate 		goto out;
22047c478bd9Sstevel@tonic-gate 	}
22057c478bd9Sstevel@tonic-gate 	lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL);
22067c478bd9Sstevel@tonic-gate 	locked = 1;
22077c478bd9Sstevel@tonic-gate 	fuword32_noerr(&sp->sema_count, (uint32_t *)&count);
22087c478bd9Sstevel@tonic-gate 	if (count == _SEM_VALUE_MAX)
22097c478bd9Sstevel@tonic-gate 		error = EOVERFLOW;
22107c478bd9Sstevel@tonic-gate 	else
22117c478bd9Sstevel@tonic-gate 		suword32_noerr(&sp->sema_count, ++count);
22127c478bd9Sstevel@tonic-gate 	if (count == 1) {
22137c478bd9Sstevel@tonic-gate 		fuword8_noerr(&sp->sema_waiters, &waiters);
22147c478bd9Sstevel@tonic-gate 		if (waiters) {
22157c478bd9Sstevel@tonic-gate 			(void) lwp_release(&lwpchan, &waiters, T_WAITCVSEM);
22167c478bd9Sstevel@tonic-gate 			suword8_noerr(&sp->sema_waiters, waiters);
22177c478bd9Sstevel@tonic-gate 		}
22187c478bd9Sstevel@tonic-gate 	}
22197c478bd9Sstevel@tonic-gate 	lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL);
22207c478bd9Sstevel@tonic-gate out:
22217c478bd9Sstevel@tonic-gate 	no_fault();
22227c478bd9Sstevel@tonic-gate 	if (watched)
22237c478bd9Sstevel@tonic-gate 		watch_enable_addr((caddr_t)sp, sizeof (*sp), S_WRITE);
22247c478bd9Sstevel@tonic-gate 	if (error)
22257c478bd9Sstevel@tonic-gate 		return (set_errno(error));
22267c478bd9Sstevel@tonic-gate 	return (0);
22277c478bd9Sstevel@tonic-gate }
22287c478bd9Sstevel@tonic-gate 
22297c478bd9Sstevel@tonic-gate #define	TRW_WANT_WRITE		0x1
22307c478bd9Sstevel@tonic-gate #define	TRW_LOCK_GRANTED	0x2
22317c478bd9Sstevel@tonic-gate 
22327c478bd9Sstevel@tonic-gate #define	READ_LOCK		0
22337c478bd9Sstevel@tonic-gate #define	WRITE_LOCK		1
22347c478bd9Sstevel@tonic-gate #define	TRY_FLAG		0x10
22357c478bd9Sstevel@tonic-gate #define	READ_LOCK_TRY		(READ_LOCK | TRY_FLAG)
22367c478bd9Sstevel@tonic-gate #define	WRITE_LOCK_TRY		(WRITE_LOCK | TRY_FLAG)
22377c478bd9Sstevel@tonic-gate 
22387c478bd9Sstevel@tonic-gate /*
22397c478bd9Sstevel@tonic-gate  * Release one writer or one or more readers. Compute the rwstate word to
22407c478bd9Sstevel@tonic-gate  * reflect the new state of the queue. For a safe hand-off we copy the new
22417c478bd9Sstevel@tonic-gate  * rwstate value back to userland before we wake any of the new lock holders.
22427c478bd9Sstevel@tonic-gate  *
22437c478bd9Sstevel@tonic-gate  * Note that sleepq_insert() implements a prioritized FIFO (with writers
22447c478bd9Sstevel@tonic-gate  * being given precedence over readers of the same priority).
22457c478bd9Sstevel@tonic-gate  *
22467c478bd9Sstevel@tonic-gate  * If the first thread is a reader we scan the queue releasing all readers
22477c478bd9Sstevel@tonic-gate  * until we hit a writer or the end of the queue. If the first thread is a
224841efec22Sraf  * writer we still need to check for another writer.
22497c478bd9Sstevel@tonic-gate  */
22507c478bd9Sstevel@tonic-gate void
lwp_rwlock_release(lwpchan_t * lwpchan,lwp_rwlock_t * rw)22517c478bd9Sstevel@tonic-gate lwp_rwlock_release(lwpchan_t *lwpchan, lwp_rwlock_t *rw)
22527c478bd9Sstevel@tonic-gate {
22537c478bd9Sstevel@tonic-gate 	sleepq_head_t *sqh;
22547c478bd9Sstevel@tonic-gate 	kthread_t *tp;
22557c478bd9Sstevel@tonic-gate 	kthread_t **tpp;
22567c478bd9Sstevel@tonic-gate 	kthread_t *tpnext;
22577c478bd9Sstevel@tonic-gate 	kthread_t *wakelist = NULL;
22587c478bd9Sstevel@tonic-gate 	uint32_t rwstate = 0;
22597c478bd9Sstevel@tonic-gate 	int wcount = 0;
22607c478bd9Sstevel@tonic-gate 	int rcount = 0;
22617c478bd9Sstevel@tonic-gate 
22627c478bd9Sstevel@tonic-gate 	sqh = lwpsqhash(lwpchan);
22637c478bd9Sstevel@tonic-gate 	disp_lock_enter(&sqh->sq_lock);
22647c478bd9Sstevel@tonic-gate 	tpp = &sqh->sq_queue.sq_first;
22657c478bd9Sstevel@tonic-gate 	while ((tp = *tpp) != NULL) {
22667c478bd9Sstevel@tonic-gate 		if (tp->t_lwpchan.lc_wchan0 == lwpchan->lc_wchan0 &&
22677c478bd9Sstevel@tonic-gate 		    tp->t_lwpchan.lc_wchan == lwpchan->lc_wchan) {
22687c478bd9Sstevel@tonic-gate 			if (tp->t_writer & TRW_WANT_WRITE) {
22697c478bd9Sstevel@tonic-gate 				if ((wcount++ == 0) && (rcount == 0)) {
22707c478bd9Sstevel@tonic-gate 					rwstate |= URW_WRITE_LOCKED;
22717c478bd9Sstevel@tonic-gate 
22727c478bd9Sstevel@tonic-gate 					/* Just one writer to wake. */
22737c478bd9Sstevel@tonic-gate 					sleepq_unlink(tpp, tp);
22747c478bd9Sstevel@tonic-gate 					wakelist = tp;
22757c478bd9Sstevel@tonic-gate 
22767c478bd9Sstevel@tonic-gate 					/* tpp already set for next thread. */
22777c478bd9Sstevel@tonic-gate 					continue;
22787c478bd9Sstevel@tonic-gate 				} else {
227941efec22Sraf 					rwstate |= URW_HAS_WAITERS;
22807c478bd9Sstevel@tonic-gate 					/* We need look no further. */
22817c478bd9Sstevel@tonic-gate 					break;
22827c478bd9Sstevel@tonic-gate 				}
22837c478bd9Sstevel@tonic-gate 			} else {
22847c478bd9Sstevel@tonic-gate 				rcount++;
22857c478bd9Sstevel@tonic-gate 				if (wcount == 0) {
22867c478bd9Sstevel@tonic-gate 					rwstate++;
22877c478bd9Sstevel@tonic-gate 
22887c478bd9Sstevel@tonic-gate 					/* Add reader to wake list. */
22897c478bd9Sstevel@tonic-gate 					sleepq_unlink(tpp, tp);
22907c478bd9Sstevel@tonic-gate 					tp->t_link = wakelist;
22917c478bd9Sstevel@tonic-gate 					wakelist = tp;
22927c478bd9Sstevel@tonic-gate 
22937c478bd9Sstevel@tonic-gate 					/* tpp already set for next thread. */
22947c478bd9Sstevel@tonic-gate 					continue;
229541efec22Sraf 				} else {
22967c478bd9Sstevel@tonic-gate 					rwstate |= URW_HAS_WAITERS;
229741efec22Sraf 					/* We need look no further. */
229841efec22Sraf 					break;
229941efec22Sraf 				}
23007c478bd9Sstevel@tonic-gate 			}
23017c478bd9Sstevel@tonic-gate 		}
23027c478bd9Sstevel@tonic-gate 		tpp = &tp->t_link;
23037c478bd9Sstevel@tonic-gate 	}
23047c478bd9Sstevel@tonic-gate 
23057c478bd9Sstevel@tonic-gate 	/* Copy the new rwstate back to userland. */
23067c478bd9Sstevel@tonic-gate 	suword32_noerr(&rw->rwlock_readers, rwstate);
23077c478bd9Sstevel@tonic-gate 
23087c478bd9Sstevel@tonic-gate 	/* Wake the new lock holder(s) up. */
23097c478bd9Sstevel@tonic-gate 	tp = wakelist;
23107c478bd9Sstevel@tonic-gate 	while (tp != NULL) {
23117c478bd9Sstevel@tonic-gate 		DTRACE_SCHED1(wakeup, kthread_t *, tp);
23127c478bd9Sstevel@tonic-gate 		tp->t_wchan0 = NULL;
23137c478bd9Sstevel@tonic-gate 		tp->t_wchan = NULL;
23147c478bd9Sstevel@tonic-gate 		tp->t_sobj_ops = NULL;
23157c478bd9Sstevel@tonic-gate 		tp->t_writer |= TRW_LOCK_GRANTED;
23167c478bd9Sstevel@tonic-gate 		tpnext = tp->t_link;
23177c478bd9Sstevel@tonic-gate 		tp->t_link = NULL;
23187c478bd9Sstevel@tonic-gate 		CL_WAKEUP(tp);
23197c478bd9Sstevel@tonic-gate 		thread_unlock_high(tp);
23207c478bd9Sstevel@tonic-gate 		tp = tpnext;
23217c478bd9Sstevel@tonic-gate 	}
23227c478bd9Sstevel@tonic-gate 
23237c478bd9Sstevel@tonic-gate 	disp_lock_exit(&sqh->sq_lock);
23247c478bd9Sstevel@tonic-gate }
23257c478bd9Sstevel@tonic-gate 
23267c478bd9Sstevel@tonic-gate /*
23277c478bd9Sstevel@tonic-gate  * We enter here holding the user-level mutex, which we must release before
23287c478bd9Sstevel@tonic-gate  * returning or blocking. Based on lwp_cond_wait().
23297c478bd9Sstevel@tonic-gate  */
23307c478bd9Sstevel@tonic-gate static int
lwp_rwlock_lock(lwp_rwlock_t * rw,timespec_t * tsp,int rd_wr)23317c478bd9Sstevel@tonic-gate lwp_rwlock_lock(lwp_rwlock_t *rw, timespec_t *tsp, int rd_wr)
23327c478bd9Sstevel@tonic-gate {
23337c478bd9Sstevel@tonic-gate 	lwp_mutex_t *mp = NULL;
23347c478bd9Sstevel@tonic-gate 	kthread_t *t = curthread;
23357c478bd9Sstevel@tonic-gate 	kthread_t *tp;
23367c478bd9Sstevel@tonic-gate 	klwp_t *lwp = ttolwp(t);
23377c478bd9Sstevel@tonic-gate 	proc_t *p = ttoproc(t);
23387c478bd9Sstevel@tonic-gate 	lwp_timer_t lwpt;
23397c478bd9Sstevel@tonic-gate 	lwpchan_t lwpchan;
23407c478bd9Sstevel@tonic-gate 	lwpchan_t mlwpchan;
23417c478bd9Sstevel@tonic-gate 	caddr_t timedwait;
23427c478bd9Sstevel@tonic-gate 	volatile uint16_t type = 0;
23437c478bd9Sstevel@tonic-gate 	volatile uint8_t mtype = 0;
23447c478bd9Sstevel@tonic-gate 	uchar_t mwaiters;
23457c478bd9Sstevel@tonic-gate 	volatile int error = 0;
23467c478bd9Sstevel@tonic-gate 	int time_error;
23477c478bd9Sstevel@tonic-gate 	clock_t tim = -1;
23487c478bd9Sstevel@tonic-gate 	volatile int locked = 0;
23497c478bd9Sstevel@tonic-gate 	volatile int mlocked = 0;
23507c478bd9Sstevel@tonic-gate 	volatile int watched = 0;
23517c478bd9Sstevel@tonic-gate 	volatile int mwatched = 0;
23527c478bd9Sstevel@tonic-gate 	label_t ljb;
23537c478bd9Sstevel@tonic-gate 	volatile int no_lwpchan = 1;
23547c478bd9Sstevel@tonic-gate 	int imm_timeout = 0;
23557c478bd9Sstevel@tonic-gate 	int try_flag;
23567c478bd9Sstevel@tonic-gate 	uint32_t rwstate;
23577c478bd9Sstevel@tonic-gate 	int acquired = 0;
23587c478bd9Sstevel@tonic-gate 
23597c478bd9Sstevel@tonic-gate 	/* We only check rw because the mutex is included in it. */
23607c478bd9Sstevel@tonic-gate 	if ((caddr_t)rw >= p->p_as->a_userlimit)
23617c478bd9Sstevel@tonic-gate 		return (set_errno(EFAULT));
23627c478bd9Sstevel@tonic-gate 
236307a48826SRoger A. Faulkner 	/*
236407a48826SRoger A. Faulkner 	 * Put the lwp in an orderly state for debugging,
236507a48826SRoger A. Faulkner 	 * in case we are stopped while sleeping, below.
236607a48826SRoger A. Faulkner 	 */
236707a48826SRoger A. Faulkner 	prstop(PR_REQUESTED, 0);
236807a48826SRoger A. Faulkner 
23697c478bd9Sstevel@tonic-gate 	/* We must only report this error if we are about to sleep (later). */
23707c478bd9Sstevel@tonic-gate 	timedwait = (caddr_t)tsp;
23717c478bd9Sstevel@tonic-gate 	if ((time_error = lwp_timer_copyin(&lwpt, tsp)) == 0 &&
23727c478bd9Sstevel@tonic-gate 	    lwpt.lwpt_imm_timeout) {
23737c478bd9Sstevel@tonic-gate 		imm_timeout = 1;
23747c478bd9Sstevel@tonic-gate 		timedwait = NULL;
23757c478bd9Sstevel@tonic-gate 	}
23767c478bd9Sstevel@tonic-gate 
23777c478bd9Sstevel@tonic-gate 	(void) new_mstate(t, LMS_USER_LOCK);
23787c478bd9Sstevel@tonic-gate 
23797c478bd9Sstevel@tonic-gate 	if (on_fault(&ljb)) {
23807c478bd9Sstevel@tonic-gate 		if (no_lwpchan) {
23817c478bd9Sstevel@tonic-gate 			error = EFAULT;
23827c478bd9Sstevel@tonic-gate 			goto out_nodrop;
23837c478bd9Sstevel@tonic-gate 		}
23847c478bd9Sstevel@tonic-gate 		if (mlocked) {
23857c478bd9Sstevel@tonic-gate 			mlocked = 0;
23867c478bd9Sstevel@tonic-gate 			lwpchan_unlock(&mlwpchan, LWPCHAN_MPPOOL);
23877c478bd9Sstevel@tonic-gate 		}
23887c478bd9Sstevel@tonic-gate 		if (locked) {
23897c478bd9Sstevel@tonic-gate 			locked = 0;
23907c478bd9Sstevel@tonic-gate 			lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL);
23917c478bd9Sstevel@tonic-gate 		}
23927c478bd9Sstevel@tonic-gate 		/*
23937c478bd9Sstevel@tonic-gate 		 * Set up another on_fault() for a possible fault
23947c478bd9Sstevel@tonic-gate 		 * on the user lock accessed at "out_drop".
23957c478bd9Sstevel@tonic-gate 		 */
23967c478bd9Sstevel@tonic-gate 		if (on_fault(&ljb)) {
23977c478bd9Sstevel@tonic-gate 			if (mlocked) {
23987c478bd9Sstevel@tonic-gate 				mlocked = 0;
23997c478bd9Sstevel@tonic-gate 				lwpchan_unlock(&mlwpchan, LWPCHAN_MPPOOL);
24007c478bd9Sstevel@tonic-gate 			}
24017c478bd9Sstevel@tonic-gate 			error = EFAULT;
24027c478bd9Sstevel@tonic-gate 			goto out_nodrop;
24037c478bd9Sstevel@tonic-gate 		}
24047c478bd9Sstevel@tonic-gate 		error = EFAULT;
24057c478bd9Sstevel@tonic-gate 		goto out_nodrop;
24067c478bd9Sstevel@tonic-gate 	}
24077c478bd9Sstevel@tonic-gate 
24087c478bd9Sstevel@tonic-gate 	/* Process rd_wr (including sanity check). */
24097c478bd9Sstevel@tonic-gate 	try_flag = (rd_wr & TRY_FLAG);
24107c478bd9Sstevel@tonic-gate 	rd_wr &= ~TRY_FLAG;
24117c478bd9Sstevel@tonic-gate 	if ((rd_wr != READ_LOCK) && (rd_wr != WRITE_LOCK)) {
24127c478bd9Sstevel@tonic-gate 		error = EINVAL;
24137c478bd9Sstevel@tonic-gate 		goto out_nodrop;
24147c478bd9Sstevel@tonic-gate 	}
24157c478bd9Sstevel@tonic-gate 
24168118ecd5Sraf 	/*
24178118ecd5Sraf 	 * Force Copy-on-write if necessary and ensure that the
24188118ecd5Sraf 	 * synchronization object resides in read/write memory.
24198118ecd5Sraf 	 * Cause an EFAULT return now if this is not so.
24208118ecd5Sraf 	 */
24217c478bd9Sstevel@tonic-gate 	mp = &rw->mutex;
24227c478bd9Sstevel@tonic-gate 	fuword8_noerr(&mp->mutex_type, (uint8_t *)&mtype);
24237c478bd9Sstevel@tonic-gate 	fuword16_noerr(&rw->rwlock_type, (uint16_t *)&type);
24248118ecd5Sraf 	suword8_noerr(&mp->mutex_type, mtype);
24258118ecd5Sraf 	suword16_noerr(&rw->rwlock_type, type);
24268118ecd5Sraf 
24278118ecd5Sraf 	/* We can only continue for simple USYNC_PROCESS locks. */
24287c478bd9Sstevel@tonic-gate 	if ((mtype != USYNC_PROCESS) || (type != USYNC_PROCESS)) {
24297c478bd9Sstevel@tonic-gate 		error = EINVAL;
24307c478bd9Sstevel@tonic-gate 		goto out_nodrop;
24317c478bd9Sstevel@tonic-gate 	}
24327c478bd9Sstevel@tonic-gate 
24337c478bd9Sstevel@tonic-gate 	/* Convert user level mutex, "mp", to a unique lwpchan. */
24347c478bd9Sstevel@tonic-gate 	if (!get_lwpchan(p->p_as, (caddr_t)mp, mtype,
24357c478bd9Sstevel@tonic-gate 	    &mlwpchan, LWPCHAN_MPPOOL)) {
24367c478bd9Sstevel@tonic-gate 		error = EFAULT;
24377c478bd9Sstevel@tonic-gate 		goto out_nodrop;
24387c478bd9Sstevel@tonic-gate 	}
24397c478bd9Sstevel@tonic-gate 
24407c478bd9Sstevel@tonic-gate 	/* Convert user level rwlock, "rw", to a unique lwpchan. */
24417c478bd9Sstevel@tonic-gate 	if (!get_lwpchan(p->p_as, (caddr_t)rw, type,
24427c478bd9Sstevel@tonic-gate 	    &lwpchan, LWPCHAN_CVPOOL)) {
24437c478bd9Sstevel@tonic-gate 		error = EFAULT;
24447c478bd9Sstevel@tonic-gate 		goto out_nodrop;
24457c478bd9Sstevel@tonic-gate 	}
24467c478bd9Sstevel@tonic-gate 
24477c478bd9Sstevel@tonic-gate 	no_lwpchan = 0;
24487c478bd9Sstevel@tonic-gate 	watched = watch_disable_addr((caddr_t)rw, sizeof (*rw), S_WRITE);
24497c478bd9Sstevel@tonic-gate 	mwatched = watch_disable_addr((caddr_t)mp, sizeof (*mp), S_WRITE);
24507c478bd9Sstevel@tonic-gate 
24517c478bd9Sstevel@tonic-gate 	/*
24527c478bd9Sstevel@tonic-gate 	 * lwpchan_lock() ensures that the calling LWP is put to sleep
24537c478bd9Sstevel@tonic-gate 	 * atomically with respect to a possible wakeup which is a result
24547c478bd9Sstevel@tonic-gate 	 * of lwp_rwlock_unlock().
24557c478bd9Sstevel@tonic-gate 	 *
24567c478bd9Sstevel@tonic-gate 	 * What's misleading is that the LWP is put to sleep after the
24577c478bd9Sstevel@tonic-gate 	 * rwlock's mutex is released. This is OK as long as the release
24587c478bd9Sstevel@tonic-gate 	 * operation is also done while holding mlwpchan. The LWP is then
24597c478bd9Sstevel@tonic-gate 	 * put to sleep when the possibility of pagefaulting or sleeping
24607c478bd9Sstevel@tonic-gate 	 * has been completely eliminated.
24617c478bd9Sstevel@tonic-gate 	 */
24627c478bd9Sstevel@tonic-gate 	lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL);
24637c478bd9Sstevel@tonic-gate 	locked = 1;
24647c478bd9Sstevel@tonic-gate 	lwpchan_lock(&mlwpchan, LWPCHAN_MPPOOL);
24657c478bd9Sstevel@tonic-gate 	mlocked = 1;
24667c478bd9Sstevel@tonic-gate 
24677c478bd9Sstevel@tonic-gate 	/*
24687c478bd9Sstevel@tonic-gate 	 * Fetch the current rwlock state.
24697c478bd9Sstevel@tonic-gate 	 *
247041efec22Sraf 	 * The possibility of spurious wake-ups or killed waiters means
247141efec22Sraf 	 * rwstate's URW_HAS_WAITERS bit may indicate false positives.
247241efec22Sraf 	 * We only fix these if they are important to us.
24737c478bd9Sstevel@tonic-gate 	 *
24747c478bd9Sstevel@tonic-gate 	 * Although various error states can be observed here (e.g. the lock
24757c478bd9Sstevel@tonic-gate 	 * is not held, but there are waiters) we assume these are applicaton
24767c478bd9Sstevel@tonic-gate 	 * errors and so we take no corrective action.
24777c478bd9Sstevel@tonic-gate 	 */
24787c478bd9Sstevel@tonic-gate 	fuword32_noerr(&rw->rwlock_readers, &rwstate);
24797c478bd9Sstevel@tonic-gate 	/*
248041efec22Sraf 	 * We cannot legitimately get here from user-level
248141efec22Sraf 	 * without URW_HAS_WAITERS being set.
248241efec22Sraf 	 * Set it now to guard against user-level error.
24837c478bd9Sstevel@tonic-gate 	 */
248441efec22Sraf 	rwstate |= URW_HAS_WAITERS;
24857c478bd9Sstevel@tonic-gate 
24867c478bd9Sstevel@tonic-gate 	/*
248741efec22Sraf 	 * We can try only if the lock isn't held by a writer.
24887c478bd9Sstevel@tonic-gate 	 */
248941efec22Sraf 	if (!(rwstate & URW_WRITE_LOCKED)) {
24907c478bd9Sstevel@tonic-gate 		tp = lwp_queue_waiter(&lwpchan);
24917c478bd9Sstevel@tonic-gate 		if (tp == NULL) {
24927c478bd9Sstevel@tonic-gate 			/*
24937c478bd9Sstevel@tonic-gate 			 * Hmmm, rwstate indicates waiters but there are
24947c478bd9Sstevel@tonic-gate 			 * none queued. This could just be the result of a
249541efec22Sraf 			 * spurious wakeup, so let's ignore it.
249641efec22Sraf 			 *
249741efec22Sraf 			 * We now have a chance to acquire the lock
249841efec22Sraf 			 * uncontended, but this is the last chance for
249941efec22Sraf 			 * a writer to acquire the lock without blocking.
25007c478bd9Sstevel@tonic-gate 			 */
25017c478bd9Sstevel@tonic-gate 			if (rd_wr == READ_LOCK) {
25027c478bd9Sstevel@tonic-gate 				rwstate++;
25037c478bd9Sstevel@tonic-gate 				acquired = 1;
250441efec22Sraf 			} else if ((rwstate & URW_READERS_MASK) == 0) {
250541efec22Sraf 				rwstate |= URW_WRITE_LOCKED;
25067c478bd9Sstevel@tonic-gate 				acquired = 1;
25077c478bd9Sstevel@tonic-gate 			}
25087c478bd9Sstevel@tonic-gate 		} else if (rd_wr == READ_LOCK) {
25097c478bd9Sstevel@tonic-gate 			/*
25107c478bd9Sstevel@tonic-gate 			 * This is the last chance for a reader to acquire
25117c478bd9Sstevel@tonic-gate 			 * the lock now, but it can only do so if there is
25127c478bd9Sstevel@tonic-gate 			 * no writer of equal or greater priority at the
25137c478bd9Sstevel@tonic-gate 			 * head of the queue .
25147c478bd9Sstevel@tonic-gate 			 *
25157c478bd9Sstevel@tonic-gate 			 * It is also just possible that there is a reader
25167c478bd9Sstevel@tonic-gate 			 * at the head of the queue. This may be the result
25177c478bd9Sstevel@tonic-gate 			 * of a spurious wakeup or an application failure.
25187c478bd9Sstevel@tonic-gate 			 * In this case we only acquire the lock if we have
25197c478bd9Sstevel@tonic-gate 			 * equal or greater priority. It is not our job to
25207c478bd9Sstevel@tonic-gate 			 * release spurious waiters.
25217c478bd9Sstevel@tonic-gate 			 */
25227c478bd9Sstevel@tonic-gate 			pri_t our_pri = DISP_PRIO(t);
25237c478bd9Sstevel@tonic-gate 			pri_t his_pri = DISP_PRIO(tp);
25247c478bd9Sstevel@tonic-gate 
25257c478bd9Sstevel@tonic-gate 			if ((our_pri > his_pri) || ((our_pri == his_pri) &&
25267c478bd9Sstevel@tonic-gate 			    !(tp->t_writer & TRW_WANT_WRITE))) {
25277c478bd9Sstevel@tonic-gate 				rwstate++;
25287c478bd9Sstevel@tonic-gate 				acquired = 1;
25297c478bd9Sstevel@tonic-gate 			}
25307c478bd9Sstevel@tonic-gate 		}
25317c478bd9Sstevel@tonic-gate 	}
25327c478bd9Sstevel@tonic-gate 
25337c478bd9Sstevel@tonic-gate 	if (acquired || try_flag || time_error) {
25347c478bd9Sstevel@tonic-gate 		/*
253541efec22Sraf 		 * We're not going to block this time.
25367c478bd9Sstevel@tonic-gate 		 */
25377c478bd9Sstevel@tonic-gate 		suword32_noerr(&rw->rwlock_readers, rwstate);
25387c478bd9Sstevel@tonic-gate 		lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL);
25397c478bd9Sstevel@tonic-gate 		locked = 0;
25407c478bd9Sstevel@tonic-gate 
25417c478bd9Sstevel@tonic-gate 		if (acquired) {
25427c478bd9Sstevel@tonic-gate 			/*
25437c478bd9Sstevel@tonic-gate 			 * Got the lock!
25447c478bd9Sstevel@tonic-gate 			 */
25457c478bd9Sstevel@tonic-gate 			error = 0;
25467c478bd9Sstevel@tonic-gate 
25477c478bd9Sstevel@tonic-gate 		} else if (try_flag) {
25487c478bd9Sstevel@tonic-gate 			/*
25497c478bd9Sstevel@tonic-gate 			 * We didn't get the lock and we're about to block.
25507c478bd9Sstevel@tonic-gate 			 * If we're doing a trylock, return EBUSY instead.
25517c478bd9Sstevel@tonic-gate 			 */
25527c478bd9Sstevel@tonic-gate 			error = EBUSY;
25537c478bd9Sstevel@tonic-gate 
25547c478bd9Sstevel@tonic-gate 		} else if (time_error) {
25557c478bd9Sstevel@tonic-gate 			/*
25567c478bd9Sstevel@tonic-gate 			 * The SUSV3 POSIX spec is very clear that we should
25577c478bd9Sstevel@tonic-gate 			 * get no error from validating the timer (above)
25587c478bd9Sstevel@tonic-gate 			 * until we would actually sleep.
25597c478bd9Sstevel@tonic-gate 			 */
25607c478bd9Sstevel@tonic-gate 			error = time_error;
25617c478bd9Sstevel@tonic-gate 		}
25627c478bd9Sstevel@tonic-gate 
25637c478bd9Sstevel@tonic-gate 		goto out_drop;
25647c478bd9Sstevel@tonic-gate 	}
25657c478bd9Sstevel@tonic-gate 
25667c478bd9Sstevel@tonic-gate 	/*
25677c478bd9Sstevel@tonic-gate 	 * We're about to block, so indicate what kind of waiter we are.
25687c478bd9Sstevel@tonic-gate 	 */
25697c478bd9Sstevel@tonic-gate 	t->t_writer = 0;
257041efec22Sraf 	if (rd_wr == WRITE_LOCK)
25717c478bd9Sstevel@tonic-gate 		t->t_writer = TRW_WANT_WRITE;
25727c478bd9Sstevel@tonic-gate 	suword32_noerr(&rw->rwlock_readers, rwstate);
25737c478bd9Sstevel@tonic-gate 
25747c478bd9Sstevel@tonic-gate 	/*
25757c478bd9Sstevel@tonic-gate 	 * Unlock the rwlock's mutex (pagefaults are possible here).
25767c478bd9Sstevel@tonic-gate 	 */
2577db94676fSRoger A. Faulkner 	set_owner_pid(mp, 0, 0);
25787c478bd9Sstevel@tonic-gate 	ulock_clear(&mp->mutex_lockw);
25797c478bd9Sstevel@tonic-gate 	fuword8_noerr(&mp->mutex_waiters, &mwaiters);
25807c478bd9Sstevel@tonic-gate 	if (mwaiters != 0) {
25817c478bd9Sstevel@tonic-gate 		/*
25827c478bd9Sstevel@tonic-gate 		 * Given the locking of mlwpchan around the release of
25837c478bd9Sstevel@tonic-gate 		 * the mutex and checking for waiters, the following
25847c478bd9Sstevel@tonic-gate 		 * call to lwp_release() can fail ONLY if the lock
25857c478bd9Sstevel@tonic-gate 		 * acquirer is interrupted after setting the waiter bit,
25867c478bd9Sstevel@tonic-gate 		 * calling lwp_block() and releasing mlwpchan.
25877c478bd9Sstevel@tonic-gate 		 * In this case, it could get pulled off the LWP sleep
25887c478bd9Sstevel@tonic-gate 		 * queue (via setrun()) before the following call to
25897c478bd9Sstevel@tonic-gate 		 * lwp_release() occurs, and the lock requestor will
25907c478bd9Sstevel@tonic-gate 		 * update the waiter bit correctly by re-evaluating it.
25917c478bd9Sstevel@tonic-gate 		 */
259231db3c26Sraf 		if (lwp_release(&mlwpchan, &mwaiters, 0))
25937c478bd9Sstevel@tonic-gate 			suword8_noerr(&mp->mutex_waiters, mwaiters);
25947c478bd9Sstevel@tonic-gate 	}
25957c478bd9Sstevel@tonic-gate 	lwpchan_unlock(&mlwpchan, LWPCHAN_MPPOOL);
25967c478bd9Sstevel@tonic-gate 	mlocked = 0;
25977c478bd9Sstevel@tonic-gate 	no_fault();
25987c478bd9Sstevel@tonic-gate 
25997c478bd9Sstevel@tonic-gate 	if (mwatched) {
26007c478bd9Sstevel@tonic-gate 		watch_enable_addr((caddr_t)mp, sizeof (*mp), S_WRITE);
26017c478bd9Sstevel@tonic-gate 		mwatched = 0;
26027c478bd9Sstevel@tonic-gate 	}
26037c478bd9Sstevel@tonic-gate 	if (watched) {
26047c478bd9Sstevel@tonic-gate 		watch_enable_addr((caddr_t)rw, sizeof (*rw), S_WRITE);
26057c478bd9Sstevel@tonic-gate 		watched = 0;
26067c478bd9Sstevel@tonic-gate 	}
26077c478bd9Sstevel@tonic-gate 
26087c478bd9Sstevel@tonic-gate 	if (timedwait) {
26097c478bd9Sstevel@tonic-gate 		/*
26107c478bd9Sstevel@tonic-gate 		 * If we successfully queue the timeout,
26117c478bd9Sstevel@tonic-gate 		 * then don't drop t_delay_lock until
26127c478bd9Sstevel@tonic-gate 		 * we are on the sleep queue (below).
26137c478bd9Sstevel@tonic-gate 		 */
26147c478bd9Sstevel@tonic-gate 		mutex_enter(&t->t_delay_lock);
26157c478bd9Sstevel@tonic-gate 		if (lwp_timer_enqueue(&lwpt) != 0) {
26167c478bd9Sstevel@tonic-gate 			mutex_exit(&t->t_delay_lock);
26177c478bd9Sstevel@tonic-gate 			imm_timeout = 1;
26187c478bd9Sstevel@tonic-gate 			timedwait = NULL;
26197c478bd9Sstevel@tonic-gate 		}
26207c478bd9Sstevel@tonic-gate 	}
26217c478bd9Sstevel@tonic-gate 	t->t_flag |= T_WAITCVSEM;
26227c478bd9Sstevel@tonic-gate 	lwp_block(&lwpchan);
26237c478bd9Sstevel@tonic-gate 
26247c478bd9Sstevel@tonic-gate 	/*
26257c478bd9Sstevel@tonic-gate 	 * Nothing should happen to cause the LWp to go to sleep until after
26267c478bd9Sstevel@tonic-gate 	 * it returns from swtch().
26277c478bd9Sstevel@tonic-gate 	 */
26287c478bd9Sstevel@tonic-gate 	if (timedwait)
26297c478bd9Sstevel@tonic-gate 		mutex_exit(&t->t_delay_lock);
26307c478bd9Sstevel@tonic-gate 	locked = 0;
26317c478bd9Sstevel@tonic-gate 	lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL);
2632fd6545c7Sraf 	if (ISSIG(t, JUSTLOOKING) || MUSTRETURN(p, t) || imm_timeout)
26337c478bd9Sstevel@tonic-gate 		setrun(t);
26347c478bd9Sstevel@tonic-gate 	swtch();
26357c478bd9Sstevel@tonic-gate 
26367c478bd9Sstevel@tonic-gate 	/*
26377c478bd9Sstevel@tonic-gate 	 * We're back, but we need to work out why. Were we interrupted? Did
26387c478bd9Sstevel@tonic-gate 	 * we timeout? Were we granted the lock?
26397c478bd9Sstevel@tonic-gate 	 */
26407c478bd9Sstevel@tonic-gate 	error = EAGAIN;
26417c478bd9Sstevel@tonic-gate 	acquired = (t->t_writer & TRW_LOCK_GRANTED);
26427c478bd9Sstevel@tonic-gate 	t->t_writer = 0;
26437c478bd9Sstevel@tonic-gate 	t->t_flag &= ~(T_WAITCVSEM | T_WAKEABLE);
26447c478bd9Sstevel@tonic-gate 	if (timedwait)
26457c478bd9Sstevel@tonic-gate 		tim = lwp_timer_dequeue(&lwpt);
26467c478bd9Sstevel@tonic-gate 	if (ISSIG(t, FORREAL) || lwp->lwp_sysabort || MUSTRETURN(p, t))
26477c478bd9Sstevel@tonic-gate 		error = EINTR;
26487c478bd9Sstevel@tonic-gate 	else if (imm_timeout || (timedwait && tim == -1))
26497c478bd9Sstevel@tonic-gate 		error = ETIME;
26507c478bd9Sstevel@tonic-gate 	lwp->lwp_asleep = 0;
26517c478bd9Sstevel@tonic-gate 	lwp->lwp_sysabort = 0;
26527c478bd9Sstevel@tonic-gate 	setallwatch();
26537c478bd9Sstevel@tonic-gate 
26547c478bd9Sstevel@tonic-gate 	/*
26557c478bd9Sstevel@tonic-gate 	 * If we were granted the lock we don't care about EINTR or ETIME.
26567c478bd9Sstevel@tonic-gate 	 */
26577c478bd9Sstevel@tonic-gate 	if (acquired)
26587c478bd9Sstevel@tonic-gate 		error = 0;
26597c478bd9Sstevel@tonic-gate 
26607c478bd9Sstevel@tonic-gate 	if (t->t_mstate == LMS_USER_LOCK)
26617c478bd9Sstevel@tonic-gate 		(void) new_mstate(t, LMS_SYSTEM);
26627c478bd9Sstevel@tonic-gate 
26637c478bd9Sstevel@tonic-gate 	if (error)
26647c478bd9Sstevel@tonic-gate 		return (set_errno(error));
26657c478bd9Sstevel@tonic-gate 	return (0);
26667c478bd9Sstevel@tonic-gate 
26677c478bd9Sstevel@tonic-gate out_drop:
26687c478bd9Sstevel@tonic-gate 	/*
26697c478bd9Sstevel@tonic-gate 	 * Make sure that the user level lock is dropped before returning
26707c478bd9Sstevel@tonic-gate 	 * to the caller.
26717c478bd9Sstevel@tonic-gate 	 */
26727c478bd9Sstevel@tonic-gate 	if (!mlocked) {
26737c478bd9Sstevel@tonic-gate 		lwpchan_lock(&mlwpchan, LWPCHAN_MPPOOL);
26747c478bd9Sstevel@tonic-gate 		mlocked = 1;
26757c478bd9Sstevel@tonic-gate 	}
2676db94676fSRoger A. Faulkner 	set_owner_pid(mp, 0, 0);
26777c478bd9Sstevel@tonic-gate 	ulock_clear(&mp->mutex_lockw);
26787c478bd9Sstevel@tonic-gate 	fuword8_noerr(&mp->mutex_waiters, &mwaiters);
26797c478bd9Sstevel@tonic-gate 	if (mwaiters != 0) {
26807c478bd9Sstevel@tonic-gate 		/*
26817c478bd9Sstevel@tonic-gate 		 * See comment above on lock clearing and lwp_release()
26827c478bd9Sstevel@tonic-gate 		 * success/failure.
26837c478bd9Sstevel@tonic-gate 		 */
268431db3c26Sraf 		if (lwp_release(&mlwpchan, &mwaiters, 0))
26857c478bd9Sstevel@tonic-gate 			suword8_noerr(&mp->mutex_waiters, mwaiters);
26867c478bd9Sstevel@tonic-gate 	}
26877c478bd9Sstevel@tonic-gate 	lwpchan_unlock(&mlwpchan, LWPCHAN_MPPOOL);
26887c478bd9Sstevel@tonic-gate 	mlocked = 0;
26897c478bd9Sstevel@tonic-gate 
26907c478bd9Sstevel@tonic-gate out_nodrop:
26917c478bd9Sstevel@tonic-gate 	no_fault();
26927c478bd9Sstevel@tonic-gate 	if (mwatched)
26937c478bd9Sstevel@tonic-gate 		watch_enable_addr((caddr_t)mp, sizeof (*mp), S_WRITE);
26947c478bd9Sstevel@tonic-gate 	if (watched)
26957c478bd9Sstevel@tonic-gate 		watch_enable_addr((caddr_t)rw, sizeof (*rw), S_WRITE);
26967c478bd9Sstevel@tonic-gate 	if (t->t_mstate == LMS_USER_LOCK)
26977c478bd9Sstevel@tonic-gate 		(void) new_mstate(t, LMS_SYSTEM);
26987c478bd9Sstevel@tonic-gate 	if (error)
26997c478bd9Sstevel@tonic-gate 		return (set_errno(error));
27007c478bd9Sstevel@tonic-gate 	return (0);
27017c478bd9Sstevel@tonic-gate }
27027c478bd9Sstevel@tonic-gate 
27037c478bd9Sstevel@tonic-gate /*
27047c478bd9Sstevel@tonic-gate  * We enter here holding the user-level mutex but, unlike lwp_rwlock_lock(),
27057c478bd9Sstevel@tonic-gate  * we never drop the lock.
27067c478bd9Sstevel@tonic-gate  */
27077c478bd9Sstevel@tonic-gate static int
lwp_rwlock_unlock(lwp_rwlock_t * rw)27087c478bd9Sstevel@tonic-gate lwp_rwlock_unlock(lwp_rwlock_t *rw)
27097c478bd9Sstevel@tonic-gate {
27107c478bd9Sstevel@tonic-gate 	kthread_t *t = curthread;
27117c478bd9Sstevel@tonic-gate 	proc_t *p = ttoproc(t);
27127c478bd9Sstevel@tonic-gate 	lwpchan_t lwpchan;
27137c478bd9Sstevel@tonic-gate 	volatile uint16_t type = 0;
27147c478bd9Sstevel@tonic-gate 	volatile int error = 0;
27157c478bd9Sstevel@tonic-gate 	volatile int locked = 0;
27167c478bd9Sstevel@tonic-gate 	volatile int watched = 0;
27177c478bd9Sstevel@tonic-gate 	label_t ljb;
27187c478bd9Sstevel@tonic-gate 	volatile int no_lwpchan = 1;
27197c478bd9Sstevel@tonic-gate 	uint32_t rwstate;
27207c478bd9Sstevel@tonic-gate 
27217c478bd9Sstevel@tonic-gate 	/* We only check rw because the mutex is included in it. */
27227c478bd9Sstevel@tonic-gate 	if ((caddr_t)rw >= p->p_as->a_userlimit)
27237c478bd9Sstevel@tonic-gate 		return (set_errno(EFAULT));
27247c478bd9Sstevel@tonic-gate 
27257c478bd9Sstevel@tonic-gate 	if (on_fault(&ljb)) {
27267c478bd9Sstevel@tonic-gate 		if (no_lwpchan) {
27277c478bd9Sstevel@tonic-gate 			error = EFAULT;
27287c478bd9Sstevel@tonic-gate 			goto out_nodrop;
27297c478bd9Sstevel@tonic-gate 		}
27307c478bd9Sstevel@tonic-gate 		if (locked) {
27317c478bd9Sstevel@tonic-gate 			locked = 0;
27327c478bd9Sstevel@tonic-gate 			lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL);
27337c478bd9Sstevel@tonic-gate 		}
27347c478bd9Sstevel@tonic-gate 		error = EFAULT;
27357c478bd9Sstevel@tonic-gate 		goto out_nodrop;
27367c478bd9Sstevel@tonic-gate 	}
27377c478bd9Sstevel@tonic-gate 
27388118ecd5Sraf 	/*
27398118ecd5Sraf 	 * Force Copy-on-write if necessary and ensure that the
27408118ecd5Sraf 	 * synchronization object resides in read/write memory.
27418118ecd5Sraf 	 * Cause an EFAULT return now if this is not so.
27428118ecd5Sraf 	 */
27437c478bd9Sstevel@tonic-gate 	fuword16_noerr(&rw->rwlock_type, (uint16_t *)&type);
27448118ecd5Sraf 	suword16_noerr(&rw->rwlock_type, type);
27458118ecd5Sraf 
27468118ecd5Sraf 	/* We can only continue for simple USYNC_PROCESS locks. */
27477c478bd9Sstevel@tonic-gate 	if (type != USYNC_PROCESS) {
27487c478bd9Sstevel@tonic-gate 		error = EINVAL;
27497c478bd9Sstevel@tonic-gate 		goto out_nodrop;
27507c478bd9Sstevel@tonic-gate 	}
27517c478bd9Sstevel@tonic-gate 
27527c478bd9Sstevel@tonic-gate 	/* Convert user level rwlock, "rw", to a unique lwpchan. */
27537c478bd9Sstevel@tonic-gate 	if (!get_lwpchan(p->p_as, (caddr_t)rw, type,
27547c478bd9Sstevel@tonic-gate 	    &lwpchan, LWPCHAN_CVPOOL)) {
27557c478bd9Sstevel@tonic-gate 		error = EFAULT;
27567c478bd9Sstevel@tonic-gate 		goto out_nodrop;
27577c478bd9Sstevel@tonic-gate 	}
27587c478bd9Sstevel@tonic-gate 
27597c478bd9Sstevel@tonic-gate 	no_lwpchan = 0;
27607c478bd9Sstevel@tonic-gate 	watched = watch_disable_addr((caddr_t)rw, sizeof (*rw), S_WRITE);
27617c478bd9Sstevel@tonic-gate 
27627c478bd9Sstevel@tonic-gate 	lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL);
27637c478bd9Sstevel@tonic-gate 	locked = 1;
27647c478bd9Sstevel@tonic-gate 
27657c478bd9Sstevel@tonic-gate 	/*
27667c478bd9Sstevel@tonic-gate 	 * We can resolve multiple readers (except the last reader) here.
27677c478bd9Sstevel@tonic-gate 	 * For the last reader or a writer we need lwp_rwlock_release(),
27687c478bd9Sstevel@tonic-gate 	 * to which we also delegate the task of copying the new rwstate
27697c478bd9Sstevel@tonic-gate 	 * back to userland (see the comment there).
27707c478bd9Sstevel@tonic-gate 	 */
27717c478bd9Sstevel@tonic-gate 	fuword32_noerr(&rw->rwlock_readers, &rwstate);
27727c478bd9Sstevel@tonic-gate 	if (rwstate & URW_WRITE_LOCKED)
27737c478bd9Sstevel@tonic-gate 		lwp_rwlock_release(&lwpchan, rw);
27747c478bd9Sstevel@tonic-gate 	else if ((rwstate & URW_READERS_MASK) > 0) {
27757c478bd9Sstevel@tonic-gate 		rwstate--;
27767c478bd9Sstevel@tonic-gate 		if ((rwstate & URW_READERS_MASK) == 0)
27777c478bd9Sstevel@tonic-gate 			lwp_rwlock_release(&lwpchan, rw);
27787c478bd9Sstevel@tonic-gate 		else
27797c478bd9Sstevel@tonic-gate 			suword32_noerr(&rw->rwlock_readers, rwstate);
27807c478bd9Sstevel@tonic-gate 	}
27817c478bd9Sstevel@tonic-gate 
27827c478bd9Sstevel@tonic-gate 	lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL);
27837c478bd9Sstevel@tonic-gate 	locked = 0;
27847c478bd9Sstevel@tonic-gate 	error = 0;
27857c478bd9Sstevel@tonic-gate 
27867c478bd9Sstevel@tonic-gate out_nodrop:
27877c478bd9Sstevel@tonic-gate 	no_fault();
27887c478bd9Sstevel@tonic-gate 	if (watched)
27897c478bd9Sstevel@tonic-gate 		watch_enable_addr((caddr_t)rw, sizeof (*rw), S_WRITE);
27907c478bd9Sstevel@tonic-gate 	if (error)
27917c478bd9Sstevel@tonic-gate 		return (set_errno(error));
27927c478bd9Sstevel@tonic-gate 	return (0);
27937c478bd9Sstevel@tonic-gate }
27947c478bd9Sstevel@tonic-gate 
27957c478bd9Sstevel@tonic-gate int
lwp_rwlock_sys(int subcode,lwp_rwlock_t * rwlp,timespec_t * tsp)27967c478bd9Sstevel@tonic-gate lwp_rwlock_sys(int subcode, lwp_rwlock_t *rwlp, timespec_t *tsp)
27977c478bd9Sstevel@tonic-gate {
27987c478bd9Sstevel@tonic-gate 	switch (subcode) {
27997c478bd9Sstevel@tonic-gate 	case 0:
28007c478bd9Sstevel@tonic-gate 		return (lwp_rwlock_lock(rwlp, tsp, READ_LOCK));
28017c478bd9Sstevel@tonic-gate 	case 1:
28027c478bd9Sstevel@tonic-gate 		return (lwp_rwlock_lock(rwlp, tsp, WRITE_LOCK));
28037c478bd9Sstevel@tonic-gate 	case 2:
28047c478bd9Sstevel@tonic-gate 		return (lwp_rwlock_lock(rwlp, NULL, READ_LOCK_TRY));
28057c478bd9Sstevel@tonic-gate 	case 3:
28067c478bd9Sstevel@tonic-gate 		return (lwp_rwlock_lock(rwlp, NULL, WRITE_LOCK_TRY));
28077c478bd9Sstevel@tonic-gate 	case 4:
28087c478bd9Sstevel@tonic-gate 		return (lwp_rwlock_unlock(rwlp));
28097c478bd9Sstevel@tonic-gate 	}
28107c478bd9Sstevel@tonic-gate 	return (set_errno(EINVAL));
28117c478bd9Sstevel@tonic-gate }
28127c478bd9Sstevel@tonic-gate 
28137c478bd9Sstevel@tonic-gate /*
28147c478bd9Sstevel@tonic-gate  * Return the owner of the user-level s-object.
28157c478bd9Sstevel@tonic-gate  * Since we can't really do this, return NULL.
28167c478bd9Sstevel@tonic-gate  */
28177c478bd9Sstevel@tonic-gate /* ARGSUSED */
28187c478bd9Sstevel@tonic-gate static kthread_t *
lwpsobj_owner(caddr_t sobj)28197c478bd9Sstevel@tonic-gate lwpsobj_owner(caddr_t sobj)
28207c478bd9Sstevel@tonic-gate {
28217c478bd9Sstevel@tonic-gate 	return ((kthread_t *)NULL);
28227c478bd9Sstevel@tonic-gate }
28237c478bd9Sstevel@tonic-gate 
28247c478bd9Sstevel@tonic-gate /*
28257c478bd9Sstevel@tonic-gate  * Wake up a thread asleep on a user-level synchronization
28267c478bd9Sstevel@tonic-gate  * object.
28277c478bd9Sstevel@tonic-gate  */
28287c478bd9Sstevel@tonic-gate static void
lwp_unsleep(kthread_t * t)28297c478bd9Sstevel@tonic-gate lwp_unsleep(kthread_t *t)
28307c478bd9Sstevel@tonic-gate {
28317c478bd9Sstevel@tonic-gate 	ASSERT(THREAD_LOCK_HELD(t));
28327c478bd9Sstevel@tonic-gate 	if (t->t_wchan0 != NULL) {
28337c478bd9Sstevel@tonic-gate 		sleepq_head_t *sqh;
28347c478bd9Sstevel@tonic-gate 		sleepq_t *sqp = t->t_sleepq;
28357c478bd9Sstevel@tonic-gate 
28367c478bd9Sstevel@tonic-gate 		if (sqp != NULL) {
28377c478bd9Sstevel@tonic-gate 			sqh = lwpsqhash(&t->t_lwpchan);
28387c478bd9Sstevel@tonic-gate 			ASSERT(&sqh->sq_queue == sqp);
28397c478bd9Sstevel@tonic-gate 			sleepq_unsleep(t);
28407c478bd9Sstevel@tonic-gate 			disp_lock_exit_high(&sqh->sq_lock);
28417c478bd9Sstevel@tonic-gate 			CL_SETRUN(t);
28427c478bd9Sstevel@tonic-gate 			return;
28437c478bd9Sstevel@tonic-gate 		}
28447c478bd9Sstevel@tonic-gate 	}
28457c478bd9Sstevel@tonic-gate 	panic("lwp_unsleep: thread %p not on sleepq", (void *)t);
28467c478bd9Sstevel@tonic-gate }
28477c478bd9Sstevel@tonic-gate 
28487c478bd9Sstevel@tonic-gate /*
28497c478bd9Sstevel@tonic-gate  * Change the priority of a thread asleep on a user-level
28507c478bd9Sstevel@tonic-gate  * synchronization object. To maintain proper priority order,
28517c478bd9Sstevel@tonic-gate  * we:
28527c478bd9Sstevel@tonic-gate  *	o dequeue the thread.
28537c478bd9Sstevel@tonic-gate  *	o change its priority.
28547c478bd9Sstevel@tonic-gate  *	o re-enqueue the thread.
28557c478bd9Sstevel@tonic-gate  * Assumption: the thread is locked on entry.
28567c478bd9Sstevel@tonic-gate  */
28577c478bd9Sstevel@tonic-gate static void
lwp_change_pri(kthread_t * t,pri_t pri,pri_t * t_prip)28587c478bd9Sstevel@tonic-gate lwp_change_pri(kthread_t *t, pri_t pri, pri_t *t_prip)
28597c478bd9Sstevel@tonic-gate {
28607c478bd9Sstevel@tonic-gate 	ASSERT(THREAD_LOCK_HELD(t));
28617c478bd9Sstevel@tonic-gate 	if (t->t_wchan0 != NULL) {
28627c478bd9Sstevel@tonic-gate 		sleepq_t   *sqp = t->t_sleepq;
28637c478bd9Sstevel@tonic-gate 
28647c478bd9Sstevel@tonic-gate 		sleepq_dequeue(t);
28657c478bd9Sstevel@tonic-gate 		*t_prip = pri;
28667c478bd9Sstevel@tonic-gate 		sleepq_insert(sqp, t);
28677c478bd9Sstevel@tonic-gate 	} else
28687c478bd9Sstevel@tonic-gate 		panic("lwp_change_pri: %p not on a sleep queue", (void *)t);
28697c478bd9Sstevel@tonic-gate }
28707c478bd9Sstevel@tonic-gate 
28717c478bd9Sstevel@tonic-gate /*
2872bb88be57SRoger A. Faulkner  * Clean up a left-over process-shared robust mutex
28737c478bd9Sstevel@tonic-gate  */
28747c478bd9Sstevel@tonic-gate static void
lwp_mutex_cleanup(lwpchan_entry_t * ent,uint16_t lockflg)28757c478bd9Sstevel@tonic-gate lwp_mutex_cleanup(lwpchan_entry_t *ent, uint16_t lockflg)
28767c478bd9Sstevel@tonic-gate {
28777c478bd9Sstevel@tonic-gate 	uint16_t flag;
28787c478bd9Sstevel@tonic-gate 	uchar_t waiters;
28797c478bd9Sstevel@tonic-gate 	label_t ljb;
28807c478bd9Sstevel@tonic-gate 	pid_t owner_pid;
28817c478bd9Sstevel@tonic-gate 	lwp_mutex_t *lp;
28827c478bd9Sstevel@tonic-gate 	volatile int locked = 0;
28837c478bd9Sstevel@tonic-gate 	volatile int watched = 0;
2884883492d5Sraf 	volatile struct upimutex *upimutex = NULL;
2885883492d5Sraf 	volatile int upilocked = 0;
28867c478bd9Sstevel@tonic-gate 
2887bb88be57SRoger A. Faulkner 	if ((ent->lwpchan_type & (USYNC_PROCESS | LOCK_ROBUST))
2888bb88be57SRoger A. Faulkner 	    != (USYNC_PROCESS | LOCK_ROBUST))
2889bb88be57SRoger A. Faulkner 		return;
28907c478bd9Sstevel@tonic-gate 
28917c478bd9Sstevel@tonic-gate 	lp = (lwp_mutex_t *)ent->lwpchan_addr;
28927c478bd9Sstevel@tonic-gate 	watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE);
28937c478bd9Sstevel@tonic-gate 	if (on_fault(&ljb)) {
28947c478bd9Sstevel@tonic-gate 		if (locked)
28957c478bd9Sstevel@tonic-gate 			lwpchan_unlock(&ent->lwpchan_lwpchan, LWPCHAN_MPPOOL);
2896883492d5Sraf 		if (upilocked)
2897883492d5Sraf 			upimutex_unlock((upimutex_t *)upimutex, 0);
28987c478bd9Sstevel@tonic-gate 		goto out;
28997c478bd9Sstevel@tonic-gate 	}
2900bb88be57SRoger A. Faulkner 
2901bb88be57SRoger A. Faulkner 	fuword32_noerr(&lp->mutex_ownerpid, (uint32_t *)&owner_pid);
2902bb88be57SRoger A. Faulkner 
2903883492d5Sraf 	if (UPIMUTEX(ent->lwpchan_type)) {
2904883492d5Sraf 		lwpchan_t lwpchan = ent->lwpchan_lwpchan;
2905883492d5Sraf 		upib_t *upibp = &UPI_CHAIN(lwpchan);
2906883492d5Sraf 
2907bb88be57SRoger A. Faulkner 		if (owner_pid != curproc->p_pid)
2908bb88be57SRoger A. Faulkner 			goto out;
2909883492d5Sraf 		mutex_enter(&upibp->upib_lock);
2910883492d5Sraf 		upimutex = upi_get(upibp, &lwpchan);
2911883492d5Sraf 		if (upimutex == NULL || upimutex->upi_owner != curthread) {
2912883492d5Sraf 			mutex_exit(&upibp->upib_lock);
2913883492d5Sraf 			goto out;
2914883492d5Sraf 		}
2915883492d5Sraf 		mutex_exit(&upibp->upib_lock);
2916883492d5Sraf 		upilocked = 1;
2917883492d5Sraf 		flag = lwp_clear_mutex(lp, lockflg);
2918883492d5Sraf 		suword8_noerr(&lp->mutex_lockw, 0);
2919883492d5Sraf 		upimutex_unlock((upimutex_t *)upimutex, flag);
2920883492d5Sraf 	} else {
2921883492d5Sraf 		lwpchan_lock(&ent->lwpchan_lwpchan, LWPCHAN_MPPOOL);
2922883492d5Sraf 		locked = 1;
2923bb88be57SRoger A. Faulkner 		/*
2924bb88be57SRoger A. Faulkner 		 * Clear the spinners count because one of our
2925bb88be57SRoger A. Faulkner 		 * threads could have been spinning for this lock
2926bb88be57SRoger A. Faulkner 		 * at user level when the process was suddenly killed.
2927bb88be57SRoger A. Faulkner 		 * There is no harm in this since user-level libc code
2928bb88be57SRoger A. Faulkner 		 * will adapt to the sudden change in the spinner count.
2929bb88be57SRoger A. Faulkner 		 */
2930bb88be57SRoger A. Faulkner 		suword8_noerr(&lp->mutex_spinners, 0);
2931bb88be57SRoger A. Faulkner 		if (owner_pid != curproc->p_pid) {
293231db3c26Sraf 			/*
2933bb88be57SRoger A. Faulkner 			 * We are not the owner.  There may or may not be one.
2934bb88be57SRoger A. Faulkner 			 * If there are waiters, we wake up one or all of them.
2935bb88be57SRoger A. Faulkner 			 * It doesn't hurt to wake them up in error since
2936bb88be57SRoger A. Faulkner 			 * they will just retry the lock and go to sleep
2937bb88be57SRoger A. Faulkner 			 * again if necessary.
293831db3c26Sraf 			 */
293931db3c26Sraf 			fuword8_noerr(&lp->mutex_waiters, &waiters);
294031db3c26Sraf 			if (waiters != 0) {	/* there are waiters */
294131db3c26Sraf 				fuword16_noerr(&lp->mutex_flag, &flag);
294231db3c26Sraf 				if (flag & LOCK_NOTRECOVERABLE) {
294331db3c26Sraf 					lwp_release_all(&ent->lwpchan_lwpchan);
294431db3c26Sraf 					suword8_noerr(&lp->mutex_waiters, 0);
294531db3c26Sraf 				} else if (lwp_release(&ent->lwpchan_lwpchan,
294631db3c26Sraf 				    &waiters, 0)) {
294731db3c26Sraf 					suword8_noerr(&lp->mutex_waiters,
294831db3c26Sraf 					    waiters);
294931db3c26Sraf 				}
295031db3c26Sraf 			}
295131db3c26Sraf 		} else {
2952bb88be57SRoger A. Faulkner 			/*
2953bb88be57SRoger A. Faulkner 			 * We are the owner.  Release it.
2954bb88be57SRoger A. Faulkner 			 */
295531db3c26Sraf 			(void) lwp_clear_mutex(lp, lockflg);
295631db3c26Sraf 			ulock_clear(&lp->mutex_lockw);
295731db3c26Sraf 			fuword8_noerr(&lp->mutex_waiters, &waiters);
295831db3c26Sraf 			if (waiters &&
295931db3c26Sraf 			    lwp_release(&ent->lwpchan_lwpchan, &waiters, 0))
296031db3c26Sraf 				suword8_noerr(&lp->mutex_waiters, waiters);
296131db3c26Sraf 		}
2962883492d5Sraf 		lwpchan_unlock(&ent->lwpchan_lwpchan, LWPCHAN_MPPOOL);
29637c478bd9Sstevel@tonic-gate 	}
29647c478bd9Sstevel@tonic-gate out:
29657c478bd9Sstevel@tonic-gate 	no_fault();
29667c478bd9Sstevel@tonic-gate 	if (watched)
29677c478bd9Sstevel@tonic-gate 		watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE);
29687c478bd9Sstevel@tonic-gate }
29697c478bd9Sstevel@tonic-gate 
29707c478bd9Sstevel@tonic-gate /*
2971883492d5Sraf  * Register a process-shared robust mutex in the lwpchan cache.
29727c478bd9Sstevel@tonic-gate  */
29737c478bd9Sstevel@tonic-gate int
lwp_mutex_register(lwp_mutex_t * lp,caddr_t uaddr)2974c242ec1bSRoger A. Faulkner lwp_mutex_register(lwp_mutex_t *lp, caddr_t uaddr)
29757c478bd9Sstevel@tonic-gate {
29767c478bd9Sstevel@tonic-gate 	int error = 0;
2977883492d5Sraf 	volatile int watched;
29787c478bd9Sstevel@tonic-gate 	label_t ljb;
2979883492d5Sraf 	uint8_t type;
29807c478bd9Sstevel@tonic-gate 	lwpchan_t lwpchan;
29817c478bd9Sstevel@tonic-gate 
29827c478bd9Sstevel@tonic-gate 	if ((caddr_t)lp >= (caddr_t)USERLIMIT)
29837c478bd9Sstevel@tonic-gate 		return (set_errno(EFAULT));
29847c478bd9Sstevel@tonic-gate 
29857c478bd9Sstevel@tonic-gate 	watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE);
29867c478bd9Sstevel@tonic-gate 
29877c478bd9Sstevel@tonic-gate 	if (on_fault(&ljb)) {
29887c478bd9Sstevel@tonic-gate 		error = EFAULT;
29897c478bd9Sstevel@tonic-gate 	} else {
29908118ecd5Sraf 		/*
29918118ecd5Sraf 		 * Force Copy-on-write if necessary and ensure that the
29928118ecd5Sraf 		 * synchronization object resides in read/write memory.
29938118ecd5Sraf 		 * Cause an EFAULT return now if this is not so.
29948118ecd5Sraf 		 */
2995883492d5Sraf 		fuword8_noerr(&lp->mutex_type, &type);
29968118ecd5Sraf 		suword8_noerr(&lp->mutex_type, type);
2997883492d5Sraf 		if ((type & (USYNC_PROCESS|LOCK_ROBUST))
2998883492d5Sraf 		    != (USYNC_PROCESS|LOCK_ROBUST)) {
2999883492d5Sraf 			error = EINVAL;
3000c242ec1bSRoger A. Faulkner 		} else if (!lwpchan_get_mapping(curproc->p_as, (caddr_t)lp,
3001c242ec1bSRoger A. Faulkner 		    uaddr, type, &lwpchan, LWPCHAN_MPPOOL)) {
30028118ecd5Sraf 			error = EFAULT;
3003883492d5Sraf 		}
30047c478bd9Sstevel@tonic-gate 	}
30057c478bd9Sstevel@tonic-gate 	no_fault();
30067c478bd9Sstevel@tonic-gate 	if (watched)
30077c478bd9Sstevel@tonic-gate 		watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE);
30087c478bd9Sstevel@tonic-gate 	if (error)
30097c478bd9Sstevel@tonic-gate 		return (set_errno(error));
30107c478bd9Sstevel@tonic-gate 	return (0);
30117c478bd9Sstevel@tonic-gate }
30127c478bd9Sstevel@tonic-gate 
3013c242ec1bSRoger A. Faulkner /*
3014c242ec1bSRoger A. Faulkner  * There is a user-level robust lock registration in libc.
3015c242ec1bSRoger A. Faulkner  * Mark it as invalid by storing -1 into the location of the pointer.
3016c242ec1bSRoger A. Faulkner  */
3017c242ec1bSRoger A. Faulkner static void
lwp_mutex_unregister(void * uaddr)3018c242ec1bSRoger A. Faulkner lwp_mutex_unregister(void *uaddr)
3019c242ec1bSRoger A. Faulkner {
3020c242ec1bSRoger A. Faulkner 	if (get_udatamodel() == DATAMODEL_NATIVE) {
3021c242ec1bSRoger A. Faulkner 		(void) sulword(uaddr, (ulong_t)-1);
3022c242ec1bSRoger A. Faulkner #ifdef _SYSCALL32_IMPL
3023c242ec1bSRoger A. Faulkner 	} else {
3024c242ec1bSRoger A. Faulkner 		(void) suword32(uaddr, (uint32_t)-1);
3025c242ec1bSRoger A. Faulkner #endif
3026c242ec1bSRoger A. Faulkner 	}
3027c242ec1bSRoger A. Faulkner }
3028c242ec1bSRoger A. Faulkner 
30297c478bd9Sstevel@tonic-gate int
lwp_mutex_trylock(lwp_mutex_t * lp,uintptr_t owner)3030db94676fSRoger A. Faulkner lwp_mutex_trylock(lwp_mutex_t *lp, uintptr_t owner)
30317c478bd9Sstevel@tonic-gate {
30327c478bd9Sstevel@tonic-gate 	kthread_t *t = curthread;
30337c478bd9Sstevel@tonic-gate 	proc_t *p = ttoproc(t);
30347c478bd9Sstevel@tonic-gate 	int error = 0;
30357c478bd9Sstevel@tonic-gate 	volatile int locked = 0;
30367c478bd9Sstevel@tonic-gate 	volatile int watched = 0;
30377c478bd9Sstevel@tonic-gate 	label_t ljb;
30387c478bd9Sstevel@tonic-gate 	volatile uint8_t type = 0;
30397c478bd9Sstevel@tonic-gate 	uint16_t flag;
30407c478bd9Sstevel@tonic-gate 	lwpchan_t lwpchan;
30417c478bd9Sstevel@tonic-gate 
30427c478bd9Sstevel@tonic-gate 	if ((caddr_t)lp >= p->p_as->a_userlimit)
30437c478bd9Sstevel@tonic-gate 		return (set_errno(EFAULT));
30447c478bd9Sstevel@tonic-gate 
30457c478bd9Sstevel@tonic-gate 	(void) new_mstate(t, LMS_USER_LOCK);
30467c478bd9Sstevel@tonic-gate 
30477c478bd9Sstevel@tonic-gate 	if (on_fault(&ljb)) {
30487c478bd9Sstevel@tonic-gate 		if (locked)
30497c478bd9Sstevel@tonic-gate 			lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL);
30507c478bd9Sstevel@tonic-gate 		error = EFAULT;
30517c478bd9Sstevel@tonic-gate 		goto out;
30527c478bd9Sstevel@tonic-gate 	}
30538118ecd5Sraf 	/*
30548118ecd5Sraf 	 * Force Copy-on-write if necessary and ensure that the
30558118ecd5Sraf 	 * synchronization object resides in read/write memory.
30568118ecd5Sraf 	 * Cause an EFAULT return now if this is not so.
30578118ecd5Sraf 	 */
30587c478bd9Sstevel@tonic-gate 	fuword8_noerr(&lp->mutex_type, (uint8_t *)&type);
30598118ecd5Sraf 	suword8_noerr(&lp->mutex_type, type);
30607c478bd9Sstevel@tonic-gate 	if (UPIMUTEX(type)) {
30617c478bd9Sstevel@tonic-gate 		no_fault();
30627c478bd9Sstevel@tonic-gate 		error = lwp_upimutex_lock(lp, type, UPIMUTEX_TRY, NULL);
3063*3ce2fcdcSRobert Mustacchi 		if (error == 0 || error == EOWNERDEAD ||
3064*3ce2fcdcSRobert Mustacchi 		    error == ELOCKUNMAPPED) {
3065*3ce2fcdcSRobert Mustacchi 			volatile int locked = error != 0;
3066*3ce2fcdcSRobert Mustacchi 			if (on_fault(&ljb)) {
3067*3ce2fcdcSRobert Mustacchi 				if (locked != 0)
3068*3ce2fcdcSRobert Mustacchi 					error = lwp_upimutex_unlock(lp, type);
3069*3ce2fcdcSRobert Mustacchi 				else
3070*3ce2fcdcSRobert Mustacchi 					error = EFAULT;
3071*3ce2fcdcSRobert Mustacchi 				goto upierr;
3072*3ce2fcdcSRobert Mustacchi 			}
3073db94676fSRoger A. Faulkner 			set_owner_pid(lp, owner,
3074db94676fSRoger A. Faulkner 			    (type & USYNC_PROCESS)? p->p_pid : 0);
3075*3ce2fcdcSRobert Mustacchi 			no_fault();
3076*3ce2fcdcSRobert Mustacchi 		}
3077*3ce2fcdcSRobert Mustacchi 
3078*3ce2fcdcSRobert Mustacchi upierr:
30797c478bd9Sstevel@tonic-gate 		if (error)
30807c478bd9Sstevel@tonic-gate 			return (set_errno(error));
30817c478bd9Sstevel@tonic-gate 		return (0);
30827c478bd9Sstevel@tonic-gate 	}
30837c478bd9Sstevel@tonic-gate 	if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type,
30847c478bd9Sstevel@tonic-gate 	    &lwpchan, LWPCHAN_MPPOOL)) {
30857c478bd9Sstevel@tonic-gate 		error = EFAULT;
30867c478bd9Sstevel@tonic-gate 		goto out;
30877c478bd9Sstevel@tonic-gate 	}
30887c478bd9Sstevel@tonic-gate 	lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL);
30897c478bd9Sstevel@tonic-gate 	locked = 1;
3090883492d5Sraf 	if (type & LOCK_ROBUST) {
3091883492d5Sraf 		fuword16_noerr(&lp->mutex_flag, &flag);
30927c478bd9Sstevel@tonic-gate 		if (flag & LOCK_NOTRECOVERABLE) {
30937c478bd9Sstevel@tonic-gate 			lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL);
30947c478bd9Sstevel@tonic-gate 			error =  ENOTRECOVERABLE;
30957c478bd9Sstevel@tonic-gate 			goto out;
30967c478bd9Sstevel@tonic-gate 		}
30977c478bd9Sstevel@tonic-gate 	}
30987c478bd9Sstevel@tonic-gate 
30997c478bd9Sstevel@tonic-gate 	watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE);
31007c478bd9Sstevel@tonic-gate 
31017c478bd9Sstevel@tonic-gate 	if (!ulock_try(&lp->mutex_lockw))
31027c478bd9Sstevel@tonic-gate 		error = EBUSY;
3103883492d5Sraf 	else {
3104db94676fSRoger A. Faulkner 		set_owner_pid(lp, owner, (type & USYNC_PROCESS)? p->p_pid : 0);
3105883492d5Sraf 		if (type & LOCK_ROBUST) {
3106883492d5Sraf 			fuword16_noerr(&lp->mutex_flag, &flag);
3107883492d5Sraf 			if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) {
3108883492d5Sraf 				if (flag & LOCK_OWNERDEAD)
3109883492d5Sraf 					error = EOWNERDEAD;
3110883492d5Sraf 				else if (type & USYNC_PROCESS_ROBUST)
3111883492d5Sraf 					error = ELOCKUNMAPPED;
3112883492d5Sraf 				else
3113883492d5Sraf 					error = EOWNERDEAD;
3114883492d5Sraf 			}
31157c478bd9Sstevel@tonic-gate 		}
31167c478bd9Sstevel@tonic-gate 	}
31177c478bd9Sstevel@tonic-gate 	locked = 0;
31187c478bd9Sstevel@tonic-gate 	lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL);
31197c478bd9Sstevel@tonic-gate out:
31207c478bd9Sstevel@tonic-gate 
31217c478bd9Sstevel@tonic-gate 	if (t->t_mstate == LMS_USER_LOCK)
31227c478bd9Sstevel@tonic-gate 		(void) new_mstate(t, LMS_SYSTEM);
31237c478bd9Sstevel@tonic-gate 
31247c478bd9Sstevel@tonic-gate 	no_fault();
31257c478bd9Sstevel@tonic-gate 	if (watched)
31267c478bd9Sstevel@tonic-gate 		watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE);
31277c478bd9Sstevel@tonic-gate 	if (error)
31287c478bd9Sstevel@tonic-gate 		return (set_errno(error));
31297c478bd9Sstevel@tonic-gate 	return (0);
31307c478bd9Sstevel@tonic-gate }
31317c478bd9Sstevel@tonic-gate 
31327c478bd9Sstevel@tonic-gate /*
31337c478bd9Sstevel@tonic-gate  * unlock the mutex and unblock lwps that is trying to acquire this mutex.
31347c478bd9Sstevel@tonic-gate  * the blocked lwp resumes and retries to acquire the lock.
31357c478bd9Sstevel@tonic-gate  */
31367c478bd9Sstevel@tonic-gate int
lwp_mutex_unlock(lwp_mutex_t * lp)31377c478bd9Sstevel@tonic-gate lwp_mutex_unlock(lwp_mutex_t *lp)
31387c478bd9Sstevel@tonic-gate {
31397c478bd9Sstevel@tonic-gate 	proc_t *p = ttoproc(curthread);
31407c478bd9Sstevel@tonic-gate 	lwpchan_t lwpchan;
31417c478bd9Sstevel@tonic-gate 	uchar_t waiters;
31427c478bd9Sstevel@tonic-gate 	volatile int locked = 0;
31437c478bd9Sstevel@tonic-gate 	volatile int watched = 0;
31447c478bd9Sstevel@tonic-gate 	volatile uint8_t type = 0;
31457c478bd9Sstevel@tonic-gate 	label_t ljb;
31467c478bd9Sstevel@tonic-gate 	uint16_t flag;
31477c478bd9Sstevel@tonic-gate 	int error = 0;
31487c478bd9Sstevel@tonic-gate 
31497c478bd9Sstevel@tonic-gate 	if ((caddr_t)lp >= p->p_as->a_userlimit)
31507c478bd9Sstevel@tonic-gate 		return (set_errno(EFAULT));
31517c478bd9Sstevel@tonic-gate 
31527c478bd9Sstevel@tonic-gate 	if (on_fault(&ljb)) {
31537c478bd9Sstevel@tonic-gate 		if (locked)
31547c478bd9Sstevel@tonic-gate 			lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL);
31557c478bd9Sstevel@tonic-gate 		error = EFAULT;
31567c478bd9Sstevel@tonic-gate 		goto out;
31577c478bd9Sstevel@tonic-gate 	}
31588118ecd5Sraf 
31598118ecd5Sraf 	/*
31608118ecd5Sraf 	 * Force Copy-on-write if necessary and ensure that the
31618118ecd5Sraf 	 * synchronization object resides in read/write memory.
31628118ecd5Sraf 	 * Cause an EFAULT return now if this is not so.
31638118ecd5Sraf 	 */
31647c478bd9Sstevel@tonic-gate 	fuword8_noerr(&lp->mutex_type, (uint8_t *)&type);
31658118ecd5Sraf 	suword8_noerr(&lp->mutex_type, type);
31668118ecd5Sraf 
31677c478bd9Sstevel@tonic-gate 	if (UPIMUTEX(type)) {
31687c478bd9Sstevel@tonic-gate 		no_fault();
31697c478bd9Sstevel@tonic-gate 		error = lwp_upimutex_unlock(lp, type);
31707c478bd9Sstevel@tonic-gate 		if (error)
31717c478bd9Sstevel@tonic-gate 			return (set_errno(error));
31727c478bd9Sstevel@tonic-gate 		return (0);
31737c478bd9Sstevel@tonic-gate 	}
31747c478bd9Sstevel@tonic-gate 
31757c478bd9Sstevel@tonic-gate 	watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE);
31767c478bd9Sstevel@tonic-gate 
31777c478bd9Sstevel@tonic-gate 	if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type,
31787c478bd9Sstevel@tonic-gate 	    &lwpchan, LWPCHAN_MPPOOL)) {
31797c478bd9Sstevel@tonic-gate 		error = EFAULT;
31807c478bd9Sstevel@tonic-gate 		goto out;
31817c478bd9Sstevel@tonic-gate 	}
31827c478bd9Sstevel@tonic-gate 	lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL);
31837c478bd9Sstevel@tonic-gate 	locked = 1;
3184883492d5Sraf 	if (type & LOCK_ROBUST) {
3185883492d5Sraf 		fuword16_noerr(&lp->mutex_flag, &flag);
3186883492d5Sraf 		if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) {
3187883492d5Sraf 			flag &= ~(LOCK_OWNERDEAD | LOCK_UNMAPPED);
3188883492d5Sraf 			flag |= LOCK_NOTRECOVERABLE;
3189883492d5Sraf 			suword16_noerr(&lp->mutex_flag, flag);
31907c478bd9Sstevel@tonic-gate 		}
31917c478bd9Sstevel@tonic-gate 	}
3192db94676fSRoger A. Faulkner 	set_owner_pid(lp, 0, 0);
31937c478bd9Sstevel@tonic-gate 	ulock_clear(&lp->mutex_lockw);
31947c478bd9Sstevel@tonic-gate 	/*
31957c478bd9Sstevel@tonic-gate 	 * Always wake up an lwp (if any) waiting on lwpchan. The woken lwp will
31967c478bd9Sstevel@tonic-gate 	 * re-try the lock in lwp_mutex_timedlock(). The call to lwp_release()
31977c478bd9Sstevel@tonic-gate 	 * may fail.  If it fails, do not write into the waiter bit.
31987c478bd9Sstevel@tonic-gate 	 * The call to lwp_release() might fail due to one of three reasons:
31997c478bd9Sstevel@tonic-gate 	 *
32007c478bd9Sstevel@tonic-gate 	 * 	1. due to the thread which set the waiter bit not actually
32017c478bd9Sstevel@tonic-gate 	 *	   sleeping since it got the lock on the re-try. The waiter
32027c478bd9Sstevel@tonic-gate 	 *	   bit will then be correctly updated by that thread. This
32037c478bd9Sstevel@tonic-gate 	 *	   window may be closed by reading the wait bit again here
32047c478bd9Sstevel@tonic-gate 	 *	   and not calling lwp_release() at all if it is zero.
32057c478bd9Sstevel@tonic-gate 	 *	2. the thread which set the waiter bit and went to sleep
32067c478bd9Sstevel@tonic-gate 	 *	   was woken up by a signal. This time, the waiter recomputes
32077c478bd9Sstevel@tonic-gate 	 *	   the wait bit in the return with EINTR code.
32087c478bd9Sstevel@tonic-gate 	 *	3. the waiter bit read by lwp_mutex_wakeup() was in
32097c478bd9Sstevel@tonic-gate 	 *	   memory that has been re-used after the lock was dropped.
32107c478bd9Sstevel@tonic-gate 	 *	   In this case, writing into the waiter bit would cause data
32117c478bd9Sstevel@tonic-gate 	 *	   corruption.
32127c478bd9Sstevel@tonic-gate 	 */
32137c478bd9Sstevel@tonic-gate 	fuword8_noerr(&lp->mutex_waiters, &waiters);
32147c478bd9Sstevel@tonic-gate 	if (waiters) {
3215883492d5Sraf 		if ((type & LOCK_ROBUST) &&
32167c478bd9Sstevel@tonic-gate 		    (flag & LOCK_NOTRECOVERABLE)) {
32177c478bd9Sstevel@tonic-gate 			lwp_release_all(&lwpchan);
32187c478bd9Sstevel@tonic-gate 			suword8_noerr(&lp->mutex_waiters, 0);
321931db3c26Sraf 		} else if (lwp_release(&lwpchan, &waiters, 0)) {
32207c478bd9Sstevel@tonic-gate 			suword8_noerr(&lp->mutex_waiters, waiters);
32217c478bd9Sstevel@tonic-gate 		}
32227c478bd9Sstevel@tonic-gate 	}
32237c478bd9Sstevel@tonic-gate 
32247c478bd9Sstevel@tonic-gate 	lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL);
32257c478bd9Sstevel@tonic-gate out:
32267c478bd9Sstevel@tonic-gate 	no_fault();
32277c478bd9Sstevel@tonic-gate 	if (watched)
32287c478bd9Sstevel@tonic-gate 		watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE);
32297c478bd9Sstevel@tonic-gate 	if (error)
32307c478bd9Sstevel@tonic-gate 		return (set_errno(error));
32317c478bd9Sstevel@tonic-gate 	return (0);
32327c478bd9Sstevel@tonic-gate }
3233