17c478bd9Sstevel@tonic-gate /* 27c478bd9Sstevel@tonic-gate * CDDL HEADER START 37c478bd9Sstevel@tonic-gate * 47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 52be60c5eSraf * Common Development and Distribution License (the "License"). 62be60c5eSraf * You may not use this file except in compliance with the License. 77c478bd9Sstevel@tonic-gate * 87c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 97c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 107c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 117c478bd9Sstevel@tonic-gate * and limitations under the License. 127c478bd9Sstevel@tonic-gate * 137c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 147c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 157c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 167c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 177c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 187c478bd9Sstevel@tonic-gate * 197c478bd9Sstevel@tonic-gate * CDDL HEADER END 207c478bd9Sstevel@tonic-gate */ 21e8031f0aSraf 227c478bd9Sstevel@tonic-gate /* 23a574db85Sraf * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 247c478bd9Sstevel@tonic-gate * Use is subject to license terms. 257c478bd9Sstevel@tonic-gate */ 267c478bd9Sstevel@tonic-gate 277c478bd9Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 287c478bd9Sstevel@tonic-gate 2931db3c26Sraf #define atomic_cas_64 _atomic_cas_64 307c478bd9Sstevel@tonic-gate 317c478bd9Sstevel@tonic-gate #include "lint.h" 327c478bd9Sstevel@tonic-gate #include "thr_uberdata.h" 33*d4204c85Sraf #include <sys/rtpriocntl.h> 3431db3c26Sraf #include <sys/sdt.h> 3531db3c26Sraf #include <atomic.h> 367c478bd9Sstevel@tonic-gate 37*d4204c85Sraf #if defined(THREAD_DEBUG) 38*d4204c85Sraf #define INCR32(x) (((x) != UINT32_MAX)? (x)++ : 0) 39*d4204c85Sraf #define INCR(x) ((x)++) 40*d4204c85Sraf #define DECR(x) ((x)--) 41*d4204c85Sraf #define MAXINCR(m, x) ((m < ++x)? (m = x) : 0) 42*d4204c85Sraf #else 43*d4204c85Sraf #define INCR32(x) 44*d4204c85Sraf #define INCR(x) 45*d4204c85Sraf #define DECR(x) 46*d4204c85Sraf #define MAXINCR(m, x) 47*d4204c85Sraf #endif 48*d4204c85Sraf 497c478bd9Sstevel@tonic-gate /* 507c478bd9Sstevel@tonic-gate * This mutex is initialized to be held by lwp#1. 517c478bd9Sstevel@tonic-gate * It is used to block a thread that has returned from a mutex_lock() 52883492d5Sraf * of a LOCK_PRIO_INHERIT mutex with an unrecoverable error. 537c478bd9Sstevel@tonic-gate */ 547c478bd9Sstevel@tonic-gate mutex_t stall_mutex = DEFAULTMUTEX; 557c478bd9Sstevel@tonic-gate 567c478bd9Sstevel@tonic-gate static int shared_mutex_held(mutex_t *); 57883492d5Sraf static int mutex_queuelock_adaptive(mutex_t *); 58883492d5Sraf static void mutex_wakeup_all(mutex_t *); 597c478bd9Sstevel@tonic-gate 607c478bd9Sstevel@tonic-gate /* 617c478bd9Sstevel@tonic-gate * Lock statistics support functions. 627c478bd9Sstevel@tonic-gate */ 637c478bd9Sstevel@tonic-gate void 647c478bd9Sstevel@tonic-gate record_begin_hold(tdb_mutex_stats_t *msp) 657c478bd9Sstevel@tonic-gate { 667c478bd9Sstevel@tonic-gate tdb_incr(msp->mutex_lock); 677c478bd9Sstevel@tonic-gate msp->mutex_begin_hold = gethrtime(); 687c478bd9Sstevel@tonic-gate } 697c478bd9Sstevel@tonic-gate 707c478bd9Sstevel@tonic-gate hrtime_t 717c478bd9Sstevel@tonic-gate record_hold_time(tdb_mutex_stats_t *msp) 727c478bd9Sstevel@tonic-gate { 737c478bd9Sstevel@tonic-gate hrtime_t now = gethrtime(); 747c478bd9Sstevel@tonic-gate 757c478bd9Sstevel@tonic-gate if (msp->mutex_begin_hold) 767c478bd9Sstevel@tonic-gate msp->mutex_hold_time += now - msp->mutex_begin_hold; 777c478bd9Sstevel@tonic-gate msp->mutex_begin_hold = 0; 787c478bd9Sstevel@tonic-gate return (now); 797c478bd9Sstevel@tonic-gate } 807c478bd9Sstevel@tonic-gate 817c478bd9Sstevel@tonic-gate /* 827c478bd9Sstevel@tonic-gate * Called once at library initialization. 837c478bd9Sstevel@tonic-gate */ 847c478bd9Sstevel@tonic-gate void 857c478bd9Sstevel@tonic-gate mutex_setup(void) 867c478bd9Sstevel@tonic-gate { 877c478bd9Sstevel@tonic-gate if (set_lock_byte(&stall_mutex.mutex_lockw)) 887c478bd9Sstevel@tonic-gate thr_panic("mutex_setup() cannot acquire stall_mutex"); 897c478bd9Sstevel@tonic-gate stall_mutex.mutex_owner = (uintptr_t)curthread; 907c478bd9Sstevel@tonic-gate } 917c478bd9Sstevel@tonic-gate 927c478bd9Sstevel@tonic-gate /* 935d1dd9a9Sraf * The default spin count of 1000 is experimentally determined. 945d1dd9a9Sraf * On sun4u machines with any number of processors it could be raised 957c478bd9Sstevel@tonic-gate * to 10,000 but that (experimentally) makes almost no difference. 965d1dd9a9Sraf * The environment variable: 977c478bd9Sstevel@tonic-gate * _THREAD_ADAPTIVE_SPIN=count 985d1dd9a9Sraf * can be used to override and set the count in the range [0 .. 1,000,000]. 997c478bd9Sstevel@tonic-gate */ 1007c478bd9Sstevel@tonic-gate int thread_adaptive_spin = 1000; 1017c478bd9Sstevel@tonic-gate uint_t thread_max_spinners = 100; 1027c478bd9Sstevel@tonic-gate int thread_queue_verify = 0; 1037c478bd9Sstevel@tonic-gate static int ncpus; 1047c478bd9Sstevel@tonic-gate 1057c478bd9Sstevel@tonic-gate /* 1067c478bd9Sstevel@tonic-gate * Distinguish spinning for queue locks from spinning for regular locks. 1075d1dd9a9Sraf * We try harder to acquire queue locks by spinning. 1087c478bd9Sstevel@tonic-gate * The environment variable: 1097c478bd9Sstevel@tonic-gate * _THREAD_QUEUE_SPIN=count 1107c478bd9Sstevel@tonic-gate * can be used to override and set the count in the range [0 .. 1,000,000]. 1117c478bd9Sstevel@tonic-gate */ 1125d1dd9a9Sraf int thread_queue_spin = 10000; 1137c478bd9Sstevel@tonic-gate 114883492d5Sraf #define ALL_ATTRIBUTES \ 115883492d5Sraf (LOCK_RECURSIVE | LOCK_ERRORCHECK | \ 116883492d5Sraf LOCK_PRIO_INHERIT | LOCK_PRIO_PROTECT | \ 117883492d5Sraf LOCK_ROBUST) 1187c478bd9Sstevel@tonic-gate 1197c478bd9Sstevel@tonic-gate /* 120883492d5Sraf * 'type' can be one of USYNC_THREAD, USYNC_PROCESS, or USYNC_PROCESS_ROBUST, 121883492d5Sraf * augmented by zero or more the flags: 122883492d5Sraf * LOCK_RECURSIVE 123883492d5Sraf * LOCK_ERRORCHECK 124883492d5Sraf * LOCK_PRIO_INHERIT 125883492d5Sraf * LOCK_PRIO_PROTECT 126883492d5Sraf * LOCK_ROBUST 1277c478bd9Sstevel@tonic-gate */ 1287c478bd9Sstevel@tonic-gate #pragma weak _private_mutex_init = __mutex_init 1297c478bd9Sstevel@tonic-gate #pragma weak mutex_init = __mutex_init 1307c478bd9Sstevel@tonic-gate #pragma weak _mutex_init = __mutex_init 1317c478bd9Sstevel@tonic-gate /* ARGSUSED2 */ 1327c478bd9Sstevel@tonic-gate int 1337c478bd9Sstevel@tonic-gate __mutex_init(mutex_t *mp, int type, void *arg) 1347c478bd9Sstevel@tonic-gate { 135883492d5Sraf int basetype = (type & ~ALL_ATTRIBUTES); 136*d4204c85Sraf const pcclass_t *pccp; 137883492d5Sraf int error = 0; 138*d4204c85Sraf int ceil; 139883492d5Sraf 140883492d5Sraf if (basetype == USYNC_PROCESS_ROBUST) { 141883492d5Sraf /* 142883492d5Sraf * USYNC_PROCESS_ROBUST is a deprecated historical type. 143883492d5Sraf * We change it into (USYNC_PROCESS | LOCK_ROBUST) but 144883492d5Sraf * retain the USYNC_PROCESS_ROBUST flag so we can return 145883492d5Sraf * ELOCKUNMAPPED when necessary (only USYNC_PROCESS_ROBUST 146883492d5Sraf * mutexes will ever draw ELOCKUNMAPPED). 147883492d5Sraf */ 148883492d5Sraf type |= (USYNC_PROCESS | LOCK_ROBUST); 149883492d5Sraf basetype = USYNC_PROCESS; 150883492d5Sraf } 1517c478bd9Sstevel@tonic-gate 152*d4204c85Sraf if (type & LOCK_PRIO_PROTECT) 153*d4204c85Sraf pccp = get_info_by_policy(SCHED_FIFO); 154*d4204c85Sraf if ((basetype != USYNC_THREAD && basetype != USYNC_PROCESS) || 155883492d5Sraf (type & (LOCK_PRIO_INHERIT | LOCK_PRIO_PROTECT)) 156*d4204c85Sraf == (LOCK_PRIO_INHERIT | LOCK_PRIO_PROTECT) || 157*d4204c85Sraf ((type & LOCK_PRIO_PROTECT) && 158*d4204c85Sraf ((ceil = *(int *)arg) < pccp->pcc_primin || 159*d4204c85Sraf ceil > pccp->pcc_primax))) { 160883492d5Sraf error = EINVAL; 161883492d5Sraf } else if (type & LOCK_ROBUST) { 162883492d5Sraf /* 163883492d5Sraf * Callers of mutex_init() with the LOCK_ROBUST attribute 164883492d5Sraf * are required to pass an initially all-zero mutex. 165883492d5Sraf * Multiple calls to mutex_init() are allowed; all but 166883492d5Sraf * the first return EBUSY. A call to mutex_init() is 167883492d5Sraf * allowed to make an inconsistent robust lock consistent 168883492d5Sraf * (for historical usage, even though the proper interface 169883492d5Sraf * for this is mutex_consistent()). Note that we use 170883492d5Sraf * atomic_or_16() to set the LOCK_INITED flag so as 171883492d5Sraf * not to disturb surrounding bits (LOCK_OWNERDEAD, etc). 172883492d5Sraf */ 173883492d5Sraf extern void _atomic_or_16(volatile uint16_t *, uint16_t); 174883492d5Sraf if (!(mp->mutex_flag & LOCK_INITED)) { 175883492d5Sraf mp->mutex_type = (uint8_t)type; 176883492d5Sraf _atomic_or_16(&mp->mutex_flag, LOCK_INITED); 177883492d5Sraf mp->mutex_magic = MUTEX_MAGIC; 178883492d5Sraf } else if (type != mp->mutex_type || 179*d4204c85Sraf ((type & LOCK_PRIO_PROTECT) && mp->mutex_ceiling != ceil)) { 180883492d5Sraf error = EINVAL; 181883492d5Sraf } else if (__mutex_consistent(mp) != 0) { 182883492d5Sraf error = EBUSY; 183883492d5Sraf } 184883492d5Sraf /* register a process robust mutex with the kernel */ 185883492d5Sraf if (basetype == USYNC_PROCESS) 186883492d5Sraf register_lock(mp); 187883492d5Sraf } else { 1887c478bd9Sstevel@tonic-gate (void) _memset(mp, 0, sizeof (*mp)); 1897c478bd9Sstevel@tonic-gate mp->mutex_type = (uint8_t)type; 1907c478bd9Sstevel@tonic-gate mp->mutex_flag = LOCK_INITED; 1917c478bd9Sstevel@tonic-gate mp->mutex_magic = MUTEX_MAGIC; 192883492d5Sraf } 193883492d5Sraf 194*d4204c85Sraf if (error == 0 && (type & LOCK_PRIO_PROTECT)) { 195*d4204c85Sraf mp->mutex_ceiling = ceil; 196*d4204c85Sraf } 197883492d5Sraf 1987c478bd9Sstevel@tonic-gate return (error); 1997c478bd9Sstevel@tonic-gate } 2007c478bd9Sstevel@tonic-gate 2017c478bd9Sstevel@tonic-gate /* 202*d4204c85Sraf * Delete mp from list of ceiling mutexes owned by curthread. 2037c478bd9Sstevel@tonic-gate * Return 1 if the head of the chain was updated. 2047c478bd9Sstevel@tonic-gate */ 2057c478bd9Sstevel@tonic-gate int 2067c478bd9Sstevel@tonic-gate _ceil_mylist_del(mutex_t *mp) 2077c478bd9Sstevel@tonic-gate { 2087c478bd9Sstevel@tonic-gate ulwp_t *self = curthread; 2097c478bd9Sstevel@tonic-gate mxchain_t **mcpp; 2107c478bd9Sstevel@tonic-gate mxchain_t *mcp; 2117c478bd9Sstevel@tonic-gate 212*d4204c85Sraf for (mcpp = &self->ul_mxchain; 213*d4204c85Sraf (mcp = *mcpp) != NULL; 214*d4204c85Sraf mcpp = &mcp->mxchain_next) { 215*d4204c85Sraf if (mcp->mxchain_mx == mp) { 216*d4204c85Sraf *mcpp = mcp->mxchain_next; 217*d4204c85Sraf lfree(mcp, sizeof (*mcp)); 218*d4204c85Sraf return (mcpp == &self->ul_mxchain); 219*d4204c85Sraf } 220*d4204c85Sraf } 221*d4204c85Sraf return (0); 2227c478bd9Sstevel@tonic-gate } 2237c478bd9Sstevel@tonic-gate 2247c478bd9Sstevel@tonic-gate /* 225*d4204c85Sraf * Add mp to the list of ceiling mutexes owned by curthread. 2267c478bd9Sstevel@tonic-gate * Return ENOMEM if no memory could be allocated. 2277c478bd9Sstevel@tonic-gate */ 2287c478bd9Sstevel@tonic-gate int 2297c478bd9Sstevel@tonic-gate _ceil_mylist_add(mutex_t *mp) 2307c478bd9Sstevel@tonic-gate { 2317c478bd9Sstevel@tonic-gate ulwp_t *self = curthread; 2327c478bd9Sstevel@tonic-gate mxchain_t *mcp; 2337c478bd9Sstevel@tonic-gate 2347c478bd9Sstevel@tonic-gate if ((mcp = lmalloc(sizeof (*mcp))) == NULL) 2357c478bd9Sstevel@tonic-gate return (ENOMEM); 2367c478bd9Sstevel@tonic-gate mcp->mxchain_mx = mp; 2377c478bd9Sstevel@tonic-gate mcp->mxchain_next = self->ul_mxchain; 2387c478bd9Sstevel@tonic-gate self->ul_mxchain = mcp; 2397c478bd9Sstevel@tonic-gate return (0); 2407c478bd9Sstevel@tonic-gate } 2417c478bd9Sstevel@tonic-gate 2427c478bd9Sstevel@tonic-gate /* 243*d4204c85Sraf * Helper function for _ceil_prio_inherit() and _ceil_prio_waive(), below. 244*d4204c85Sraf */ 245*d4204c85Sraf static void 246*d4204c85Sraf set_rt_priority(ulwp_t *self, int prio) 247*d4204c85Sraf { 248*d4204c85Sraf pcparms_t pcparm; 249*d4204c85Sraf 250*d4204c85Sraf pcparm.pc_cid = self->ul_rtclassid; 251*d4204c85Sraf ((rtparms_t *)pcparm.pc_clparms)->rt_tqnsecs = RT_NOCHANGE; 252*d4204c85Sraf ((rtparms_t *)pcparm.pc_clparms)->rt_pri = prio; 253*d4204c85Sraf (void) _private_priocntl(P_LWPID, self->ul_lwpid, PC_SETPARMS, &pcparm); 254*d4204c85Sraf } 255*d4204c85Sraf 256*d4204c85Sraf /* 257*d4204c85Sraf * Inherit priority from ceiling. 258*d4204c85Sraf * This changes the effective priority, not the assigned priority. 2597c478bd9Sstevel@tonic-gate */ 2607c478bd9Sstevel@tonic-gate void 261*d4204c85Sraf _ceil_prio_inherit(int prio) 2627c478bd9Sstevel@tonic-gate { 2637c478bd9Sstevel@tonic-gate ulwp_t *self = curthread; 2647c478bd9Sstevel@tonic-gate 265*d4204c85Sraf self->ul_epri = prio; 266*d4204c85Sraf set_rt_priority(self, prio); 2677c478bd9Sstevel@tonic-gate } 2687c478bd9Sstevel@tonic-gate 2697c478bd9Sstevel@tonic-gate /* 2707c478bd9Sstevel@tonic-gate * Waive inherited ceiling priority. Inherit from head of owned ceiling locks 2717c478bd9Sstevel@tonic-gate * if holding at least one ceiling lock. If no ceiling locks are held at this 2727c478bd9Sstevel@tonic-gate * point, disinherit completely, reverting back to assigned priority. 2737c478bd9Sstevel@tonic-gate */ 2747c478bd9Sstevel@tonic-gate void 2757c478bd9Sstevel@tonic-gate _ceil_prio_waive(void) 2767c478bd9Sstevel@tonic-gate { 2777c478bd9Sstevel@tonic-gate ulwp_t *self = curthread; 278*d4204c85Sraf mxchain_t *mcp = self->ul_mxchain; 279*d4204c85Sraf int prio; 2807c478bd9Sstevel@tonic-gate 281*d4204c85Sraf if (mcp == NULL) { 282*d4204c85Sraf prio = self->ul_pri; 283*d4204c85Sraf self->ul_epri = 0; 2847c478bd9Sstevel@tonic-gate } else { 285*d4204c85Sraf prio = mcp->mxchain_mx->mutex_ceiling; 286*d4204c85Sraf self->ul_epri = prio; 2877c478bd9Sstevel@tonic-gate } 288*d4204c85Sraf set_rt_priority(self, prio); 2897c478bd9Sstevel@tonic-gate } 2907c478bd9Sstevel@tonic-gate 2915d1dd9a9Sraf /* 2925d1dd9a9Sraf * Clear the lock byte. Retain the waiters byte and the spinners byte. 2935d1dd9a9Sraf * Return the old value of the lock word. 2945d1dd9a9Sraf */ 2955d1dd9a9Sraf static uint32_t 2965d1dd9a9Sraf clear_lockbyte(volatile uint32_t *lockword) 2975d1dd9a9Sraf { 2985d1dd9a9Sraf uint32_t old; 2995d1dd9a9Sraf uint32_t new; 3005d1dd9a9Sraf 3015d1dd9a9Sraf do { 3025d1dd9a9Sraf old = *lockword; 3035d1dd9a9Sraf new = old & ~LOCKMASK; 3045d1dd9a9Sraf } while (atomic_cas_32(lockword, old, new) != old); 3055d1dd9a9Sraf 3065d1dd9a9Sraf return (old); 3075d1dd9a9Sraf } 3085d1dd9a9Sraf 30931db3c26Sraf /* 31031db3c26Sraf * Same as clear_lockbyte(), but operates on mutex_lockword64. 31131db3c26Sraf * The mutex_ownerpid field is cleared along with the lock byte. 31231db3c26Sraf */ 31331db3c26Sraf static uint64_t 31431db3c26Sraf clear_lockbyte64(volatile uint64_t *lockword64) 31531db3c26Sraf { 31631db3c26Sraf uint64_t old; 31731db3c26Sraf uint64_t new; 31831db3c26Sraf 31931db3c26Sraf do { 32031db3c26Sraf old = *lockword64; 32131db3c26Sraf new = old & ~LOCKMASK64; 32231db3c26Sraf } while (atomic_cas_64(lockword64, old, new) != old); 32331db3c26Sraf 32431db3c26Sraf return (old); 32531db3c26Sraf } 32631db3c26Sraf 32731db3c26Sraf /* 32831db3c26Sraf * Similar to set_lock_byte(), which only tries to set the lock byte. 32931db3c26Sraf * Here, we attempt to set the lock byte AND the mutex_ownerpid, 33031db3c26Sraf * keeping the remaining bytes constant. 33131db3c26Sraf */ 33231db3c26Sraf static int 33331db3c26Sraf set_lock_byte64(volatile uint64_t *lockword64, pid_t ownerpid) 33431db3c26Sraf { 33531db3c26Sraf uint64_t old; 33631db3c26Sraf uint64_t new; 33731db3c26Sraf 33831db3c26Sraf old = *lockword64 & ~LOCKMASK64; 33931db3c26Sraf new = old | ((uint64_t)(uint_t)ownerpid << PIDSHIFT) | LOCKBYTE64; 34031db3c26Sraf if (atomic_cas_64(lockword64, old, new) == old) 34131db3c26Sraf return (LOCKCLEAR); 34231db3c26Sraf 34331db3c26Sraf return (LOCKSET); 34431db3c26Sraf } 34531db3c26Sraf 3465d1dd9a9Sraf /* 3475d1dd9a9Sraf * Increment the spinners count in the mutex lock word. 3485d1dd9a9Sraf * Return 0 on success. Return -1 if the count would overflow. 3495d1dd9a9Sraf */ 3505d1dd9a9Sraf static int 3515d1dd9a9Sraf spinners_incr(volatile uint32_t *lockword, uint8_t max_spinners) 3525d1dd9a9Sraf { 3535d1dd9a9Sraf uint32_t old; 3545d1dd9a9Sraf uint32_t new; 3555d1dd9a9Sraf 3565d1dd9a9Sraf do { 3575d1dd9a9Sraf old = *lockword; 3585d1dd9a9Sraf if (((old & SPINNERMASK) >> SPINNERSHIFT) >= max_spinners) 3595d1dd9a9Sraf return (-1); 3605d1dd9a9Sraf new = old + (1 << SPINNERSHIFT); 3615d1dd9a9Sraf } while (atomic_cas_32(lockword, old, new) != old); 3625d1dd9a9Sraf 3635d1dd9a9Sraf return (0); 3645d1dd9a9Sraf } 3655d1dd9a9Sraf 3665d1dd9a9Sraf /* 3675d1dd9a9Sraf * Decrement the spinners count in the mutex lock word. 3685d1dd9a9Sraf * Return the new value of the lock word. 3695d1dd9a9Sraf */ 3705d1dd9a9Sraf static uint32_t 3715d1dd9a9Sraf spinners_decr(volatile uint32_t *lockword) 3725d1dd9a9Sraf { 3735d1dd9a9Sraf uint32_t old; 3745d1dd9a9Sraf uint32_t new; 3755d1dd9a9Sraf 3765d1dd9a9Sraf do { 3775d1dd9a9Sraf new = old = *lockword; 3785d1dd9a9Sraf if (new & SPINNERMASK) 3795d1dd9a9Sraf new -= (1 << SPINNERSHIFT); 3805d1dd9a9Sraf } while (atomic_cas_32(lockword, old, new) != old); 3815d1dd9a9Sraf 3825d1dd9a9Sraf return (new); 3835d1dd9a9Sraf } 3845d1dd9a9Sraf 3857c478bd9Sstevel@tonic-gate /* 3867c478bd9Sstevel@tonic-gate * Non-preemptive spin locks. Used by queue_lock(). 3877c478bd9Sstevel@tonic-gate * No lock statistics are gathered for these locks. 3885d1dd9a9Sraf * No DTrace probes are provided for these locks. 3897c478bd9Sstevel@tonic-gate */ 3907c478bd9Sstevel@tonic-gate void 3917c478bd9Sstevel@tonic-gate spin_lock_set(mutex_t *mp) 3927c478bd9Sstevel@tonic-gate { 3937c478bd9Sstevel@tonic-gate ulwp_t *self = curthread; 3947c478bd9Sstevel@tonic-gate 3957c478bd9Sstevel@tonic-gate no_preempt(self); 3967c478bd9Sstevel@tonic-gate if (set_lock_byte(&mp->mutex_lockw) == 0) { 3977c478bd9Sstevel@tonic-gate mp->mutex_owner = (uintptr_t)self; 3987c478bd9Sstevel@tonic-gate return; 3997c478bd9Sstevel@tonic-gate } 4007c478bd9Sstevel@tonic-gate /* 4017c478bd9Sstevel@tonic-gate * Spin for a while, attempting to acquire the lock. 4027c478bd9Sstevel@tonic-gate */ 403*d4204c85Sraf INCR32(self->ul_spin_lock_spin); 4047c478bd9Sstevel@tonic-gate if (mutex_queuelock_adaptive(mp) == 0 || 4057c478bd9Sstevel@tonic-gate set_lock_byte(&mp->mutex_lockw) == 0) { 4067c478bd9Sstevel@tonic-gate mp->mutex_owner = (uintptr_t)self; 4077c478bd9Sstevel@tonic-gate return; 4087c478bd9Sstevel@tonic-gate } 4097c478bd9Sstevel@tonic-gate /* 4107c478bd9Sstevel@tonic-gate * Try harder if we were previously at a no premption level. 4117c478bd9Sstevel@tonic-gate */ 4127c478bd9Sstevel@tonic-gate if (self->ul_preempt > 1) { 413*d4204c85Sraf INCR32(self->ul_spin_lock_spin2); 4147c478bd9Sstevel@tonic-gate if (mutex_queuelock_adaptive(mp) == 0 || 4157c478bd9Sstevel@tonic-gate set_lock_byte(&mp->mutex_lockw) == 0) { 4167c478bd9Sstevel@tonic-gate mp->mutex_owner = (uintptr_t)self; 4177c478bd9Sstevel@tonic-gate return; 4187c478bd9Sstevel@tonic-gate } 4197c478bd9Sstevel@tonic-gate } 4207c478bd9Sstevel@tonic-gate /* 4217c478bd9Sstevel@tonic-gate * Give up and block in the kernel for the mutex. 4227c478bd9Sstevel@tonic-gate */ 423*d4204c85Sraf INCR32(self->ul_spin_lock_sleep); 4247c478bd9Sstevel@tonic-gate (void) ___lwp_mutex_timedlock(mp, NULL); 4257c478bd9Sstevel@tonic-gate mp->mutex_owner = (uintptr_t)self; 4267c478bd9Sstevel@tonic-gate } 4277c478bd9Sstevel@tonic-gate 4287c478bd9Sstevel@tonic-gate void 4297c478bd9Sstevel@tonic-gate spin_lock_clear(mutex_t *mp) 4307c478bd9Sstevel@tonic-gate { 4317c478bd9Sstevel@tonic-gate ulwp_t *self = curthread; 4327c478bd9Sstevel@tonic-gate 4337c478bd9Sstevel@tonic-gate mp->mutex_owner = 0; 43441efec22Sraf if (atomic_swap_32(&mp->mutex_lockword, 0) & WAITERMASK) { 435883492d5Sraf (void) ___lwp_mutex_wakeup(mp, 0); 436*d4204c85Sraf INCR32(self->ul_spin_lock_wakeup); 4377c478bd9Sstevel@tonic-gate } 4387c478bd9Sstevel@tonic-gate preempt(self); 4397c478bd9Sstevel@tonic-gate } 4407c478bd9Sstevel@tonic-gate 4417c478bd9Sstevel@tonic-gate /* 4427c478bd9Sstevel@tonic-gate * Allocate the sleep queue hash table. 4437c478bd9Sstevel@tonic-gate */ 4447c478bd9Sstevel@tonic-gate void 4457c478bd9Sstevel@tonic-gate queue_alloc(void) 4467c478bd9Sstevel@tonic-gate { 4477c478bd9Sstevel@tonic-gate ulwp_t *self = curthread; 4487c478bd9Sstevel@tonic-gate uberdata_t *udp = self->ul_uberdata; 449*d4204c85Sraf queue_head_t *qp; 4507c478bd9Sstevel@tonic-gate void *data; 4517c478bd9Sstevel@tonic-gate int i; 4527c478bd9Sstevel@tonic-gate 4537c478bd9Sstevel@tonic-gate /* 4547c478bd9Sstevel@tonic-gate * No locks are needed; we call here only when single-threaded. 4557c478bd9Sstevel@tonic-gate */ 4567c478bd9Sstevel@tonic-gate ASSERT(self == udp->ulwp_one); 4577c478bd9Sstevel@tonic-gate ASSERT(!udp->uberflags.uf_mt); 4587c478bd9Sstevel@tonic-gate if ((data = _private_mmap(NULL, 2 * QHASHSIZE * sizeof (queue_head_t), 4597c478bd9Sstevel@tonic-gate PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, (off_t)0)) 4607c478bd9Sstevel@tonic-gate == MAP_FAILED) 4617c478bd9Sstevel@tonic-gate thr_panic("cannot allocate thread queue_head table"); 462*d4204c85Sraf udp->queue_head = qp = (queue_head_t *)data; 463*d4204c85Sraf for (i = 0; i < 2 * QHASHSIZE; qp++, i++) { 464*d4204c85Sraf qp->qh_type = (i < QHASHSIZE)? MX : CV; 465*d4204c85Sraf qp->qh_lock.mutex_flag = LOCK_INITED; 466*d4204c85Sraf qp->qh_lock.mutex_magic = MUTEX_MAGIC; 467*d4204c85Sraf qp->qh_hlist = &qp->qh_def_root; 468*d4204c85Sraf #if defined(THREAD_DEBUG) 469*d4204c85Sraf qp->qh_hlen = 1; 470*d4204c85Sraf qp->qh_hmax = 1; 471*d4204c85Sraf #endif 472883492d5Sraf } 4737c478bd9Sstevel@tonic-gate } 4747c478bd9Sstevel@tonic-gate 4757c478bd9Sstevel@tonic-gate #if defined(THREAD_DEBUG) 4767c478bd9Sstevel@tonic-gate 4777c478bd9Sstevel@tonic-gate /* 4787c478bd9Sstevel@tonic-gate * Debugging: verify correctness of a sleep queue. 4797c478bd9Sstevel@tonic-gate */ 4807c478bd9Sstevel@tonic-gate void 4817c478bd9Sstevel@tonic-gate QVERIFY(queue_head_t *qp) 4827c478bd9Sstevel@tonic-gate { 4837c478bd9Sstevel@tonic-gate ulwp_t *self = curthread; 4847c478bd9Sstevel@tonic-gate uberdata_t *udp = self->ul_uberdata; 485*d4204c85Sraf queue_root_t *qrp; 4867c478bd9Sstevel@tonic-gate ulwp_t *ulwp; 4877c478bd9Sstevel@tonic-gate ulwp_t *prev; 4887c478bd9Sstevel@tonic-gate uint_t index; 489*d4204c85Sraf uint32_t cnt; 4907c478bd9Sstevel@tonic-gate char qtype; 4917c478bd9Sstevel@tonic-gate void *wchan; 4927c478bd9Sstevel@tonic-gate 4937c478bd9Sstevel@tonic-gate ASSERT(qp >= udp->queue_head && (qp - udp->queue_head) < 2 * QHASHSIZE); 4947c478bd9Sstevel@tonic-gate ASSERT(MUTEX_OWNED(&qp->qh_lock, self)); 495*d4204c85Sraf for (cnt = 0, qrp = qp->qh_hlist; qrp != NULL; qrp = qrp->qr_next) { 496*d4204c85Sraf cnt++; 497*d4204c85Sraf ASSERT((qrp->qr_head != NULL && qrp->qr_tail != NULL) || 498*d4204c85Sraf (qrp->qr_head == NULL && qrp->qr_tail == NULL)); 499*d4204c85Sraf } 500*d4204c85Sraf ASSERT(qp->qh_hlen == cnt && qp->qh_hmax >= cnt); 501*d4204c85Sraf qtype = ((qp - udp->queue_head) < QHASHSIZE)? MX : CV; 502*d4204c85Sraf ASSERT(qp->qh_type == qtype); 5037c478bd9Sstevel@tonic-gate if (!thread_queue_verify) 5047c478bd9Sstevel@tonic-gate return; 5057c478bd9Sstevel@tonic-gate /* real expensive stuff, only for _THREAD_QUEUE_VERIFY */ 506*d4204c85Sraf for (cnt = 0, qrp = qp->qh_hlist; qrp != NULL; qrp = qrp->qr_next) { 507*d4204c85Sraf for (prev = NULL, ulwp = qrp->qr_head; ulwp != NULL; 508*d4204c85Sraf prev = ulwp, ulwp = ulwp->ul_link) { 509*d4204c85Sraf cnt++; 510*d4204c85Sraf if (ulwp->ul_writer) 511*d4204c85Sraf ASSERT(prev == NULL || prev->ul_writer); 512*d4204c85Sraf ASSERT(ulwp->ul_qtype == qtype); 513*d4204c85Sraf ASSERT(ulwp->ul_wchan != NULL); 514*d4204c85Sraf ASSERT(ulwp->ul_sleepq == qp); 515*d4204c85Sraf wchan = ulwp->ul_wchan; 516*d4204c85Sraf ASSERT(qrp->qr_wchan == wchan); 517*d4204c85Sraf index = QUEUE_HASH(wchan, qtype); 518*d4204c85Sraf ASSERT(&udp->queue_head[index] == qp); 519*d4204c85Sraf } 520*d4204c85Sraf ASSERT(qrp->qr_tail == prev); 521*d4204c85Sraf } 5227c478bd9Sstevel@tonic-gate ASSERT(qp->qh_qlen == cnt); 5237c478bd9Sstevel@tonic-gate } 5247c478bd9Sstevel@tonic-gate 5257c478bd9Sstevel@tonic-gate #else /* THREAD_DEBUG */ 5267c478bd9Sstevel@tonic-gate 5277c478bd9Sstevel@tonic-gate #define QVERIFY(qp) 5287c478bd9Sstevel@tonic-gate 5297c478bd9Sstevel@tonic-gate #endif /* THREAD_DEBUG */ 5307c478bd9Sstevel@tonic-gate 5317c478bd9Sstevel@tonic-gate /* 5327c478bd9Sstevel@tonic-gate * Acquire a queue head. 5337c478bd9Sstevel@tonic-gate */ 5347c478bd9Sstevel@tonic-gate queue_head_t * 5357c478bd9Sstevel@tonic-gate queue_lock(void *wchan, int qtype) 5367c478bd9Sstevel@tonic-gate { 5377c478bd9Sstevel@tonic-gate uberdata_t *udp = curthread->ul_uberdata; 5387c478bd9Sstevel@tonic-gate queue_head_t *qp; 539*d4204c85Sraf queue_root_t *qrp; 5407c478bd9Sstevel@tonic-gate 5417c478bd9Sstevel@tonic-gate ASSERT(qtype == MX || qtype == CV); 5427c478bd9Sstevel@tonic-gate 5437c478bd9Sstevel@tonic-gate /* 5447c478bd9Sstevel@tonic-gate * It is possible that we could be called while still single-threaded. 5457c478bd9Sstevel@tonic-gate * If so, we call queue_alloc() to allocate the queue_head[] array. 5467c478bd9Sstevel@tonic-gate */ 5477c478bd9Sstevel@tonic-gate if ((qp = udp->queue_head) == NULL) { 5487c478bd9Sstevel@tonic-gate queue_alloc(); 5497c478bd9Sstevel@tonic-gate qp = udp->queue_head; 5507c478bd9Sstevel@tonic-gate } 5517c478bd9Sstevel@tonic-gate qp += QUEUE_HASH(wchan, qtype); 5527c478bd9Sstevel@tonic-gate spin_lock_set(&qp->qh_lock); 553*d4204c85Sraf for (qrp = qp->qh_hlist; qrp != NULL; qrp = qrp->qr_next) 554*d4204c85Sraf if (qrp->qr_wchan == wchan) 555*d4204c85Sraf break; 556*d4204c85Sraf if (qrp == NULL && qp->qh_def_root.qr_head == NULL) { 557*d4204c85Sraf /* the default queue root is available; use it */ 558*d4204c85Sraf qrp = &qp->qh_def_root; 559*d4204c85Sraf qrp->qr_wchan = wchan; 560*d4204c85Sraf ASSERT(qrp->qr_next == NULL); 561*d4204c85Sraf ASSERT(qrp->qr_tail == NULL && 562*d4204c85Sraf qrp->qr_rtcount == 0 && qrp->qr_qlen == 0); 563*d4204c85Sraf } 564*d4204c85Sraf qp->qh_wchan = wchan; /* valid until queue_unlock() is called */ 565*d4204c85Sraf qp->qh_root = qrp; /* valid until queue_unlock() is called */ 566*d4204c85Sraf INCR32(qp->qh_lockcount); 5677c478bd9Sstevel@tonic-gate QVERIFY(qp); 5687c478bd9Sstevel@tonic-gate return (qp); 5697c478bd9Sstevel@tonic-gate } 5707c478bd9Sstevel@tonic-gate 5717c478bd9Sstevel@tonic-gate /* 5727c478bd9Sstevel@tonic-gate * Release a queue head. 5737c478bd9Sstevel@tonic-gate */ 5747c478bd9Sstevel@tonic-gate void 5757c478bd9Sstevel@tonic-gate queue_unlock(queue_head_t *qp) 5767c478bd9Sstevel@tonic-gate { 5777c478bd9Sstevel@tonic-gate QVERIFY(qp); 5787c478bd9Sstevel@tonic-gate spin_lock_clear(&qp->qh_lock); 5797c478bd9Sstevel@tonic-gate } 5807c478bd9Sstevel@tonic-gate 5817c478bd9Sstevel@tonic-gate /* 5827c478bd9Sstevel@tonic-gate * For rwlock queueing, we must queue writers ahead of readers of the 5837c478bd9Sstevel@tonic-gate * same priority. We do this by making writers appear to have a half 5847c478bd9Sstevel@tonic-gate * point higher priority for purposes of priority comparisons below. 5857c478bd9Sstevel@tonic-gate */ 5867c478bd9Sstevel@tonic-gate #define CMP_PRIO(ulwp) ((real_priority(ulwp) << 1) + (ulwp)->ul_writer) 5877c478bd9Sstevel@tonic-gate 5887c478bd9Sstevel@tonic-gate void 589*d4204c85Sraf enqueue(queue_head_t *qp, ulwp_t *ulwp, int force_fifo) 5907c478bd9Sstevel@tonic-gate { 591*d4204c85Sraf queue_root_t *qrp; 5927c478bd9Sstevel@tonic-gate ulwp_t **ulwpp; 5937c478bd9Sstevel@tonic-gate ulwp_t *next; 5947c478bd9Sstevel@tonic-gate int pri = CMP_PRIO(ulwp); 5957c478bd9Sstevel@tonic-gate 5967c478bd9Sstevel@tonic-gate ASSERT(MUTEX_OWNED(&qp->qh_lock, curthread)); 5977c478bd9Sstevel@tonic-gate ASSERT(ulwp->ul_sleepq != qp); 5987c478bd9Sstevel@tonic-gate 599*d4204c85Sraf if ((qrp = qp->qh_root) == NULL) { 600*d4204c85Sraf /* use the thread's queue root for the linkage */ 601*d4204c85Sraf qrp = &ulwp->ul_queue_root; 602*d4204c85Sraf qrp->qr_next = qp->qh_hlist; 603*d4204c85Sraf qrp->qr_prev = NULL; 604*d4204c85Sraf qrp->qr_head = NULL; 605*d4204c85Sraf qrp->qr_tail = NULL; 606*d4204c85Sraf qrp->qr_wchan = qp->qh_wchan; 607*d4204c85Sraf qrp->qr_rtcount = 0; 608*d4204c85Sraf qrp->qr_qlen = 0; 609*d4204c85Sraf qrp->qr_qmax = 0; 610*d4204c85Sraf qp->qh_hlist->qr_prev = qrp; 611*d4204c85Sraf qp->qh_hlist = qrp; 612*d4204c85Sraf qp->qh_root = qrp; 613*d4204c85Sraf MAXINCR(qp->qh_hmax, qp->qh_hlen); 614*d4204c85Sraf } 615*d4204c85Sraf 6167c478bd9Sstevel@tonic-gate /* 6177c478bd9Sstevel@tonic-gate * LIFO queue ordering is unfair and can lead to starvation, 6187c478bd9Sstevel@tonic-gate * but it gives better performance for heavily contended locks. 6197c478bd9Sstevel@tonic-gate * We use thread_queue_fifo (range is 0..8) to determine 6207c478bd9Sstevel@tonic-gate * the frequency of FIFO vs LIFO queuing: 6217c478bd9Sstevel@tonic-gate * 0 : every 256th time (almost always LIFO) 6227c478bd9Sstevel@tonic-gate * 1 : every 128th time 6237c478bd9Sstevel@tonic-gate * 2 : every 64th time 6247c478bd9Sstevel@tonic-gate * 3 : every 32nd time 6257c478bd9Sstevel@tonic-gate * 4 : every 16th time (the default value, mostly LIFO) 6267c478bd9Sstevel@tonic-gate * 5 : every 8th time 6277c478bd9Sstevel@tonic-gate * 6 : every 4th time 6287c478bd9Sstevel@tonic-gate * 7 : every 2nd time 6297c478bd9Sstevel@tonic-gate * 8 : every time (never LIFO, always FIFO) 6307c478bd9Sstevel@tonic-gate * Note that there is always some degree of FIFO ordering. 6317c478bd9Sstevel@tonic-gate * This breaks live lock conditions that occur in applications 6327c478bd9Sstevel@tonic-gate * that are written assuming (incorrectly) that threads acquire 6337c478bd9Sstevel@tonic-gate * locks fairly, that is, in roughly round-robin order. 634*d4204c85Sraf * In any event, the queue is maintained in kernel priority order. 6357c478bd9Sstevel@tonic-gate * 636*d4204c85Sraf * If force_fifo is non-zero, fifo queueing is forced. 6377c478bd9Sstevel@tonic-gate * SUSV3 requires this for semaphores. 6387c478bd9Sstevel@tonic-gate */ 639*d4204c85Sraf if (qrp->qr_head == NULL) { 6407c478bd9Sstevel@tonic-gate /* 6417c478bd9Sstevel@tonic-gate * The queue is empty. LIFO/FIFO doesn't matter. 6427c478bd9Sstevel@tonic-gate */ 643*d4204c85Sraf ASSERT(qrp->qr_tail == NULL); 644*d4204c85Sraf ulwpp = &qrp->qr_head; 645*d4204c85Sraf } else if (force_fifo | 646*d4204c85Sraf (((++qp->qh_qcnt << curthread->ul_queue_fifo) & 0xff) == 0)) { 6477c478bd9Sstevel@tonic-gate /* 6487c478bd9Sstevel@tonic-gate * Enqueue after the last thread whose priority is greater 6497c478bd9Sstevel@tonic-gate * than or equal to the priority of the thread being queued. 6507c478bd9Sstevel@tonic-gate * Attempt first to go directly onto the tail of the queue. 6517c478bd9Sstevel@tonic-gate */ 652*d4204c85Sraf if (pri <= CMP_PRIO(qrp->qr_tail)) 653*d4204c85Sraf ulwpp = &qrp->qr_tail->ul_link; 6547c478bd9Sstevel@tonic-gate else { 655*d4204c85Sraf for (ulwpp = &qrp->qr_head; (next = *ulwpp) != NULL; 6567c478bd9Sstevel@tonic-gate ulwpp = &next->ul_link) 6577c478bd9Sstevel@tonic-gate if (pri > CMP_PRIO(next)) 6587c478bd9Sstevel@tonic-gate break; 6597c478bd9Sstevel@tonic-gate } 6607c478bd9Sstevel@tonic-gate } else { 6617c478bd9Sstevel@tonic-gate /* 6627c478bd9Sstevel@tonic-gate * Enqueue before the first thread whose priority is less 6637c478bd9Sstevel@tonic-gate * than or equal to the priority of the thread being queued. 6647c478bd9Sstevel@tonic-gate * Hopefully we can go directly onto the head of the queue. 6657c478bd9Sstevel@tonic-gate */ 666*d4204c85Sraf for (ulwpp = &qrp->qr_head; (next = *ulwpp) != NULL; 6677c478bd9Sstevel@tonic-gate ulwpp = &next->ul_link) 6687c478bd9Sstevel@tonic-gate if (pri >= CMP_PRIO(next)) 6697c478bd9Sstevel@tonic-gate break; 6707c478bd9Sstevel@tonic-gate } 6717c478bd9Sstevel@tonic-gate if ((ulwp->ul_link = *ulwpp) == NULL) 672*d4204c85Sraf qrp->qr_tail = ulwp; 6737c478bd9Sstevel@tonic-gate *ulwpp = ulwp; 6747c478bd9Sstevel@tonic-gate 6757c478bd9Sstevel@tonic-gate ulwp->ul_sleepq = qp; 676*d4204c85Sraf ulwp->ul_wchan = qp->qh_wchan; 677*d4204c85Sraf ulwp->ul_qtype = qp->qh_type; 678*d4204c85Sraf if ((ulwp->ul_schedctl != NULL && 679*d4204c85Sraf ulwp->ul_schedctl->sc_cid == ulwp->ul_rtclassid) | 680*d4204c85Sraf ulwp->ul_pilocks) { 681*d4204c85Sraf ulwp->ul_rtqueued = 1; 682*d4204c85Sraf qrp->qr_rtcount++; 683*d4204c85Sraf } 684*d4204c85Sraf MAXINCR(qrp->qr_qmax, qrp->qr_qlen); 685*d4204c85Sraf MAXINCR(qp->qh_qmax, qp->qh_qlen); 6867c478bd9Sstevel@tonic-gate } 6877c478bd9Sstevel@tonic-gate 6887c478bd9Sstevel@tonic-gate /* 689*d4204c85Sraf * Helper function for queue_slot() and queue_slot_rt(). 690*d4204c85Sraf * Try to find a non-suspended thread on the queue. 6917c478bd9Sstevel@tonic-gate */ 6927c478bd9Sstevel@tonic-gate static ulwp_t ** 693*d4204c85Sraf queue_slot_runnable(ulwp_t **ulwpp, ulwp_t **prevp, int rt) 6947c478bd9Sstevel@tonic-gate { 6957c478bd9Sstevel@tonic-gate ulwp_t *ulwp; 696*d4204c85Sraf ulwp_t **foundpp = NULL; 697*d4204c85Sraf int priority = -1; 698*d4204c85Sraf ulwp_t *prev; 699*d4204c85Sraf int tpri; 7007c478bd9Sstevel@tonic-gate 701*d4204c85Sraf for (prev = NULL; 702*d4204c85Sraf (ulwp = *ulwpp) != NULL; 7037c478bd9Sstevel@tonic-gate prev = ulwp, ulwpp = &ulwp->ul_link) { 704*d4204c85Sraf if (ulwp->ul_stop) /* skip suspended threads */ 705*d4204c85Sraf continue; 706*d4204c85Sraf tpri = rt? CMP_PRIO(ulwp) : 0; 707*d4204c85Sraf if (tpri > priority) { 708*d4204c85Sraf foundpp = ulwpp; 709*d4204c85Sraf *prevp = prev; 710*d4204c85Sraf priority = tpri; 711*d4204c85Sraf if (!rt) 7127c478bd9Sstevel@tonic-gate break; 7137c478bd9Sstevel@tonic-gate } 7147c478bd9Sstevel@tonic-gate } 715*d4204c85Sraf return (foundpp); 716*d4204c85Sraf } 717*d4204c85Sraf 718*d4204c85Sraf /* 719*d4204c85Sraf * For real-time, we search the entire queue because the dispatch 720*d4204c85Sraf * (kernel) priorities may have changed since enqueueing. 721*d4204c85Sraf */ 722*d4204c85Sraf static ulwp_t ** 723*d4204c85Sraf queue_slot_rt(ulwp_t **ulwpp_org, ulwp_t **prevp) 724*d4204c85Sraf { 725*d4204c85Sraf ulwp_t **ulwpp = ulwpp_org; 726*d4204c85Sraf ulwp_t *ulwp = *ulwpp; 727*d4204c85Sraf ulwp_t **foundpp = ulwpp; 728*d4204c85Sraf int priority = CMP_PRIO(ulwp); 729*d4204c85Sraf ulwp_t *prev; 730*d4204c85Sraf int tpri; 7317c478bd9Sstevel@tonic-gate 732*d4204c85Sraf for (prev = ulwp, ulwpp = &ulwp->ul_link; 733*d4204c85Sraf (ulwp = *ulwpp) != NULL; 734*d4204c85Sraf prev = ulwp, ulwpp = &ulwp->ul_link) { 735*d4204c85Sraf tpri = CMP_PRIO(ulwp); 736*d4204c85Sraf if (tpri > priority) { 737*d4204c85Sraf foundpp = ulwpp; 738*d4204c85Sraf *prevp = prev; 739*d4204c85Sraf priority = tpri; 740*d4204c85Sraf } 7417c478bd9Sstevel@tonic-gate } 742*d4204c85Sraf ulwp = *foundpp; 743*d4204c85Sraf 744*d4204c85Sraf /* 745*d4204c85Sraf * Try not to return a suspended thread. 746*d4204c85Sraf * This mimics the old libthread's behavior. 747*d4204c85Sraf */ 748*d4204c85Sraf if (ulwp->ul_stop && 749*d4204c85Sraf (ulwpp = queue_slot_runnable(ulwpp_org, prevp, 1)) != NULL) { 750*d4204c85Sraf foundpp = ulwpp; 751*d4204c85Sraf ulwp = *foundpp; 7527c478bd9Sstevel@tonic-gate } 753*d4204c85Sraf ulwp->ul_rt = 1; 754*d4204c85Sraf return (foundpp); 755*d4204c85Sraf } 7567c478bd9Sstevel@tonic-gate 757*d4204c85Sraf ulwp_t ** 758*d4204c85Sraf queue_slot(queue_head_t *qp, ulwp_t **prevp, int *more) 759*d4204c85Sraf { 760*d4204c85Sraf queue_root_t *qrp; 761*d4204c85Sraf ulwp_t **ulwpp; 762*d4204c85Sraf ulwp_t *ulwp; 763*d4204c85Sraf int rt; 7647c478bd9Sstevel@tonic-gate 765*d4204c85Sraf ASSERT(MUTEX_OWNED(&qp->qh_lock, curthread)); 766*d4204c85Sraf 767*d4204c85Sraf if ((qrp = qp->qh_root) == NULL || (ulwp = qrp->qr_head) == NULL) { 768*d4204c85Sraf *more = 0; 769*d4204c85Sraf return (NULL); /* no lwps on the queue */ 770*d4204c85Sraf } 771*d4204c85Sraf rt = (qrp->qr_rtcount != 0); 772*d4204c85Sraf *prevp = NULL; 773*d4204c85Sraf if (ulwp->ul_link == NULL) { /* only one lwp on the queue */ 774*d4204c85Sraf *more = 0; 775*d4204c85Sraf ulwp->ul_rt = rt; 776*d4204c85Sraf return (&qrp->qr_head); 777*d4204c85Sraf } 778*d4204c85Sraf *more = 1; 779*d4204c85Sraf 780*d4204c85Sraf if (rt) /* real-time queue */ 781*d4204c85Sraf return (queue_slot_rt(&qrp->qr_head, prevp)); 7827c478bd9Sstevel@tonic-gate /* 783*d4204c85Sraf * Try not to return a suspended thread. 784*d4204c85Sraf * This mimics the old libthread's behavior. 7857c478bd9Sstevel@tonic-gate */ 786*d4204c85Sraf if (ulwp->ul_stop && 787*d4204c85Sraf (ulwpp = queue_slot_runnable(&qrp->qr_head, prevp, 0)) != NULL) { 788*d4204c85Sraf ulwp = *ulwpp; 789*d4204c85Sraf ulwp->ul_rt = 0; 7907c478bd9Sstevel@tonic-gate return (ulwpp); 7917c478bd9Sstevel@tonic-gate } 792*d4204c85Sraf /* 793*d4204c85Sraf * The common case; just pick the first thread on the queue. 794*d4204c85Sraf */ 795*d4204c85Sraf ulwp->ul_rt = 0; 796*d4204c85Sraf return (&qrp->qr_head); 7977c478bd9Sstevel@tonic-gate } 7987c478bd9Sstevel@tonic-gate 799*d4204c85Sraf /* 800*d4204c85Sraf * Common code for unlinking an lwp from a user-level sleep queue. 801*d4204c85Sraf */ 802*d4204c85Sraf void 80341efec22Sraf queue_unlink(queue_head_t *qp, ulwp_t **ulwpp, ulwp_t *prev) 8047c478bd9Sstevel@tonic-gate { 805*d4204c85Sraf queue_root_t *qrp = qp->qh_root; 806*d4204c85Sraf queue_root_t *nqrp; 807*d4204c85Sraf ulwp_t *ulwp = *ulwpp; 808*d4204c85Sraf ulwp_t *next; 8097c478bd9Sstevel@tonic-gate 810*d4204c85Sraf ASSERT(MUTEX_OWNED(&qp->qh_lock, curthread)); 811*d4204c85Sraf ASSERT(qp->qh_wchan != NULL && ulwp->ul_wchan == qp->qh_wchan); 8127c478bd9Sstevel@tonic-gate 813*d4204c85Sraf DECR(qp->qh_qlen); 814*d4204c85Sraf DECR(qrp->qr_qlen); 815*d4204c85Sraf if (ulwp->ul_rtqueued) { 816*d4204c85Sraf ulwp->ul_rtqueued = 0; 817*d4204c85Sraf qrp->qr_rtcount--; 818*d4204c85Sraf } 819*d4204c85Sraf next = ulwp->ul_link; 820*d4204c85Sraf *ulwpp = next; 821*d4204c85Sraf ulwp->ul_link = NULL; 822*d4204c85Sraf if (qrp->qr_tail == ulwp) 823*d4204c85Sraf qrp->qr_tail = prev; 824*d4204c85Sraf if (qrp == &ulwp->ul_queue_root) { 825*d4204c85Sraf /* 826*d4204c85Sraf * We can't continue to use the unlinked thread's 827*d4204c85Sraf * queue root for the linkage. 828*d4204c85Sraf */ 829*d4204c85Sraf queue_root_t *qr_next = qrp->qr_next; 830*d4204c85Sraf queue_root_t *qr_prev = qrp->qr_prev; 831*d4204c85Sraf 832*d4204c85Sraf if (qrp->qr_tail) { 833*d4204c85Sraf /* switch to using the last thread's queue root */ 834*d4204c85Sraf ASSERT(qrp->qr_qlen != 0); 835*d4204c85Sraf nqrp = &qrp->qr_tail->ul_queue_root; 836*d4204c85Sraf *nqrp = *qrp; 837*d4204c85Sraf if (qr_next) 838*d4204c85Sraf qr_next->qr_prev = nqrp; 839*d4204c85Sraf if (qr_prev) 840*d4204c85Sraf qr_prev->qr_next = nqrp; 841*d4204c85Sraf else 842*d4204c85Sraf qp->qh_hlist = nqrp; 843*d4204c85Sraf qp->qh_root = nqrp; 844*d4204c85Sraf } else { 845*d4204c85Sraf /* empty queue root; just delete from the hash list */ 846*d4204c85Sraf ASSERT(qrp->qr_qlen == 0); 847*d4204c85Sraf if (qr_next) 848*d4204c85Sraf qr_next->qr_prev = qr_prev; 849*d4204c85Sraf if (qr_prev) 850*d4204c85Sraf qr_prev->qr_next = qr_next; 851*d4204c85Sraf else 852*d4204c85Sraf qp->qh_hlist = qr_next; 853*d4204c85Sraf qp->qh_root = NULL; 854*d4204c85Sraf DECR(qp->qh_hlen); 855*d4204c85Sraf } 856*d4204c85Sraf } 8577c478bd9Sstevel@tonic-gate } 8587c478bd9Sstevel@tonic-gate 85941efec22Sraf ulwp_t * 860*d4204c85Sraf dequeue(queue_head_t *qp, int *more) 86141efec22Sraf { 86241efec22Sraf ulwp_t **ulwpp; 863*d4204c85Sraf ulwp_t *ulwp; 86441efec22Sraf ulwp_t *prev; 86541efec22Sraf 866*d4204c85Sraf if ((ulwpp = queue_slot(qp, &prev, more)) == NULL) 86741efec22Sraf return (NULL); 868*d4204c85Sraf ulwp = *ulwpp; 869*d4204c85Sraf queue_unlink(qp, ulwpp, prev); 870*d4204c85Sraf ulwp->ul_sleepq = NULL; 871*d4204c85Sraf ulwp->ul_wchan = NULL; 872*d4204c85Sraf return (ulwp); 87341efec22Sraf } 87441efec22Sraf 8757c478bd9Sstevel@tonic-gate /* 8767c478bd9Sstevel@tonic-gate * Return a pointer to the highest priority thread sleeping on wchan. 8777c478bd9Sstevel@tonic-gate */ 8787c478bd9Sstevel@tonic-gate ulwp_t * 879*d4204c85Sraf queue_waiter(queue_head_t *qp) 8807c478bd9Sstevel@tonic-gate { 8817c478bd9Sstevel@tonic-gate ulwp_t **ulwpp; 882*d4204c85Sraf ulwp_t *prev; 883*d4204c85Sraf int more; 8847c478bd9Sstevel@tonic-gate 885*d4204c85Sraf if ((ulwpp = queue_slot(qp, &prev, &more)) == NULL) 8867c478bd9Sstevel@tonic-gate return (NULL); 8877c478bd9Sstevel@tonic-gate return (*ulwpp); 8887c478bd9Sstevel@tonic-gate } 8897c478bd9Sstevel@tonic-gate 890*d4204c85Sraf int 891*d4204c85Sraf dequeue_self(queue_head_t *qp) 8927c478bd9Sstevel@tonic-gate { 8937c478bd9Sstevel@tonic-gate ulwp_t *self = curthread; 894*d4204c85Sraf queue_root_t *qrp; 8957c478bd9Sstevel@tonic-gate ulwp_t **ulwpp; 8967c478bd9Sstevel@tonic-gate ulwp_t *ulwp; 897*d4204c85Sraf ulwp_t *prev; 8987c478bd9Sstevel@tonic-gate int found = 0; 8997c478bd9Sstevel@tonic-gate 9007c478bd9Sstevel@tonic-gate ASSERT(MUTEX_OWNED(&qp->qh_lock, self)); 9017c478bd9Sstevel@tonic-gate 9027c478bd9Sstevel@tonic-gate /* find self on the sleep queue */ 903*d4204c85Sraf if ((qrp = qp->qh_root) != NULL) { 904*d4204c85Sraf for (prev = NULL, ulwpp = &qrp->qr_head; 905*d4204c85Sraf (ulwp = *ulwpp) != NULL; 906*d4204c85Sraf prev = ulwp, ulwpp = &ulwp->ul_link) { 907*d4204c85Sraf if (ulwp == self) { 908*d4204c85Sraf queue_unlink(qp, ulwpp, prev); 909*d4204c85Sraf self->ul_cvmutex = NULL; 910*d4204c85Sraf self->ul_sleepq = NULL; 911*d4204c85Sraf self->ul_wchan = NULL; 912*d4204c85Sraf found = 1; 913*d4204c85Sraf break; 914*d4204c85Sraf } 9157c478bd9Sstevel@tonic-gate } 9167c478bd9Sstevel@tonic-gate } 9177c478bd9Sstevel@tonic-gate 9187c478bd9Sstevel@tonic-gate if (!found) 9197c478bd9Sstevel@tonic-gate thr_panic("dequeue_self(): curthread not found on queue"); 9207c478bd9Sstevel@tonic-gate 921*d4204c85Sraf return ((qrp = qp->qh_root) != NULL && qrp->qr_head != NULL); 9227c478bd9Sstevel@tonic-gate } 9237c478bd9Sstevel@tonic-gate 9247c478bd9Sstevel@tonic-gate /* 9257c478bd9Sstevel@tonic-gate * Called from call_user_handler() and _thrp_suspend() to take 9267c478bd9Sstevel@tonic-gate * ourself off of our sleep queue so we can grab locks. 9277c478bd9Sstevel@tonic-gate */ 9287c478bd9Sstevel@tonic-gate void 9297c478bd9Sstevel@tonic-gate unsleep_self(void) 9307c478bd9Sstevel@tonic-gate { 9317c478bd9Sstevel@tonic-gate ulwp_t *self = curthread; 9327c478bd9Sstevel@tonic-gate queue_head_t *qp; 9337c478bd9Sstevel@tonic-gate 9347c478bd9Sstevel@tonic-gate /* 9357c478bd9Sstevel@tonic-gate * Calling enter_critical()/exit_critical() here would lead 9367c478bd9Sstevel@tonic-gate * to recursion. Just manipulate self->ul_critical directly. 9377c478bd9Sstevel@tonic-gate */ 9387c478bd9Sstevel@tonic-gate self->ul_critical++; 9397c478bd9Sstevel@tonic-gate while (self->ul_sleepq != NULL) { 9407c478bd9Sstevel@tonic-gate qp = queue_lock(self->ul_wchan, self->ul_qtype); 9417c478bd9Sstevel@tonic-gate /* 9427c478bd9Sstevel@tonic-gate * We may have been moved from a CV queue to a 9437c478bd9Sstevel@tonic-gate * mutex queue while we were attempting queue_lock(). 9447c478bd9Sstevel@tonic-gate * If so, just loop around and try again. 9457c478bd9Sstevel@tonic-gate * dequeue_self() clears self->ul_sleepq. 9467c478bd9Sstevel@tonic-gate */ 947*d4204c85Sraf if (qp == self->ul_sleepq) 948*d4204c85Sraf (void) dequeue_self(qp); 9497c478bd9Sstevel@tonic-gate queue_unlock(qp); 9507c478bd9Sstevel@tonic-gate } 951*d4204c85Sraf self->ul_writer = 0; 9527c478bd9Sstevel@tonic-gate self->ul_critical--; 9537c478bd9Sstevel@tonic-gate } 9547c478bd9Sstevel@tonic-gate 9557c478bd9Sstevel@tonic-gate /* 9567c478bd9Sstevel@tonic-gate * Common code for calling the the ___lwp_mutex_timedlock() system call. 9577c478bd9Sstevel@tonic-gate * Returns with mutex_owner and mutex_ownerpid set correctly. 9587c478bd9Sstevel@tonic-gate */ 959883492d5Sraf static int 9607c478bd9Sstevel@tonic-gate mutex_lock_kernel(mutex_t *mp, timespec_t *tsp, tdb_mutex_stats_t *msp) 9617c478bd9Sstevel@tonic-gate { 9627c478bd9Sstevel@tonic-gate ulwp_t *self = curthread; 9637c478bd9Sstevel@tonic-gate uberdata_t *udp = self->ul_uberdata; 964883492d5Sraf int mtype = mp->mutex_type; 9657c478bd9Sstevel@tonic-gate hrtime_t begin_sleep; 966883492d5Sraf int acquired; 9677c478bd9Sstevel@tonic-gate int error; 9687c478bd9Sstevel@tonic-gate 9697c478bd9Sstevel@tonic-gate self->ul_sp = stkptr(); 9707c478bd9Sstevel@tonic-gate self->ul_wchan = mp; 9717c478bd9Sstevel@tonic-gate if (__td_event_report(self, TD_SLEEP, udp)) { 9727c478bd9Sstevel@tonic-gate self->ul_td_evbuf.eventnum = TD_SLEEP; 9737c478bd9Sstevel@tonic-gate self->ul_td_evbuf.eventdata = mp; 9747c478bd9Sstevel@tonic-gate tdb_event(TD_SLEEP, udp); 9757c478bd9Sstevel@tonic-gate } 9767c478bd9Sstevel@tonic-gate if (msp) { 9777c478bd9Sstevel@tonic-gate tdb_incr(msp->mutex_sleep); 9787c478bd9Sstevel@tonic-gate begin_sleep = gethrtime(); 9797c478bd9Sstevel@tonic-gate } 9807c478bd9Sstevel@tonic-gate 9817c478bd9Sstevel@tonic-gate DTRACE_PROBE1(plockstat, mutex__block, mp); 9827c478bd9Sstevel@tonic-gate 9837c478bd9Sstevel@tonic-gate for (;;) { 984883492d5Sraf /* 985883492d5Sraf * A return value of EOWNERDEAD or ELOCKUNMAPPED 986883492d5Sraf * means we successfully acquired the lock. 987883492d5Sraf */ 988883492d5Sraf if ((error = ___lwp_mutex_timedlock(mp, tsp)) != 0 && 989883492d5Sraf error != EOWNERDEAD && error != ELOCKUNMAPPED) { 990883492d5Sraf acquired = 0; 9917c478bd9Sstevel@tonic-gate break; 9927c478bd9Sstevel@tonic-gate } 9937c478bd9Sstevel@tonic-gate 994883492d5Sraf if (mtype & USYNC_PROCESS) { 9957c478bd9Sstevel@tonic-gate /* 9967c478bd9Sstevel@tonic-gate * Defend against forkall(). We may be the child, 9977c478bd9Sstevel@tonic-gate * in which case we don't actually own the mutex. 9987c478bd9Sstevel@tonic-gate */ 9997c478bd9Sstevel@tonic-gate enter_critical(self); 10007c478bd9Sstevel@tonic-gate if (mp->mutex_ownerpid == udp->pid) { 10017c478bd9Sstevel@tonic-gate mp->mutex_owner = (uintptr_t)self; 10027c478bd9Sstevel@tonic-gate exit_critical(self); 1003883492d5Sraf acquired = 1; 10047c478bd9Sstevel@tonic-gate break; 10057c478bd9Sstevel@tonic-gate } 10067c478bd9Sstevel@tonic-gate exit_critical(self); 10077c478bd9Sstevel@tonic-gate } else { 10087c478bd9Sstevel@tonic-gate mp->mutex_owner = (uintptr_t)self; 1009883492d5Sraf acquired = 1; 10107c478bd9Sstevel@tonic-gate break; 10117c478bd9Sstevel@tonic-gate } 10127c478bd9Sstevel@tonic-gate } 10137c478bd9Sstevel@tonic-gate if (msp) 10147c478bd9Sstevel@tonic-gate msp->mutex_sleep_time += gethrtime() - begin_sleep; 10157c478bd9Sstevel@tonic-gate self->ul_wchan = NULL; 10167c478bd9Sstevel@tonic-gate self->ul_sp = 0; 10177c478bd9Sstevel@tonic-gate 1018883492d5Sraf if (acquired) { 1019883492d5Sraf DTRACE_PROBE2(plockstat, mutex__blocked, mp, 1); 1020883492d5Sraf DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 1021883492d5Sraf } else { 1022883492d5Sraf DTRACE_PROBE2(plockstat, mutex__blocked, mp, 0); 1023883492d5Sraf DTRACE_PROBE2(plockstat, mutex__error, mp, error); 1024883492d5Sraf } 1025883492d5Sraf 10267c478bd9Sstevel@tonic-gate return (error); 10277c478bd9Sstevel@tonic-gate } 10287c478bd9Sstevel@tonic-gate 10297c478bd9Sstevel@tonic-gate /* 10307c478bd9Sstevel@tonic-gate * Common code for calling the ___lwp_mutex_trylock() system call. 10317c478bd9Sstevel@tonic-gate * Returns with mutex_owner and mutex_ownerpid set correctly. 10327c478bd9Sstevel@tonic-gate */ 10337c478bd9Sstevel@tonic-gate int 10347c478bd9Sstevel@tonic-gate mutex_trylock_kernel(mutex_t *mp) 10357c478bd9Sstevel@tonic-gate { 10367c478bd9Sstevel@tonic-gate ulwp_t *self = curthread; 10377c478bd9Sstevel@tonic-gate uberdata_t *udp = self->ul_uberdata; 1038883492d5Sraf int mtype = mp->mutex_type; 10397c478bd9Sstevel@tonic-gate int error; 1040883492d5Sraf int acquired; 10417c478bd9Sstevel@tonic-gate 10427c478bd9Sstevel@tonic-gate for (;;) { 1043883492d5Sraf /* 1044883492d5Sraf * A return value of EOWNERDEAD or ELOCKUNMAPPED 1045883492d5Sraf * means we successfully acquired the lock. 1046883492d5Sraf */ 1047883492d5Sraf if ((error = ___lwp_mutex_trylock(mp)) != 0 && 1048883492d5Sraf error != EOWNERDEAD && error != ELOCKUNMAPPED) { 1049883492d5Sraf acquired = 0; 10507c478bd9Sstevel@tonic-gate break; 10517c478bd9Sstevel@tonic-gate } 10527c478bd9Sstevel@tonic-gate 1053883492d5Sraf if (mtype & USYNC_PROCESS) { 10547c478bd9Sstevel@tonic-gate /* 10557c478bd9Sstevel@tonic-gate * Defend against forkall(). We may be the child, 10567c478bd9Sstevel@tonic-gate * in which case we don't actually own the mutex. 10577c478bd9Sstevel@tonic-gate */ 10587c478bd9Sstevel@tonic-gate enter_critical(self); 10597c478bd9Sstevel@tonic-gate if (mp->mutex_ownerpid == udp->pid) { 10607c478bd9Sstevel@tonic-gate mp->mutex_owner = (uintptr_t)self; 10617c478bd9Sstevel@tonic-gate exit_critical(self); 1062883492d5Sraf acquired = 1; 10637c478bd9Sstevel@tonic-gate break; 10647c478bd9Sstevel@tonic-gate } 10657c478bd9Sstevel@tonic-gate exit_critical(self); 10667c478bd9Sstevel@tonic-gate } else { 10677c478bd9Sstevel@tonic-gate mp->mutex_owner = (uintptr_t)self; 1068883492d5Sraf acquired = 1; 10697c478bd9Sstevel@tonic-gate break; 10707c478bd9Sstevel@tonic-gate } 10717c478bd9Sstevel@tonic-gate } 10727c478bd9Sstevel@tonic-gate 1073883492d5Sraf if (acquired) { 1074883492d5Sraf DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 1075883492d5Sraf } else if (error != EBUSY) { 1076883492d5Sraf DTRACE_PROBE2(plockstat, mutex__error, mp, error); 1077883492d5Sraf } 1078883492d5Sraf 10797c478bd9Sstevel@tonic-gate return (error); 10807c478bd9Sstevel@tonic-gate } 10817c478bd9Sstevel@tonic-gate 10827c478bd9Sstevel@tonic-gate volatile sc_shared_t * 10837c478bd9Sstevel@tonic-gate setup_schedctl(void) 10847c478bd9Sstevel@tonic-gate { 10857c478bd9Sstevel@tonic-gate ulwp_t *self = curthread; 10867c478bd9Sstevel@tonic-gate volatile sc_shared_t *scp; 10877c478bd9Sstevel@tonic-gate sc_shared_t *tmp; 10887c478bd9Sstevel@tonic-gate 10897c478bd9Sstevel@tonic-gate if ((scp = self->ul_schedctl) == NULL && /* no shared state yet */ 10907c478bd9Sstevel@tonic-gate !self->ul_vfork && /* not a child of vfork() */ 10917c478bd9Sstevel@tonic-gate !self->ul_schedctl_called) { /* haven't been called before */ 10927c478bd9Sstevel@tonic-gate enter_critical(self); 10937c478bd9Sstevel@tonic-gate self->ul_schedctl_called = &self->ul_uberdata->uberflags; 10947c478bd9Sstevel@tonic-gate if ((tmp = __schedctl()) != (sc_shared_t *)(-1)) 10957c478bd9Sstevel@tonic-gate self->ul_schedctl = scp = tmp; 10967c478bd9Sstevel@tonic-gate exit_critical(self); 10977c478bd9Sstevel@tonic-gate } 10987c478bd9Sstevel@tonic-gate /* 10997c478bd9Sstevel@tonic-gate * Unless the call to setup_schedctl() is surrounded 11007c478bd9Sstevel@tonic-gate * by enter_critical()/exit_critical(), the address 11017c478bd9Sstevel@tonic-gate * we are returning could be invalid due to a forkall() 11027c478bd9Sstevel@tonic-gate * having occurred in another thread. 11037c478bd9Sstevel@tonic-gate */ 11047c478bd9Sstevel@tonic-gate return (scp); 11057c478bd9Sstevel@tonic-gate } 11067c478bd9Sstevel@tonic-gate 11077c478bd9Sstevel@tonic-gate /* 11087c478bd9Sstevel@tonic-gate * Interfaces from libsched, incorporated into libc. 11097c478bd9Sstevel@tonic-gate * libsched.so.1 is now a filter library onto libc. 11107c478bd9Sstevel@tonic-gate */ 11117c478bd9Sstevel@tonic-gate #pragma weak schedctl_lookup = _schedctl_init 11127c478bd9Sstevel@tonic-gate #pragma weak _schedctl_lookup = _schedctl_init 11137c478bd9Sstevel@tonic-gate #pragma weak schedctl_init = _schedctl_init 11147c478bd9Sstevel@tonic-gate schedctl_t * 11157c478bd9Sstevel@tonic-gate _schedctl_init(void) 11167c478bd9Sstevel@tonic-gate { 11177c478bd9Sstevel@tonic-gate volatile sc_shared_t *scp = setup_schedctl(); 11187c478bd9Sstevel@tonic-gate return ((scp == NULL)? NULL : (schedctl_t *)&scp->sc_preemptctl); 11197c478bd9Sstevel@tonic-gate } 11207c478bd9Sstevel@tonic-gate 11217c478bd9Sstevel@tonic-gate #pragma weak schedctl_exit = _schedctl_exit 11227c478bd9Sstevel@tonic-gate void 11237c478bd9Sstevel@tonic-gate _schedctl_exit(void) 11247c478bd9Sstevel@tonic-gate { 11257c478bd9Sstevel@tonic-gate } 11267c478bd9Sstevel@tonic-gate 11277c478bd9Sstevel@tonic-gate /* 11287c478bd9Sstevel@tonic-gate * Contract private interface for java. 11297c478bd9Sstevel@tonic-gate * Set up the schedctl data if it doesn't exist yet. 11307c478bd9Sstevel@tonic-gate * Return a pointer to the pointer to the schedctl data. 11317c478bd9Sstevel@tonic-gate */ 11327c478bd9Sstevel@tonic-gate volatile sc_shared_t *volatile * 11337c478bd9Sstevel@tonic-gate _thr_schedctl(void) 11347c478bd9Sstevel@tonic-gate { 11357c478bd9Sstevel@tonic-gate ulwp_t *self = curthread; 11367c478bd9Sstevel@tonic-gate volatile sc_shared_t *volatile *ptr; 11377c478bd9Sstevel@tonic-gate 11387c478bd9Sstevel@tonic-gate if (self->ul_vfork) 11397c478bd9Sstevel@tonic-gate return (NULL); 11407c478bd9Sstevel@tonic-gate if (*(ptr = &self->ul_schedctl) == NULL) 11417c478bd9Sstevel@tonic-gate (void) setup_schedctl(); 11427c478bd9Sstevel@tonic-gate return (ptr); 11437c478bd9Sstevel@tonic-gate } 11447c478bd9Sstevel@tonic-gate 11457c478bd9Sstevel@tonic-gate /* 11467c478bd9Sstevel@tonic-gate * Block signals and attempt to block preemption. 11477c478bd9Sstevel@tonic-gate * no_preempt()/preempt() must be used in pairs but can be nested. 11487c478bd9Sstevel@tonic-gate */ 11497c478bd9Sstevel@tonic-gate void 11507c478bd9Sstevel@tonic-gate no_preempt(ulwp_t *self) 11517c478bd9Sstevel@tonic-gate { 11527c478bd9Sstevel@tonic-gate volatile sc_shared_t *scp; 11537c478bd9Sstevel@tonic-gate 11547c478bd9Sstevel@tonic-gate if (self->ul_preempt++ == 0) { 11557c478bd9Sstevel@tonic-gate enter_critical(self); 11567c478bd9Sstevel@tonic-gate if ((scp = self->ul_schedctl) != NULL || 11577c478bd9Sstevel@tonic-gate (scp = setup_schedctl()) != NULL) { 11587c478bd9Sstevel@tonic-gate /* 11597c478bd9Sstevel@tonic-gate * Save the pre-existing preempt value. 11607c478bd9Sstevel@tonic-gate */ 11617c478bd9Sstevel@tonic-gate self->ul_savpreempt = scp->sc_preemptctl.sc_nopreempt; 11627c478bd9Sstevel@tonic-gate scp->sc_preemptctl.sc_nopreempt = 1; 11637c478bd9Sstevel@tonic-gate } 11647c478bd9Sstevel@tonic-gate } 11657c478bd9Sstevel@tonic-gate } 11667c478bd9Sstevel@tonic-gate 11677c478bd9Sstevel@tonic-gate /* 11687c478bd9Sstevel@tonic-gate * Undo the effects of no_preempt(). 11697c478bd9Sstevel@tonic-gate */ 11707c478bd9Sstevel@tonic-gate void 11717c478bd9Sstevel@tonic-gate preempt(ulwp_t *self) 11727c478bd9Sstevel@tonic-gate { 11737c478bd9Sstevel@tonic-gate volatile sc_shared_t *scp; 11747c478bd9Sstevel@tonic-gate 11757c478bd9Sstevel@tonic-gate ASSERT(self->ul_preempt > 0); 11767c478bd9Sstevel@tonic-gate if (--self->ul_preempt == 0) { 11777c478bd9Sstevel@tonic-gate if ((scp = self->ul_schedctl) != NULL) { 11787c478bd9Sstevel@tonic-gate /* 11797c478bd9Sstevel@tonic-gate * Restore the pre-existing preempt value. 11807c478bd9Sstevel@tonic-gate */ 11817c478bd9Sstevel@tonic-gate scp->sc_preemptctl.sc_nopreempt = self->ul_savpreempt; 11827c478bd9Sstevel@tonic-gate if (scp->sc_preemptctl.sc_yield && 11837c478bd9Sstevel@tonic-gate scp->sc_preemptctl.sc_nopreempt == 0) { 11847c478bd9Sstevel@tonic-gate lwp_yield(); 11857c478bd9Sstevel@tonic-gate if (scp->sc_preemptctl.sc_yield) { 11867c478bd9Sstevel@tonic-gate /* 11877c478bd9Sstevel@tonic-gate * Shouldn't happen. This is either 11887c478bd9Sstevel@tonic-gate * a race condition or the thread 11897c478bd9Sstevel@tonic-gate * just entered the real-time class. 11907c478bd9Sstevel@tonic-gate */ 11917c478bd9Sstevel@tonic-gate lwp_yield(); 11927c478bd9Sstevel@tonic-gate scp->sc_preemptctl.sc_yield = 0; 11937c478bd9Sstevel@tonic-gate } 11947c478bd9Sstevel@tonic-gate } 11957c478bd9Sstevel@tonic-gate } 11967c478bd9Sstevel@tonic-gate exit_critical(self); 11977c478bd9Sstevel@tonic-gate } 11987c478bd9Sstevel@tonic-gate } 11997c478bd9Sstevel@tonic-gate 12007c478bd9Sstevel@tonic-gate /* 12017c478bd9Sstevel@tonic-gate * If a call to preempt() would cause the current thread to yield or to 12027c478bd9Sstevel@tonic-gate * take deferred actions in exit_critical(), then unpark the specified 12037c478bd9Sstevel@tonic-gate * lwp so it can run while we delay. Return the original lwpid if the 12047c478bd9Sstevel@tonic-gate * unpark was not performed, else return zero. The tests are a repeat 12057c478bd9Sstevel@tonic-gate * of some of the tests in preempt(), above. This is a statistical 12067c478bd9Sstevel@tonic-gate * optimization solely for cond_sleep_queue(), below. 12077c478bd9Sstevel@tonic-gate */ 12087c478bd9Sstevel@tonic-gate static lwpid_t 12097c478bd9Sstevel@tonic-gate preempt_unpark(ulwp_t *self, lwpid_t lwpid) 12107c478bd9Sstevel@tonic-gate { 12117c478bd9Sstevel@tonic-gate volatile sc_shared_t *scp = self->ul_schedctl; 12127c478bd9Sstevel@tonic-gate 12137c478bd9Sstevel@tonic-gate ASSERT(self->ul_preempt == 1 && self->ul_critical > 0); 12147c478bd9Sstevel@tonic-gate if ((scp != NULL && scp->sc_preemptctl.sc_yield) || 12157c478bd9Sstevel@tonic-gate (self->ul_curplease && self->ul_critical == 1)) { 12167c478bd9Sstevel@tonic-gate (void) __lwp_unpark(lwpid); 12177c478bd9Sstevel@tonic-gate lwpid = 0; 12187c478bd9Sstevel@tonic-gate } 12197c478bd9Sstevel@tonic-gate return (lwpid); 12207c478bd9Sstevel@tonic-gate } 12217c478bd9Sstevel@tonic-gate 12227c478bd9Sstevel@tonic-gate /* 122316b01779Sraf * Spin for a while (if 'tryhard' is true), trying to grab the lock. 12247c478bd9Sstevel@tonic-gate * If this fails, return EBUSY and let the caller deal with it. 12257c478bd9Sstevel@tonic-gate * If this succeeds, return 0 with mutex_owner set to curthread. 12267c478bd9Sstevel@tonic-gate */ 1227883492d5Sraf static int 122816b01779Sraf mutex_trylock_adaptive(mutex_t *mp, int tryhard) 12297c478bd9Sstevel@tonic-gate { 12307c478bd9Sstevel@tonic-gate ulwp_t *self = curthread; 1231883492d5Sraf int error = EBUSY; 12327c478bd9Sstevel@tonic-gate ulwp_t *ulwp; 12337c478bd9Sstevel@tonic-gate volatile sc_shared_t *scp; 12345d1dd9a9Sraf volatile uint8_t *lockp = (volatile uint8_t *)&mp->mutex_lockw; 12355d1dd9a9Sraf volatile uint64_t *ownerp = (volatile uint64_t *)&mp->mutex_owner; 12365d1dd9a9Sraf uint32_t new_lockword; 12375d1dd9a9Sraf int count = 0; 12385d1dd9a9Sraf int max_count; 12395d1dd9a9Sraf uint8_t max_spinners; 12407c478bd9Sstevel@tonic-gate 1241883492d5Sraf ASSERT(!(mp->mutex_type & USYNC_PROCESS)); 12427c478bd9Sstevel@tonic-gate 1243883492d5Sraf if (MUTEX_OWNER(mp) == self) 12447c478bd9Sstevel@tonic-gate return (EBUSY); 12457c478bd9Sstevel@tonic-gate 1246883492d5Sraf /* short-cut, not definitive (see below) */ 1247883492d5Sraf if (mp->mutex_flag & LOCK_NOTRECOVERABLE) { 1248883492d5Sraf ASSERT(mp->mutex_type & LOCK_ROBUST); 12495d1dd9a9Sraf error = ENOTRECOVERABLE; 12505d1dd9a9Sraf goto done; 1251883492d5Sraf } 1252883492d5Sraf 12535d1dd9a9Sraf /* 12545d1dd9a9Sraf * Make one attempt to acquire the lock before 12555d1dd9a9Sraf * incurring the overhead of the spin loop. 12565d1dd9a9Sraf */ 12575d1dd9a9Sraf if (set_lock_byte(lockp) == 0) { 12585d1dd9a9Sraf *ownerp = (uintptr_t)self; 12595d1dd9a9Sraf error = 0; 12605d1dd9a9Sraf goto done; 12615d1dd9a9Sraf } 12625d1dd9a9Sraf if (!tryhard) 12635d1dd9a9Sraf goto done; 12645d1dd9a9Sraf if (ncpus == 0) 12655d1dd9a9Sraf ncpus = (int)_sysconf(_SC_NPROCESSORS_ONLN); 12665d1dd9a9Sraf if ((max_spinners = self->ul_max_spinners) >= ncpus) 12675d1dd9a9Sraf max_spinners = ncpus - 1; 12685d1dd9a9Sraf max_count = (max_spinners != 0)? self->ul_adaptive_spin : 0; 12695d1dd9a9Sraf if (max_count == 0) 12705d1dd9a9Sraf goto done; 12717c478bd9Sstevel@tonic-gate 12727c478bd9Sstevel@tonic-gate /* 12737c478bd9Sstevel@tonic-gate * This spin loop is unfair to lwps that have already dropped into 12747c478bd9Sstevel@tonic-gate * the kernel to sleep. They will starve on a highly-contended mutex. 12757c478bd9Sstevel@tonic-gate * This is just too bad. The adaptive spin algorithm is intended 12767c478bd9Sstevel@tonic-gate * to allow programs with highly-contended locks (that is, broken 12777c478bd9Sstevel@tonic-gate * programs) to execute with reasonable speed despite their contention. 12787c478bd9Sstevel@tonic-gate * Being fair would reduce the speed of such programs and well-written 12797c478bd9Sstevel@tonic-gate * programs will not suffer in any case. 12807c478bd9Sstevel@tonic-gate */ 12815d1dd9a9Sraf enter_critical(self); 12825d1dd9a9Sraf if (spinners_incr(&mp->mutex_lockword, max_spinners) == -1) { 12835d1dd9a9Sraf exit_critical(self); 12845d1dd9a9Sraf goto done; 12855d1dd9a9Sraf } 12865d1dd9a9Sraf DTRACE_PROBE1(plockstat, mutex__spin, mp); 12875d1dd9a9Sraf for (count = 1; ; count++) { 12887c478bd9Sstevel@tonic-gate if (*lockp == 0 && set_lock_byte(lockp) == 0) { 12897c478bd9Sstevel@tonic-gate *ownerp = (uintptr_t)self; 1290883492d5Sraf error = 0; 1291883492d5Sraf break; 12927c478bd9Sstevel@tonic-gate } 12935d1dd9a9Sraf if (count == max_count) 12945d1dd9a9Sraf break; 12957c478bd9Sstevel@tonic-gate SMT_PAUSE(); 12967c478bd9Sstevel@tonic-gate /* 12977c478bd9Sstevel@tonic-gate * Stop spinning if the mutex owner is not running on 12987c478bd9Sstevel@tonic-gate * a processor; it will not drop the lock any time soon 12997c478bd9Sstevel@tonic-gate * and we would just be wasting time to keep spinning. 13007c478bd9Sstevel@tonic-gate * 13017c478bd9Sstevel@tonic-gate * Note that we are looking at another thread (ulwp_t) 13027c478bd9Sstevel@tonic-gate * without ensuring that the other thread does not exit. 13037c478bd9Sstevel@tonic-gate * The scheme relies on ulwp_t structures never being 13047c478bd9Sstevel@tonic-gate * deallocated by the library (the library employs a free 13057c478bd9Sstevel@tonic-gate * list of ulwp_t structs that are reused when new threads 13067c478bd9Sstevel@tonic-gate * are created) and on schedctl shared memory never being 13077c478bd9Sstevel@tonic-gate * deallocated once created via __schedctl(). 13087c478bd9Sstevel@tonic-gate * 13097c478bd9Sstevel@tonic-gate * Thus, the worst that can happen when the spinning thread 13107c478bd9Sstevel@tonic-gate * looks at the owner's schedctl data is that it is looking 13117c478bd9Sstevel@tonic-gate * at some other thread's schedctl data. This almost never 13127c478bd9Sstevel@tonic-gate * happens and is benign when it does. 13137c478bd9Sstevel@tonic-gate */ 13147c478bd9Sstevel@tonic-gate if ((ulwp = (ulwp_t *)(uintptr_t)*ownerp) != NULL && 13157c478bd9Sstevel@tonic-gate ((scp = ulwp->ul_schedctl) == NULL || 13167c478bd9Sstevel@tonic-gate scp->sc_state != SC_ONPROC)) 13177c478bd9Sstevel@tonic-gate break; 13187c478bd9Sstevel@tonic-gate } 13195d1dd9a9Sraf new_lockword = spinners_decr(&mp->mutex_lockword); 13205d1dd9a9Sraf if (error && (new_lockword & (LOCKMASK | SPINNERMASK)) == 0) { 13215d1dd9a9Sraf /* 13225d1dd9a9Sraf * We haven't yet acquired the lock, the lock 13235d1dd9a9Sraf * is free, and there are no other spinners. 13245d1dd9a9Sraf * Make one final attempt to acquire the lock. 13255d1dd9a9Sraf * 13265d1dd9a9Sraf * This isn't strictly necessary since mutex_lock_queue() 13275d1dd9a9Sraf * (the next action this thread will take if it doesn't 13285d1dd9a9Sraf * acquire the lock here) makes one attempt to acquire 13295d1dd9a9Sraf * the lock before putting the thread to sleep. 13305d1dd9a9Sraf * 13315d1dd9a9Sraf * If the next action for this thread (on failure here) 13325d1dd9a9Sraf * were not to call mutex_lock_queue(), this would be 13335d1dd9a9Sraf * necessary for correctness, to avoid ending up with an 13345d1dd9a9Sraf * unheld mutex with waiters but no one to wake them up. 13355d1dd9a9Sraf */ 13365d1dd9a9Sraf if (set_lock_byte(lockp) == 0) { 13375d1dd9a9Sraf *ownerp = (uintptr_t)self; 13385d1dd9a9Sraf error = 0; 13395d1dd9a9Sraf } 13405d1dd9a9Sraf count++; 13415d1dd9a9Sraf } 13427c478bd9Sstevel@tonic-gate exit_critical(self); 13437c478bd9Sstevel@tonic-gate 13445d1dd9a9Sraf done: 1345883492d5Sraf if (error == 0 && (mp->mutex_flag & LOCK_NOTRECOVERABLE)) { 1346883492d5Sraf ASSERT(mp->mutex_type & LOCK_ROBUST); 1347883492d5Sraf /* 134831db3c26Sraf * We shouldn't own the mutex. 134931db3c26Sraf * Just clear the lock; everyone has already been waked up. 1350883492d5Sraf */ 1351883492d5Sraf mp->mutex_owner = 0; 135231db3c26Sraf (void) clear_lockbyte(&mp->mutex_lockword); 1353883492d5Sraf error = ENOTRECOVERABLE; 1354883492d5Sraf } 13557c478bd9Sstevel@tonic-gate 1356883492d5Sraf if (error) { 13575d1dd9a9Sraf if (count) { 13585d1dd9a9Sraf DTRACE_PROBE2(plockstat, mutex__spun, 0, count); 13595d1dd9a9Sraf } 1360883492d5Sraf if (error != EBUSY) { 1361883492d5Sraf DTRACE_PROBE2(plockstat, mutex__error, mp, error); 1362883492d5Sraf } 1363883492d5Sraf } else { 13645d1dd9a9Sraf if (count) { 13655d1dd9a9Sraf DTRACE_PROBE2(plockstat, mutex__spun, 1, count); 13665d1dd9a9Sraf } 1367883492d5Sraf DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, count); 1368883492d5Sraf if (mp->mutex_flag & LOCK_OWNERDEAD) { 1369883492d5Sraf ASSERT(mp->mutex_type & LOCK_ROBUST); 1370883492d5Sraf error = EOWNERDEAD; 1371883492d5Sraf } 1372883492d5Sraf } 1373883492d5Sraf 1374883492d5Sraf return (error); 13757c478bd9Sstevel@tonic-gate } 13767c478bd9Sstevel@tonic-gate 13777c478bd9Sstevel@tonic-gate /* 13787c478bd9Sstevel@tonic-gate * Same as mutex_trylock_adaptive(), except specifically for queue locks. 13797c478bd9Sstevel@tonic-gate * The owner field is not set here; the caller (spin_lock_set()) sets it. 13807c478bd9Sstevel@tonic-gate */ 1381883492d5Sraf static int 13827c478bd9Sstevel@tonic-gate mutex_queuelock_adaptive(mutex_t *mp) 13837c478bd9Sstevel@tonic-gate { 13847c478bd9Sstevel@tonic-gate ulwp_t *ulwp; 13857c478bd9Sstevel@tonic-gate volatile sc_shared_t *scp; 13867c478bd9Sstevel@tonic-gate volatile uint8_t *lockp; 13877c478bd9Sstevel@tonic-gate volatile uint64_t *ownerp; 13887c478bd9Sstevel@tonic-gate int count = curthread->ul_queue_spin; 13897c478bd9Sstevel@tonic-gate 13907c478bd9Sstevel@tonic-gate ASSERT(mp->mutex_type == USYNC_THREAD); 13917c478bd9Sstevel@tonic-gate 13927c478bd9Sstevel@tonic-gate if (count == 0) 13937c478bd9Sstevel@tonic-gate return (EBUSY); 13947c478bd9Sstevel@tonic-gate 13957c478bd9Sstevel@tonic-gate lockp = (volatile uint8_t *)&mp->mutex_lockw; 13967c478bd9Sstevel@tonic-gate ownerp = (volatile uint64_t *)&mp->mutex_owner; 13977c478bd9Sstevel@tonic-gate while (--count >= 0) { 13987c478bd9Sstevel@tonic-gate if (*lockp == 0 && set_lock_byte(lockp) == 0) 13997c478bd9Sstevel@tonic-gate return (0); 14007c478bd9Sstevel@tonic-gate SMT_PAUSE(); 14017c478bd9Sstevel@tonic-gate if ((ulwp = (ulwp_t *)(uintptr_t)*ownerp) != NULL && 14027c478bd9Sstevel@tonic-gate ((scp = ulwp->ul_schedctl) == NULL || 14037c478bd9Sstevel@tonic-gate scp->sc_state != SC_ONPROC)) 14047c478bd9Sstevel@tonic-gate break; 14057c478bd9Sstevel@tonic-gate } 14067c478bd9Sstevel@tonic-gate 14077c478bd9Sstevel@tonic-gate return (EBUSY); 14087c478bd9Sstevel@tonic-gate } 14097c478bd9Sstevel@tonic-gate 14107c478bd9Sstevel@tonic-gate /* 14117c478bd9Sstevel@tonic-gate * Like mutex_trylock_adaptive(), but for process-shared mutexes. 141216b01779Sraf * Spin for a while (if 'tryhard' is true), trying to grab the lock. 14137c478bd9Sstevel@tonic-gate * If this fails, return EBUSY and let the caller deal with it. 14147c478bd9Sstevel@tonic-gate * If this succeeds, return 0 with mutex_owner set to curthread 14157c478bd9Sstevel@tonic-gate * and mutex_ownerpid set to the current pid. 14167c478bd9Sstevel@tonic-gate */ 1417883492d5Sraf static int 141816b01779Sraf mutex_trylock_process(mutex_t *mp, int tryhard) 14197c478bd9Sstevel@tonic-gate { 14207c478bd9Sstevel@tonic-gate ulwp_t *self = curthread; 14215d1dd9a9Sraf uberdata_t *udp = self->ul_uberdata; 1422883492d5Sraf int error = EBUSY; 142331db3c26Sraf volatile uint64_t *lockp = (volatile uint64_t *)&mp->mutex_lockword64; 14245d1dd9a9Sraf uint32_t new_lockword; 14255d1dd9a9Sraf int count = 0; 14265d1dd9a9Sraf int max_count; 14275d1dd9a9Sraf uint8_t max_spinners; 14287c478bd9Sstevel@tonic-gate 1429883492d5Sraf ASSERT(mp->mutex_type & USYNC_PROCESS); 14307c478bd9Sstevel@tonic-gate 1431883492d5Sraf if (shared_mutex_held(mp)) 14327c478bd9Sstevel@tonic-gate return (EBUSY); 14337c478bd9Sstevel@tonic-gate 1434883492d5Sraf /* short-cut, not definitive (see below) */ 1435883492d5Sraf if (mp->mutex_flag & LOCK_NOTRECOVERABLE) { 1436883492d5Sraf ASSERT(mp->mutex_type & LOCK_ROBUST); 14375d1dd9a9Sraf error = ENOTRECOVERABLE; 14385d1dd9a9Sraf goto done; 1439883492d5Sraf } 1440883492d5Sraf 14415d1dd9a9Sraf /* 14425d1dd9a9Sraf * Make one attempt to acquire the lock before 14435d1dd9a9Sraf * incurring the overhead of the spin loop. 14445d1dd9a9Sraf */ 14455d1dd9a9Sraf enter_critical(self); 144631db3c26Sraf if (set_lock_byte64(lockp, udp->pid) == 0) { 14475d1dd9a9Sraf mp->mutex_owner = (uintptr_t)self; 144831db3c26Sraf /* mp->mutex_ownerpid was set by set_lock_byte64() */ 14495d1dd9a9Sraf exit_critical(self); 14505d1dd9a9Sraf error = 0; 14515d1dd9a9Sraf goto done; 14525d1dd9a9Sraf } 14535d1dd9a9Sraf exit_critical(self); 14545d1dd9a9Sraf if (!tryhard) 14555d1dd9a9Sraf goto done; 1456883492d5Sraf if (ncpus == 0) 1457883492d5Sraf ncpus = (int)_sysconf(_SC_NPROCESSORS_ONLN); 14585d1dd9a9Sraf if ((max_spinners = self->ul_max_spinners) >= ncpus) 14595d1dd9a9Sraf max_spinners = ncpus - 1; 14605d1dd9a9Sraf max_count = (max_spinners != 0)? self->ul_adaptive_spin : 0; 14615d1dd9a9Sraf if (max_count == 0) 14625d1dd9a9Sraf goto done; 1463883492d5Sraf 14647c478bd9Sstevel@tonic-gate /* 14657c478bd9Sstevel@tonic-gate * This is a process-shared mutex. 14667c478bd9Sstevel@tonic-gate * We cannot know if the owner is running on a processor. 14677c478bd9Sstevel@tonic-gate * We just spin and hope that it is on a processor. 14687c478bd9Sstevel@tonic-gate */ 1469883492d5Sraf enter_critical(self); 14705d1dd9a9Sraf if (spinners_incr(&mp->mutex_lockword, max_spinners) == -1) { 14715d1dd9a9Sraf exit_critical(self); 14725d1dd9a9Sraf goto done; 14735d1dd9a9Sraf } 14745d1dd9a9Sraf DTRACE_PROBE1(plockstat, mutex__spin, mp); 14755d1dd9a9Sraf for (count = 1; ; count++) { 147631db3c26Sraf if ((*lockp & LOCKMASK64) == 0 && 147731db3c26Sraf set_lock_byte64(lockp, udp->pid) == 0) { 1478883492d5Sraf mp->mutex_owner = (uintptr_t)self; 147931db3c26Sraf /* mp->mutex_ownerpid was set by set_lock_byte64() */ 1480883492d5Sraf error = 0; 1481883492d5Sraf break; 14827c478bd9Sstevel@tonic-gate } 14835d1dd9a9Sraf if (count == max_count) 14845d1dd9a9Sraf break; 1485883492d5Sraf SMT_PAUSE(); 1486883492d5Sraf } 14875d1dd9a9Sraf new_lockword = spinners_decr(&mp->mutex_lockword); 14885d1dd9a9Sraf if (error && (new_lockword & (LOCKMASK | SPINNERMASK)) == 0) { 14895d1dd9a9Sraf /* 14905d1dd9a9Sraf * We haven't yet acquired the lock, the lock 14915d1dd9a9Sraf * is free, and there are no other spinners. 14925d1dd9a9Sraf * Make one final attempt to acquire the lock. 14935d1dd9a9Sraf * 14945d1dd9a9Sraf * This isn't strictly necessary since mutex_lock_kernel() 14955d1dd9a9Sraf * (the next action this thread will take if it doesn't 14965d1dd9a9Sraf * acquire the lock here) makes one attempt to acquire 14975d1dd9a9Sraf * the lock before putting the thread to sleep. 14985d1dd9a9Sraf * 14995d1dd9a9Sraf * If the next action for this thread (on failure here) 15005d1dd9a9Sraf * were not to call mutex_lock_kernel(), this would be 15015d1dd9a9Sraf * necessary for correctness, to avoid ending up with an 15025d1dd9a9Sraf * unheld mutex with waiters but no one to wake them up. 15035d1dd9a9Sraf */ 150431db3c26Sraf if (set_lock_byte64(lockp, udp->pid) == 0) { 15055d1dd9a9Sraf mp->mutex_owner = (uintptr_t)self; 150631db3c26Sraf /* mp->mutex_ownerpid was set by set_lock_byte64() */ 15075d1dd9a9Sraf error = 0; 15085d1dd9a9Sraf } 15095d1dd9a9Sraf count++; 15105d1dd9a9Sraf } 1511883492d5Sraf exit_critical(self); 1512883492d5Sraf 15135d1dd9a9Sraf done: 1514883492d5Sraf if (error == 0 && (mp->mutex_flag & LOCK_NOTRECOVERABLE)) { 1515883492d5Sraf ASSERT(mp->mutex_type & LOCK_ROBUST); 15167c478bd9Sstevel@tonic-gate /* 151731db3c26Sraf * We shouldn't own the mutex. 151831db3c26Sraf * Just clear the lock; everyone has already been waked up. 15197c478bd9Sstevel@tonic-gate */ 1520883492d5Sraf mp->mutex_owner = 0; 152131db3c26Sraf /* mp->mutex_ownerpid is cleared by clear_lockbyte64() */ 152231db3c26Sraf (void) clear_lockbyte64(&mp->mutex_lockword64); 1523883492d5Sraf error = ENOTRECOVERABLE; 15247c478bd9Sstevel@tonic-gate } 15257c478bd9Sstevel@tonic-gate 1526883492d5Sraf if (error) { 15275d1dd9a9Sraf if (count) { 15285d1dd9a9Sraf DTRACE_PROBE2(plockstat, mutex__spun, 0, count); 15295d1dd9a9Sraf } 1530883492d5Sraf if (error != EBUSY) { 1531883492d5Sraf DTRACE_PROBE2(plockstat, mutex__error, mp, error); 1532883492d5Sraf } 1533883492d5Sraf } else { 15345d1dd9a9Sraf if (count) { 15355d1dd9a9Sraf DTRACE_PROBE2(plockstat, mutex__spun, 1, count); 15365d1dd9a9Sraf } 1537883492d5Sraf DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, count); 1538883492d5Sraf if (mp->mutex_flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) { 1539883492d5Sraf ASSERT(mp->mutex_type & LOCK_ROBUST); 1540883492d5Sraf if (mp->mutex_flag & LOCK_OWNERDEAD) 1541883492d5Sraf error = EOWNERDEAD; 1542883492d5Sraf else if (mp->mutex_type & USYNC_PROCESS_ROBUST) 1543883492d5Sraf error = ELOCKUNMAPPED; 1544883492d5Sraf else 1545883492d5Sraf error = EOWNERDEAD; 1546883492d5Sraf } 1547883492d5Sraf } 1548883492d5Sraf 1549883492d5Sraf return (error); 15507c478bd9Sstevel@tonic-gate } 15517c478bd9Sstevel@tonic-gate 15527c478bd9Sstevel@tonic-gate /* 15537c478bd9Sstevel@tonic-gate * Mutex wakeup code for releasing a USYNC_THREAD mutex. 15547c478bd9Sstevel@tonic-gate * Returns the lwpid of the thread that was dequeued, if any. 15557c478bd9Sstevel@tonic-gate * The caller of mutex_wakeup() must call __lwp_unpark(lwpid) 15567c478bd9Sstevel@tonic-gate * to wake up the specified lwp. 15577c478bd9Sstevel@tonic-gate */ 1558883492d5Sraf static lwpid_t 15597c478bd9Sstevel@tonic-gate mutex_wakeup(mutex_t *mp) 15607c478bd9Sstevel@tonic-gate { 15617c478bd9Sstevel@tonic-gate lwpid_t lwpid = 0; 1562*d4204c85Sraf int more; 15637c478bd9Sstevel@tonic-gate queue_head_t *qp; 15647c478bd9Sstevel@tonic-gate ulwp_t *ulwp; 15657c478bd9Sstevel@tonic-gate 15667c478bd9Sstevel@tonic-gate /* 15677c478bd9Sstevel@tonic-gate * Dequeue a waiter from the sleep queue. Don't touch the mutex 15687c478bd9Sstevel@tonic-gate * waiters bit if no one was found on the queue because the mutex 15697c478bd9Sstevel@tonic-gate * might have been deallocated or reallocated for another purpose. 15707c478bd9Sstevel@tonic-gate */ 15717c478bd9Sstevel@tonic-gate qp = queue_lock(mp, MX); 1572*d4204c85Sraf if ((ulwp = dequeue(qp, &more)) != NULL) { 15737c478bd9Sstevel@tonic-gate lwpid = ulwp->ul_lwpid; 1574*d4204c85Sraf mp->mutex_waiters = more; 15757c478bd9Sstevel@tonic-gate } 15767c478bd9Sstevel@tonic-gate queue_unlock(qp); 15777c478bd9Sstevel@tonic-gate return (lwpid); 15787c478bd9Sstevel@tonic-gate } 15797c478bd9Sstevel@tonic-gate 1580883492d5Sraf /* 1581883492d5Sraf * Mutex wakeup code for releasing all waiters on a USYNC_THREAD mutex. 1582883492d5Sraf */ 1583883492d5Sraf static void 1584883492d5Sraf mutex_wakeup_all(mutex_t *mp) 1585883492d5Sraf { 1586883492d5Sraf queue_head_t *qp; 1587*d4204c85Sraf queue_root_t *qrp; 1588883492d5Sraf int nlwpid = 0; 1589883492d5Sraf int maxlwps = MAXLWPS; 1590883492d5Sraf ulwp_t *ulwp; 1591883492d5Sraf lwpid_t buffer[MAXLWPS]; 1592883492d5Sraf lwpid_t *lwpid = buffer; 1593883492d5Sraf 1594883492d5Sraf /* 1595883492d5Sraf * Walk the list of waiters and prepare to wake up all of them. 1596883492d5Sraf * The waiters flag has already been cleared from the mutex. 1597883492d5Sraf * 1598883492d5Sraf * We keep track of lwpids that are to be unparked in lwpid[]. 1599883492d5Sraf * __lwp_unpark_all() is called to unpark all of them after 1600883492d5Sraf * they have been removed from the sleep queue and the sleep 1601883492d5Sraf * queue lock has been dropped. If we run out of space in our 1602883492d5Sraf * on-stack buffer, we need to allocate more but we can't call 1603883492d5Sraf * lmalloc() because we are holding a queue lock when the overflow 1604883492d5Sraf * occurs and lmalloc() acquires a lock. We can't use alloca() 1605883492d5Sraf * either because the application may have allocated a small 1606883492d5Sraf * stack and we don't want to overrun the stack. So we call 1607883492d5Sraf * alloc_lwpids() to allocate a bigger buffer using the mmap() 1608883492d5Sraf * system call directly since that path acquires no locks. 1609883492d5Sraf */ 1610883492d5Sraf qp = queue_lock(mp, MX); 1611*d4204c85Sraf for (;;) { 1612*d4204c85Sraf if ((qrp = qp->qh_root) == NULL || 1613*d4204c85Sraf (ulwp = qrp->qr_head) == NULL) 1614*d4204c85Sraf break; 1615*d4204c85Sraf ASSERT(ulwp->ul_wchan == mp); 1616*d4204c85Sraf queue_unlink(qp, &qrp->qr_head, NULL); 1617*d4204c85Sraf ulwp->ul_sleepq = NULL; 1618*d4204c85Sraf ulwp->ul_wchan = NULL; 1619*d4204c85Sraf if (nlwpid == maxlwps) 1620*d4204c85Sraf lwpid = alloc_lwpids(lwpid, &nlwpid, &maxlwps); 1621*d4204c85Sraf lwpid[nlwpid++] = ulwp->ul_lwpid; 1622883492d5Sraf } 1623883492d5Sraf 1624883492d5Sraf if (nlwpid == 0) { 1625883492d5Sraf queue_unlock(qp); 1626883492d5Sraf } else { 16275d1dd9a9Sraf mp->mutex_waiters = 0; 1628883492d5Sraf no_preempt(curthread); 1629883492d5Sraf queue_unlock(qp); 1630883492d5Sraf if (nlwpid == 1) 1631883492d5Sraf (void) __lwp_unpark(lwpid[0]); 1632883492d5Sraf else 1633883492d5Sraf (void) __lwp_unpark_all(lwpid, nlwpid); 1634883492d5Sraf preempt(curthread); 1635883492d5Sraf } 1636883492d5Sraf 1637883492d5Sraf if (lwpid != buffer) 1638883492d5Sraf (void) _private_munmap(lwpid, maxlwps * sizeof (lwpid_t)); 1639883492d5Sraf } 1640883492d5Sraf 16417c478bd9Sstevel@tonic-gate /* 16425d1dd9a9Sraf * Release a process-private mutex. 16435d1dd9a9Sraf * As an optimization, if there are waiters but there are also spinners 16445d1dd9a9Sraf * attempting to acquire the mutex, then don't bother waking up a waiter; 16455d1dd9a9Sraf * one of the spinners will acquire the mutex soon and it would be a waste 16465d1dd9a9Sraf * of resources to wake up some thread just to have it spin for a while 16475d1dd9a9Sraf * and then possibly go back to sleep. See mutex_trylock_adaptive(). 16487c478bd9Sstevel@tonic-gate */ 1649883492d5Sraf static lwpid_t 1650883492d5Sraf mutex_unlock_queue(mutex_t *mp, int release_all) 16517c478bd9Sstevel@tonic-gate { 16525d1dd9a9Sraf lwpid_t lwpid = 0; 16535d1dd9a9Sraf uint32_t old_lockword; 16547c478bd9Sstevel@tonic-gate 16555d1dd9a9Sraf DTRACE_PROBE2(plockstat, mutex__release, mp, 0); 165631db3c26Sraf mp->mutex_owner = 0; 16575d1dd9a9Sraf old_lockword = clear_lockbyte(&mp->mutex_lockword); 16585d1dd9a9Sraf if ((old_lockword & WAITERMASK) && 16595d1dd9a9Sraf (release_all || (old_lockword & SPINNERMASK) == 0)) { 16605d1dd9a9Sraf ulwp_t *self = curthread; 16617c478bd9Sstevel@tonic-gate no_preempt(self); /* ensure a prompt wakeup */ 16625d1dd9a9Sraf if (release_all) 16635d1dd9a9Sraf mutex_wakeup_all(mp); 16645d1dd9a9Sraf else 16655d1dd9a9Sraf lwpid = mutex_wakeup(mp); 16665d1dd9a9Sraf if (lwpid == 0) 16675d1dd9a9Sraf preempt(self); 1668883492d5Sraf } 16697c478bd9Sstevel@tonic-gate return (lwpid); 16707c478bd9Sstevel@tonic-gate } 16717c478bd9Sstevel@tonic-gate 16727c478bd9Sstevel@tonic-gate /* 16737c478bd9Sstevel@tonic-gate * Like mutex_unlock_queue(), but for process-shared mutexes. 16747c478bd9Sstevel@tonic-gate */ 1675883492d5Sraf static void 1676883492d5Sraf mutex_unlock_process(mutex_t *mp, int release_all) 16777c478bd9Sstevel@tonic-gate { 167831db3c26Sraf uint64_t old_lockword64; 16797c478bd9Sstevel@tonic-gate 16807c478bd9Sstevel@tonic-gate DTRACE_PROBE2(plockstat, mutex__release, mp, 0); 168131db3c26Sraf mp->mutex_owner = 0; 168231db3c26Sraf /* mp->mutex_ownerpid is cleared by clear_lockbyte64() */ 168331db3c26Sraf old_lockword64 = clear_lockbyte64(&mp->mutex_lockword64); 168431db3c26Sraf if ((old_lockword64 & WAITERMASK64) && 168531db3c26Sraf (release_all || (old_lockword64 & SPINNERMASK64) == 0)) { 16865d1dd9a9Sraf ulwp_t *self = curthread; 16875d1dd9a9Sraf no_preempt(self); /* ensure a prompt wakeup */ 16885d1dd9a9Sraf (void) ___lwp_mutex_wakeup(mp, release_all); 16895d1dd9a9Sraf preempt(self); 16907c478bd9Sstevel@tonic-gate } 16917c478bd9Sstevel@tonic-gate } 16927c478bd9Sstevel@tonic-gate 16937c478bd9Sstevel@tonic-gate void 16947c478bd9Sstevel@tonic-gate stall(void) 16957c478bd9Sstevel@tonic-gate { 16967c478bd9Sstevel@tonic-gate for (;;) 16977c478bd9Sstevel@tonic-gate (void) mutex_lock_kernel(&stall_mutex, NULL, NULL); 16987c478bd9Sstevel@tonic-gate } 16997c478bd9Sstevel@tonic-gate 17007c478bd9Sstevel@tonic-gate /* 17017c478bd9Sstevel@tonic-gate * Acquire a USYNC_THREAD mutex via user-level sleep queues. 17027c478bd9Sstevel@tonic-gate * We failed set_lock_byte(&mp->mutex_lockw) before coming here. 1703883492d5Sraf * If successful, returns with mutex_owner set correctly. 17047c478bd9Sstevel@tonic-gate */ 17057c478bd9Sstevel@tonic-gate int 17067c478bd9Sstevel@tonic-gate mutex_lock_queue(ulwp_t *self, tdb_mutex_stats_t *msp, mutex_t *mp, 17077c478bd9Sstevel@tonic-gate timespec_t *tsp) 17087c478bd9Sstevel@tonic-gate { 17097c478bd9Sstevel@tonic-gate uberdata_t *udp = curthread->ul_uberdata; 17107c478bd9Sstevel@tonic-gate queue_head_t *qp; 17117c478bd9Sstevel@tonic-gate hrtime_t begin_sleep; 17127c478bd9Sstevel@tonic-gate int error = 0; 17137c478bd9Sstevel@tonic-gate 17147c478bd9Sstevel@tonic-gate self->ul_sp = stkptr(); 17157c478bd9Sstevel@tonic-gate if (__td_event_report(self, TD_SLEEP, udp)) { 17167c478bd9Sstevel@tonic-gate self->ul_wchan = mp; 17177c478bd9Sstevel@tonic-gate self->ul_td_evbuf.eventnum = TD_SLEEP; 17187c478bd9Sstevel@tonic-gate self->ul_td_evbuf.eventdata = mp; 17197c478bd9Sstevel@tonic-gate tdb_event(TD_SLEEP, udp); 17207c478bd9Sstevel@tonic-gate } 17217c478bd9Sstevel@tonic-gate if (msp) { 17227c478bd9Sstevel@tonic-gate tdb_incr(msp->mutex_sleep); 17237c478bd9Sstevel@tonic-gate begin_sleep = gethrtime(); 17247c478bd9Sstevel@tonic-gate } 17257c478bd9Sstevel@tonic-gate 17267c478bd9Sstevel@tonic-gate DTRACE_PROBE1(plockstat, mutex__block, mp); 17277c478bd9Sstevel@tonic-gate 17287c478bd9Sstevel@tonic-gate /* 17297c478bd9Sstevel@tonic-gate * Put ourself on the sleep queue, and while we are 17307c478bd9Sstevel@tonic-gate * unable to grab the lock, go park in the kernel. 17317c478bd9Sstevel@tonic-gate * Take ourself off the sleep queue after we acquire the lock. 17327c478bd9Sstevel@tonic-gate * The waiter bit can be set/cleared only while holding the queue lock. 17337c478bd9Sstevel@tonic-gate */ 17347c478bd9Sstevel@tonic-gate qp = queue_lock(mp, MX); 1735*d4204c85Sraf enqueue(qp, self, 0); 17367c478bd9Sstevel@tonic-gate mp->mutex_waiters = 1; 17377c478bd9Sstevel@tonic-gate for (;;) { 17387c478bd9Sstevel@tonic-gate if (set_lock_byte(&mp->mutex_lockw) == 0) { 17397c478bd9Sstevel@tonic-gate mp->mutex_owner = (uintptr_t)self; 1740*d4204c85Sraf mp->mutex_waiters = dequeue_self(qp); 17417c478bd9Sstevel@tonic-gate break; 17427c478bd9Sstevel@tonic-gate } 17437c478bd9Sstevel@tonic-gate set_parking_flag(self, 1); 17447c478bd9Sstevel@tonic-gate queue_unlock(qp); 17457c478bd9Sstevel@tonic-gate /* 17467c478bd9Sstevel@tonic-gate * __lwp_park() will return the residual time in tsp 17477c478bd9Sstevel@tonic-gate * if we are unparked before the timeout expires. 17487c478bd9Sstevel@tonic-gate */ 17495d1dd9a9Sraf error = __lwp_park(tsp, 0); 17507c478bd9Sstevel@tonic-gate set_parking_flag(self, 0); 17517c478bd9Sstevel@tonic-gate /* 17527c478bd9Sstevel@tonic-gate * We could have taken a signal or suspended ourself. 17537c478bd9Sstevel@tonic-gate * If we did, then we removed ourself from the queue. 17547c478bd9Sstevel@tonic-gate * Someone else may have removed us from the queue 17557c478bd9Sstevel@tonic-gate * as a consequence of mutex_unlock(). We may have 17567c478bd9Sstevel@tonic-gate * gotten a timeout from __lwp_park(). Or we may still 17577c478bd9Sstevel@tonic-gate * be on the queue and this is just a spurious wakeup. 17587c478bd9Sstevel@tonic-gate */ 17597c478bd9Sstevel@tonic-gate qp = queue_lock(mp, MX); 17607c478bd9Sstevel@tonic-gate if (self->ul_sleepq == NULL) { 17615d1dd9a9Sraf if (error) { 1762*d4204c85Sraf mp->mutex_waiters = queue_waiter(qp)? 1 : 0; 17635d1dd9a9Sraf if (error != EINTR) 17645d1dd9a9Sraf break; 17655d1dd9a9Sraf error = 0; 17665d1dd9a9Sraf } 17677c478bd9Sstevel@tonic-gate if (set_lock_byte(&mp->mutex_lockw) == 0) { 17687c478bd9Sstevel@tonic-gate mp->mutex_owner = (uintptr_t)self; 17697c478bd9Sstevel@tonic-gate break; 17707c478bd9Sstevel@tonic-gate } 1771*d4204c85Sraf enqueue(qp, self, 0); 17727c478bd9Sstevel@tonic-gate mp->mutex_waiters = 1; 17737c478bd9Sstevel@tonic-gate } 17747c478bd9Sstevel@tonic-gate ASSERT(self->ul_sleepq == qp && 17757c478bd9Sstevel@tonic-gate self->ul_qtype == MX && 17767c478bd9Sstevel@tonic-gate self->ul_wchan == mp); 17777c478bd9Sstevel@tonic-gate if (error) { 17785d1dd9a9Sraf if (error != EINTR) { 1779*d4204c85Sraf mp->mutex_waiters = dequeue_self(qp); 17805d1dd9a9Sraf break; 17815d1dd9a9Sraf } 17825d1dd9a9Sraf error = 0; 17837c478bd9Sstevel@tonic-gate } 17847c478bd9Sstevel@tonic-gate } 17857c478bd9Sstevel@tonic-gate ASSERT(self->ul_sleepq == NULL && self->ul_link == NULL && 17867c478bd9Sstevel@tonic-gate self->ul_wchan == NULL); 17877c478bd9Sstevel@tonic-gate self->ul_sp = 0; 17887c478bd9Sstevel@tonic-gate queue_unlock(qp); 1789883492d5Sraf 17907c478bd9Sstevel@tonic-gate if (msp) 17917c478bd9Sstevel@tonic-gate msp->mutex_sleep_time += gethrtime() - begin_sleep; 17927c478bd9Sstevel@tonic-gate 17937c478bd9Sstevel@tonic-gate ASSERT(error == 0 || error == EINVAL || error == ETIME); 1794883492d5Sraf 1795883492d5Sraf if (error == 0 && (mp->mutex_flag & LOCK_NOTRECOVERABLE)) { 1796883492d5Sraf ASSERT(mp->mutex_type & LOCK_ROBUST); 1797883492d5Sraf /* 179831db3c26Sraf * We shouldn't own the mutex. 179931db3c26Sraf * Just clear the lock; everyone has already been waked up. 1800883492d5Sraf */ 1801883492d5Sraf mp->mutex_owner = 0; 180231db3c26Sraf (void) clear_lockbyte(&mp->mutex_lockword); 1803883492d5Sraf error = ENOTRECOVERABLE; 1804883492d5Sraf } 1805883492d5Sraf 1806883492d5Sraf if (error) { 1807883492d5Sraf DTRACE_PROBE2(plockstat, mutex__blocked, mp, 0); 1808883492d5Sraf DTRACE_PROBE2(plockstat, mutex__error, mp, error); 1809883492d5Sraf } else { 1810883492d5Sraf DTRACE_PROBE2(plockstat, mutex__blocked, mp, 1); 1811883492d5Sraf DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 1812883492d5Sraf if (mp->mutex_flag & LOCK_OWNERDEAD) { 1813883492d5Sraf ASSERT(mp->mutex_type & LOCK_ROBUST); 1814883492d5Sraf error = EOWNERDEAD; 1815883492d5Sraf } 1816883492d5Sraf } 1817883492d5Sraf 18187c478bd9Sstevel@tonic-gate return (error); 18197c478bd9Sstevel@tonic-gate } 18207c478bd9Sstevel@tonic-gate 1821883492d5Sraf static int 1822883492d5Sraf mutex_recursion(mutex_t *mp, int mtype, int try) 1823883492d5Sraf { 1824883492d5Sraf ASSERT(mutex_is_held(mp)); 1825883492d5Sraf ASSERT(mtype & (LOCK_RECURSIVE|LOCK_ERRORCHECK)); 1826883492d5Sraf ASSERT(try == MUTEX_TRY || try == MUTEX_LOCK); 1827883492d5Sraf 1828883492d5Sraf if (mtype & LOCK_RECURSIVE) { 1829883492d5Sraf if (mp->mutex_rcount == RECURSION_MAX) { 1830883492d5Sraf DTRACE_PROBE2(plockstat, mutex__error, mp, EAGAIN); 1831883492d5Sraf return (EAGAIN); 1832883492d5Sraf } 1833883492d5Sraf mp->mutex_rcount++; 1834883492d5Sraf DTRACE_PROBE3(plockstat, mutex__acquire, mp, 1, 0); 1835883492d5Sraf return (0); 1836883492d5Sraf } 1837883492d5Sraf if (try == MUTEX_LOCK) { 1838883492d5Sraf DTRACE_PROBE2(plockstat, mutex__error, mp, EDEADLK); 1839883492d5Sraf return (EDEADLK); 1840883492d5Sraf } 1841883492d5Sraf return (EBUSY); 1842883492d5Sraf } 1843883492d5Sraf 1844883492d5Sraf /* 1845883492d5Sraf * Register this USYNC_PROCESS|LOCK_ROBUST mutex with the kernel so 1846883492d5Sraf * it can apply LOCK_OWNERDEAD|LOCK_UNMAPPED if it becomes necessary. 1847883492d5Sraf * We use tdb_hash_lock here and in the synch object tracking code in 1848883492d5Sraf * the tdb_agent.c file. There is no conflict between these two usages. 1849883492d5Sraf */ 1850883492d5Sraf void 1851883492d5Sraf register_lock(mutex_t *mp) 1852883492d5Sraf { 1853883492d5Sraf uberdata_t *udp = curthread->ul_uberdata; 1854883492d5Sraf uint_t hash = LOCK_HASH(mp); 1855883492d5Sraf robust_t *rlp; 1856883492d5Sraf robust_t **rlpp; 1857883492d5Sraf robust_t **table; 1858883492d5Sraf 1859883492d5Sraf if ((table = udp->robustlocks) == NULL) { 1860883492d5Sraf lmutex_lock(&udp->tdb_hash_lock); 1861883492d5Sraf if ((table = udp->robustlocks) == NULL) { 1862883492d5Sraf table = lmalloc(LOCKHASHSZ * sizeof (robust_t *)); 1863883492d5Sraf _membar_producer(); 1864883492d5Sraf udp->robustlocks = table; 1865883492d5Sraf } 1866883492d5Sraf lmutex_unlock(&udp->tdb_hash_lock); 1867883492d5Sraf } 1868883492d5Sraf _membar_consumer(); 1869883492d5Sraf 1870883492d5Sraf /* 1871883492d5Sraf * First search the registered table with no locks held. 1872883492d5Sraf * This is safe because the table never shrinks 1873883492d5Sraf * and we can only get a false negative. 1874883492d5Sraf */ 1875883492d5Sraf for (rlp = table[hash]; rlp != NULL; rlp = rlp->robust_next) { 1876883492d5Sraf if (rlp->robust_lock == mp) /* already registered */ 1877883492d5Sraf return; 1878883492d5Sraf } 1879883492d5Sraf 1880883492d5Sraf /* 1881883492d5Sraf * The lock was not found. 1882883492d5Sraf * Repeat the operation with tdb_hash_lock held. 1883883492d5Sraf */ 1884883492d5Sraf lmutex_lock(&udp->tdb_hash_lock); 1885883492d5Sraf 1886883492d5Sraf for (rlpp = &table[hash]; 1887883492d5Sraf (rlp = *rlpp) != NULL; 1888883492d5Sraf rlpp = &rlp->robust_next) { 1889883492d5Sraf if (rlp->robust_lock == mp) { /* already registered */ 1890883492d5Sraf lmutex_unlock(&udp->tdb_hash_lock); 1891883492d5Sraf return; 1892883492d5Sraf } 1893883492d5Sraf } 1894883492d5Sraf 1895883492d5Sraf /* 1896883492d5Sraf * The lock has never been registered. 1897883492d5Sraf * Register it now and add it to the table. 1898883492d5Sraf */ 1899883492d5Sraf (void) ___lwp_mutex_register(mp); 1900883492d5Sraf rlp = lmalloc(sizeof (*rlp)); 1901883492d5Sraf rlp->robust_lock = mp; 1902883492d5Sraf _membar_producer(); 1903883492d5Sraf *rlpp = rlp; 1904883492d5Sraf 1905883492d5Sraf lmutex_unlock(&udp->tdb_hash_lock); 1906883492d5Sraf } 1907883492d5Sraf 1908883492d5Sraf /* 1909883492d5Sraf * This is called in the child of fork()/forkall() to start over 1910883492d5Sraf * with a clean slate. (Each process must register its own locks.) 1911883492d5Sraf * No locks are needed because all other threads are suspended or gone. 1912883492d5Sraf */ 1913883492d5Sraf void 1914883492d5Sraf unregister_locks(void) 1915883492d5Sraf { 1916883492d5Sraf uberdata_t *udp = curthread->ul_uberdata; 1917883492d5Sraf uint_t hash; 1918883492d5Sraf robust_t **table; 1919883492d5Sraf robust_t *rlp; 1920883492d5Sraf robust_t *next; 1921883492d5Sraf 1922883492d5Sraf if ((table = udp->robustlocks) != NULL) { 1923883492d5Sraf for (hash = 0; hash < LOCKHASHSZ; hash++) { 1924883492d5Sraf rlp = table[hash]; 1925883492d5Sraf while (rlp != NULL) { 1926883492d5Sraf next = rlp->robust_next; 1927883492d5Sraf lfree(rlp, sizeof (*rlp)); 1928883492d5Sraf rlp = next; 1929883492d5Sraf } 1930883492d5Sraf } 1931883492d5Sraf lfree(table, LOCKHASHSZ * sizeof (robust_t *)); 1932883492d5Sraf udp->robustlocks = NULL; 1933883492d5Sraf } 1934883492d5Sraf } 1935883492d5Sraf 19367c478bd9Sstevel@tonic-gate /* 19377c478bd9Sstevel@tonic-gate * Returns with mutex_owner set correctly. 19387c478bd9Sstevel@tonic-gate */ 1939*d4204c85Sraf int 19407c478bd9Sstevel@tonic-gate mutex_lock_internal(mutex_t *mp, timespec_t *tsp, int try) 19417c478bd9Sstevel@tonic-gate { 19427c478bd9Sstevel@tonic-gate ulwp_t *self = curthread; 19437c478bd9Sstevel@tonic-gate uberdata_t *udp = self->ul_uberdata; 19447c478bd9Sstevel@tonic-gate int mtype = mp->mutex_type; 19457c478bd9Sstevel@tonic-gate tdb_mutex_stats_t *msp = MUTEX_STATS(mp, udp); 19467c478bd9Sstevel@tonic-gate int error = 0; 1947*d4204c85Sraf int noceil = try & MUTEX_NOCEIL; 1948883492d5Sraf uint8_t ceil; 1949883492d5Sraf int myprio; 19507c478bd9Sstevel@tonic-gate 1951*d4204c85Sraf try &= ~MUTEX_NOCEIL; 19527c478bd9Sstevel@tonic-gate ASSERT(try == MUTEX_TRY || try == MUTEX_LOCK); 19537c478bd9Sstevel@tonic-gate 19547c478bd9Sstevel@tonic-gate if (!self->ul_schedctl_called) 19557c478bd9Sstevel@tonic-gate (void) setup_schedctl(); 19567c478bd9Sstevel@tonic-gate 19577c478bd9Sstevel@tonic-gate if (msp && try == MUTEX_TRY) 19587c478bd9Sstevel@tonic-gate tdb_incr(msp->mutex_try); 19597c478bd9Sstevel@tonic-gate 1960883492d5Sraf if ((mtype & (LOCK_RECURSIVE|LOCK_ERRORCHECK)) && mutex_is_held(mp)) 1961883492d5Sraf return (mutex_recursion(mp, mtype, try)); 19627c478bd9Sstevel@tonic-gate 19637c478bd9Sstevel@tonic-gate if (self->ul_error_detection && try == MUTEX_LOCK && 19647c478bd9Sstevel@tonic-gate tsp == NULL && mutex_is_held(mp)) 19657c478bd9Sstevel@tonic-gate lock_error(mp, "mutex_lock", NULL, NULL); 19667c478bd9Sstevel@tonic-gate 1967*d4204c85Sraf if ((mtype & LOCK_PRIO_PROTECT) && noceil == 0) { 1968*d4204c85Sraf update_sched(self); 1969*d4204c85Sraf if (self->ul_cid != self->ul_rtclassid) { 1970*d4204c85Sraf DTRACE_PROBE2(plockstat, mutex__error, mp, EPERM); 1971*d4204c85Sraf return (EPERM); 1972*d4204c85Sraf } 1973883492d5Sraf ceil = mp->mutex_ceiling; 1974*d4204c85Sraf myprio = self->ul_epri? self->ul_epri : self->ul_pri; 1975883492d5Sraf if (myprio > ceil) { 1976883492d5Sraf DTRACE_PROBE2(plockstat, mutex__error, mp, EINVAL); 1977883492d5Sraf return (EINVAL); 19787c478bd9Sstevel@tonic-gate } 1979883492d5Sraf if ((error = _ceil_mylist_add(mp)) != 0) { 1980883492d5Sraf DTRACE_PROBE2(plockstat, mutex__error, mp, error); 1981883492d5Sraf return (error); 19827c478bd9Sstevel@tonic-gate } 1983883492d5Sraf if (myprio < ceil) 1984883492d5Sraf _ceil_prio_inherit(ceil); 1985883492d5Sraf } 19867c478bd9Sstevel@tonic-gate 1987883492d5Sraf if ((mtype & (USYNC_PROCESS | LOCK_ROBUST)) 1988883492d5Sraf == (USYNC_PROCESS | LOCK_ROBUST)) 1989883492d5Sraf register_lock(mp); 1990883492d5Sraf 1991883492d5Sraf if (mtype & LOCK_PRIO_INHERIT) { 1992883492d5Sraf /* go straight to the kernel */ 1993883492d5Sraf if (try == MUTEX_TRY) 1994883492d5Sraf error = mutex_trylock_kernel(mp); 1995883492d5Sraf else /* MUTEX_LOCK */ 1996883492d5Sraf error = mutex_lock_kernel(mp, tsp, msp); 19977c478bd9Sstevel@tonic-gate /* 1998883492d5Sraf * The kernel never sets or clears the lock byte 1999883492d5Sraf * for LOCK_PRIO_INHERIT mutexes. 2000883492d5Sraf * Set it here for consistency. 20017c478bd9Sstevel@tonic-gate */ 2002883492d5Sraf switch (error) { 2003883492d5Sraf case 0: 2004*d4204c85Sraf self->ul_pilocks++; 2005883492d5Sraf mp->mutex_lockw = LOCKSET; 2006883492d5Sraf break; 2007883492d5Sraf case EOWNERDEAD: 2008883492d5Sraf case ELOCKUNMAPPED: 2009*d4204c85Sraf self->ul_pilocks++; 2010883492d5Sraf mp->mutex_lockw = LOCKSET; 2011883492d5Sraf /* FALLTHROUGH */ 2012883492d5Sraf case ENOTRECOVERABLE: 2013883492d5Sraf ASSERT(mtype & LOCK_ROBUST); 2014883492d5Sraf break; 2015883492d5Sraf case EDEADLK: 2016883492d5Sraf if (try == MUTEX_LOCK) 2017883492d5Sraf stall(); 2018883492d5Sraf error = EBUSY; 2019883492d5Sraf break; 20207c478bd9Sstevel@tonic-gate } 2021883492d5Sraf } else if (mtype & USYNC_PROCESS) { 202216b01779Sraf error = mutex_trylock_process(mp, try == MUTEX_LOCK); 2023883492d5Sraf if (error == EBUSY && try == MUTEX_LOCK) 20247c478bd9Sstevel@tonic-gate error = mutex_lock_kernel(mp, tsp, msp); 20255d1dd9a9Sraf } else { /* USYNC_THREAD */ 202616b01779Sraf error = mutex_trylock_adaptive(mp, try == MUTEX_LOCK); 2027883492d5Sraf if (error == EBUSY && try == MUTEX_LOCK) 2028883492d5Sraf error = mutex_lock_queue(self, msp, mp, tsp); 20297c478bd9Sstevel@tonic-gate } 20307c478bd9Sstevel@tonic-gate 20317c478bd9Sstevel@tonic-gate switch (error) { 2032883492d5Sraf case 0: 20337c478bd9Sstevel@tonic-gate case EOWNERDEAD: 20347c478bd9Sstevel@tonic-gate case ELOCKUNMAPPED: 2035883492d5Sraf if (mtype & LOCK_ROBUST) 2036883492d5Sraf remember_lock(mp); 20377c478bd9Sstevel@tonic-gate if (msp) 20387c478bd9Sstevel@tonic-gate record_begin_hold(msp); 20397c478bd9Sstevel@tonic-gate break; 20407c478bd9Sstevel@tonic-gate default: 2041*d4204c85Sraf if ((mtype & LOCK_PRIO_PROTECT) && noceil == 0) { 2042883492d5Sraf (void) _ceil_mylist_del(mp); 2043883492d5Sraf if (myprio < ceil) 2044883492d5Sraf _ceil_prio_waive(); 2045883492d5Sraf } 20467c478bd9Sstevel@tonic-gate if (try == MUTEX_TRY) { 20477c478bd9Sstevel@tonic-gate if (msp) 20487c478bd9Sstevel@tonic-gate tdb_incr(msp->mutex_try_fail); 20497c478bd9Sstevel@tonic-gate if (__td_event_report(self, TD_LOCK_TRY, udp)) { 20507c478bd9Sstevel@tonic-gate self->ul_td_evbuf.eventnum = TD_LOCK_TRY; 20517c478bd9Sstevel@tonic-gate tdb_event(TD_LOCK_TRY, udp); 20527c478bd9Sstevel@tonic-gate } 20537c478bd9Sstevel@tonic-gate } 20547c478bd9Sstevel@tonic-gate break; 20557c478bd9Sstevel@tonic-gate } 20567c478bd9Sstevel@tonic-gate 20577c478bd9Sstevel@tonic-gate return (error); 20587c478bd9Sstevel@tonic-gate } 20597c478bd9Sstevel@tonic-gate 20607c478bd9Sstevel@tonic-gate int 20617c478bd9Sstevel@tonic-gate fast_process_lock(mutex_t *mp, timespec_t *tsp, int mtype, int try) 20627c478bd9Sstevel@tonic-gate { 20637c478bd9Sstevel@tonic-gate ulwp_t *self = curthread; 20647c478bd9Sstevel@tonic-gate uberdata_t *udp = self->ul_uberdata; 20657c478bd9Sstevel@tonic-gate 20667c478bd9Sstevel@tonic-gate /* 20677c478bd9Sstevel@tonic-gate * We know that USYNC_PROCESS is set in mtype and that 20687c478bd9Sstevel@tonic-gate * zero, one, or both of the flags LOCK_RECURSIVE and 20697c478bd9Sstevel@tonic-gate * LOCK_ERRORCHECK are set, and that no other flags are set. 20707c478bd9Sstevel@tonic-gate */ 2071883492d5Sraf ASSERT((mtype & ~(USYNC_PROCESS|LOCK_RECURSIVE|LOCK_ERRORCHECK)) == 0); 20727c478bd9Sstevel@tonic-gate enter_critical(self); 207331db3c26Sraf if (set_lock_byte64(&mp->mutex_lockword64, udp->pid) == 0) { 20747c478bd9Sstevel@tonic-gate mp->mutex_owner = (uintptr_t)self; 207531db3c26Sraf /* mp->mutex_ownerpid was set by set_lock_byte64() */ 20767c478bd9Sstevel@tonic-gate exit_critical(self); 20777c478bd9Sstevel@tonic-gate DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 20787c478bd9Sstevel@tonic-gate return (0); 20797c478bd9Sstevel@tonic-gate } 20807c478bd9Sstevel@tonic-gate exit_critical(self); 20817c478bd9Sstevel@tonic-gate 2082883492d5Sraf if ((mtype & (LOCK_RECURSIVE|LOCK_ERRORCHECK)) && shared_mutex_held(mp)) 2083883492d5Sraf return (mutex_recursion(mp, mtype, try)); 20847c478bd9Sstevel@tonic-gate 208516b01779Sraf if (try == MUTEX_LOCK) { 208616b01779Sraf if (mutex_trylock_process(mp, 1) == 0) 208716b01779Sraf return (0); 20887c478bd9Sstevel@tonic-gate return (mutex_lock_kernel(mp, tsp, NULL)); 208916b01779Sraf } 20907c478bd9Sstevel@tonic-gate 20917c478bd9Sstevel@tonic-gate if (__td_event_report(self, TD_LOCK_TRY, udp)) { 20927c478bd9Sstevel@tonic-gate self->ul_td_evbuf.eventnum = TD_LOCK_TRY; 20937c478bd9Sstevel@tonic-gate tdb_event(TD_LOCK_TRY, udp); 20947c478bd9Sstevel@tonic-gate } 20957c478bd9Sstevel@tonic-gate return (EBUSY); 20967c478bd9Sstevel@tonic-gate } 20977c478bd9Sstevel@tonic-gate 20987c478bd9Sstevel@tonic-gate static int 20997c478bd9Sstevel@tonic-gate mutex_lock_impl(mutex_t *mp, timespec_t *tsp) 21007c478bd9Sstevel@tonic-gate { 21017c478bd9Sstevel@tonic-gate ulwp_t *self = curthread; 2102*d4204c85Sraf int mtype = mp->mutex_type; 21037c478bd9Sstevel@tonic-gate uberflags_t *gflags; 21047c478bd9Sstevel@tonic-gate 21057c478bd9Sstevel@tonic-gate /* 21067c478bd9Sstevel@tonic-gate * Optimize the case of USYNC_THREAD, including 21077c478bd9Sstevel@tonic-gate * the LOCK_RECURSIVE and LOCK_ERRORCHECK cases, 21087c478bd9Sstevel@tonic-gate * no error detection, no lock statistics, 21097c478bd9Sstevel@tonic-gate * and the process has only a single thread. 21107c478bd9Sstevel@tonic-gate * (Most likely a traditional single-threaded application.) 21117c478bd9Sstevel@tonic-gate */ 2112*d4204c85Sraf if (((mtype & ~(LOCK_RECURSIVE|LOCK_ERRORCHECK)) | 2113*d4204c85Sraf self->ul_uberdata->uberflags.uf_all) == 0) { 21147c478bd9Sstevel@tonic-gate /* 21157c478bd9Sstevel@tonic-gate * Only one thread exists so we don't need an atomic operation. 21167c478bd9Sstevel@tonic-gate */ 21177c478bd9Sstevel@tonic-gate if (mp->mutex_lockw == 0) { 21187c478bd9Sstevel@tonic-gate mp->mutex_lockw = LOCKSET; 21197c478bd9Sstevel@tonic-gate mp->mutex_owner = (uintptr_t)self; 21207c478bd9Sstevel@tonic-gate DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 21217c478bd9Sstevel@tonic-gate return (0); 21227c478bd9Sstevel@tonic-gate } 2123883492d5Sraf if (mtype && MUTEX_OWNER(mp) == self) 2124883492d5Sraf return (mutex_recursion(mp, mtype, MUTEX_LOCK)); 21257c478bd9Sstevel@tonic-gate /* 21267c478bd9Sstevel@tonic-gate * We have reached a deadlock, probably because the 21277c478bd9Sstevel@tonic-gate * process is executing non-async-signal-safe code in 21287c478bd9Sstevel@tonic-gate * a signal handler and is attempting to acquire a lock 21297c478bd9Sstevel@tonic-gate * that it already owns. This is not surprising, given 21307c478bd9Sstevel@tonic-gate * bad programming practices over the years that has 21317c478bd9Sstevel@tonic-gate * resulted in applications calling printf() and such 21327c478bd9Sstevel@tonic-gate * in their signal handlers. Unless the user has told 21337c478bd9Sstevel@tonic-gate * us that the signal handlers are safe by setting: 21347c478bd9Sstevel@tonic-gate * export _THREAD_ASYNC_SAFE=1 21357c478bd9Sstevel@tonic-gate * we return EDEADLK rather than actually deadlocking. 21367c478bd9Sstevel@tonic-gate */ 21377c478bd9Sstevel@tonic-gate if (tsp == NULL && 21387c478bd9Sstevel@tonic-gate MUTEX_OWNER(mp) == self && !self->ul_async_safe) { 21397c478bd9Sstevel@tonic-gate DTRACE_PROBE2(plockstat, mutex__error, mp, EDEADLK); 21407c478bd9Sstevel@tonic-gate return (EDEADLK); 21417c478bd9Sstevel@tonic-gate } 21427c478bd9Sstevel@tonic-gate } 21437c478bd9Sstevel@tonic-gate 21447c478bd9Sstevel@tonic-gate /* 21457c478bd9Sstevel@tonic-gate * Optimize the common cases of USYNC_THREAD or USYNC_PROCESS, 21467c478bd9Sstevel@tonic-gate * no error detection, and no lock statistics. 21477c478bd9Sstevel@tonic-gate * Include LOCK_RECURSIVE and LOCK_ERRORCHECK cases. 21487c478bd9Sstevel@tonic-gate */ 21497c478bd9Sstevel@tonic-gate if ((gflags = self->ul_schedctl_called) != NULL && 21507c478bd9Sstevel@tonic-gate (gflags->uf_trs_ted | 21517c478bd9Sstevel@tonic-gate (mtype & ~(USYNC_PROCESS|LOCK_RECURSIVE|LOCK_ERRORCHECK))) == 0) { 21527c478bd9Sstevel@tonic-gate if (mtype & USYNC_PROCESS) 21537c478bd9Sstevel@tonic-gate return (fast_process_lock(mp, tsp, mtype, MUTEX_LOCK)); 21547c478bd9Sstevel@tonic-gate if (set_lock_byte(&mp->mutex_lockw) == 0) { 21557c478bd9Sstevel@tonic-gate mp->mutex_owner = (uintptr_t)self; 21567c478bd9Sstevel@tonic-gate DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 21577c478bd9Sstevel@tonic-gate return (0); 21587c478bd9Sstevel@tonic-gate } 2159883492d5Sraf if (mtype && MUTEX_OWNER(mp) == self) 2160883492d5Sraf return (mutex_recursion(mp, mtype, MUTEX_LOCK)); 216116b01779Sraf if (mutex_trylock_adaptive(mp, 1) != 0) 2162883492d5Sraf return (mutex_lock_queue(self, NULL, mp, tsp)); 2163883492d5Sraf return (0); 21647c478bd9Sstevel@tonic-gate } 21657c478bd9Sstevel@tonic-gate 21667c478bd9Sstevel@tonic-gate /* else do it the long way */ 21677c478bd9Sstevel@tonic-gate return (mutex_lock_internal(mp, tsp, MUTEX_LOCK)); 21687c478bd9Sstevel@tonic-gate } 21697c478bd9Sstevel@tonic-gate 2170a574db85Sraf /* 2171a574db85Sraf * Of the following function names (all the same function, of course), 2172a574db85Sraf * only _private_mutex_lock() is not exported from libc. This means 2173a574db85Sraf * that calling _private_mutex_lock() within libc will not invoke the 2174a574db85Sraf * dynamic linker. This is critical for any code called in the child 2175a574db85Sraf * of vfork() (via posix_spawn()) because invoking the dynamic linker 2176a574db85Sraf * in such a case would corrupt the parent's address space. There are 2177a574db85Sraf * other places in libc where avoiding the dynamic linker is necessary. 2178a574db85Sraf * Of course, _private_mutex_lock() can be called in cases not requiring 2179a574db85Sraf * the avoidance of the dynamic linker too, and often is. 2180a574db85Sraf */ 21817c478bd9Sstevel@tonic-gate #pragma weak _private_mutex_lock = __mutex_lock 21827c478bd9Sstevel@tonic-gate #pragma weak mutex_lock = __mutex_lock 21837c478bd9Sstevel@tonic-gate #pragma weak _mutex_lock = __mutex_lock 21847c478bd9Sstevel@tonic-gate #pragma weak pthread_mutex_lock = __mutex_lock 21857c478bd9Sstevel@tonic-gate #pragma weak _pthread_mutex_lock = __mutex_lock 21867c478bd9Sstevel@tonic-gate int 21877c478bd9Sstevel@tonic-gate __mutex_lock(mutex_t *mp) 21887c478bd9Sstevel@tonic-gate { 21897c478bd9Sstevel@tonic-gate ASSERT(!curthread->ul_critical || curthread->ul_bindflags); 21907c478bd9Sstevel@tonic-gate return (mutex_lock_impl(mp, NULL)); 21917c478bd9Sstevel@tonic-gate } 21927c478bd9Sstevel@tonic-gate 21937c478bd9Sstevel@tonic-gate #pragma weak pthread_mutex_timedlock = _pthread_mutex_timedlock 21947c478bd9Sstevel@tonic-gate int 21957c478bd9Sstevel@tonic-gate _pthread_mutex_timedlock(mutex_t *mp, const timespec_t *abstime) 21967c478bd9Sstevel@tonic-gate { 21977c478bd9Sstevel@tonic-gate timespec_t tslocal; 21987c478bd9Sstevel@tonic-gate int error; 21997c478bd9Sstevel@tonic-gate 22007c478bd9Sstevel@tonic-gate ASSERT(!curthread->ul_critical || curthread->ul_bindflags); 22017c478bd9Sstevel@tonic-gate abstime_to_reltime(CLOCK_REALTIME, abstime, &tslocal); 22027c478bd9Sstevel@tonic-gate error = mutex_lock_impl(mp, &tslocal); 22037c478bd9Sstevel@tonic-gate if (error == ETIME) 22047c478bd9Sstevel@tonic-gate error = ETIMEDOUT; 22057c478bd9Sstevel@tonic-gate return (error); 22067c478bd9Sstevel@tonic-gate } 22077c478bd9Sstevel@tonic-gate 22087c478bd9Sstevel@tonic-gate #pragma weak pthread_mutex_reltimedlock_np = _pthread_mutex_reltimedlock_np 22097c478bd9Sstevel@tonic-gate int 22107c478bd9Sstevel@tonic-gate _pthread_mutex_reltimedlock_np(mutex_t *mp, const timespec_t *reltime) 22117c478bd9Sstevel@tonic-gate { 22127c478bd9Sstevel@tonic-gate timespec_t tslocal; 22137c478bd9Sstevel@tonic-gate int error; 22147c478bd9Sstevel@tonic-gate 22157c478bd9Sstevel@tonic-gate ASSERT(!curthread->ul_critical || curthread->ul_bindflags); 22167c478bd9Sstevel@tonic-gate tslocal = *reltime; 22177c478bd9Sstevel@tonic-gate error = mutex_lock_impl(mp, &tslocal); 22187c478bd9Sstevel@tonic-gate if (error == ETIME) 22197c478bd9Sstevel@tonic-gate error = ETIMEDOUT; 22207c478bd9Sstevel@tonic-gate return (error); 22217c478bd9Sstevel@tonic-gate } 22227c478bd9Sstevel@tonic-gate 22237c478bd9Sstevel@tonic-gate #pragma weak _private_mutex_trylock = __mutex_trylock 22247c478bd9Sstevel@tonic-gate #pragma weak mutex_trylock = __mutex_trylock 22257c478bd9Sstevel@tonic-gate #pragma weak _mutex_trylock = __mutex_trylock 22267c478bd9Sstevel@tonic-gate #pragma weak pthread_mutex_trylock = __mutex_trylock 22277c478bd9Sstevel@tonic-gate #pragma weak _pthread_mutex_trylock = __mutex_trylock 22287c478bd9Sstevel@tonic-gate int 22297c478bd9Sstevel@tonic-gate __mutex_trylock(mutex_t *mp) 22307c478bd9Sstevel@tonic-gate { 22317c478bd9Sstevel@tonic-gate ulwp_t *self = curthread; 22327c478bd9Sstevel@tonic-gate uberdata_t *udp = self->ul_uberdata; 2233*d4204c85Sraf int mtype = mp->mutex_type; 22347c478bd9Sstevel@tonic-gate uberflags_t *gflags; 22357c478bd9Sstevel@tonic-gate 22367c478bd9Sstevel@tonic-gate ASSERT(!curthread->ul_critical || curthread->ul_bindflags); 2237*d4204c85Sraf 22387c478bd9Sstevel@tonic-gate /* 22397c478bd9Sstevel@tonic-gate * Optimize the case of USYNC_THREAD, including 22407c478bd9Sstevel@tonic-gate * the LOCK_RECURSIVE and LOCK_ERRORCHECK cases, 22417c478bd9Sstevel@tonic-gate * no error detection, no lock statistics, 22427c478bd9Sstevel@tonic-gate * and the process has only a single thread. 22437c478bd9Sstevel@tonic-gate * (Most likely a traditional single-threaded application.) 22447c478bd9Sstevel@tonic-gate */ 2245*d4204c85Sraf if (((mtype & ~(LOCK_RECURSIVE|LOCK_ERRORCHECK)) | 22467c478bd9Sstevel@tonic-gate udp->uberflags.uf_all) == 0) { 22477c478bd9Sstevel@tonic-gate /* 22487c478bd9Sstevel@tonic-gate * Only one thread exists so we don't need an atomic operation. 22497c478bd9Sstevel@tonic-gate */ 22507c478bd9Sstevel@tonic-gate if (mp->mutex_lockw == 0) { 22517c478bd9Sstevel@tonic-gate mp->mutex_lockw = LOCKSET; 22527c478bd9Sstevel@tonic-gate mp->mutex_owner = (uintptr_t)self; 22537c478bd9Sstevel@tonic-gate DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 22547c478bd9Sstevel@tonic-gate return (0); 22557c478bd9Sstevel@tonic-gate } 2256883492d5Sraf if (mtype && MUTEX_OWNER(mp) == self) 2257883492d5Sraf return (mutex_recursion(mp, mtype, MUTEX_TRY)); 22587c478bd9Sstevel@tonic-gate return (EBUSY); 22597c478bd9Sstevel@tonic-gate } 22607c478bd9Sstevel@tonic-gate 22617c478bd9Sstevel@tonic-gate /* 22627c478bd9Sstevel@tonic-gate * Optimize the common cases of USYNC_THREAD or USYNC_PROCESS, 22637c478bd9Sstevel@tonic-gate * no error detection, and no lock statistics. 22647c478bd9Sstevel@tonic-gate * Include LOCK_RECURSIVE and LOCK_ERRORCHECK cases. 22657c478bd9Sstevel@tonic-gate */ 22667c478bd9Sstevel@tonic-gate if ((gflags = self->ul_schedctl_called) != NULL && 22677c478bd9Sstevel@tonic-gate (gflags->uf_trs_ted | 22687c478bd9Sstevel@tonic-gate (mtype & ~(USYNC_PROCESS|LOCK_RECURSIVE|LOCK_ERRORCHECK))) == 0) { 22697c478bd9Sstevel@tonic-gate if (mtype & USYNC_PROCESS) 22707c478bd9Sstevel@tonic-gate return (fast_process_lock(mp, NULL, mtype, MUTEX_TRY)); 22717c478bd9Sstevel@tonic-gate if (set_lock_byte(&mp->mutex_lockw) == 0) { 22727c478bd9Sstevel@tonic-gate mp->mutex_owner = (uintptr_t)self; 22737c478bd9Sstevel@tonic-gate DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 22747c478bd9Sstevel@tonic-gate return (0); 22757c478bd9Sstevel@tonic-gate } 2276883492d5Sraf if (mtype && MUTEX_OWNER(mp) == self) 2277883492d5Sraf return (mutex_recursion(mp, mtype, MUTEX_TRY)); 227816b01779Sraf if (__td_event_report(self, TD_LOCK_TRY, udp)) { 227916b01779Sraf self->ul_td_evbuf.eventnum = TD_LOCK_TRY; 228016b01779Sraf tdb_event(TD_LOCK_TRY, udp); 22817c478bd9Sstevel@tonic-gate } 228216b01779Sraf return (EBUSY); 22837c478bd9Sstevel@tonic-gate } 22847c478bd9Sstevel@tonic-gate 22857c478bd9Sstevel@tonic-gate /* else do it the long way */ 22867c478bd9Sstevel@tonic-gate return (mutex_lock_internal(mp, NULL, MUTEX_TRY)); 22877c478bd9Sstevel@tonic-gate } 22887c478bd9Sstevel@tonic-gate 22897c478bd9Sstevel@tonic-gate int 2290883492d5Sraf mutex_unlock_internal(mutex_t *mp, int retain_robust_flags) 22917c478bd9Sstevel@tonic-gate { 22927c478bd9Sstevel@tonic-gate ulwp_t *self = curthread; 22937c478bd9Sstevel@tonic-gate uberdata_t *udp = self->ul_uberdata; 22947c478bd9Sstevel@tonic-gate int mtype = mp->mutex_type; 22957c478bd9Sstevel@tonic-gate tdb_mutex_stats_t *msp; 2296883492d5Sraf int error = 0; 2297883492d5Sraf int release_all; 22987c478bd9Sstevel@tonic-gate lwpid_t lwpid; 22997c478bd9Sstevel@tonic-gate 23007c478bd9Sstevel@tonic-gate if ((mtype & LOCK_ERRORCHECK) && !mutex_is_held(mp)) 23017c478bd9Sstevel@tonic-gate return (EPERM); 23027c478bd9Sstevel@tonic-gate 23037c478bd9Sstevel@tonic-gate if (self->ul_error_detection && !mutex_is_held(mp)) 23047c478bd9Sstevel@tonic-gate lock_error(mp, "mutex_unlock", NULL, NULL); 23057c478bd9Sstevel@tonic-gate 23067c478bd9Sstevel@tonic-gate if ((mtype & LOCK_RECURSIVE) && mp->mutex_rcount != 0) { 23077c478bd9Sstevel@tonic-gate mp->mutex_rcount--; 23087c478bd9Sstevel@tonic-gate DTRACE_PROBE2(plockstat, mutex__release, mp, 1); 23097c478bd9Sstevel@tonic-gate return (0); 23107c478bd9Sstevel@tonic-gate } 23117c478bd9Sstevel@tonic-gate 23127c478bd9Sstevel@tonic-gate if ((msp = MUTEX_STATS(mp, udp)) != NULL) 23137c478bd9Sstevel@tonic-gate (void) record_hold_time(msp); 23147c478bd9Sstevel@tonic-gate 2315883492d5Sraf if (!retain_robust_flags && !(mtype & LOCK_PRIO_INHERIT) && 2316883492d5Sraf (mp->mutex_flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED))) { 2317883492d5Sraf ASSERT(mp->mutex_type & LOCK_ROBUST); 2318883492d5Sraf mp->mutex_flag &= ~(LOCK_OWNERDEAD | LOCK_UNMAPPED); 2319883492d5Sraf mp->mutex_flag |= LOCK_NOTRECOVERABLE; 2320883492d5Sraf } 2321883492d5Sraf release_all = ((mp->mutex_flag & LOCK_NOTRECOVERABLE) != 0); 2322883492d5Sraf 2323883492d5Sraf if (mtype & LOCK_PRIO_INHERIT) { 23247c478bd9Sstevel@tonic-gate no_preempt(self); 23257c478bd9Sstevel@tonic-gate mp->mutex_owner = 0; 232631db3c26Sraf /* mp->mutex_ownerpid is cleared by ___lwp_mutex_unlock() */ 23277c478bd9Sstevel@tonic-gate DTRACE_PROBE2(plockstat, mutex__release, mp, 0); 2328883492d5Sraf mp->mutex_lockw = LOCKCLEAR; 2329*d4204c85Sraf self->ul_pilocks--; 2330883492d5Sraf error = ___lwp_mutex_unlock(mp); 23317c478bd9Sstevel@tonic-gate preempt(self); 23327c478bd9Sstevel@tonic-gate } else if (mtype & USYNC_PROCESS) { 23335d1dd9a9Sraf mutex_unlock_process(mp, release_all); 23347c478bd9Sstevel@tonic-gate } else { /* USYNC_THREAD */ 2335883492d5Sraf if ((lwpid = mutex_unlock_queue(mp, release_all)) != 0) { 23367c478bd9Sstevel@tonic-gate (void) __lwp_unpark(lwpid); 23377c478bd9Sstevel@tonic-gate preempt(self); 23387c478bd9Sstevel@tonic-gate } 23397c478bd9Sstevel@tonic-gate } 23407c478bd9Sstevel@tonic-gate 2341883492d5Sraf if (mtype & LOCK_ROBUST) 2342883492d5Sraf forget_lock(mp); 2343883492d5Sraf 2344883492d5Sraf if ((mtype & LOCK_PRIO_PROTECT) && _ceil_mylist_del(mp)) 2345883492d5Sraf _ceil_prio_waive(); 2346883492d5Sraf 23477c478bd9Sstevel@tonic-gate return (error); 23487c478bd9Sstevel@tonic-gate } 23497c478bd9Sstevel@tonic-gate 23507c478bd9Sstevel@tonic-gate #pragma weak _private_mutex_unlock = __mutex_unlock 23517c478bd9Sstevel@tonic-gate #pragma weak mutex_unlock = __mutex_unlock 23527c478bd9Sstevel@tonic-gate #pragma weak _mutex_unlock = __mutex_unlock 23537c478bd9Sstevel@tonic-gate #pragma weak pthread_mutex_unlock = __mutex_unlock 23547c478bd9Sstevel@tonic-gate #pragma weak _pthread_mutex_unlock = __mutex_unlock 23557c478bd9Sstevel@tonic-gate int 23567c478bd9Sstevel@tonic-gate __mutex_unlock(mutex_t *mp) 23577c478bd9Sstevel@tonic-gate { 23587c478bd9Sstevel@tonic-gate ulwp_t *self = curthread; 2359*d4204c85Sraf int mtype = mp->mutex_type; 23607c478bd9Sstevel@tonic-gate uberflags_t *gflags; 23617c478bd9Sstevel@tonic-gate lwpid_t lwpid; 23627c478bd9Sstevel@tonic-gate short el; 23637c478bd9Sstevel@tonic-gate 23647c478bd9Sstevel@tonic-gate /* 23657c478bd9Sstevel@tonic-gate * Optimize the case of USYNC_THREAD, including 23667c478bd9Sstevel@tonic-gate * the LOCK_RECURSIVE and LOCK_ERRORCHECK cases, 23677c478bd9Sstevel@tonic-gate * no error detection, no lock statistics, 23687c478bd9Sstevel@tonic-gate * and the process has only a single thread. 23697c478bd9Sstevel@tonic-gate * (Most likely a traditional single-threaded application.) 23707c478bd9Sstevel@tonic-gate */ 2371*d4204c85Sraf if (((mtype & ~(LOCK_RECURSIVE|LOCK_ERRORCHECK)) | 2372*d4204c85Sraf self->ul_uberdata->uberflags.uf_all) == 0) { 23737c478bd9Sstevel@tonic-gate if (mtype) { 23747c478bd9Sstevel@tonic-gate /* 23757c478bd9Sstevel@tonic-gate * At this point we know that one or both of the 23767c478bd9Sstevel@tonic-gate * flags LOCK_RECURSIVE or LOCK_ERRORCHECK is set. 23777c478bd9Sstevel@tonic-gate */ 23787c478bd9Sstevel@tonic-gate if ((mtype & LOCK_ERRORCHECK) && !MUTEX_OWNED(mp, self)) 23797c478bd9Sstevel@tonic-gate return (EPERM); 23807c478bd9Sstevel@tonic-gate if ((mtype & LOCK_RECURSIVE) && mp->mutex_rcount != 0) { 23817c478bd9Sstevel@tonic-gate mp->mutex_rcount--; 23827c478bd9Sstevel@tonic-gate DTRACE_PROBE2(plockstat, mutex__release, mp, 1); 23837c478bd9Sstevel@tonic-gate return (0); 23847c478bd9Sstevel@tonic-gate } 23857c478bd9Sstevel@tonic-gate } 23867c478bd9Sstevel@tonic-gate /* 23877c478bd9Sstevel@tonic-gate * Only one thread exists so we don't need an atomic operation. 23887c478bd9Sstevel@tonic-gate * Also, there can be no waiters. 23897c478bd9Sstevel@tonic-gate */ 23907c478bd9Sstevel@tonic-gate mp->mutex_owner = 0; 23917c478bd9Sstevel@tonic-gate mp->mutex_lockword = 0; 23927c478bd9Sstevel@tonic-gate DTRACE_PROBE2(plockstat, mutex__release, mp, 0); 23937c478bd9Sstevel@tonic-gate return (0); 23947c478bd9Sstevel@tonic-gate } 23957c478bd9Sstevel@tonic-gate 23967c478bd9Sstevel@tonic-gate /* 23977c478bd9Sstevel@tonic-gate * Optimize the common cases of USYNC_THREAD or USYNC_PROCESS, 23987c478bd9Sstevel@tonic-gate * no error detection, and no lock statistics. 23997c478bd9Sstevel@tonic-gate * Include LOCK_RECURSIVE and LOCK_ERRORCHECK cases. 24007c478bd9Sstevel@tonic-gate */ 24017c478bd9Sstevel@tonic-gate if ((gflags = self->ul_schedctl_called) != NULL) { 24027c478bd9Sstevel@tonic-gate if (((el = gflags->uf_trs_ted) | mtype) == 0) { 24037c478bd9Sstevel@tonic-gate fast_unlock: 24045d1dd9a9Sraf if ((lwpid = mutex_unlock_queue(mp, 0)) != 0) { 24057c478bd9Sstevel@tonic-gate (void) __lwp_unpark(lwpid); 24067c478bd9Sstevel@tonic-gate preempt(self); 24077c478bd9Sstevel@tonic-gate } 24087c478bd9Sstevel@tonic-gate return (0); 24097c478bd9Sstevel@tonic-gate } 24107c478bd9Sstevel@tonic-gate if (el) /* error detection or lock statistics */ 24117c478bd9Sstevel@tonic-gate goto slow_unlock; 24127c478bd9Sstevel@tonic-gate if ((mtype & ~(LOCK_RECURSIVE|LOCK_ERRORCHECK)) == 0) { 24137c478bd9Sstevel@tonic-gate /* 24147c478bd9Sstevel@tonic-gate * At this point we know that one or both of the 24157c478bd9Sstevel@tonic-gate * flags LOCK_RECURSIVE or LOCK_ERRORCHECK is set. 24167c478bd9Sstevel@tonic-gate */ 24177c478bd9Sstevel@tonic-gate if ((mtype & LOCK_ERRORCHECK) && !MUTEX_OWNED(mp, self)) 24187c478bd9Sstevel@tonic-gate return (EPERM); 24197c478bd9Sstevel@tonic-gate if ((mtype & LOCK_RECURSIVE) && mp->mutex_rcount != 0) { 24207c478bd9Sstevel@tonic-gate mp->mutex_rcount--; 24217c478bd9Sstevel@tonic-gate DTRACE_PROBE2(plockstat, mutex__release, mp, 1); 24227c478bd9Sstevel@tonic-gate return (0); 24237c478bd9Sstevel@tonic-gate } 24247c478bd9Sstevel@tonic-gate goto fast_unlock; 24257c478bd9Sstevel@tonic-gate } 24267c478bd9Sstevel@tonic-gate if ((mtype & 24277c478bd9Sstevel@tonic-gate ~(USYNC_PROCESS|LOCK_RECURSIVE|LOCK_ERRORCHECK)) == 0) { 24287c478bd9Sstevel@tonic-gate /* 24297c478bd9Sstevel@tonic-gate * At this point we know that zero, one, or both of the 24307c478bd9Sstevel@tonic-gate * flags LOCK_RECURSIVE or LOCK_ERRORCHECK is set and 24317c478bd9Sstevel@tonic-gate * that the USYNC_PROCESS flag is set. 24327c478bd9Sstevel@tonic-gate */ 24337c478bd9Sstevel@tonic-gate if ((mtype & LOCK_ERRORCHECK) && !shared_mutex_held(mp)) 24347c478bd9Sstevel@tonic-gate return (EPERM); 24357c478bd9Sstevel@tonic-gate if ((mtype & LOCK_RECURSIVE) && mp->mutex_rcount != 0) { 24367c478bd9Sstevel@tonic-gate mp->mutex_rcount--; 24377c478bd9Sstevel@tonic-gate DTRACE_PROBE2(plockstat, mutex__release, mp, 1); 24387c478bd9Sstevel@tonic-gate return (0); 24397c478bd9Sstevel@tonic-gate } 24405d1dd9a9Sraf mutex_unlock_process(mp, 0); 24417c478bd9Sstevel@tonic-gate return (0); 24427c478bd9Sstevel@tonic-gate } 24437c478bd9Sstevel@tonic-gate } 24447c478bd9Sstevel@tonic-gate 24457c478bd9Sstevel@tonic-gate /* else do it the long way */ 24467c478bd9Sstevel@tonic-gate slow_unlock: 2447883492d5Sraf return (mutex_unlock_internal(mp, 0)); 24487c478bd9Sstevel@tonic-gate } 24497c478bd9Sstevel@tonic-gate 24507c478bd9Sstevel@tonic-gate /* 24517c478bd9Sstevel@tonic-gate * Internally to the library, almost all mutex lock/unlock actions 24527c478bd9Sstevel@tonic-gate * go through these lmutex_ functions, to protect critical regions. 24537c478bd9Sstevel@tonic-gate * We replicate a bit of code from __mutex_lock() and __mutex_unlock() 24547c478bd9Sstevel@tonic-gate * to make these functions faster since we know that the mutex type 24557c478bd9Sstevel@tonic-gate * of all internal locks is USYNC_THREAD. We also know that internal 24567c478bd9Sstevel@tonic-gate * locking can never fail, so we panic if it does. 24577c478bd9Sstevel@tonic-gate */ 24587c478bd9Sstevel@tonic-gate void 24597c478bd9Sstevel@tonic-gate lmutex_lock(mutex_t *mp) 24607c478bd9Sstevel@tonic-gate { 24617c478bd9Sstevel@tonic-gate ulwp_t *self = curthread; 24627c478bd9Sstevel@tonic-gate uberdata_t *udp = self->ul_uberdata; 24637c478bd9Sstevel@tonic-gate 24647c478bd9Sstevel@tonic-gate ASSERT(mp->mutex_type == USYNC_THREAD); 24657c478bd9Sstevel@tonic-gate 24667c478bd9Sstevel@tonic-gate enter_critical(self); 24677c478bd9Sstevel@tonic-gate /* 24687c478bd9Sstevel@tonic-gate * Optimize the case of no lock statistics and only a single thread. 24697c478bd9Sstevel@tonic-gate * (Most likely a traditional single-threaded application.) 24707c478bd9Sstevel@tonic-gate */ 24717c478bd9Sstevel@tonic-gate if (udp->uberflags.uf_all == 0) { 24727c478bd9Sstevel@tonic-gate /* 24737c478bd9Sstevel@tonic-gate * Only one thread exists; the mutex must be free. 24747c478bd9Sstevel@tonic-gate */ 24757c478bd9Sstevel@tonic-gate ASSERT(mp->mutex_lockw == 0); 24767c478bd9Sstevel@tonic-gate mp->mutex_lockw = LOCKSET; 24777c478bd9Sstevel@tonic-gate mp->mutex_owner = (uintptr_t)self; 24787c478bd9Sstevel@tonic-gate DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 24797c478bd9Sstevel@tonic-gate } else { 24807c478bd9Sstevel@tonic-gate tdb_mutex_stats_t *msp = MUTEX_STATS(mp, udp); 24817c478bd9Sstevel@tonic-gate 24827c478bd9Sstevel@tonic-gate if (!self->ul_schedctl_called) 24837c478bd9Sstevel@tonic-gate (void) setup_schedctl(); 24847c478bd9Sstevel@tonic-gate 24857c478bd9Sstevel@tonic-gate if (set_lock_byte(&mp->mutex_lockw) == 0) { 24867c478bd9Sstevel@tonic-gate mp->mutex_owner = (uintptr_t)self; 24877c478bd9Sstevel@tonic-gate DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 248816b01779Sraf } else if (mutex_trylock_adaptive(mp, 1) != 0) { 24897c478bd9Sstevel@tonic-gate (void) mutex_lock_queue(self, msp, mp, NULL); 24907c478bd9Sstevel@tonic-gate } 24917c478bd9Sstevel@tonic-gate 24927c478bd9Sstevel@tonic-gate if (msp) 24937c478bd9Sstevel@tonic-gate record_begin_hold(msp); 24947c478bd9Sstevel@tonic-gate } 24957c478bd9Sstevel@tonic-gate } 24967c478bd9Sstevel@tonic-gate 24977c478bd9Sstevel@tonic-gate void 24987c478bd9Sstevel@tonic-gate lmutex_unlock(mutex_t *mp) 24997c478bd9Sstevel@tonic-gate { 25007c478bd9Sstevel@tonic-gate ulwp_t *self = curthread; 25017c478bd9Sstevel@tonic-gate uberdata_t *udp = self->ul_uberdata; 25027c478bd9Sstevel@tonic-gate 25037c478bd9Sstevel@tonic-gate ASSERT(mp->mutex_type == USYNC_THREAD); 25047c478bd9Sstevel@tonic-gate 25057c478bd9Sstevel@tonic-gate /* 25067c478bd9Sstevel@tonic-gate * Optimize the case of no lock statistics and only a single thread. 25077c478bd9Sstevel@tonic-gate * (Most likely a traditional single-threaded application.) 25087c478bd9Sstevel@tonic-gate */ 25097c478bd9Sstevel@tonic-gate if (udp->uberflags.uf_all == 0) { 25107c478bd9Sstevel@tonic-gate /* 25117c478bd9Sstevel@tonic-gate * Only one thread exists so there can be no waiters. 25127c478bd9Sstevel@tonic-gate */ 25137c478bd9Sstevel@tonic-gate mp->mutex_owner = 0; 25147c478bd9Sstevel@tonic-gate mp->mutex_lockword = 0; 25157c478bd9Sstevel@tonic-gate DTRACE_PROBE2(plockstat, mutex__release, mp, 0); 25167c478bd9Sstevel@tonic-gate } else { 25177c478bd9Sstevel@tonic-gate tdb_mutex_stats_t *msp = MUTEX_STATS(mp, udp); 25187c478bd9Sstevel@tonic-gate lwpid_t lwpid; 25197c478bd9Sstevel@tonic-gate 25207c478bd9Sstevel@tonic-gate if (msp) 25217c478bd9Sstevel@tonic-gate (void) record_hold_time(msp); 2522883492d5Sraf if ((lwpid = mutex_unlock_queue(mp, 0)) != 0) { 25237c478bd9Sstevel@tonic-gate (void) __lwp_unpark(lwpid); 25247c478bd9Sstevel@tonic-gate preempt(self); 25257c478bd9Sstevel@tonic-gate } 25267c478bd9Sstevel@tonic-gate } 25277c478bd9Sstevel@tonic-gate exit_critical(self); 25287c478bd9Sstevel@tonic-gate } 25297c478bd9Sstevel@tonic-gate 2530f841f6adSraf /* 2531f841f6adSraf * For specialized code in libc, like the asynchronous i/o code, 2532f841f6adSraf * the following sig_*() locking primitives are used in order 2533f841f6adSraf * to make the code asynchronous signal safe. Signals are 2534f841f6adSraf * deferred while locks acquired by these functions are held. 2535f841f6adSraf */ 2536f841f6adSraf void 2537f841f6adSraf sig_mutex_lock(mutex_t *mp) 2538f841f6adSraf { 2539f841f6adSraf sigoff(curthread); 2540f841f6adSraf (void) _private_mutex_lock(mp); 2541f841f6adSraf } 2542f841f6adSraf 2543f841f6adSraf void 2544f841f6adSraf sig_mutex_unlock(mutex_t *mp) 2545f841f6adSraf { 2546f841f6adSraf (void) _private_mutex_unlock(mp); 2547f841f6adSraf sigon(curthread); 2548f841f6adSraf } 2549f841f6adSraf 2550f841f6adSraf int 2551f841f6adSraf sig_mutex_trylock(mutex_t *mp) 2552f841f6adSraf { 2553f841f6adSraf int error; 2554f841f6adSraf 2555f841f6adSraf sigoff(curthread); 2556f841f6adSraf if ((error = _private_mutex_trylock(mp)) != 0) 2557f841f6adSraf sigon(curthread); 2558f841f6adSraf return (error); 2559f841f6adSraf } 2560f841f6adSraf 2561f841f6adSraf /* 2562f841f6adSraf * sig_cond_wait() is a cancellation point. 2563f841f6adSraf */ 2564f841f6adSraf int 2565f841f6adSraf sig_cond_wait(cond_t *cv, mutex_t *mp) 2566f841f6adSraf { 2567f841f6adSraf int error; 2568f841f6adSraf 2569f841f6adSraf ASSERT(curthread->ul_sigdefer != 0); 2570f841f6adSraf _private_testcancel(); 2571a574db85Sraf error = __cond_wait(cv, mp); 2572f841f6adSraf if (error == EINTR && curthread->ul_cursig) { 2573f841f6adSraf sig_mutex_unlock(mp); 2574f841f6adSraf /* take the deferred signal here */ 2575f841f6adSraf sig_mutex_lock(mp); 2576f841f6adSraf } 2577f841f6adSraf _private_testcancel(); 2578f841f6adSraf return (error); 2579f841f6adSraf } 2580f841f6adSraf 2581f841f6adSraf /* 2582f841f6adSraf * sig_cond_reltimedwait() is a cancellation point. 2583f841f6adSraf */ 2584f841f6adSraf int 2585f841f6adSraf sig_cond_reltimedwait(cond_t *cv, mutex_t *mp, const timespec_t *ts) 2586f841f6adSraf { 2587f841f6adSraf int error; 2588f841f6adSraf 2589f841f6adSraf ASSERT(curthread->ul_sigdefer != 0); 2590f841f6adSraf _private_testcancel(); 2591a574db85Sraf error = __cond_reltimedwait(cv, mp, ts); 2592f841f6adSraf if (error == EINTR && curthread->ul_cursig) { 2593f841f6adSraf sig_mutex_unlock(mp); 2594f841f6adSraf /* take the deferred signal here */ 2595f841f6adSraf sig_mutex_lock(mp); 2596f841f6adSraf } 2597f841f6adSraf _private_testcancel(); 2598f841f6adSraf return (error); 2599f841f6adSraf } 2600f841f6adSraf 2601a574db85Sraf /* 2602a574db85Sraf * For specialized code in libc, like the stdio code. 2603a574db85Sraf * the following cancel_safe_*() locking primitives are used in 2604a574db85Sraf * order to make the code cancellation-safe. Cancellation is 2605a574db85Sraf * deferred while locks acquired by these functions are held. 2606a574db85Sraf */ 2607a574db85Sraf void 2608a574db85Sraf cancel_safe_mutex_lock(mutex_t *mp) 2609a574db85Sraf { 2610a574db85Sraf (void) _private_mutex_lock(mp); 2611a574db85Sraf curthread->ul_libc_locks++; 2612a574db85Sraf } 2613a574db85Sraf 2614a574db85Sraf int 2615a574db85Sraf cancel_safe_mutex_trylock(mutex_t *mp) 2616a574db85Sraf { 2617a574db85Sraf int error; 2618a574db85Sraf 2619a574db85Sraf if ((error = _private_mutex_trylock(mp)) == 0) 2620a574db85Sraf curthread->ul_libc_locks++; 2621a574db85Sraf return (error); 2622a574db85Sraf } 2623a574db85Sraf 2624a574db85Sraf void 2625a574db85Sraf cancel_safe_mutex_unlock(mutex_t *mp) 2626a574db85Sraf { 2627a574db85Sraf ulwp_t *self = curthread; 2628a574db85Sraf 2629a574db85Sraf ASSERT(self->ul_libc_locks != 0); 2630a574db85Sraf 2631a574db85Sraf (void) _private_mutex_unlock(mp); 2632a574db85Sraf 2633a574db85Sraf /* 2634a574db85Sraf * Decrement the count of locks held by cancel_safe_mutex_lock(). 2635a574db85Sraf * If we are then in a position to terminate cleanly and 2636a574db85Sraf * if there is a pending cancellation and cancellation 2637a574db85Sraf * is not disabled and we received EINTR from a recent 2638a574db85Sraf * system call then perform the cancellation action now. 2639a574db85Sraf */ 2640a574db85Sraf if (--self->ul_libc_locks == 0 && 2641a574db85Sraf !(self->ul_vfork | self->ul_nocancel | 2642a574db85Sraf self->ul_critical | self->ul_sigdefer) && 2643a574db85Sraf cancel_active()) 2644a574db85Sraf _pthread_exit(PTHREAD_CANCELED); 2645a574db85Sraf } 2646a574db85Sraf 26477c478bd9Sstevel@tonic-gate static int 26487c478bd9Sstevel@tonic-gate shared_mutex_held(mutex_t *mparg) 26497c478bd9Sstevel@tonic-gate { 26507c478bd9Sstevel@tonic-gate /* 2651883492d5Sraf * The 'volatile' is necessary to make sure the compiler doesn't 2652883492d5Sraf * reorder the tests of the various components of the mutex. 2653883492d5Sraf * They must be tested in this order: 2654883492d5Sraf * mutex_lockw 2655883492d5Sraf * mutex_owner 2656883492d5Sraf * mutex_ownerpid 2657883492d5Sraf * This relies on the fact that everywhere mutex_lockw is cleared, 2658883492d5Sraf * mutex_owner and mutex_ownerpid are cleared before mutex_lockw 2659883492d5Sraf * is cleared, and that everywhere mutex_lockw is set, mutex_owner 2660883492d5Sraf * and mutex_ownerpid are set after mutex_lockw is set, and that 2661883492d5Sraf * mutex_lockw is set or cleared with a memory barrier. 26627c478bd9Sstevel@tonic-gate */ 26637c478bd9Sstevel@tonic-gate volatile mutex_t *mp = (volatile mutex_t *)mparg; 26647c478bd9Sstevel@tonic-gate ulwp_t *self = curthread; 26657c478bd9Sstevel@tonic-gate uberdata_t *udp = self->ul_uberdata; 26667c478bd9Sstevel@tonic-gate 2667883492d5Sraf return (MUTEX_OWNED(mp, self) && mp->mutex_ownerpid == udp->pid); 26687c478bd9Sstevel@tonic-gate } 26697c478bd9Sstevel@tonic-gate 26707c478bd9Sstevel@tonic-gate /* 26717c478bd9Sstevel@tonic-gate * Some crufty old programs define their own version of _mutex_held() 26727c478bd9Sstevel@tonic-gate * to be simply return(1). This breaks internal libc logic, so we 26737c478bd9Sstevel@tonic-gate * define a private version for exclusive use by libc, mutex_is_held(), 26747c478bd9Sstevel@tonic-gate * and also a new public function, __mutex_held(), to be used in new 26757c478bd9Sstevel@tonic-gate * code to circumvent these crufty old programs. 26767c478bd9Sstevel@tonic-gate */ 26777c478bd9Sstevel@tonic-gate #pragma weak mutex_held = mutex_is_held 26787c478bd9Sstevel@tonic-gate #pragma weak _mutex_held = mutex_is_held 26797c478bd9Sstevel@tonic-gate #pragma weak __mutex_held = mutex_is_held 26807c478bd9Sstevel@tonic-gate int 2681883492d5Sraf mutex_is_held(mutex_t *mparg) 26827c478bd9Sstevel@tonic-gate { 2683883492d5Sraf volatile mutex_t *mp = (volatile mutex_t *)mparg; 2684883492d5Sraf 2685883492d5Sraf if (mparg->mutex_type & USYNC_PROCESS) 2686883492d5Sraf return (shared_mutex_held(mparg)); 26877c478bd9Sstevel@tonic-gate return (MUTEX_OWNED(mp, curthread)); 26887c478bd9Sstevel@tonic-gate } 26897c478bd9Sstevel@tonic-gate 26907c478bd9Sstevel@tonic-gate #pragma weak _private_mutex_destroy = __mutex_destroy 26917c478bd9Sstevel@tonic-gate #pragma weak mutex_destroy = __mutex_destroy 26927c478bd9Sstevel@tonic-gate #pragma weak _mutex_destroy = __mutex_destroy 26937c478bd9Sstevel@tonic-gate #pragma weak pthread_mutex_destroy = __mutex_destroy 26947c478bd9Sstevel@tonic-gate #pragma weak _pthread_mutex_destroy = __mutex_destroy 26957c478bd9Sstevel@tonic-gate int 26967c478bd9Sstevel@tonic-gate __mutex_destroy(mutex_t *mp) 26977c478bd9Sstevel@tonic-gate { 2698883492d5Sraf if (mp->mutex_type & USYNC_PROCESS) 2699883492d5Sraf forget_lock(mp); 2700883492d5Sraf (void) _memset(mp, 0, sizeof (*mp)); 27017c478bd9Sstevel@tonic-gate tdb_sync_obj_deregister(mp); 27027c478bd9Sstevel@tonic-gate return (0); 27037c478bd9Sstevel@tonic-gate } 27047c478bd9Sstevel@tonic-gate 2705883492d5Sraf #pragma weak mutex_consistent = __mutex_consistent 2706883492d5Sraf #pragma weak _mutex_consistent = __mutex_consistent 2707883492d5Sraf #pragma weak pthread_mutex_consistent_np = __mutex_consistent 2708883492d5Sraf #pragma weak _pthread_mutex_consistent_np = __mutex_consistent 2709883492d5Sraf int 2710883492d5Sraf __mutex_consistent(mutex_t *mp) 2711883492d5Sraf { 2712883492d5Sraf /* 2713883492d5Sraf * Do this only for an inconsistent, initialized robust lock 2714883492d5Sraf * that we hold. For all other cases, return EINVAL. 2715883492d5Sraf */ 2716883492d5Sraf if (mutex_is_held(mp) && 2717883492d5Sraf (mp->mutex_type & LOCK_ROBUST) && 2718883492d5Sraf (mp->mutex_flag & LOCK_INITED) && 2719883492d5Sraf (mp->mutex_flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED))) { 2720883492d5Sraf mp->mutex_flag &= ~(LOCK_OWNERDEAD | LOCK_UNMAPPED); 2721883492d5Sraf mp->mutex_rcount = 0; 2722883492d5Sraf return (0); 2723883492d5Sraf } 2724883492d5Sraf return (EINVAL); 2725883492d5Sraf } 2726883492d5Sraf 27277c478bd9Sstevel@tonic-gate /* 27287c478bd9Sstevel@tonic-gate * Spin locks are separate from ordinary mutexes, 27297c478bd9Sstevel@tonic-gate * but we use the same data structure for them. 27307c478bd9Sstevel@tonic-gate */ 27317c478bd9Sstevel@tonic-gate 27327c478bd9Sstevel@tonic-gate #pragma weak pthread_spin_init = _pthread_spin_init 27337c478bd9Sstevel@tonic-gate int 27347c478bd9Sstevel@tonic-gate _pthread_spin_init(pthread_spinlock_t *lock, int pshared) 27357c478bd9Sstevel@tonic-gate { 27367c478bd9Sstevel@tonic-gate mutex_t *mp = (mutex_t *)lock; 27377c478bd9Sstevel@tonic-gate 27387c478bd9Sstevel@tonic-gate (void) _memset(mp, 0, sizeof (*mp)); 27397c478bd9Sstevel@tonic-gate if (pshared == PTHREAD_PROCESS_SHARED) 27407c478bd9Sstevel@tonic-gate mp->mutex_type = USYNC_PROCESS; 27417c478bd9Sstevel@tonic-gate else 27427c478bd9Sstevel@tonic-gate mp->mutex_type = USYNC_THREAD; 27437c478bd9Sstevel@tonic-gate mp->mutex_flag = LOCK_INITED; 27447c478bd9Sstevel@tonic-gate mp->mutex_magic = MUTEX_MAGIC; 27457c478bd9Sstevel@tonic-gate return (0); 27467c478bd9Sstevel@tonic-gate } 27477c478bd9Sstevel@tonic-gate 27487c478bd9Sstevel@tonic-gate #pragma weak pthread_spin_destroy = _pthread_spin_destroy 27497c478bd9Sstevel@tonic-gate int 27507c478bd9Sstevel@tonic-gate _pthread_spin_destroy(pthread_spinlock_t *lock) 27517c478bd9Sstevel@tonic-gate { 27527c478bd9Sstevel@tonic-gate (void) _memset(lock, 0, sizeof (*lock)); 27537c478bd9Sstevel@tonic-gate return (0); 27547c478bd9Sstevel@tonic-gate } 27557c478bd9Sstevel@tonic-gate 27567c478bd9Sstevel@tonic-gate #pragma weak pthread_spin_trylock = _pthread_spin_trylock 27577c478bd9Sstevel@tonic-gate int 27587c478bd9Sstevel@tonic-gate _pthread_spin_trylock(pthread_spinlock_t *lock) 27597c478bd9Sstevel@tonic-gate { 27607c478bd9Sstevel@tonic-gate mutex_t *mp = (mutex_t *)lock; 27617c478bd9Sstevel@tonic-gate ulwp_t *self = curthread; 27627c478bd9Sstevel@tonic-gate int error = 0; 27637c478bd9Sstevel@tonic-gate 27647c478bd9Sstevel@tonic-gate no_preempt(self); 27657c478bd9Sstevel@tonic-gate if (set_lock_byte(&mp->mutex_lockw) != 0) 27667c478bd9Sstevel@tonic-gate error = EBUSY; 27677c478bd9Sstevel@tonic-gate else { 27687c478bd9Sstevel@tonic-gate mp->mutex_owner = (uintptr_t)self; 27697c478bd9Sstevel@tonic-gate if (mp->mutex_type == USYNC_PROCESS) 27707c478bd9Sstevel@tonic-gate mp->mutex_ownerpid = self->ul_uberdata->pid; 27717c478bd9Sstevel@tonic-gate DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0); 27727c478bd9Sstevel@tonic-gate } 27737c478bd9Sstevel@tonic-gate preempt(self); 27747c478bd9Sstevel@tonic-gate return (error); 27757c478bd9Sstevel@tonic-gate } 27767c478bd9Sstevel@tonic-gate 27777c478bd9Sstevel@tonic-gate #pragma weak pthread_spin_lock = _pthread_spin_lock 27787c478bd9Sstevel@tonic-gate int 27797c478bd9Sstevel@tonic-gate _pthread_spin_lock(pthread_spinlock_t *lock) 27807c478bd9Sstevel@tonic-gate { 2781883492d5Sraf mutex_t *mp = (mutex_t *)lock; 2782883492d5Sraf ulwp_t *self = curthread; 2783883492d5Sraf volatile uint8_t *lockp = (volatile uint8_t *)&mp->mutex_lockw; 2784883492d5Sraf int count = 0; 2785883492d5Sraf 2786883492d5Sraf ASSERT(!self->ul_critical || self->ul_bindflags); 2787883492d5Sraf 2788883492d5Sraf DTRACE_PROBE1(plockstat, mutex__spin, mp); 27897c478bd9Sstevel@tonic-gate 27907c478bd9Sstevel@tonic-gate /* 27917c478bd9Sstevel@tonic-gate * We don't care whether the owner is running on a processor. 27927c478bd9Sstevel@tonic-gate * We just spin because that's what this interface requires. 27937c478bd9Sstevel@tonic-gate */ 27947c478bd9Sstevel@tonic-gate for (;;) { 27957c478bd9Sstevel@tonic-gate if (*lockp == 0) { /* lock byte appears to be clear */ 2796883492d5Sraf no_preempt(self); 2797883492d5Sraf if (set_lock_byte(lockp) == 0) 2798883492d5Sraf break; 2799883492d5Sraf preempt(self); 28007c478bd9Sstevel@tonic-gate } 28015d1dd9a9Sraf if (count < INT_MAX) 28025d1dd9a9Sraf count++; 28037c478bd9Sstevel@tonic-gate SMT_PAUSE(); 28047c478bd9Sstevel@tonic-gate } 2805883492d5Sraf mp->mutex_owner = (uintptr_t)self; 2806883492d5Sraf if (mp->mutex_type == USYNC_PROCESS) 2807883492d5Sraf mp->mutex_ownerpid = self->ul_uberdata->pid; 2808883492d5Sraf preempt(self); 28095d1dd9a9Sraf if (count) { 28105d1dd9a9Sraf DTRACE_PROBE2(plockstat, mutex__spun, 1, count); 28115d1dd9a9Sraf } 2812883492d5Sraf DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, count); 2813883492d5Sraf return (0); 28147c478bd9Sstevel@tonic-gate } 28157c478bd9Sstevel@tonic-gate 28167c478bd9Sstevel@tonic-gate #pragma weak pthread_spin_unlock = _pthread_spin_unlock 28177c478bd9Sstevel@tonic-gate int 28187c478bd9Sstevel@tonic-gate _pthread_spin_unlock(pthread_spinlock_t *lock) 28197c478bd9Sstevel@tonic-gate { 28207c478bd9Sstevel@tonic-gate mutex_t *mp = (mutex_t *)lock; 28217c478bd9Sstevel@tonic-gate ulwp_t *self = curthread; 28227c478bd9Sstevel@tonic-gate 28237c478bd9Sstevel@tonic-gate no_preempt(self); 28247c478bd9Sstevel@tonic-gate mp->mutex_owner = 0; 28257c478bd9Sstevel@tonic-gate mp->mutex_ownerpid = 0; 28267c478bd9Sstevel@tonic-gate DTRACE_PROBE2(plockstat, mutex__release, mp, 0); 282741efec22Sraf (void) atomic_swap_32(&mp->mutex_lockword, 0); 28287c478bd9Sstevel@tonic-gate preempt(self); 28297c478bd9Sstevel@tonic-gate return (0); 28307c478bd9Sstevel@tonic-gate } 28317c478bd9Sstevel@tonic-gate 28325d1dd9a9Sraf #define INITIAL_LOCKS 8 /* initial size of ul_heldlocks.array */ 2833883492d5Sraf 2834883492d5Sraf /* 2835883492d5Sraf * Find/allocate an entry for 'lock' in our array of held locks. 2836883492d5Sraf */ 2837883492d5Sraf static mutex_t ** 2838883492d5Sraf find_lock_entry(mutex_t *lock) 2839883492d5Sraf { 2840883492d5Sraf ulwp_t *self = curthread; 2841883492d5Sraf mutex_t **remembered = NULL; 2842883492d5Sraf mutex_t **lockptr; 2843883492d5Sraf uint_t nlocks; 2844883492d5Sraf 2845883492d5Sraf if ((nlocks = self->ul_heldlockcnt) != 0) 2846883492d5Sraf lockptr = self->ul_heldlocks.array; 2847883492d5Sraf else { 2848883492d5Sraf nlocks = 1; 2849883492d5Sraf lockptr = &self->ul_heldlocks.single; 2850883492d5Sraf } 2851883492d5Sraf 2852883492d5Sraf for (; nlocks; nlocks--, lockptr++) { 2853883492d5Sraf if (*lockptr == lock) 2854883492d5Sraf return (lockptr); 2855883492d5Sraf if (*lockptr == NULL && remembered == NULL) 2856883492d5Sraf remembered = lockptr; 2857883492d5Sraf } 2858883492d5Sraf if (remembered != NULL) { 2859883492d5Sraf *remembered = lock; 2860883492d5Sraf return (remembered); 2861883492d5Sraf } 2862883492d5Sraf 2863883492d5Sraf /* 2864883492d5Sraf * No entry available. Allocate more space, converting 2865883492d5Sraf * the single entry into an array of entries if necessary. 2866883492d5Sraf */ 2867883492d5Sraf if ((nlocks = self->ul_heldlockcnt) == 0) { 2868883492d5Sraf /* 2869883492d5Sraf * Initial allocation of the array. 2870883492d5Sraf * Convert the single entry into an array. 2871883492d5Sraf */ 2872883492d5Sraf self->ul_heldlockcnt = nlocks = INITIAL_LOCKS; 2873883492d5Sraf lockptr = lmalloc(nlocks * sizeof (mutex_t *)); 2874883492d5Sraf /* 2875883492d5Sraf * The single entry becomes the first entry in the array. 2876883492d5Sraf */ 2877883492d5Sraf *lockptr = self->ul_heldlocks.single; 2878883492d5Sraf self->ul_heldlocks.array = lockptr; 2879883492d5Sraf /* 2880883492d5Sraf * Return the next available entry in the array. 2881883492d5Sraf */ 2882883492d5Sraf *++lockptr = lock; 2883883492d5Sraf return (lockptr); 2884883492d5Sraf } 2885883492d5Sraf /* 2886883492d5Sraf * Reallocate the array, double the size each time. 2887883492d5Sraf */ 2888883492d5Sraf lockptr = lmalloc(nlocks * 2 * sizeof (mutex_t *)); 2889883492d5Sraf (void) _memcpy(lockptr, self->ul_heldlocks.array, 2890883492d5Sraf nlocks * sizeof (mutex_t *)); 2891883492d5Sraf lfree(self->ul_heldlocks.array, nlocks * sizeof (mutex_t *)); 2892883492d5Sraf self->ul_heldlocks.array = lockptr; 2893883492d5Sraf self->ul_heldlockcnt *= 2; 2894883492d5Sraf /* 2895883492d5Sraf * Return the next available entry in the newly allocated array. 2896883492d5Sraf */ 2897883492d5Sraf *(lockptr += nlocks) = lock; 2898883492d5Sraf return (lockptr); 2899883492d5Sraf } 2900883492d5Sraf 2901883492d5Sraf /* 2902883492d5Sraf * Insert 'lock' into our list of held locks. 2903883492d5Sraf * Currently only used for LOCK_ROBUST mutexes. 2904883492d5Sraf */ 2905883492d5Sraf void 2906883492d5Sraf remember_lock(mutex_t *lock) 2907883492d5Sraf { 2908883492d5Sraf (void) find_lock_entry(lock); 2909883492d5Sraf } 2910883492d5Sraf 2911883492d5Sraf /* 2912883492d5Sraf * Remove 'lock' from our list of held locks. 2913883492d5Sraf * Currently only used for LOCK_ROBUST mutexes. 2914883492d5Sraf */ 2915883492d5Sraf void 2916883492d5Sraf forget_lock(mutex_t *lock) 2917883492d5Sraf { 2918883492d5Sraf *find_lock_entry(lock) = NULL; 2919883492d5Sraf } 2920883492d5Sraf 2921883492d5Sraf /* 2922883492d5Sraf * Free the array of held locks. 2923883492d5Sraf */ 2924883492d5Sraf void 2925883492d5Sraf heldlock_free(ulwp_t *ulwp) 2926883492d5Sraf { 2927883492d5Sraf uint_t nlocks; 2928883492d5Sraf 2929883492d5Sraf if ((nlocks = ulwp->ul_heldlockcnt) != 0) 2930883492d5Sraf lfree(ulwp->ul_heldlocks.array, nlocks * sizeof (mutex_t *)); 2931883492d5Sraf ulwp->ul_heldlockcnt = 0; 2932883492d5Sraf ulwp->ul_heldlocks.array = NULL; 2933883492d5Sraf } 2934883492d5Sraf 2935883492d5Sraf /* 2936883492d5Sraf * Mark all held LOCK_ROBUST mutexes LOCK_OWNERDEAD. 2937883492d5Sraf * Called from _thrp_exit() to deal with abandoned locks. 2938883492d5Sraf */ 2939883492d5Sraf void 2940883492d5Sraf heldlock_exit(void) 2941883492d5Sraf { 2942883492d5Sraf ulwp_t *self = curthread; 2943883492d5Sraf mutex_t **lockptr; 2944883492d5Sraf uint_t nlocks; 2945883492d5Sraf mutex_t *mp; 2946883492d5Sraf 2947883492d5Sraf if ((nlocks = self->ul_heldlockcnt) != 0) 2948883492d5Sraf lockptr = self->ul_heldlocks.array; 2949883492d5Sraf else { 2950883492d5Sraf nlocks = 1; 2951883492d5Sraf lockptr = &self->ul_heldlocks.single; 2952883492d5Sraf } 2953883492d5Sraf 2954883492d5Sraf for (; nlocks; nlocks--, lockptr++) { 2955883492d5Sraf /* 2956883492d5Sraf * The kernel takes care of transitioning held 2957883492d5Sraf * LOCK_PRIO_INHERIT mutexes to LOCK_OWNERDEAD. 2958883492d5Sraf * We avoid that case here. 2959883492d5Sraf */ 2960883492d5Sraf if ((mp = *lockptr) != NULL && 2961883492d5Sraf mutex_is_held(mp) && 2962883492d5Sraf (mp->mutex_type & (LOCK_ROBUST | LOCK_PRIO_INHERIT)) == 2963883492d5Sraf LOCK_ROBUST) { 2964883492d5Sraf mp->mutex_rcount = 0; 2965883492d5Sraf if (!(mp->mutex_flag & LOCK_UNMAPPED)) 2966883492d5Sraf mp->mutex_flag |= LOCK_OWNERDEAD; 2967883492d5Sraf (void) mutex_unlock_internal(mp, 1); 2968883492d5Sraf } 2969883492d5Sraf } 2970883492d5Sraf 2971883492d5Sraf heldlock_free(self); 2972883492d5Sraf } 2973883492d5Sraf 29747c478bd9Sstevel@tonic-gate #pragma weak cond_init = _cond_init 29757c478bd9Sstevel@tonic-gate /* ARGSUSED2 */ 29767c478bd9Sstevel@tonic-gate int 29777c478bd9Sstevel@tonic-gate _cond_init(cond_t *cvp, int type, void *arg) 29787c478bd9Sstevel@tonic-gate { 29797c478bd9Sstevel@tonic-gate if (type != USYNC_THREAD && type != USYNC_PROCESS) 29807c478bd9Sstevel@tonic-gate return (EINVAL); 29817c478bd9Sstevel@tonic-gate (void) _memset(cvp, 0, sizeof (*cvp)); 29827c478bd9Sstevel@tonic-gate cvp->cond_type = (uint16_t)type; 29837c478bd9Sstevel@tonic-gate cvp->cond_magic = COND_MAGIC; 29847c478bd9Sstevel@tonic-gate return (0); 29857c478bd9Sstevel@tonic-gate } 29867c478bd9Sstevel@tonic-gate 29877c478bd9Sstevel@tonic-gate /* 29887c478bd9Sstevel@tonic-gate * cond_sleep_queue(): utility function for cond_wait_queue(). 29897c478bd9Sstevel@tonic-gate * 29907c478bd9Sstevel@tonic-gate * Go to sleep on a condvar sleep queue, expect to be waked up 29917c478bd9Sstevel@tonic-gate * by someone calling cond_signal() or cond_broadcast() or due 29927c478bd9Sstevel@tonic-gate * to receiving a UNIX signal or being cancelled, or just simply 29937c478bd9Sstevel@tonic-gate * due to a spurious wakeup (like someome calling forkall()). 29947c478bd9Sstevel@tonic-gate * 29957c478bd9Sstevel@tonic-gate * The associated mutex is *not* reacquired before returning. 29967c478bd9Sstevel@tonic-gate * That must be done by the caller of cond_sleep_queue(). 29977c478bd9Sstevel@tonic-gate */ 2998883492d5Sraf static int 29997c478bd9Sstevel@tonic-gate cond_sleep_queue(cond_t *cvp, mutex_t *mp, timespec_t *tsp) 30007c478bd9Sstevel@tonic-gate { 30017c478bd9Sstevel@tonic-gate ulwp_t *self = curthread; 30027c478bd9Sstevel@tonic-gate queue_head_t *qp; 30037c478bd9Sstevel@tonic-gate queue_head_t *mqp; 30047c478bd9Sstevel@tonic-gate lwpid_t lwpid; 30057c478bd9Sstevel@tonic-gate int signalled; 30067c478bd9Sstevel@tonic-gate int error; 3007*d4204c85Sraf int cv_wake; 3008883492d5Sraf int release_all; 30097c478bd9Sstevel@tonic-gate 30107c478bd9Sstevel@tonic-gate /* 30117c478bd9Sstevel@tonic-gate * Put ourself on the CV sleep queue, unlock the mutex, then 30127c478bd9Sstevel@tonic-gate * park ourself and unpark a candidate lwp to grab the mutex. 30137c478bd9Sstevel@tonic-gate * We must go onto the CV sleep queue before dropping the 30147c478bd9Sstevel@tonic-gate * mutex in order to guarantee atomicity of the operation. 30157c478bd9Sstevel@tonic-gate */ 30167c478bd9Sstevel@tonic-gate self->ul_sp = stkptr(); 30177c478bd9Sstevel@tonic-gate qp = queue_lock(cvp, CV); 3018*d4204c85Sraf enqueue(qp, self, 0); 30197c478bd9Sstevel@tonic-gate cvp->cond_waiters_user = 1; 30207c478bd9Sstevel@tonic-gate self->ul_cvmutex = mp; 3021*d4204c85Sraf self->ul_cv_wake = cv_wake = (tsp != NULL); 30227c478bd9Sstevel@tonic-gate self->ul_signalled = 0; 3023883492d5Sraf if (mp->mutex_flag & LOCK_OWNERDEAD) { 3024883492d5Sraf mp->mutex_flag &= ~LOCK_OWNERDEAD; 3025883492d5Sraf mp->mutex_flag |= LOCK_NOTRECOVERABLE; 3026883492d5Sraf } 3027883492d5Sraf release_all = ((mp->mutex_flag & LOCK_NOTRECOVERABLE) != 0); 3028883492d5Sraf lwpid = mutex_unlock_queue(mp, release_all); 30297c478bd9Sstevel@tonic-gate for (;;) { 30307c478bd9Sstevel@tonic-gate set_parking_flag(self, 1); 30317c478bd9Sstevel@tonic-gate queue_unlock(qp); 30327c478bd9Sstevel@tonic-gate if (lwpid != 0) { 30337c478bd9Sstevel@tonic-gate lwpid = preempt_unpark(self, lwpid); 30347c478bd9Sstevel@tonic-gate preempt(self); 30357c478bd9Sstevel@tonic-gate } 30367c478bd9Sstevel@tonic-gate /* 30377c478bd9Sstevel@tonic-gate * We may have a deferred signal present, 30387c478bd9Sstevel@tonic-gate * in which case we should return EINTR. 30397c478bd9Sstevel@tonic-gate * Also, we may have received a SIGCANCEL; if so 30407c478bd9Sstevel@tonic-gate * and we are cancelable we should return EINTR. 30417c478bd9Sstevel@tonic-gate * We force an immediate EINTR return from 30427c478bd9Sstevel@tonic-gate * __lwp_park() by turning our parking flag off. 30437c478bd9Sstevel@tonic-gate */ 30447c478bd9Sstevel@tonic-gate if (self->ul_cursig != 0 || 30457c478bd9Sstevel@tonic-gate (self->ul_cancelable && self->ul_cancel_pending)) 30467c478bd9Sstevel@tonic-gate set_parking_flag(self, 0); 30477c478bd9Sstevel@tonic-gate /* 30487c478bd9Sstevel@tonic-gate * __lwp_park() will return the residual time in tsp 30497c478bd9Sstevel@tonic-gate * if we are unparked before the timeout expires. 30507c478bd9Sstevel@tonic-gate */ 30517c478bd9Sstevel@tonic-gate error = __lwp_park(tsp, lwpid); 30527c478bd9Sstevel@tonic-gate set_parking_flag(self, 0); 30537c478bd9Sstevel@tonic-gate lwpid = 0; /* unpark the other lwp only once */ 30547c478bd9Sstevel@tonic-gate /* 30557c478bd9Sstevel@tonic-gate * We were waked up by cond_signal(), cond_broadcast(), 30567c478bd9Sstevel@tonic-gate * by an interrupt or timeout (EINTR or ETIME), 30577c478bd9Sstevel@tonic-gate * or we may just have gotten a spurious wakeup. 30587c478bd9Sstevel@tonic-gate */ 30597c478bd9Sstevel@tonic-gate qp = queue_lock(cvp, CV); 3060*d4204c85Sraf if (!cv_wake) 3061*d4204c85Sraf mqp = queue_lock(mp, MX); 30627c478bd9Sstevel@tonic-gate if (self->ul_sleepq == NULL) 30637c478bd9Sstevel@tonic-gate break; 30647c478bd9Sstevel@tonic-gate /* 30657c478bd9Sstevel@tonic-gate * We are on either the condvar sleep queue or the 30662be60c5eSraf * mutex sleep queue. Break out of the sleep if we 30672be60c5eSraf * were interrupted or we timed out (EINTR or ETIME). 30687c478bd9Sstevel@tonic-gate * Else this is a spurious wakeup; continue the loop. 30697c478bd9Sstevel@tonic-gate */ 3070*d4204c85Sraf if (!cv_wake && self->ul_sleepq == mqp) { /* mutex queue */ 30712be60c5eSraf if (error) { 3072*d4204c85Sraf mp->mutex_waiters = dequeue_self(mqp); 30732be60c5eSraf break; 30742be60c5eSraf } 30752be60c5eSraf tsp = NULL; /* no more timeout */ 30762be60c5eSraf } else if (self->ul_sleepq == qp) { /* condvar queue */ 30777c478bd9Sstevel@tonic-gate if (error) { 3078*d4204c85Sraf cvp->cond_waiters_user = dequeue_self(qp); 30797c478bd9Sstevel@tonic-gate break; 30807c478bd9Sstevel@tonic-gate } 30817c478bd9Sstevel@tonic-gate /* 30827c478bd9Sstevel@tonic-gate * Else a spurious wakeup on the condvar queue. 30837c478bd9Sstevel@tonic-gate * __lwp_park() has already adjusted the timeout. 30847c478bd9Sstevel@tonic-gate */ 30857c478bd9Sstevel@tonic-gate } else { 30867c478bd9Sstevel@tonic-gate thr_panic("cond_sleep_queue(): thread not on queue"); 30877c478bd9Sstevel@tonic-gate } 3088*d4204c85Sraf if (!cv_wake) 3089*d4204c85Sraf queue_unlock(mqp); 30907c478bd9Sstevel@tonic-gate } 30917c478bd9Sstevel@tonic-gate 30927c478bd9Sstevel@tonic-gate self->ul_sp = 0; 3093*d4204c85Sraf self->ul_cv_wake = 0; 3094*d4204c85Sraf ASSERT(self->ul_cvmutex == NULL); 30957c478bd9Sstevel@tonic-gate ASSERT(self->ul_sleepq == NULL && self->ul_link == NULL && 30967c478bd9Sstevel@tonic-gate self->ul_wchan == NULL); 30977c478bd9Sstevel@tonic-gate 30987c478bd9Sstevel@tonic-gate signalled = self->ul_signalled; 30997c478bd9Sstevel@tonic-gate self->ul_signalled = 0; 31007c478bd9Sstevel@tonic-gate queue_unlock(qp); 3101*d4204c85Sraf if (!cv_wake) 3102*d4204c85Sraf queue_unlock(mqp); 31037c478bd9Sstevel@tonic-gate 31047c478bd9Sstevel@tonic-gate /* 31057c478bd9Sstevel@tonic-gate * If we were concurrently cond_signal()d and any of: 31067c478bd9Sstevel@tonic-gate * received a UNIX signal, were cancelled, or got a timeout, 31077c478bd9Sstevel@tonic-gate * then perform another cond_signal() to avoid consuming it. 31087c478bd9Sstevel@tonic-gate */ 31097c478bd9Sstevel@tonic-gate if (error && signalled) 31107c478bd9Sstevel@tonic-gate (void) cond_signal_internal(cvp); 31117c478bd9Sstevel@tonic-gate 31127c478bd9Sstevel@tonic-gate return (error); 31137c478bd9Sstevel@tonic-gate } 31147c478bd9Sstevel@tonic-gate 31157c478bd9Sstevel@tonic-gate int 31165d1dd9a9Sraf cond_wait_queue(cond_t *cvp, mutex_t *mp, timespec_t *tsp) 31177c478bd9Sstevel@tonic-gate { 31187c478bd9Sstevel@tonic-gate ulwp_t *self = curthread; 31197c478bd9Sstevel@tonic-gate int error; 3120883492d5Sraf int merror; 31217c478bd9Sstevel@tonic-gate 31227c478bd9Sstevel@tonic-gate /* 31237c478bd9Sstevel@tonic-gate * The old thread library was programmed to defer signals 31247c478bd9Sstevel@tonic-gate * while in cond_wait() so that the associated mutex would 31257c478bd9Sstevel@tonic-gate * be guaranteed to be held when the application signal 31267c478bd9Sstevel@tonic-gate * handler was invoked. 31277c478bd9Sstevel@tonic-gate * 31287c478bd9Sstevel@tonic-gate * We do not behave this way by default; the state of the 31297c478bd9Sstevel@tonic-gate * associated mutex in the signal handler is undefined. 31307c478bd9Sstevel@tonic-gate * 31317c478bd9Sstevel@tonic-gate * To accommodate applications that depend on the old 31327c478bd9Sstevel@tonic-gate * behavior, the _THREAD_COND_WAIT_DEFER environment 31337c478bd9Sstevel@tonic-gate * variable can be set to 1 and we will behave in the 31347c478bd9Sstevel@tonic-gate * old way with respect to cond_wait(). 31357c478bd9Sstevel@tonic-gate */ 31367c478bd9Sstevel@tonic-gate if (self->ul_cond_wait_defer) 31377c478bd9Sstevel@tonic-gate sigoff(self); 31387c478bd9Sstevel@tonic-gate 31397c478bd9Sstevel@tonic-gate error = cond_sleep_queue(cvp, mp, tsp); 31407c478bd9Sstevel@tonic-gate 31417c478bd9Sstevel@tonic-gate /* 31427c478bd9Sstevel@tonic-gate * Reacquire the mutex. 31437c478bd9Sstevel@tonic-gate */ 31445d1dd9a9Sraf if ((merror = mutex_lock_impl(mp, NULL)) != 0) 3145883492d5Sraf error = merror; 31467c478bd9Sstevel@tonic-gate 31477c478bd9Sstevel@tonic-gate /* 31487c478bd9Sstevel@tonic-gate * Take any deferred signal now, after we have reacquired the mutex. 31497c478bd9Sstevel@tonic-gate */ 31507c478bd9Sstevel@tonic-gate if (self->ul_cond_wait_defer) 31517c478bd9Sstevel@tonic-gate sigon(self); 31527c478bd9Sstevel@tonic-gate 31537c478bd9Sstevel@tonic-gate return (error); 31547c478bd9Sstevel@tonic-gate } 31557c478bd9Sstevel@tonic-gate 31567c478bd9Sstevel@tonic-gate /* 31577c478bd9Sstevel@tonic-gate * cond_sleep_kernel(): utility function for cond_wait_kernel(). 31587c478bd9Sstevel@tonic-gate * See the comment ahead of cond_sleep_queue(), above. 31597c478bd9Sstevel@tonic-gate */ 3160883492d5Sraf static int 31617c478bd9Sstevel@tonic-gate cond_sleep_kernel(cond_t *cvp, mutex_t *mp, timespec_t *tsp) 31627c478bd9Sstevel@tonic-gate { 31637c478bd9Sstevel@tonic-gate int mtype = mp->mutex_type; 31647c478bd9Sstevel@tonic-gate ulwp_t *self = curthread; 31657c478bd9Sstevel@tonic-gate int error; 31667c478bd9Sstevel@tonic-gate 3167883492d5Sraf if ((mtype & LOCK_PRIO_PROTECT) && _ceil_mylist_del(mp)) 3168883492d5Sraf _ceil_prio_waive(); 31697c478bd9Sstevel@tonic-gate 31707c478bd9Sstevel@tonic-gate self->ul_sp = stkptr(); 31717c478bd9Sstevel@tonic-gate self->ul_wchan = cvp; 31727c478bd9Sstevel@tonic-gate mp->mutex_owner = 0; 317331db3c26Sraf /* mp->mutex_ownerpid is cleared by ___lwp_cond_wait() */ 3174*d4204c85Sraf if (mtype & LOCK_PRIO_INHERIT) { 31757c478bd9Sstevel@tonic-gate mp->mutex_lockw = LOCKCLEAR; 3176*d4204c85Sraf self->ul_pilocks--; 3177*d4204c85Sraf } 31787c478bd9Sstevel@tonic-gate /* 31797c478bd9Sstevel@tonic-gate * ___lwp_cond_wait() returns immediately with EINTR if 31807c478bd9Sstevel@tonic-gate * set_parking_flag(self,0) is called on this lwp before it 31817c478bd9Sstevel@tonic-gate * goes to sleep in the kernel. sigacthandler() calls this 31827c478bd9Sstevel@tonic-gate * when a deferred signal is noted. This assures that we don't 31837c478bd9Sstevel@tonic-gate * get stuck in ___lwp_cond_wait() with all signals blocked 31847c478bd9Sstevel@tonic-gate * due to taking a deferred signal before going to sleep. 31857c478bd9Sstevel@tonic-gate */ 31867c478bd9Sstevel@tonic-gate set_parking_flag(self, 1); 31877c478bd9Sstevel@tonic-gate if (self->ul_cursig != 0 || 31887c478bd9Sstevel@tonic-gate (self->ul_cancelable && self->ul_cancel_pending)) 31897c478bd9Sstevel@tonic-gate set_parking_flag(self, 0); 31907c478bd9Sstevel@tonic-gate error = ___lwp_cond_wait(cvp, mp, tsp, 1); 31917c478bd9Sstevel@tonic-gate set_parking_flag(self, 0); 31927c478bd9Sstevel@tonic-gate self->ul_sp = 0; 31937c478bd9Sstevel@tonic-gate self->ul_wchan = NULL; 31947c478bd9Sstevel@tonic-gate return (error); 31957c478bd9Sstevel@tonic-gate } 31967c478bd9Sstevel@tonic-gate 31977c478bd9Sstevel@tonic-gate int 31987c478bd9Sstevel@tonic-gate cond_wait_kernel(cond_t *cvp, mutex_t *mp, timespec_t *tsp) 31997c478bd9Sstevel@tonic-gate { 32007c478bd9Sstevel@tonic-gate ulwp_t *self = curthread; 32017c478bd9Sstevel@tonic-gate int error; 32027c478bd9Sstevel@tonic-gate int merror; 32037c478bd9Sstevel@tonic-gate 32047c478bd9Sstevel@tonic-gate /* 32057c478bd9Sstevel@tonic-gate * See the large comment in cond_wait_queue(), above. 32067c478bd9Sstevel@tonic-gate */ 32077c478bd9Sstevel@tonic-gate if (self->ul_cond_wait_defer) 32087c478bd9Sstevel@tonic-gate sigoff(self); 32097c478bd9Sstevel@tonic-gate 32107c478bd9Sstevel@tonic-gate error = cond_sleep_kernel(cvp, mp, tsp); 32117c478bd9Sstevel@tonic-gate 32127c478bd9Sstevel@tonic-gate /* 32137c478bd9Sstevel@tonic-gate * Override the return code from ___lwp_cond_wait() 32147c478bd9Sstevel@tonic-gate * with any non-zero return code from mutex_lock(). 32157c478bd9Sstevel@tonic-gate * This addresses robust lock failures in particular; 32167c478bd9Sstevel@tonic-gate * the caller must see the EOWNERDEAD or ENOTRECOVERABLE 32177c478bd9Sstevel@tonic-gate * errors in order to take corrective action. 32187c478bd9Sstevel@tonic-gate */ 32195d1dd9a9Sraf if ((merror = mutex_lock_impl(mp, NULL)) != 0) 32207c478bd9Sstevel@tonic-gate error = merror; 32217c478bd9Sstevel@tonic-gate 32227c478bd9Sstevel@tonic-gate /* 32237c478bd9Sstevel@tonic-gate * Take any deferred signal now, after we have reacquired the mutex. 32247c478bd9Sstevel@tonic-gate */ 32257c478bd9Sstevel@tonic-gate if (self->ul_cond_wait_defer) 32267c478bd9Sstevel@tonic-gate sigon(self); 32277c478bd9Sstevel@tonic-gate 32287c478bd9Sstevel@tonic-gate return (error); 32297c478bd9Sstevel@tonic-gate } 32307c478bd9Sstevel@tonic-gate 32317c478bd9Sstevel@tonic-gate /* 32327c478bd9Sstevel@tonic-gate * Common code for _cond_wait() and _cond_timedwait() 32337c478bd9Sstevel@tonic-gate */ 32347c478bd9Sstevel@tonic-gate int 32357c478bd9Sstevel@tonic-gate cond_wait_common(cond_t *cvp, mutex_t *mp, timespec_t *tsp) 32367c478bd9Sstevel@tonic-gate { 32377c478bd9Sstevel@tonic-gate int mtype = mp->mutex_type; 32387c478bd9Sstevel@tonic-gate hrtime_t begin_sleep = 0; 32397c478bd9Sstevel@tonic-gate ulwp_t *self = curthread; 32407c478bd9Sstevel@tonic-gate uberdata_t *udp = self->ul_uberdata; 32417c478bd9Sstevel@tonic-gate tdb_cond_stats_t *csp = COND_STATS(cvp, udp); 32427c478bd9Sstevel@tonic-gate tdb_mutex_stats_t *msp = MUTEX_STATS(mp, udp); 32437c478bd9Sstevel@tonic-gate uint8_t rcount; 32447c478bd9Sstevel@tonic-gate int error = 0; 32457c478bd9Sstevel@tonic-gate 32467c478bd9Sstevel@tonic-gate /* 32477c478bd9Sstevel@tonic-gate * The SUSV3 Posix spec for pthread_cond_timedwait() states: 32487c478bd9Sstevel@tonic-gate * Except in the case of [ETIMEDOUT], all these error checks 32497c478bd9Sstevel@tonic-gate * shall act as if they were performed immediately at the 32507c478bd9Sstevel@tonic-gate * beginning of processing for the function and shall cause 32517c478bd9Sstevel@tonic-gate * an error return, in effect, prior to modifying the state 32527c478bd9Sstevel@tonic-gate * of the mutex specified by mutex or the condition variable 32537c478bd9Sstevel@tonic-gate * specified by cond. 32547c478bd9Sstevel@tonic-gate * Therefore, we must return EINVAL now if the timout is invalid. 32557c478bd9Sstevel@tonic-gate */ 32567c478bd9Sstevel@tonic-gate if (tsp != NULL && 32577c478bd9Sstevel@tonic-gate (tsp->tv_sec < 0 || (ulong_t)tsp->tv_nsec >= NANOSEC)) 32587c478bd9Sstevel@tonic-gate return (EINVAL); 32597c478bd9Sstevel@tonic-gate 32607c478bd9Sstevel@tonic-gate if (__td_event_report(self, TD_SLEEP, udp)) { 32617c478bd9Sstevel@tonic-gate self->ul_sp = stkptr(); 32627c478bd9Sstevel@tonic-gate self->ul_wchan = cvp; 32637c478bd9Sstevel@tonic-gate self->ul_td_evbuf.eventnum = TD_SLEEP; 32647c478bd9Sstevel@tonic-gate self->ul_td_evbuf.eventdata = cvp; 32657c478bd9Sstevel@tonic-gate tdb_event(TD_SLEEP, udp); 32667c478bd9Sstevel@tonic-gate self->ul_sp = 0; 32677c478bd9Sstevel@tonic-gate } 32687c478bd9Sstevel@tonic-gate if (csp) { 32697c478bd9Sstevel@tonic-gate if (tsp) 32707c478bd9Sstevel@tonic-gate tdb_incr(csp->cond_timedwait); 32717c478bd9Sstevel@tonic-gate else 32727c478bd9Sstevel@tonic-gate tdb_incr(csp->cond_wait); 32737c478bd9Sstevel@tonic-gate } 32747c478bd9Sstevel@tonic-gate if (msp) 32757c478bd9Sstevel@tonic-gate begin_sleep = record_hold_time(msp); 32767c478bd9Sstevel@tonic-gate else if (csp) 32777c478bd9Sstevel@tonic-gate begin_sleep = gethrtime(); 32787c478bd9Sstevel@tonic-gate 32797c478bd9Sstevel@tonic-gate if (self->ul_error_detection) { 32807c478bd9Sstevel@tonic-gate if (!mutex_is_held(mp)) 32817c478bd9Sstevel@tonic-gate lock_error(mp, "cond_wait", cvp, NULL); 32827c478bd9Sstevel@tonic-gate if ((mtype & LOCK_RECURSIVE) && mp->mutex_rcount != 0) 32837c478bd9Sstevel@tonic-gate lock_error(mp, "recursive mutex in cond_wait", 32845d1dd9a9Sraf cvp, NULL); 32857c478bd9Sstevel@tonic-gate if (cvp->cond_type & USYNC_PROCESS) { 3286883492d5Sraf if (!(mtype & USYNC_PROCESS)) 32877c478bd9Sstevel@tonic-gate lock_error(mp, "cond_wait", cvp, 32885d1dd9a9Sraf "condvar process-shared, " 32895d1dd9a9Sraf "mutex process-private"); 32907c478bd9Sstevel@tonic-gate } else { 3291883492d5Sraf if (mtype & USYNC_PROCESS) 32927c478bd9Sstevel@tonic-gate lock_error(mp, "cond_wait", cvp, 32935d1dd9a9Sraf "condvar process-private, " 32945d1dd9a9Sraf "mutex process-shared"); 32957c478bd9Sstevel@tonic-gate } 32967c478bd9Sstevel@tonic-gate } 32977c478bd9Sstevel@tonic-gate 32987c478bd9Sstevel@tonic-gate /* 32997c478bd9Sstevel@tonic-gate * We deal with recursive mutexes by completely 33007c478bd9Sstevel@tonic-gate * dropping the lock and restoring the recursion 33017c478bd9Sstevel@tonic-gate * count after waking up. This is arguably wrong, 33027c478bd9Sstevel@tonic-gate * but it obeys the principle of least astonishment. 33037c478bd9Sstevel@tonic-gate */ 33047c478bd9Sstevel@tonic-gate rcount = mp->mutex_rcount; 33057c478bd9Sstevel@tonic-gate mp->mutex_rcount = 0; 3306883492d5Sraf if ((mtype & 3307883492d5Sraf (USYNC_PROCESS | LOCK_PRIO_INHERIT | LOCK_PRIO_PROTECT)) | 33087c478bd9Sstevel@tonic-gate (cvp->cond_type & USYNC_PROCESS)) 33097c478bd9Sstevel@tonic-gate error = cond_wait_kernel(cvp, mp, tsp); 33107c478bd9Sstevel@tonic-gate else 33115d1dd9a9Sraf error = cond_wait_queue(cvp, mp, tsp); 33127c478bd9Sstevel@tonic-gate mp->mutex_rcount = rcount; 33137c478bd9Sstevel@tonic-gate 33147c478bd9Sstevel@tonic-gate if (csp) { 33157c478bd9Sstevel@tonic-gate hrtime_t lapse = gethrtime() - begin_sleep; 33167c478bd9Sstevel@tonic-gate if (tsp == NULL) 33177c478bd9Sstevel@tonic-gate csp->cond_wait_sleep_time += lapse; 33187c478bd9Sstevel@tonic-gate else { 33197c478bd9Sstevel@tonic-gate csp->cond_timedwait_sleep_time += lapse; 33207c478bd9Sstevel@tonic-gate if (error == ETIME) 33217c478bd9Sstevel@tonic-gate tdb_incr(csp->cond_timedwait_timeout); 33227c478bd9Sstevel@tonic-gate } 33237c478bd9Sstevel@tonic-gate } 33247c478bd9Sstevel@tonic-gate return (error); 33257c478bd9Sstevel@tonic-gate } 33267c478bd9Sstevel@tonic-gate 33277c478bd9Sstevel@tonic-gate /* 3328a574db85Sraf * cond_wait() and _cond_wait() are cancellation points but __cond_wait() 3329a574db85Sraf * is not. Internally, libc calls the non-cancellation version. 3330a574db85Sraf * Other libraries need to use pthread_setcancelstate(), as appropriate, 3331a574db85Sraf * since __cond_wait() is not exported from libc. 33327c478bd9Sstevel@tonic-gate */ 33337c478bd9Sstevel@tonic-gate int 3334a574db85Sraf __cond_wait(cond_t *cvp, mutex_t *mp) 33357c478bd9Sstevel@tonic-gate { 33367c478bd9Sstevel@tonic-gate ulwp_t *self = curthread; 33377c478bd9Sstevel@tonic-gate uberdata_t *udp = self->ul_uberdata; 33387c478bd9Sstevel@tonic-gate uberflags_t *gflags; 33397c478bd9Sstevel@tonic-gate 33407c478bd9Sstevel@tonic-gate /* 33417c478bd9Sstevel@tonic-gate * Optimize the common case of USYNC_THREAD plus 33427c478bd9Sstevel@tonic-gate * no error detection, no lock statistics, and no event tracing. 33437c478bd9Sstevel@tonic-gate */ 33447c478bd9Sstevel@tonic-gate if ((gflags = self->ul_schedctl_called) != NULL && 33457c478bd9Sstevel@tonic-gate (cvp->cond_type | mp->mutex_type | gflags->uf_trs_ted | 33467c478bd9Sstevel@tonic-gate self->ul_td_events_enable | 33477c478bd9Sstevel@tonic-gate udp->tdb.tdb_ev_global_mask.event_bits[0]) == 0) 33485d1dd9a9Sraf return (cond_wait_queue(cvp, mp, NULL)); 33497c478bd9Sstevel@tonic-gate 33507c478bd9Sstevel@tonic-gate /* 33517c478bd9Sstevel@tonic-gate * Else do it the long way. 33527c478bd9Sstevel@tonic-gate */ 33537c478bd9Sstevel@tonic-gate return (cond_wait_common(cvp, mp, NULL)); 33547c478bd9Sstevel@tonic-gate } 33557c478bd9Sstevel@tonic-gate 3356a574db85Sraf #pragma weak cond_wait = _cond_wait 33577c478bd9Sstevel@tonic-gate int 3358a574db85Sraf _cond_wait(cond_t *cvp, mutex_t *mp) 33597c478bd9Sstevel@tonic-gate { 33607c478bd9Sstevel@tonic-gate int error; 33617c478bd9Sstevel@tonic-gate 33627c478bd9Sstevel@tonic-gate _cancelon(); 3363a574db85Sraf error = __cond_wait(cvp, mp); 33647c478bd9Sstevel@tonic-gate if (error == EINTR) 33657c478bd9Sstevel@tonic-gate _canceloff(); 33667c478bd9Sstevel@tonic-gate else 33677c478bd9Sstevel@tonic-gate _canceloff_nocancel(); 33687c478bd9Sstevel@tonic-gate return (error); 33697c478bd9Sstevel@tonic-gate } 33707c478bd9Sstevel@tonic-gate 3371a574db85Sraf /* 3372a574db85Sraf * pthread_cond_wait() is a cancellation point. 3373a574db85Sraf */ 33747c478bd9Sstevel@tonic-gate #pragma weak pthread_cond_wait = _pthread_cond_wait 33757c478bd9Sstevel@tonic-gate int 33767c478bd9Sstevel@tonic-gate _pthread_cond_wait(cond_t *cvp, mutex_t *mp) 33777c478bd9Sstevel@tonic-gate { 33787c478bd9Sstevel@tonic-gate int error; 33797c478bd9Sstevel@tonic-gate 3380a574db85Sraf error = _cond_wait(cvp, mp); 33817c478bd9Sstevel@tonic-gate return ((error == EINTR)? 0 : error); 33827c478bd9Sstevel@tonic-gate } 33837c478bd9Sstevel@tonic-gate 33847c478bd9Sstevel@tonic-gate /* 3385a574db85Sraf * cond_timedwait() and _cond_timedwait() are cancellation points 3386a574db85Sraf * but __cond_timedwait() is not. 33877c478bd9Sstevel@tonic-gate */ 33887c478bd9Sstevel@tonic-gate int 3389a574db85Sraf __cond_timedwait(cond_t *cvp, mutex_t *mp, const timespec_t *abstime) 33907c478bd9Sstevel@tonic-gate { 33917c478bd9Sstevel@tonic-gate clockid_t clock_id = cvp->cond_clockid; 33927c478bd9Sstevel@tonic-gate timespec_t reltime; 33937c478bd9Sstevel@tonic-gate int error; 33947c478bd9Sstevel@tonic-gate 33957c478bd9Sstevel@tonic-gate if (clock_id != CLOCK_REALTIME && clock_id != CLOCK_HIGHRES) 33967c478bd9Sstevel@tonic-gate clock_id = CLOCK_REALTIME; 33977c478bd9Sstevel@tonic-gate abstime_to_reltime(clock_id, abstime, &reltime); 33987c478bd9Sstevel@tonic-gate error = cond_wait_common(cvp, mp, &reltime); 33997c478bd9Sstevel@tonic-gate if (error == ETIME && clock_id == CLOCK_HIGHRES) { 34007c478bd9Sstevel@tonic-gate /* 34017c478bd9Sstevel@tonic-gate * Don't return ETIME if we didn't really get a timeout. 34027c478bd9Sstevel@tonic-gate * This can happen if we return because someone resets 34037c478bd9Sstevel@tonic-gate * the system clock. Just return zero in this case, 34047c478bd9Sstevel@tonic-gate * giving a spurious wakeup but not a timeout. 34057c478bd9Sstevel@tonic-gate */ 34067c478bd9Sstevel@tonic-gate if ((hrtime_t)(uint32_t)abstime->tv_sec * NANOSEC + 34077c478bd9Sstevel@tonic-gate abstime->tv_nsec > gethrtime()) 34087c478bd9Sstevel@tonic-gate error = 0; 34097c478bd9Sstevel@tonic-gate } 34107c478bd9Sstevel@tonic-gate return (error); 34117c478bd9Sstevel@tonic-gate } 34127c478bd9Sstevel@tonic-gate 3413a574db85Sraf #pragma weak cond_timedwait = _cond_timedwait 34147c478bd9Sstevel@tonic-gate int 3415a574db85Sraf _cond_timedwait(cond_t *cvp, mutex_t *mp, const timespec_t *abstime) 34167c478bd9Sstevel@tonic-gate { 34177c478bd9Sstevel@tonic-gate int error; 34187c478bd9Sstevel@tonic-gate 34197c478bd9Sstevel@tonic-gate _cancelon(); 3420a574db85Sraf error = __cond_timedwait(cvp, mp, abstime); 34217c478bd9Sstevel@tonic-gate if (error == EINTR) 34227c478bd9Sstevel@tonic-gate _canceloff(); 34237c478bd9Sstevel@tonic-gate else 34247c478bd9Sstevel@tonic-gate _canceloff_nocancel(); 34257c478bd9Sstevel@tonic-gate return (error); 34267c478bd9Sstevel@tonic-gate } 34277c478bd9Sstevel@tonic-gate 3428a574db85Sraf /* 3429a574db85Sraf * pthread_cond_timedwait() is a cancellation point. 3430a574db85Sraf */ 34317c478bd9Sstevel@tonic-gate #pragma weak pthread_cond_timedwait = _pthread_cond_timedwait 34327c478bd9Sstevel@tonic-gate int 34337c478bd9Sstevel@tonic-gate _pthread_cond_timedwait(cond_t *cvp, mutex_t *mp, const timespec_t *abstime) 34347c478bd9Sstevel@tonic-gate { 34357c478bd9Sstevel@tonic-gate int error; 34367c478bd9Sstevel@tonic-gate 3437a574db85Sraf error = _cond_timedwait(cvp, mp, abstime); 34387c478bd9Sstevel@tonic-gate if (error == ETIME) 34397c478bd9Sstevel@tonic-gate error = ETIMEDOUT; 34407c478bd9Sstevel@tonic-gate else if (error == EINTR) 34417c478bd9Sstevel@tonic-gate error = 0; 34427c478bd9Sstevel@tonic-gate return (error); 34437c478bd9Sstevel@tonic-gate } 34447c478bd9Sstevel@tonic-gate 34457c478bd9Sstevel@tonic-gate /* 3446a574db85Sraf * cond_reltimedwait() and _cond_reltimedwait() are cancellation points 3447a574db85Sraf * but __cond_reltimedwait() is not. 34487c478bd9Sstevel@tonic-gate */ 34497c478bd9Sstevel@tonic-gate int 3450a574db85Sraf __cond_reltimedwait(cond_t *cvp, mutex_t *mp, const timespec_t *reltime) 34517c478bd9Sstevel@tonic-gate { 34527c478bd9Sstevel@tonic-gate timespec_t tslocal = *reltime; 34537c478bd9Sstevel@tonic-gate 34547c478bd9Sstevel@tonic-gate return (cond_wait_common(cvp, mp, &tslocal)); 34557c478bd9Sstevel@tonic-gate } 34567c478bd9Sstevel@tonic-gate 3457a574db85Sraf #pragma weak cond_reltimedwait = _cond_reltimedwait 34587c478bd9Sstevel@tonic-gate int 3459a574db85Sraf _cond_reltimedwait(cond_t *cvp, mutex_t *mp, const timespec_t *reltime) 34607c478bd9Sstevel@tonic-gate { 34617c478bd9Sstevel@tonic-gate int error; 34627c478bd9Sstevel@tonic-gate 34637c478bd9Sstevel@tonic-gate _cancelon(); 3464a574db85Sraf error = __cond_reltimedwait(cvp, mp, reltime); 34657c478bd9Sstevel@tonic-gate if (error == EINTR) 34667c478bd9Sstevel@tonic-gate _canceloff(); 34677c478bd9Sstevel@tonic-gate else 34687c478bd9Sstevel@tonic-gate _canceloff_nocancel(); 34697c478bd9Sstevel@tonic-gate return (error); 34707c478bd9Sstevel@tonic-gate } 34717c478bd9Sstevel@tonic-gate 34727c478bd9Sstevel@tonic-gate #pragma weak pthread_cond_reltimedwait_np = _pthread_cond_reltimedwait_np 34737c478bd9Sstevel@tonic-gate int 34747c478bd9Sstevel@tonic-gate _pthread_cond_reltimedwait_np(cond_t *cvp, mutex_t *mp, 34757c478bd9Sstevel@tonic-gate const timespec_t *reltime) 34767c478bd9Sstevel@tonic-gate { 34777c478bd9Sstevel@tonic-gate int error; 34787c478bd9Sstevel@tonic-gate 3479a574db85Sraf error = _cond_reltimedwait(cvp, mp, reltime); 34807c478bd9Sstevel@tonic-gate if (error == ETIME) 34817c478bd9Sstevel@tonic-gate error = ETIMEDOUT; 34827c478bd9Sstevel@tonic-gate else if (error == EINTR) 34837c478bd9Sstevel@tonic-gate error = 0; 34847c478bd9Sstevel@tonic-gate return (error); 34857c478bd9Sstevel@tonic-gate } 34867c478bd9Sstevel@tonic-gate 34877c478bd9Sstevel@tonic-gate #pragma weak pthread_cond_signal = cond_signal_internal 34887c478bd9Sstevel@tonic-gate #pragma weak _pthread_cond_signal = cond_signal_internal 34897c478bd9Sstevel@tonic-gate #pragma weak cond_signal = cond_signal_internal 34907c478bd9Sstevel@tonic-gate #pragma weak _cond_signal = cond_signal_internal 34917c478bd9Sstevel@tonic-gate int 34927c478bd9Sstevel@tonic-gate cond_signal_internal(cond_t *cvp) 34937c478bd9Sstevel@tonic-gate { 34947c478bd9Sstevel@tonic-gate ulwp_t *self = curthread; 34957c478bd9Sstevel@tonic-gate uberdata_t *udp = self->ul_uberdata; 34967c478bd9Sstevel@tonic-gate tdb_cond_stats_t *csp = COND_STATS(cvp, udp); 34977c478bd9Sstevel@tonic-gate int error = 0; 3498*d4204c85Sraf int more; 3499*d4204c85Sraf lwpid_t lwpid; 35007c478bd9Sstevel@tonic-gate queue_head_t *qp; 35017c478bd9Sstevel@tonic-gate mutex_t *mp; 35027c478bd9Sstevel@tonic-gate queue_head_t *mqp; 35037c478bd9Sstevel@tonic-gate ulwp_t **ulwpp; 35047c478bd9Sstevel@tonic-gate ulwp_t *ulwp; 3505*d4204c85Sraf ulwp_t *prev; 35067c478bd9Sstevel@tonic-gate 35077c478bd9Sstevel@tonic-gate if (csp) 35087c478bd9Sstevel@tonic-gate tdb_incr(csp->cond_signal); 35097c478bd9Sstevel@tonic-gate 35107c478bd9Sstevel@tonic-gate if (cvp->cond_waiters_kernel) /* someone sleeping in the kernel? */ 35117c478bd9Sstevel@tonic-gate error = __lwp_cond_signal(cvp); 35127c478bd9Sstevel@tonic-gate 35137c478bd9Sstevel@tonic-gate if (!cvp->cond_waiters_user) /* no one sleeping at user-level */ 35147c478bd9Sstevel@tonic-gate return (error); 35157c478bd9Sstevel@tonic-gate 35167c478bd9Sstevel@tonic-gate /* 35177c478bd9Sstevel@tonic-gate * Move someone from the condvar sleep queue to the mutex sleep 35187c478bd9Sstevel@tonic-gate * queue for the mutex that he will acquire on being waked up. 35197c478bd9Sstevel@tonic-gate * We can do this only if we own the mutex he will acquire. 35207c478bd9Sstevel@tonic-gate * If we do not own the mutex, or if his ul_cv_wake flag 35217c478bd9Sstevel@tonic-gate * is set, just dequeue and unpark him. 35227c478bd9Sstevel@tonic-gate */ 35237c478bd9Sstevel@tonic-gate qp = queue_lock(cvp, CV); 3524*d4204c85Sraf ulwpp = queue_slot(qp, &prev, &more); 3525*d4204c85Sraf cvp->cond_waiters_user = more; 3526*d4204c85Sraf if (ulwpp == NULL) { /* no one on the sleep queue */ 35277c478bd9Sstevel@tonic-gate queue_unlock(qp); 35287c478bd9Sstevel@tonic-gate return (error); 35297c478bd9Sstevel@tonic-gate } 3530*d4204c85Sraf ulwp = *ulwpp; 35317c478bd9Sstevel@tonic-gate 35327c478bd9Sstevel@tonic-gate /* 35337c478bd9Sstevel@tonic-gate * Inform the thread that he was the recipient of a cond_signal(). 35347c478bd9Sstevel@tonic-gate * This lets him deal with cond_signal() and, concurrently, 35357c478bd9Sstevel@tonic-gate * one or more of a cancellation, a UNIX signal, or a timeout. 35367c478bd9Sstevel@tonic-gate * These latter conditions must not consume a cond_signal(). 35377c478bd9Sstevel@tonic-gate */ 35387c478bd9Sstevel@tonic-gate ulwp->ul_signalled = 1; 35397c478bd9Sstevel@tonic-gate 35407c478bd9Sstevel@tonic-gate /* 35417c478bd9Sstevel@tonic-gate * Dequeue the waiter but leave his ul_sleepq non-NULL 35427c478bd9Sstevel@tonic-gate * while we move him to the mutex queue so that he can 35437c478bd9Sstevel@tonic-gate * deal properly with spurious wakeups. 35447c478bd9Sstevel@tonic-gate */ 3545*d4204c85Sraf queue_unlink(qp, ulwpp, prev); 35467c478bd9Sstevel@tonic-gate 35477c478bd9Sstevel@tonic-gate mp = ulwp->ul_cvmutex; /* the mutex he will acquire */ 35487c478bd9Sstevel@tonic-gate ulwp->ul_cvmutex = NULL; 35497c478bd9Sstevel@tonic-gate ASSERT(mp != NULL); 35507c478bd9Sstevel@tonic-gate 35517c478bd9Sstevel@tonic-gate if (ulwp->ul_cv_wake || !MUTEX_OWNED(mp, self)) { 3552*d4204c85Sraf /* just wake him up */ 3553*d4204c85Sraf lwpid = ulwp->ul_lwpid; 35547c478bd9Sstevel@tonic-gate no_preempt(self); 35557c478bd9Sstevel@tonic-gate ulwp->ul_sleepq = NULL; 35567c478bd9Sstevel@tonic-gate ulwp->ul_wchan = NULL; 35577c478bd9Sstevel@tonic-gate queue_unlock(qp); 35587c478bd9Sstevel@tonic-gate (void) __lwp_unpark(lwpid); 35597c478bd9Sstevel@tonic-gate preempt(self); 35607c478bd9Sstevel@tonic-gate } else { 3561*d4204c85Sraf /* move him to the mutex queue */ 35627c478bd9Sstevel@tonic-gate mqp = queue_lock(mp, MX); 3563*d4204c85Sraf enqueue(mqp, ulwp, 0); 35647c478bd9Sstevel@tonic-gate mp->mutex_waiters = 1; 35657c478bd9Sstevel@tonic-gate queue_unlock(mqp); 35667c478bd9Sstevel@tonic-gate queue_unlock(qp); 35677c478bd9Sstevel@tonic-gate } 35687c478bd9Sstevel@tonic-gate 35697c478bd9Sstevel@tonic-gate return (error); 35707c478bd9Sstevel@tonic-gate } 35717c478bd9Sstevel@tonic-gate 357241efec22Sraf /* 3573883492d5Sraf * Utility function called by mutex_wakeup_all(), cond_broadcast(), 3574883492d5Sraf * and rw_queue_release() to (re)allocate a big buffer to hold the 3575883492d5Sraf * lwpids of all the threads to be set running after they are removed 3576883492d5Sraf * from their sleep queues. Since we are holding a queue lock, we 3577883492d5Sraf * cannot call any function that might acquire a lock. mmap(), munmap(), 3578883492d5Sraf * lwp_unpark_all() are simple system calls and are safe in this regard. 357941efec22Sraf */ 358041efec22Sraf lwpid_t * 358141efec22Sraf alloc_lwpids(lwpid_t *lwpid, int *nlwpid_ptr, int *maxlwps_ptr) 358241efec22Sraf { 358341efec22Sraf /* 358441efec22Sraf * Allocate NEWLWPS ids on the first overflow. 358541efec22Sraf * Double the allocation each time after that. 358641efec22Sraf */ 358741efec22Sraf int nlwpid = *nlwpid_ptr; 358841efec22Sraf int maxlwps = *maxlwps_ptr; 358941efec22Sraf int first_allocation; 359041efec22Sraf int newlwps; 359141efec22Sraf void *vaddr; 359241efec22Sraf 359341efec22Sraf ASSERT(nlwpid == maxlwps); 359441efec22Sraf 359541efec22Sraf first_allocation = (maxlwps == MAXLWPS); 359641efec22Sraf newlwps = first_allocation? NEWLWPS : 2 * maxlwps; 359741efec22Sraf vaddr = _private_mmap(NULL, newlwps * sizeof (lwpid_t), 359841efec22Sraf PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, (off_t)0); 359941efec22Sraf 360041efec22Sraf if (vaddr == MAP_FAILED) { 360141efec22Sraf /* 360241efec22Sraf * Let's hope this never happens. 360341efec22Sraf * If it does, then we have a terrible 360441efec22Sraf * thundering herd on our hands. 360541efec22Sraf */ 360641efec22Sraf (void) __lwp_unpark_all(lwpid, nlwpid); 360741efec22Sraf *nlwpid_ptr = 0; 360841efec22Sraf } else { 360941efec22Sraf (void) _memcpy(vaddr, lwpid, maxlwps * sizeof (lwpid_t)); 361041efec22Sraf if (!first_allocation) 361141efec22Sraf (void) _private_munmap(lwpid, 361241efec22Sraf maxlwps * sizeof (lwpid_t)); 361341efec22Sraf lwpid = vaddr; 361441efec22Sraf *maxlwps_ptr = newlwps; 361541efec22Sraf } 361641efec22Sraf 361741efec22Sraf return (lwpid); 361841efec22Sraf } 36197c478bd9Sstevel@tonic-gate 36207c478bd9Sstevel@tonic-gate #pragma weak pthread_cond_broadcast = cond_broadcast_internal 36217c478bd9Sstevel@tonic-gate #pragma weak _pthread_cond_broadcast = cond_broadcast_internal 36227c478bd9Sstevel@tonic-gate #pragma weak cond_broadcast = cond_broadcast_internal 36237c478bd9Sstevel@tonic-gate #pragma weak _cond_broadcast = cond_broadcast_internal 36247c478bd9Sstevel@tonic-gate int 36257c478bd9Sstevel@tonic-gate cond_broadcast_internal(cond_t *cvp) 36267c478bd9Sstevel@tonic-gate { 36277c478bd9Sstevel@tonic-gate ulwp_t *self = curthread; 36287c478bd9Sstevel@tonic-gate uberdata_t *udp = self->ul_uberdata; 36297c478bd9Sstevel@tonic-gate tdb_cond_stats_t *csp = COND_STATS(cvp, udp); 36307c478bd9Sstevel@tonic-gate int error = 0; 36317c478bd9Sstevel@tonic-gate queue_head_t *qp; 3632*d4204c85Sraf queue_root_t *qrp; 36337c478bd9Sstevel@tonic-gate mutex_t *mp; 36347c478bd9Sstevel@tonic-gate mutex_t *mp_cache = NULL; 363541efec22Sraf queue_head_t *mqp = NULL; 36367c478bd9Sstevel@tonic-gate ulwp_t *ulwp; 36377c478bd9Sstevel@tonic-gate int nlwpid = 0; 36387c478bd9Sstevel@tonic-gate int maxlwps = MAXLWPS; 363941efec22Sraf lwpid_t buffer[MAXLWPS]; 364041efec22Sraf lwpid_t *lwpid = buffer; 36417c478bd9Sstevel@tonic-gate 36427c478bd9Sstevel@tonic-gate if (csp) 36437c478bd9Sstevel@tonic-gate tdb_incr(csp->cond_broadcast); 36447c478bd9Sstevel@tonic-gate 36457c478bd9Sstevel@tonic-gate if (cvp->cond_waiters_kernel) /* someone sleeping in the kernel? */ 36467c478bd9Sstevel@tonic-gate error = __lwp_cond_broadcast(cvp); 36477c478bd9Sstevel@tonic-gate 36487c478bd9Sstevel@tonic-gate if (!cvp->cond_waiters_user) /* no one sleeping at user-level */ 36497c478bd9Sstevel@tonic-gate return (error); 36507c478bd9Sstevel@tonic-gate 36517c478bd9Sstevel@tonic-gate /* 36527c478bd9Sstevel@tonic-gate * Move everyone from the condvar sleep queue to the mutex sleep 36537c478bd9Sstevel@tonic-gate * queue for the mutex that they will acquire on being waked up. 36547c478bd9Sstevel@tonic-gate * We can do this only if we own the mutex they will acquire. 36557c478bd9Sstevel@tonic-gate * If we do not own the mutex, or if their ul_cv_wake flag 36567c478bd9Sstevel@tonic-gate * is set, just dequeue and unpark them. 36577c478bd9Sstevel@tonic-gate * 36587c478bd9Sstevel@tonic-gate * We keep track of lwpids that are to be unparked in lwpid[]. 36597c478bd9Sstevel@tonic-gate * __lwp_unpark_all() is called to unpark all of them after 36607c478bd9Sstevel@tonic-gate * they have been removed from the sleep queue and the sleep 36617c478bd9Sstevel@tonic-gate * queue lock has been dropped. If we run out of space in our 36627c478bd9Sstevel@tonic-gate * on-stack buffer, we need to allocate more but we can't call 36637c478bd9Sstevel@tonic-gate * lmalloc() because we are holding a queue lock when the overflow 36647c478bd9Sstevel@tonic-gate * occurs and lmalloc() acquires a lock. We can't use alloca() 366541efec22Sraf * either because the application may have allocated a small 366641efec22Sraf * stack and we don't want to overrun the stack. So we call 366741efec22Sraf * alloc_lwpids() to allocate a bigger buffer using the mmap() 36687c478bd9Sstevel@tonic-gate * system call directly since that path acquires no locks. 36697c478bd9Sstevel@tonic-gate */ 36707c478bd9Sstevel@tonic-gate qp = queue_lock(cvp, CV); 36717c478bd9Sstevel@tonic-gate cvp->cond_waiters_user = 0; 3672*d4204c85Sraf for (;;) { 3673*d4204c85Sraf if ((qrp = qp->qh_root) == NULL || 3674*d4204c85Sraf (ulwp = qrp->qr_head) == NULL) 3675*d4204c85Sraf break; 3676*d4204c85Sraf ASSERT(ulwp->ul_wchan == cvp); 3677*d4204c85Sraf queue_unlink(qp, &qrp->qr_head, NULL); 36787c478bd9Sstevel@tonic-gate mp = ulwp->ul_cvmutex; /* his mutex */ 36797c478bd9Sstevel@tonic-gate ulwp->ul_cvmutex = NULL; 36807c478bd9Sstevel@tonic-gate ASSERT(mp != NULL); 36817c478bd9Sstevel@tonic-gate if (ulwp->ul_cv_wake || !MUTEX_OWNED(mp, self)) { 3682*d4204c85Sraf /* just wake him up */ 36837c478bd9Sstevel@tonic-gate ulwp->ul_sleepq = NULL; 36847c478bd9Sstevel@tonic-gate ulwp->ul_wchan = NULL; 368541efec22Sraf if (nlwpid == maxlwps) 368641efec22Sraf lwpid = alloc_lwpids(lwpid, &nlwpid, &maxlwps); 36877c478bd9Sstevel@tonic-gate lwpid[nlwpid++] = ulwp->ul_lwpid; 36887c478bd9Sstevel@tonic-gate } else { 3689*d4204c85Sraf /* move him to the mutex queue */ 36907c478bd9Sstevel@tonic-gate if (mp != mp_cache) { 36917c478bd9Sstevel@tonic-gate mp_cache = mp; 369241efec22Sraf if (mqp != NULL) 369341efec22Sraf queue_unlock(mqp); 369441efec22Sraf mqp = queue_lock(mp, MX); 36957c478bd9Sstevel@tonic-gate } 3696*d4204c85Sraf enqueue(mqp, ulwp, 0); 36977c478bd9Sstevel@tonic-gate mp->mutex_waiters = 1; 36987c478bd9Sstevel@tonic-gate } 36997c478bd9Sstevel@tonic-gate } 370041efec22Sraf if (mqp != NULL) 370141efec22Sraf queue_unlock(mqp); 370241efec22Sraf if (nlwpid == 0) { 370341efec22Sraf queue_unlock(qp); 370441efec22Sraf } else { 370541efec22Sraf no_preempt(self); 370641efec22Sraf queue_unlock(qp); 37077c478bd9Sstevel@tonic-gate if (nlwpid == 1) 37087c478bd9Sstevel@tonic-gate (void) __lwp_unpark(lwpid[0]); 37097c478bd9Sstevel@tonic-gate else 37107c478bd9Sstevel@tonic-gate (void) __lwp_unpark_all(lwpid, nlwpid); 371141efec22Sraf preempt(self); 37127c478bd9Sstevel@tonic-gate } 37137c478bd9Sstevel@tonic-gate if (lwpid != buffer) 37147c478bd9Sstevel@tonic-gate (void) _private_munmap(lwpid, maxlwps * sizeof (lwpid_t)); 37157c478bd9Sstevel@tonic-gate return (error); 37167c478bd9Sstevel@tonic-gate } 37177c478bd9Sstevel@tonic-gate 37187c478bd9Sstevel@tonic-gate #pragma weak pthread_cond_destroy = _cond_destroy 37197c478bd9Sstevel@tonic-gate #pragma weak _pthread_cond_destroy = _cond_destroy 37207c478bd9Sstevel@tonic-gate #pragma weak cond_destroy = _cond_destroy 37217c478bd9Sstevel@tonic-gate int 37227c478bd9Sstevel@tonic-gate _cond_destroy(cond_t *cvp) 37237c478bd9Sstevel@tonic-gate { 37247c478bd9Sstevel@tonic-gate cvp->cond_magic = 0; 37257c478bd9Sstevel@tonic-gate tdb_sync_obj_deregister(cvp); 37267c478bd9Sstevel@tonic-gate return (0); 37277c478bd9Sstevel@tonic-gate } 37287c478bd9Sstevel@tonic-gate 37297c478bd9Sstevel@tonic-gate #if defined(THREAD_DEBUG) 37307c478bd9Sstevel@tonic-gate void 37317c478bd9Sstevel@tonic-gate assert_no_libc_locks_held(void) 37327c478bd9Sstevel@tonic-gate { 37337c478bd9Sstevel@tonic-gate ASSERT(!curthread->ul_critical || curthread->ul_bindflags); 37347c478bd9Sstevel@tonic-gate } 37357c478bd9Sstevel@tonic-gate 37367c478bd9Sstevel@tonic-gate /* protected by link_lock */ 37377c478bd9Sstevel@tonic-gate uint64_t spin_lock_spin; 37387c478bd9Sstevel@tonic-gate uint64_t spin_lock_spin2; 37397c478bd9Sstevel@tonic-gate uint64_t spin_lock_sleep; 37407c478bd9Sstevel@tonic-gate uint64_t spin_lock_wakeup; 37417c478bd9Sstevel@tonic-gate 37427c478bd9Sstevel@tonic-gate /* 37437c478bd9Sstevel@tonic-gate * Record spin lock statistics. 37447c478bd9Sstevel@tonic-gate * Called by a thread exiting itself in thrp_exit(). 37457c478bd9Sstevel@tonic-gate * Also called via atexit() from the thread calling 37467c478bd9Sstevel@tonic-gate * exit() to do all the other threads as well. 37477c478bd9Sstevel@tonic-gate */ 37487c478bd9Sstevel@tonic-gate void 37497c478bd9Sstevel@tonic-gate record_spin_locks(ulwp_t *ulwp) 37507c478bd9Sstevel@tonic-gate { 37517c478bd9Sstevel@tonic-gate spin_lock_spin += ulwp->ul_spin_lock_spin; 37527c478bd9Sstevel@tonic-gate spin_lock_spin2 += ulwp->ul_spin_lock_spin2; 37537c478bd9Sstevel@tonic-gate spin_lock_sleep += ulwp->ul_spin_lock_sleep; 37547c478bd9Sstevel@tonic-gate spin_lock_wakeup += ulwp->ul_spin_lock_wakeup; 37557c478bd9Sstevel@tonic-gate ulwp->ul_spin_lock_spin = 0; 37567c478bd9Sstevel@tonic-gate ulwp->ul_spin_lock_spin2 = 0; 37577c478bd9Sstevel@tonic-gate ulwp->ul_spin_lock_sleep = 0; 37587c478bd9Sstevel@tonic-gate ulwp->ul_spin_lock_wakeup = 0; 37597c478bd9Sstevel@tonic-gate } 37607c478bd9Sstevel@tonic-gate 37617c478bd9Sstevel@tonic-gate /* 37627c478bd9Sstevel@tonic-gate * atexit function: dump the queue statistics to stderr. 37637c478bd9Sstevel@tonic-gate */ 3764e8031f0aSraf #if !defined(__lint) 3765e8031f0aSraf #define fprintf _fprintf 3766e8031f0aSraf #endif 37677c478bd9Sstevel@tonic-gate #include <stdio.h> 37687c478bd9Sstevel@tonic-gate void 37697c478bd9Sstevel@tonic-gate dump_queue_statistics(void) 37707c478bd9Sstevel@tonic-gate { 37717c478bd9Sstevel@tonic-gate uberdata_t *udp = curthread->ul_uberdata; 37727c478bd9Sstevel@tonic-gate queue_head_t *qp; 37737c478bd9Sstevel@tonic-gate int qn; 37747c478bd9Sstevel@tonic-gate uint64_t spin_lock_total = 0; 37757c478bd9Sstevel@tonic-gate 37767c478bd9Sstevel@tonic-gate if (udp->queue_head == NULL || thread_queue_dump == 0) 37777c478bd9Sstevel@tonic-gate return; 37787c478bd9Sstevel@tonic-gate 37797c478bd9Sstevel@tonic-gate if (fprintf(stderr, "\n%5d mutex queues:\n", QHASHSIZE) < 0 || 3780*d4204c85Sraf fprintf(stderr, "queue# lockcount max qlen max hlen\n") < 0) 37817c478bd9Sstevel@tonic-gate return; 37827c478bd9Sstevel@tonic-gate for (qn = 0, qp = udp->queue_head; qn < QHASHSIZE; qn++, qp++) { 37837c478bd9Sstevel@tonic-gate if (qp->qh_lockcount == 0) 37847c478bd9Sstevel@tonic-gate continue; 37857c478bd9Sstevel@tonic-gate spin_lock_total += qp->qh_lockcount; 3786*d4204c85Sraf if (fprintf(stderr, "%5d %12llu%12u%12u\n", qn, 3787*d4204c85Sraf (u_longlong_t)qp->qh_lockcount, 3788*d4204c85Sraf qp->qh_qmax, qp->qh_hmax) < 0) 37895d1dd9a9Sraf return; 37907c478bd9Sstevel@tonic-gate } 37917c478bd9Sstevel@tonic-gate 37927c478bd9Sstevel@tonic-gate if (fprintf(stderr, "\n%5d condvar queues:\n", QHASHSIZE) < 0 || 3793*d4204c85Sraf fprintf(stderr, "queue# lockcount max qlen max hlen\n") < 0) 37947c478bd9Sstevel@tonic-gate return; 37957c478bd9Sstevel@tonic-gate for (qn = 0; qn < QHASHSIZE; qn++, qp++) { 37967c478bd9Sstevel@tonic-gate if (qp->qh_lockcount == 0) 37977c478bd9Sstevel@tonic-gate continue; 37987c478bd9Sstevel@tonic-gate spin_lock_total += qp->qh_lockcount; 3799*d4204c85Sraf if (fprintf(stderr, "%5d %12llu%12u%12u\n", qn, 3800*d4204c85Sraf (u_longlong_t)qp->qh_lockcount, 3801*d4204c85Sraf qp->qh_qmax, qp->qh_hmax) < 0) 38025d1dd9a9Sraf return; 38037c478bd9Sstevel@tonic-gate } 38047c478bd9Sstevel@tonic-gate 38057c478bd9Sstevel@tonic-gate (void) fprintf(stderr, "\n spin_lock_total = %10llu\n", 38065d1dd9a9Sraf (u_longlong_t)spin_lock_total); 38077c478bd9Sstevel@tonic-gate (void) fprintf(stderr, " spin_lock_spin = %10llu\n", 38085d1dd9a9Sraf (u_longlong_t)spin_lock_spin); 38097c478bd9Sstevel@tonic-gate (void) fprintf(stderr, " spin_lock_spin2 = %10llu\n", 38105d1dd9a9Sraf (u_longlong_t)spin_lock_spin2); 38117c478bd9Sstevel@tonic-gate (void) fprintf(stderr, " spin_lock_sleep = %10llu\n", 38125d1dd9a9Sraf (u_longlong_t)spin_lock_sleep); 38137c478bd9Sstevel@tonic-gate (void) fprintf(stderr, " spin_lock_wakeup = %10llu\n", 38145d1dd9a9Sraf (u_longlong_t)spin_lock_wakeup); 38157c478bd9Sstevel@tonic-gate } 3816*d4204c85Sraf #endif 3817