1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (c) 2016 by Delphix. All rights reserved.
25 */
26
27 #include "lint.h"
28 #include "thr_uberdata.h"
29 #include <sys/sdt.h>
30
31 #define TRY_FLAG 0x10
32 #define READ_LOCK 0
33 #define WRITE_LOCK 1
34 #define READ_LOCK_TRY (READ_LOCK | TRY_FLAG)
35 #define WRITE_LOCK_TRY (WRITE_LOCK | TRY_FLAG)
36
37 #define NLOCKS 4 /* initial number of readlock_t structs allocated */
38
39 #define ASSERT_CONSISTENT_STATE(readers) \
40 ASSERT(!((readers) & URW_WRITE_LOCKED) || \
41 ((readers) & ~URW_HAS_WAITERS) == URW_WRITE_LOCKED)
42
43 /*
44 * Find/allocate an entry for rwlp in our array of rwlocks held for reading.
45 * We must be deferring signals for this to be safe.
46 * Else if we are returning an entry with ul_rdlockcnt == 0,
47 * it could be reassigned behind our back in a signal handler.
48 */
49 static readlock_t *
rwl_entry(rwlock_t * rwlp)50 rwl_entry(rwlock_t *rwlp)
51 {
52 ulwp_t *self = curthread;
53 readlock_t *remembered = NULL;
54 readlock_t *readlockp;
55 uint_t nlocks;
56
57 /* we must be deferring signals */
58 ASSERT((self->ul_critical + self->ul_sigdefer) != 0);
59
60 if ((nlocks = self->ul_rdlockcnt) != 0)
61 readlockp = self->ul_readlock.array;
62 else {
63 nlocks = 1;
64 readlockp = &self->ul_readlock.single;
65 }
66
67 for (; nlocks; nlocks--, readlockp++) {
68 if (readlockp->rd_rwlock == rwlp)
69 return (readlockp);
70 if (readlockp->rd_count == 0 && remembered == NULL)
71 remembered = readlockp;
72 }
73 if (remembered != NULL) {
74 remembered->rd_rwlock = rwlp;
75 return (remembered);
76 }
77
78 /*
79 * No entry available. Allocate more space, converting the single
80 * readlock_t entry into an array of readlock_t entries if necessary.
81 */
82 if ((nlocks = self->ul_rdlockcnt) == 0) {
83 /*
84 * Initial allocation of the readlock_t array.
85 * Convert the single entry into an array.
86 */
87 self->ul_rdlockcnt = nlocks = NLOCKS;
88 readlockp = lmalloc(nlocks * sizeof (readlock_t));
89 /*
90 * The single readlock_t becomes the first entry in the array.
91 */
92 *readlockp = self->ul_readlock.single;
93 self->ul_readlock.single.rd_count = 0;
94 self->ul_readlock.array = readlockp;
95 /*
96 * Return the next available entry in the array.
97 */
98 (++readlockp)->rd_rwlock = rwlp;
99 return (readlockp);
100 }
101 /*
102 * Reallocate the array, double the size each time.
103 */
104 readlockp = lmalloc(nlocks * 2 * sizeof (readlock_t));
105 (void) memcpy(readlockp, self->ul_readlock.array,
106 nlocks * sizeof (readlock_t));
107 lfree(self->ul_readlock.array, nlocks * sizeof (readlock_t));
108 self->ul_readlock.array = readlockp;
109 self->ul_rdlockcnt *= 2;
110 /*
111 * Return the next available entry in the newly allocated array.
112 */
113 (readlockp += nlocks)->rd_rwlock = rwlp;
114 return (readlockp);
115 }
116
117 /*
118 * Free the array of rwlocks held for reading.
119 */
120 void
rwl_free(ulwp_t * ulwp)121 rwl_free(ulwp_t *ulwp)
122 {
123 uint_t nlocks;
124
125 if ((nlocks = ulwp->ul_rdlockcnt) != 0)
126 lfree(ulwp->ul_readlock.array, nlocks * sizeof (readlock_t));
127 ulwp->ul_rdlockcnt = 0;
128 ulwp->ul_readlock.single.rd_rwlock = NULL;
129 ulwp->ul_readlock.single.rd_count = 0;
130 }
131
132 /*
133 * Check if a reader version of the lock is held by the current thread.
134 */
135 #pragma weak _rw_read_held = rw_read_held
136 int
rw_read_held(rwlock_t * rwlp)137 rw_read_held(rwlock_t *rwlp)
138 {
139 volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
140 uint32_t readers;
141 ulwp_t *self = curthread;
142 readlock_t *readlockp;
143 uint_t nlocks;
144 int rval = 0;
145
146 no_preempt(self);
147
148 readers = *rwstate;
149 ASSERT_CONSISTENT_STATE(readers);
150 if (!(readers & URW_WRITE_LOCKED) &&
151 (readers & URW_READERS_MASK) != 0) {
152 /*
153 * The lock is held for reading by some thread.
154 * Search our array of rwlocks held for reading for a match.
155 */
156 if ((nlocks = self->ul_rdlockcnt) != 0)
157 readlockp = self->ul_readlock.array;
158 else {
159 nlocks = 1;
160 readlockp = &self->ul_readlock.single;
161 }
162 for (; nlocks; nlocks--, readlockp++) {
163 if (readlockp->rd_rwlock == rwlp) {
164 if (readlockp->rd_count)
165 rval = 1;
166 break;
167 }
168 }
169 }
170
171 preempt(self);
172 return (rval);
173 }
174
175 /*
176 * Check if a writer version of the lock is held by the current thread.
177 */
178 #pragma weak _rw_write_held = rw_write_held
179 int
rw_write_held(rwlock_t * rwlp)180 rw_write_held(rwlock_t *rwlp)
181 {
182 volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
183 uint32_t readers;
184 ulwp_t *self = curthread;
185 int rval;
186
187 no_preempt(self);
188
189 readers = *rwstate;
190 ASSERT_CONSISTENT_STATE(readers);
191 rval = ((readers & URW_WRITE_LOCKED) &&
192 rwlp->rwlock_owner == (uintptr_t)self &&
193 (rwlp->rwlock_type == USYNC_THREAD ||
194 rwlp->rwlock_ownerpid == self->ul_uberdata->pid));
195
196 preempt(self);
197 return (rval);
198 }
199
200 #pragma weak _rwlock_init = rwlock_init
201 int
rwlock_init(rwlock_t * rwlp,int type,void * arg __unused)202 rwlock_init(rwlock_t *rwlp, int type, void *arg __unused)
203 {
204 ulwp_t *self = curthread;
205
206 if (type != USYNC_THREAD && type != USYNC_PROCESS)
207 return (EINVAL);
208 /*
209 * Once reinitialized, we can no longer be holding a read or write lock.
210 * We can do nothing about other threads that are holding read locks.
211 */
212 sigoff(self);
213 rwl_entry(rwlp)->rd_count = 0;
214 sigon(self);
215 (void) memset(rwlp, 0, sizeof (*rwlp));
216 rwlp->rwlock_type = (uint16_t)type;
217 rwlp->rwlock_magic = RWL_MAGIC;
218 rwlp->mutex.mutex_type = (uint8_t)type;
219 rwlp->mutex.mutex_flag = LOCK_INITED;
220 rwlp->mutex.mutex_magic = MUTEX_MAGIC;
221
222 /*
223 * This should be at the beginning of the function,
224 * but for the sake of old broken applications that
225 * do not have proper alignment for their rwlocks
226 * (and don't check the return code from rwlock_init),
227 * we put it here, after initializing the rwlock regardless.
228 */
229 if (((uintptr_t)rwlp & (_LONG_LONG_ALIGNMENT - 1)) &&
230 self->ul_misaligned == 0)
231 return (EINVAL);
232
233 return (0);
234 }
235
236 #pragma weak pthread_rwlock_destroy = rwlock_destroy
237 #pragma weak _rwlock_destroy = rwlock_destroy
238 int
rwlock_destroy(rwlock_t * rwlp)239 rwlock_destroy(rwlock_t *rwlp)
240 {
241 ulwp_t *self = curthread;
242
243 /*
244 * Once destroyed, we can no longer be holding a read or write lock.
245 * We can do nothing about other threads that are holding read locks.
246 */
247 sigoff(self);
248 rwl_entry(rwlp)->rd_count = 0;
249 sigon(self);
250 rwlp->rwlock_magic = 0;
251 tdb_sync_obj_deregister(rwlp);
252 return (0);
253 }
254
255 /*
256 * The following four functions:
257 * read_lock_try()
258 * read_unlock_try()
259 * write_lock_try()
260 * write_unlock_try()
261 * lie at the heart of the fast-path code for rwlocks,
262 * both process-private and process-shared.
263 *
264 * They are called once without recourse to any other locking primitives.
265 * If they succeed, we are done and the fast-path code was successful.
266 * If they fail, we have to deal with lock queues, either to enqueue
267 * ourself and sleep or to dequeue and wake up someone else (slow paths).
268 *
269 * Unless 'ignore_waiters_flag' is true (a condition that applies only
270 * when read_lock_try() or write_lock_try() is called from code that
271 * is already in the slow path and has already acquired the queue lock),
272 * these functions will always fail if the waiters flag, URW_HAS_WAITERS,
273 * is set in the 'rwstate' word. Thus, setting the waiters flag on the
274 * rwlock and acquiring the queue lock guarantees exclusive access to
275 * the rwlock (and is the only way to guarantee exclusive access).
276 */
277
278 /*
279 * Attempt to acquire a readers lock. Return true on success.
280 */
281 static int
read_lock_try(rwlock_t * rwlp,int ignore_waiters_flag)282 read_lock_try(rwlock_t *rwlp, int ignore_waiters_flag)
283 {
284 volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
285 uint32_t mask = ignore_waiters_flag?
286 URW_WRITE_LOCKED : (URW_HAS_WAITERS | URW_WRITE_LOCKED);
287 uint32_t readers;
288 ulwp_t *self = curthread;
289
290 no_preempt(self);
291 while (((readers = *rwstate) & mask) == 0) {
292 if (atomic_cas_32(rwstate, readers, readers + 1) == readers) {
293 preempt(self);
294 return (1);
295 }
296 }
297 preempt(self);
298 return (0);
299 }
300
301 /*
302 * Attempt to release a reader lock. Return true on success.
303 */
304 static int
read_unlock_try(rwlock_t * rwlp)305 read_unlock_try(rwlock_t *rwlp)
306 {
307 volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
308 uint32_t readers;
309 ulwp_t *self = curthread;
310
311 no_preempt(self);
312 while (((readers = *rwstate) & URW_HAS_WAITERS) == 0) {
313 if (atomic_cas_32(rwstate, readers, readers - 1) == readers) {
314 preempt(self);
315 return (1);
316 }
317 }
318 preempt(self);
319 return (0);
320 }
321
322 /*
323 * Attempt to acquire a writer lock. Return true on success.
324 */
325 static int
write_lock_try(rwlock_t * rwlp,int ignore_waiters_flag)326 write_lock_try(rwlock_t *rwlp, int ignore_waiters_flag)
327 {
328 volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
329 uint32_t mask = ignore_waiters_flag?
330 (URW_WRITE_LOCKED | URW_READERS_MASK) :
331 (URW_HAS_WAITERS | URW_WRITE_LOCKED | URW_READERS_MASK);
332 ulwp_t *self = curthread;
333 uint32_t readers;
334
335 no_preempt(self);
336 while (((readers = *rwstate) & mask) == 0) {
337 if (atomic_cas_32(rwstate, readers, readers | URW_WRITE_LOCKED)
338 == readers) {
339 preempt(self);
340 return (1);
341 }
342 }
343 preempt(self);
344 return (0);
345 }
346
347 /*
348 * Attempt to release a writer lock. Return true on success.
349 */
350 static int
write_unlock_try(rwlock_t * rwlp)351 write_unlock_try(rwlock_t *rwlp)
352 {
353 volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
354 uint32_t readers;
355 ulwp_t *self = curthread;
356
357 no_preempt(self);
358 while (((readers = *rwstate) & URW_HAS_WAITERS) == 0) {
359 if (atomic_cas_32(rwstate, readers, 0) == readers) {
360 preempt(self);
361 return (1);
362 }
363 }
364 preempt(self);
365 return (0);
366 }
367
368 /*
369 * Release a process-private rwlock and wake up any thread(s) sleeping on it.
370 * This is called when a thread releases a lock that appears to have waiters.
371 */
372 static void
rw_queue_release(rwlock_t * rwlp)373 rw_queue_release(rwlock_t *rwlp)
374 {
375 volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
376 queue_head_t *qp;
377 uint32_t readers;
378 uint32_t writer;
379 ulwp_t **ulwpp;
380 ulwp_t *ulwp;
381 ulwp_t *prev;
382 int nlwpid = 0;
383 int more;
384 int maxlwps = MAXLWPS;
385 lwpid_t buffer[MAXLWPS];
386 lwpid_t *lwpid = buffer;
387
388 qp = queue_lock(rwlp, MX);
389
390 /*
391 * Here is where we actually drop the lock,
392 * but we retain the URW_HAS_WAITERS flag, if it is already set.
393 */
394 readers = *rwstate;
395 ASSERT_CONSISTENT_STATE(readers);
396 if (readers & URW_WRITE_LOCKED) /* drop the writer lock */
397 atomic_and_32(rwstate, ~URW_WRITE_LOCKED);
398 else /* drop the readers lock */
399 atomic_dec_32(rwstate);
400 if (!(readers & URW_HAS_WAITERS)) { /* no waiters */
401 queue_unlock(qp);
402 return;
403 }
404
405 /*
406 * The presence of the URW_HAS_WAITERS flag causes all rwlock
407 * code to go through the slow path, acquiring queue_lock(qp).
408 * Therefore, the rest of this code is safe because we are
409 * holding the queue lock and the URW_HAS_WAITERS flag is set.
410 */
411
412 readers = *rwstate; /* must fetch the value again */
413 ASSERT_CONSISTENT_STATE(readers);
414 ASSERT(readers & URW_HAS_WAITERS);
415 readers &= URW_READERS_MASK; /* count of current readers */
416 writer = 0; /* no current writer */
417
418 /*
419 * Examine the queue of waiters in priority order and prepare
420 * to wake up as many readers as we encounter before encountering
421 * a writer. If the highest priority thread on the queue is a
422 * writer, stop there and wake it up.
423 *
424 * We keep track of lwpids that are to be unparked in lwpid[].
425 * __lwp_unpark_all() is called to unpark all of them after
426 * they have been removed from the sleep queue and the sleep
427 * queue lock has been dropped. If we run out of space in our
428 * on-stack buffer, we need to allocate more but we can't call
429 * lmalloc() because we are holding a queue lock when the overflow
430 * occurs and lmalloc() acquires a lock. We can't use alloca()
431 * either because the application may have allocated a small
432 * stack and we don't want to overrun the stack. So we call
433 * alloc_lwpids() to allocate a bigger buffer using the mmap()
434 * system call directly since that path acquires no locks.
435 */
436 while ((ulwpp = queue_slot(qp, &prev, &more)) != NULL) {
437 ulwp = *ulwpp;
438 ASSERT(ulwp->ul_wchan == rwlp);
439 if (ulwp->ul_writer) {
440 if (writer != 0 || readers != 0)
441 break;
442 /* one writer to wake */
443 writer++;
444 } else {
445 if (writer != 0)
446 break;
447 /* at least one reader to wake */
448 readers++;
449 if (nlwpid == maxlwps)
450 lwpid = alloc_lwpids(lwpid, &nlwpid, &maxlwps);
451 }
452 queue_unlink(qp, ulwpp, prev);
453 ulwp->ul_sleepq = NULL;
454 ulwp->ul_wchan = NULL;
455 if (writer) {
456 /*
457 * Hand off the lock to the writer we will be waking.
458 */
459 ASSERT((*rwstate & ~URW_HAS_WAITERS) == 0);
460 atomic_or_32(rwstate, URW_WRITE_LOCKED);
461 rwlp->rwlock_owner = (uintptr_t)ulwp;
462 }
463 lwpid[nlwpid++] = ulwp->ul_lwpid;
464 }
465
466 /*
467 * This modification of rwstate must be done last.
468 * The presence of the URW_HAS_WAITERS flag causes all rwlock
469 * code to go through the slow path, acquiring queue_lock(qp).
470 * Otherwise the read_lock_try() and write_lock_try() fast paths
471 * are effective.
472 */
473 if (ulwpp == NULL)
474 atomic_and_32(rwstate, ~URW_HAS_WAITERS);
475
476 if (nlwpid == 0) {
477 queue_unlock(qp);
478 } else {
479 ulwp_t *self = curthread;
480 no_preempt(self);
481 queue_unlock(qp);
482 if (nlwpid == 1)
483 (void) __lwp_unpark(lwpid[0]);
484 else
485 (void) __lwp_unpark_all(lwpid, nlwpid);
486 preempt(self);
487 }
488 if (lwpid != buffer)
489 (void) munmap((caddr_t)lwpid, maxlwps * sizeof (lwpid_t));
490 }
491
492 /*
493 * Common code for rdlock, timedrdlock, wrlock, timedwrlock, tryrdlock,
494 * and trywrlock for process-shared (USYNC_PROCESS) rwlocks.
495 *
496 * Note: if the lock appears to be contended we call __lwp_rwlock_rdlock()
497 * or __lwp_rwlock_wrlock() holding the mutex. These return with the mutex
498 * released, and if they need to sleep will release the mutex first. In the
499 * event of a spurious wakeup, these will return EAGAIN (because it is much
500 * easier for us to re-acquire the mutex here).
501 */
502 int
shared_rwlock_lock(rwlock_t * rwlp,timespec_t * tsp,int rd_wr)503 shared_rwlock_lock(rwlock_t *rwlp, timespec_t *tsp, int rd_wr)
504 {
505 volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
506 mutex_t *mp = &rwlp->mutex;
507 int try_flag;
508 int error;
509
510 try_flag = (rd_wr & TRY_FLAG);
511 rd_wr &= ~TRY_FLAG;
512 ASSERT(rd_wr == READ_LOCK || rd_wr == WRITE_LOCK);
513
514 if (!try_flag) {
515 DTRACE_PROBE2(plockstat, rw__block, rwlp, rd_wr);
516 }
517
518 do {
519 if (try_flag && (*rwstate & URW_WRITE_LOCKED)) {
520 error = EBUSY;
521 break;
522 }
523 if ((error = mutex_lock(mp)) != 0)
524 break;
525 if (rd_wr == READ_LOCK) {
526 if (read_lock_try(rwlp, 0)) {
527 (void) mutex_unlock(mp);
528 break;
529 }
530 } else {
531 if (write_lock_try(rwlp, 0)) {
532 (void) mutex_unlock(mp);
533 break;
534 }
535 }
536 atomic_or_32(rwstate, URW_HAS_WAITERS);
537
538 #ifdef THREAD_DEBUG
539 uint32_t readers;
540 readers = *rwstate;
541 ASSERT_CONSISTENT_STATE(readers);
542 #endif
543 /*
544 * The calls to __lwp_rwlock_*() below will release the mutex,
545 * so we need a dtrace probe here. The owner field of the
546 * mutex is cleared in the kernel when the mutex is released,
547 * so we should not clear it here.
548 */
549 DTRACE_PROBE2(plockstat, mutex__release, mp, 0);
550 /*
551 * The waiters bit may be inaccurate.
552 * Only the kernel knows for sure.
553 */
554 if (rd_wr == READ_LOCK) {
555 if (try_flag)
556 error = __lwp_rwlock_tryrdlock(rwlp);
557 else
558 error = __lwp_rwlock_rdlock(rwlp, tsp);
559 } else {
560 if (try_flag)
561 error = __lwp_rwlock_trywrlock(rwlp);
562 else
563 error = __lwp_rwlock_wrlock(rwlp, tsp);
564 }
565 } while (error == EAGAIN || error == EINTR);
566
567 if (!try_flag) {
568 DTRACE_PROBE3(plockstat, rw__blocked, rwlp, rd_wr, error == 0);
569 }
570
571 return (error);
572 }
573
574 /*
575 * Common code for rdlock, timedrdlock, wrlock, timedwrlock, tryrdlock,
576 * and trywrlock for process-private (USYNC_THREAD) rwlocks.
577 */
578 int
rwlock_lock(rwlock_t * rwlp,timespec_t * tsp,int rd_wr)579 rwlock_lock(rwlock_t *rwlp, timespec_t *tsp, int rd_wr)
580 {
581 volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
582 uint32_t readers;
583 ulwp_t *self = curthread;
584 queue_head_t *qp;
585 ulwp_t *ulwp;
586 int try_flag;
587 int ignore_waiters_flag;
588 int error = 0;
589
590 try_flag = (rd_wr & TRY_FLAG);
591 rd_wr &= ~TRY_FLAG;
592 ASSERT(rd_wr == READ_LOCK || rd_wr == WRITE_LOCK);
593
594 if (!try_flag) {
595 DTRACE_PROBE2(plockstat, rw__block, rwlp, rd_wr);
596 }
597
598 qp = queue_lock(rwlp, MX);
599 /* initial attempt to acquire the lock fails if there are waiters */
600 ignore_waiters_flag = 0;
601 while (error == 0) {
602 if (rd_wr == READ_LOCK) {
603 if (read_lock_try(rwlp, ignore_waiters_flag))
604 break;
605 } else {
606 if (write_lock_try(rwlp, ignore_waiters_flag))
607 break;
608 }
609 /* subsequent attempts do not fail due to waiters */
610 ignore_waiters_flag = 1;
611 atomic_or_32(rwstate, URW_HAS_WAITERS);
612 readers = *rwstate;
613 ASSERT_CONSISTENT_STATE(readers);
614 if ((readers & URW_WRITE_LOCKED) ||
615 (rd_wr == WRITE_LOCK &&
616 (readers & URW_READERS_MASK) != 0))
617 /* EMPTY */; /* somebody holds the lock */
618 else if ((ulwp = queue_waiter(qp)) == NULL) {
619 atomic_and_32(rwstate, ~URW_HAS_WAITERS);
620 ignore_waiters_flag = 0;
621 continue; /* no queued waiters, start over */
622 } else {
623 /*
624 * Do a priority check on the queued waiter (the
625 * highest priority thread on the queue) to see
626 * if we should defer to it or just grab the lock.
627 */
628 int our_pri = real_priority(self);
629 int his_pri = real_priority(ulwp);
630
631 if (rd_wr == WRITE_LOCK) {
632 /*
633 * We defer to a queued thread that has
634 * a higher priority than ours.
635 */
636 if (his_pri <= our_pri) {
637 /*
638 * Don't defer, just grab the lock.
639 */
640 continue;
641 }
642 } else {
643 /*
644 * We defer to a queued thread that has
645 * a higher priority than ours or that
646 * is a writer whose priority equals ours.
647 */
648 if (his_pri < our_pri ||
649 (his_pri == our_pri && !ulwp->ul_writer)) {
650 /*
651 * Don't defer, just grab the lock.
652 */
653 continue;
654 }
655 }
656 }
657 /*
658 * We are about to block.
659 * If we're doing a trylock, return EBUSY instead.
660 */
661 if (try_flag) {
662 error = EBUSY;
663 break;
664 }
665 /*
666 * Enqueue writers ahead of readers.
667 */
668 self->ul_writer = rd_wr; /* *must* be 0 or 1 */
669 enqueue(qp, self, 0);
670 set_parking_flag(self, 1);
671 queue_unlock(qp);
672 if ((error = __lwp_park(tsp, 0)) == EINTR)
673 error = 0;
674 set_parking_flag(self, 0);
675 qp = queue_lock(rwlp, MX);
676 if (self->ul_sleepq && dequeue_self(qp) == 0) {
677 atomic_and_32(rwstate, ~URW_HAS_WAITERS);
678 ignore_waiters_flag = 0;
679 }
680 self->ul_writer = 0;
681 if (rd_wr == WRITE_LOCK &&
682 (*rwstate & URW_WRITE_LOCKED) &&
683 rwlp->rwlock_owner == (uintptr_t)self) {
684 /*
685 * We acquired the lock by hand-off
686 * from the previous owner,
687 */
688 error = 0; /* timedlock did not fail */
689 break;
690 }
691 }
692
693 /*
694 * Make one final check to see if there are any threads left
695 * on the rwlock queue. Clear the URW_HAS_WAITERS flag if not.
696 */
697 if (qp->qh_root == NULL || qp->qh_root->qr_head == NULL)
698 atomic_and_32(rwstate, ~URW_HAS_WAITERS);
699
700 queue_unlock(qp);
701
702 if (!try_flag) {
703 DTRACE_PROBE3(plockstat, rw__blocked, rwlp, rd_wr, error == 0);
704 }
705
706 return (error);
707 }
708
709 int
rw_rdlock_impl(rwlock_t * rwlp,timespec_t * tsp)710 rw_rdlock_impl(rwlock_t *rwlp, timespec_t *tsp)
711 {
712 ulwp_t *self = curthread;
713 uberdata_t *udp = self->ul_uberdata;
714 readlock_t *readlockp;
715 tdb_rwlock_stats_t *rwsp = RWLOCK_STATS(rwlp, udp);
716 int error;
717
718 /*
719 * If we already hold a readers lock on this rwlock,
720 * just increment our reference count and return.
721 */
722 sigoff(self);
723 readlockp = rwl_entry(rwlp);
724 if (readlockp->rd_count != 0) {
725 if (readlockp->rd_count == READ_LOCK_MAX) {
726 sigon(self);
727 error = EAGAIN;
728 goto out;
729 }
730 sigon(self);
731 error = 0;
732 goto out;
733 }
734 sigon(self);
735
736 /*
737 * If we hold the writer lock, bail out.
738 */
739 if (rw_write_held(rwlp)) {
740 if (self->ul_error_detection)
741 rwlock_error(rwlp, "rwlock_rdlock",
742 "calling thread owns the writer lock");
743 error = EDEADLK;
744 goto out;
745 }
746
747 if (read_lock_try(rwlp, 0))
748 error = 0;
749 else if (rwlp->rwlock_type == USYNC_PROCESS) /* kernel-level */
750 error = shared_rwlock_lock(rwlp, tsp, READ_LOCK);
751 else /* user-level */
752 error = rwlock_lock(rwlp, tsp, READ_LOCK);
753
754 out:
755 if (error == 0) {
756 sigoff(self);
757 rwl_entry(rwlp)->rd_count++;
758 sigon(self);
759 if (rwsp)
760 tdb_incr(rwsp->rw_rdlock);
761 DTRACE_PROBE2(plockstat, rw__acquire, rwlp, READ_LOCK);
762 } else {
763 DTRACE_PROBE3(plockstat, rw__error, rwlp, READ_LOCK, error);
764 }
765
766 return (error);
767 }
768
769 #pragma weak pthread_rwlock_rdlock = rw_rdlock
770 #pragma weak _rw_rdlock = rw_rdlock
771 int
rw_rdlock(rwlock_t * rwlp)772 rw_rdlock(rwlock_t *rwlp)
773 {
774 ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
775 return (rw_rdlock_impl(rwlp, NULL));
776 }
777
778 void
lrw_rdlock(rwlock_t * rwlp)779 lrw_rdlock(rwlock_t *rwlp)
780 {
781 enter_critical(curthread);
782 (void) rw_rdlock_impl(rwlp, NULL);
783 }
784
785 int
pthread_rwlock_reltimedrdlock_np(pthread_rwlock_t * _RESTRICT_KYWD rwlp,const struct timespec * _RESTRICT_KYWD reltime)786 pthread_rwlock_reltimedrdlock_np(pthread_rwlock_t *_RESTRICT_KYWD rwlp,
787 const struct timespec *_RESTRICT_KYWD reltime)
788 {
789 timespec_t tslocal = *reltime;
790 int error;
791
792 ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
793 error = rw_rdlock_impl((rwlock_t *)rwlp, &tslocal);
794 if (error == ETIME)
795 error = ETIMEDOUT;
796 return (error);
797 }
798
799 int
pthread_rwlock_timedrdlock(pthread_rwlock_t * _RESTRICT_KYWD rwlp,const struct timespec * _RESTRICT_KYWD abstime)800 pthread_rwlock_timedrdlock(pthread_rwlock_t *_RESTRICT_KYWD rwlp,
801 const struct timespec *_RESTRICT_KYWD abstime)
802 {
803 timespec_t tslocal;
804 int error;
805
806 ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
807 abstime_to_reltime(CLOCK_REALTIME, abstime, &tslocal);
808 error = rw_rdlock_impl((rwlock_t *)rwlp, &tslocal);
809 if (error == ETIME)
810 error = ETIMEDOUT;
811 return (error);
812 }
813
814 int
rw_wrlock_impl(rwlock_t * rwlp,timespec_t * tsp)815 rw_wrlock_impl(rwlock_t *rwlp, timespec_t *tsp)
816 {
817 ulwp_t *self = curthread;
818 uberdata_t *udp = self->ul_uberdata;
819 tdb_rwlock_stats_t *rwsp = RWLOCK_STATS(rwlp, udp);
820 int error;
821
822 /*
823 * If we hold a readers lock on this rwlock, bail out.
824 */
825 if (rw_read_held(rwlp)) {
826 if (self->ul_error_detection)
827 rwlock_error(rwlp, "rwlock_wrlock",
828 "calling thread owns the readers lock");
829 error = EDEADLK;
830 goto out;
831 }
832
833 /*
834 * If we hold the writer lock, bail out.
835 */
836 if (rw_write_held(rwlp)) {
837 if (self->ul_error_detection)
838 rwlock_error(rwlp, "rwlock_wrlock",
839 "calling thread owns the writer lock");
840 error = EDEADLK;
841 goto out;
842 }
843
844 if (write_lock_try(rwlp, 0))
845 error = 0;
846 else if (rwlp->rwlock_type == USYNC_PROCESS) /* kernel-level */
847 error = shared_rwlock_lock(rwlp, tsp, WRITE_LOCK);
848 else /* user-level */
849 error = rwlock_lock(rwlp, tsp, WRITE_LOCK);
850
851 out:
852 if (error == 0) {
853 rwlp->rwlock_owner = (uintptr_t)self;
854 if (rwlp->rwlock_type == USYNC_PROCESS)
855 rwlp->rwlock_ownerpid = udp->pid;
856 if (rwsp) {
857 tdb_incr(rwsp->rw_wrlock);
858 rwsp->rw_wrlock_begin_hold = gethrtime();
859 }
860 DTRACE_PROBE2(plockstat, rw__acquire, rwlp, WRITE_LOCK);
861 } else {
862 DTRACE_PROBE3(plockstat, rw__error, rwlp, WRITE_LOCK, error);
863 }
864 return (error);
865 }
866
867 #pragma weak pthread_rwlock_wrlock = rw_wrlock
868 #pragma weak _rw_wrlock = rw_wrlock
869 int
rw_wrlock(rwlock_t * rwlp)870 rw_wrlock(rwlock_t *rwlp)
871 {
872 ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
873 return (rw_wrlock_impl(rwlp, NULL));
874 }
875
876 void
lrw_wrlock(rwlock_t * rwlp)877 lrw_wrlock(rwlock_t *rwlp)
878 {
879 enter_critical(curthread);
880 (void) rw_wrlock_impl(rwlp, NULL);
881 }
882
883 int
pthread_rwlock_reltimedwrlock_np(pthread_rwlock_t * _RESTRICT_KYWD rwlp,const struct timespec * _RESTRICT_KYWD reltime)884 pthread_rwlock_reltimedwrlock_np(pthread_rwlock_t *_RESTRICT_KYWD rwlp,
885 const struct timespec *_RESTRICT_KYWD reltime)
886 {
887 timespec_t tslocal = *reltime;
888 int error;
889
890 ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
891 error = rw_wrlock_impl((rwlock_t *)rwlp, &tslocal);
892 if (error == ETIME)
893 error = ETIMEDOUT;
894 return (error);
895 }
896
897 int
pthread_rwlock_timedwrlock(pthread_rwlock_t * rwlp,const timespec_t * abstime)898 pthread_rwlock_timedwrlock(pthread_rwlock_t *rwlp, const timespec_t *abstime)
899 {
900 timespec_t tslocal;
901 int error;
902
903 ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
904 abstime_to_reltime(CLOCK_REALTIME, abstime, &tslocal);
905 error = rw_wrlock_impl((rwlock_t *)rwlp, &tslocal);
906 if (error == ETIME)
907 error = ETIMEDOUT;
908 return (error);
909 }
910
911 #pragma weak pthread_rwlock_tryrdlock = rw_tryrdlock
912 int
rw_tryrdlock(rwlock_t * rwlp)913 rw_tryrdlock(rwlock_t *rwlp)
914 {
915 ulwp_t *self = curthread;
916 uberdata_t *udp = self->ul_uberdata;
917 tdb_rwlock_stats_t *rwsp = RWLOCK_STATS(rwlp, udp);
918 readlock_t *readlockp;
919 int error;
920
921 ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
922
923 if (rwsp)
924 tdb_incr(rwsp->rw_rdlock_try);
925
926 /*
927 * If we already hold a readers lock on this rwlock,
928 * just increment our reference count and return.
929 */
930 sigoff(self);
931 readlockp = rwl_entry(rwlp);
932 if (readlockp->rd_count != 0) {
933 if (readlockp->rd_count == READ_LOCK_MAX) {
934 sigon(self);
935 error = EAGAIN;
936 goto out;
937 }
938 sigon(self);
939 error = 0;
940 goto out;
941 }
942 sigon(self);
943
944 if (read_lock_try(rwlp, 0))
945 error = 0;
946 else if (rwlp->rwlock_type == USYNC_PROCESS) /* kernel-level */
947 error = shared_rwlock_lock(rwlp, NULL, READ_LOCK_TRY);
948 else /* user-level */
949 error = rwlock_lock(rwlp, NULL, READ_LOCK_TRY);
950
951 out:
952 if (error == 0) {
953 sigoff(self);
954 rwl_entry(rwlp)->rd_count++;
955 sigon(self);
956 DTRACE_PROBE2(plockstat, rw__acquire, rwlp, READ_LOCK);
957 } else {
958 if (rwsp)
959 tdb_incr(rwsp->rw_rdlock_try_fail);
960 if (error != EBUSY) {
961 DTRACE_PROBE3(plockstat, rw__error, rwlp, READ_LOCK,
962 error);
963 }
964 }
965
966 return (error);
967 }
968
969 #pragma weak pthread_rwlock_trywrlock = rw_trywrlock
970 int
rw_trywrlock(rwlock_t * rwlp)971 rw_trywrlock(rwlock_t *rwlp)
972 {
973 ulwp_t *self = curthread;
974 uberdata_t *udp = self->ul_uberdata;
975 tdb_rwlock_stats_t *rwsp = RWLOCK_STATS(rwlp, udp);
976 int error;
977
978 ASSERT(!self->ul_critical || self->ul_bindflags);
979
980 if (rwsp)
981 tdb_incr(rwsp->rw_wrlock_try);
982
983 if (write_lock_try(rwlp, 0))
984 error = 0;
985 else if (rwlp->rwlock_type == USYNC_PROCESS) /* kernel-level */
986 error = shared_rwlock_lock(rwlp, NULL, WRITE_LOCK_TRY);
987 else /* user-level */
988 error = rwlock_lock(rwlp, NULL, WRITE_LOCK_TRY);
989
990 if (error == 0) {
991 rwlp->rwlock_owner = (uintptr_t)self;
992 if (rwlp->rwlock_type == USYNC_PROCESS)
993 rwlp->rwlock_ownerpid = udp->pid;
994 if (rwsp)
995 rwsp->rw_wrlock_begin_hold = gethrtime();
996 DTRACE_PROBE2(plockstat, rw__acquire, rwlp, WRITE_LOCK);
997 } else {
998 if (rwsp)
999 tdb_incr(rwsp->rw_wrlock_try_fail);
1000 if (error != EBUSY) {
1001 DTRACE_PROBE3(plockstat, rw__error, rwlp, WRITE_LOCK,
1002 error);
1003 }
1004 }
1005 return (error);
1006 }
1007
1008 #pragma weak pthread_rwlock_unlock = rw_unlock
1009 #pragma weak _rw_unlock = rw_unlock
1010 int
rw_unlock(rwlock_t * rwlp)1011 rw_unlock(rwlock_t *rwlp)
1012 {
1013 volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
1014 uint32_t readers;
1015 ulwp_t *self = curthread;
1016 uberdata_t *udp = self->ul_uberdata;
1017 tdb_rwlock_stats_t *rwsp;
1018 int rd_wr;
1019
1020 readers = *rwstate;
1021 ASSERT_CONSISTENT_STATE(readers);
1022 if (readers & URW_WRITE_LOCKED) {
1023 rd_wr = WRITE_LOCK;
1024 readers = 0;
1025 } else {
1026 rd_wr = READ_LOCK;
1027 readers &= URW_READERS_MASK;
1028 }
1029
1030 if (rd_wr == WRITE_LOCK) {
1031 /*
1032 * Since the writer lock is held, we'd better be
1033 * holding it, else we cannot legitimately be here.
1034 */
1035 if (!rw_write_held(rwlp)) {
1036 if (self->ul_error_detection)
1037 rwlock_error(rwlp, "rwlock_unlock",
1038 "writer lock held, "
1039 "but not by the calling thread");
1040 return (EPERM);
1041 }
1042 if ((rwsp = RWLOCK_STATS(rwlp, udp)) != NULL) {
1043 if (rwsp->rw_wrlock_begin_hold)
1044 rwsp->rw_wrlock_hold_time +=
1045 gethrtime() - rwsp->rw_wrlock_begin_hold;
1046 rwsp->rw_wrlock_begin_hold = 0;
1047 }
1048 rwlp->rwlock_owner = 0;
1049 rwlp->rwlock_ownerpid = 0;
1050 } else if (readers > 0) {
1051 /*
1052 * A readers lock is held; if we don't hold one, bail out.
1053 */
1054 readlock_t *readlockp;
1055
1056 sigoff(self);
1057 readlockp = rwl_entry(rwlp);
1058 if (readlockp->rd_count == 0) {
1059 sigon(self);
1060 if (self->ul_error_detection)
1061 rwlock_error(rwlp, "rwlock_unlock",
1062 "readers lock held, "
1063 "but not by the calling thread");
1064 return (EPERM);
1065 }
1066 /*
1067 * If we hold more than one readers lock on this rwlock,
1068 * just decrement our reference count and return.
1069 */
1070 if (--readlockp->rd_count != 0) {
1071 sigon(self);
1072 goto out;
1073 }
1074 sigon(self);
1075 } else {
1076 /*
1077 * This is a usage error.
1078 * No thread should release an unowned lock.
1079 */
1080 if (self->ul_error_detection)
1081 rwlock_error(rwlp, "rwlock_unlock", "lock not owned");
1082 return (EPERM);
1083 }
1084
1085 if (rd_wr == WRITE_LOCK && write_unlock_try(rwlp)) {
1086 /* EMPTY */;
1087 } else if (rd_wr == READ_LOCK && read_unlock_try(rwlp)) {
1088 /* EMPTY */;
1089 } else if (rwlp->rwlock_type == USYNC_PROCESS) {
1090 (void) mutex_lock(&rwlp->mutex);
1091 (void) __lwp_rwlock_unlock(rwlp);
1092 (void) mutex_unlock(&rwlp->mutex);
1093 } else {
1094 rw_queue_release(rwlp);
1095 }
1096
1097 out:
1098 DTRACE_PROBE2(plockstat, rw__release, rwlp, rd_wr);
1099 return (0);
1100 }
1101
1102 void
lrw_unlock(rwlock_t * rwlp)1103 lrw_unlock(rwlock_t *rwlp)
1104 {
1105 (void) rw_unlock(rwlp);
1106 exit_critical(curthread);
1107 }
1108