xref: /illumos-gate/usr/src/uts/common/os/waitq.c (revision b7e32555)
1c97ad5cdSakolb /*
2c97ad5cdSakolb  * CDDL HEADER START
3c97ad5cdSakolb  *
4c97ad5cdSakolb  * The contents of this file are subject to the terms of the
5c97ad5cdSakolb  * Common Development and Distribution License (the "License").
6c97ad5cdSakolb  * You may not use this file except in compliance with the License.
7c97ad5cdSakolb  *
8c97ad5cdSakolb  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9c97ad5cdSakolb  * or http://www.opensolaris.org/os/licensing.
10c97ad5cdSakolb  * See the License for the specific language governing permissions
11c97ad5cdSakolb  * and limitations under the License.
12c97ad5cdSakolb  *
13c97ad5cdSakolb  * When distributing Covered Code, include this CDDL HEADER in each
14c97ad5cdSakolb  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15c97ad5cdSakolb  * If applicable, add the following below this CDDL HEADER, with the
16c97ad5cdSakolb  * fields enclosed by brackets "[]" replaced with your own identifying
17c97ad5cdSakolb  * information: Portions Copyright [yyyy] [name of copyright owner]
18c97ad5cdSakolb  *
19c97ad5cdSakolb  * CDDL HEADER END
20c97ad5cdSakolb  */
21c97ad5cdSakolb /*
22c97ad5cdSakolb  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23c97ad5cdSakolb  * Use is subject to license terms.
24c97ad5cdSakolb  */
25c97ad5cdSakolb 
26c97ad5cdSakolb #pragma ident	"%Z%%M%	%I%	%E% SMI"
27c97ad5cdSakolb 
28c97ad5cdSakolb #include <sys/param.h>
29c97ad5cdSakolb #include <sys/systm.h>
30c97ad5cdSakolb #include <sys/thread.h>
31c97ad5cdSakolb #include <sys/class.h>
32c97ad5cdSakolb #include <sys/debug.h>
33c97ad5cdSakolb #include <sys/cpuvar.h>
34c97ad5cdSakolb #include <sys/waitq.h>
35c97ad5cdSakolb #include <sys/cmn_err.h>
36c97ad5cdSakolb #include <sys/time.h>
37c97ad5cdSakolb #include <sys/dtrace.h>
38c97ad5cdSakolb #include <sys/sdt.h>
39c97ad5cdSakolb #include <sys/zone.h>
40c97ad5cdSakolb 
41c97ad5cdSakolb /*
42c97ad5cdSakolb  * Wait queue implementation.
43c97ad5cdSakolb  */
44c97ad5cdSakolb 
45c97ad5cdSakolb void
46c97ad5cdSakolb waitq_init(waitq_t *wq)
47c97ad5cdSakolb {
48c97ad5cdSakolb 	DISP_LOCK_INIT(&wq->wq_lock);
49c97ad5cdSakolb 	wq->wq_first = NULL;
50c97ad5cdSakolb 	wq->wq_count = 0;
51c97ad5cdSakolb 	wq->wq_blocked = B_TRUE;
52c97ad5cdSakolb }
53c97ad5cdSakolb 
54c97ad5cdSakolb void
55c97ad5cdSakolb waitq_fini(waitq_t *wq)
56c97ad5cdSakolb {
57c97ad5cdSakolb 	ASSERT(wq->wq_count == 0);
58c97ad5cdSakolb 	ASSERT(wq->wq_first == NULL);
59c97ad5cdSakolb 	ASSERT(wq->wq_blocked == B_TRUE);
60c97ad5cdSakolb 	ASSERT(!DISP_LOCK_HELD(&wq->wq_lock));
61c97ad5cdSakolb 
62c97ad5cdSakolb 	DISP_LOCK_DESTROY(&wq->wq_lock);
63c97ad5cdSakolb }
64c97ad5cdSakolb 
65c97ad5cdSakolb /*
66c97ad5cdSakolb  * Operations on waitq_t structures.
67c97ad5cdSakolb  *
68c97ad5cdSakolb  * A wait queue is a singly linked NULL-terminated list with doubly
69c97ad5cdSakolb  * linked circular sublists.  The singly linked list is in descending
70c97ad5cdSakolb  * priority order and FIFO for threads of the same priority.  It links
71c97ad5cdSakolb  * through the t_link field of the thread structure.  The doubly linked
72c97ad5cdSakolb  * sublists link threads of the same priority.  They use the t_priforw
73c97ad5cdSakolb  * and t_priback fields of the thread structure.
74c97ad5cdSakolb  *
75c97ad5cdSakolb  * Graphically (with priorities in parens):
76c97ad5cdSakolb  *
77c97ad5cdSakolb  *         ________________           _______                   _______
78c97ad5cdSakolb  *        /                \         /       \                 /       \
79c97ad5cdSakolb  *        |                |         |       |                 |       |
80c97ad5cdSakolb  *        v                v         v       v                 v       v
81c97ad5cdSakolb  *     t1(60)-->t2(60)-->t3(60)-->t4(50)-->t5(50)-->t6(30)-->t7(0)-->t8(0)
82c97ad5cdSakolb  *        ^      ^  ^      ^         ^       ^       ^  ^      ^       ^
83c97ad5cdSakolb  *        |      |  |      |         |       |       |  |      |       |
84c97ad5cdSakolb  *        \______/  \______/         \_______/       \__/      \_______/
85c97ad5cdSakolb  *
86c97ad5cdSakolb  * There are three interesting operations on a waitq list: inserting
87c97ad5cdSakolb  * a thread into the proper position according to priority; removing a
88c97ad5cdSakolb  * thread given a pointer to it; and walking the list, possibly
89c97ad5cdSakolb  * removing threads along the way.  This design allows all three
90c97ad5cdSakolb  * operations to be performed efficiently and easily.
91c97ad5cdSakolb  *
92c97ad5cdSakolb  * To insert a thread, traverse the list looking for the sublist of
93c97ad5cdSakolb  * the same priority as the thread (or one of a lower priority,
94c97ad5cdSakolb  * meaning there are no other threads in the list of the same
95c97ad5cdSakolb  * priority).  This can be done without touching all threads in the
96c97ad5cdSakolb  * list by following the links between the first threads in each
97c97ad5cdSakolb  * sublist.  Given a thread t that is the head of a sublist (the first
98c97ad5cdSakolb  * thread of that priority found when following the t_link pointers),
99c97ad5cdSakolb  * t->t_priback->t_link points to the head of the next sublist.  It's
100c97ad5cdSakolb  * important to do this since a waitq may contain thousands of
101c97ad5cdSakolb  * threads.
102c97ad5cdSakolb  *
103c97ad5cdSakolb  * Removing a thread from the list is also efficient.  First, the
104c97ad5cdSakolb  * t_waitq field contains a pointer to the waitq on which a thread
105c97ad5cdSakolb  * is waiting (or NULL if it's not on a waitq).  This is used to
106c97ad5cdSakolb  * determine if the given thread is on the given waitq without
107c97ad5cdSakolb  * searching the list.  Assuming it is, if it's not the head of a
108c97ad5cdSakolb  * sublist, just remove it from the sublist and use the t_priback
109c97ad5cdSakolb  * pointer to find the thread that points to it with t_link.  If it is
110c97ad5cdSakolb  * the head of a sublist, search for it by walking the sublist heads,
111c97ad5cdSakolb  * similar to searching for a given priority level when inserting a
112c97ad5cdSakolb  * thread.
113c97ad5cdSakolb  *
114c97ad5cdSakolb  * To walk the list, simply follow the t_link pointers.  Removing
115c97ad5cdSakolb  * threads along the way can be done easily if the code maintains a
116c97ad5cdSakolb  * pointer to the t_link field that pointed to the thread being
117c97ad5cdSakolb  * removed.
118c97ad5cdSakolb  */
119c97ad5cdSakolb 
120c97ad5cdSakolb static void
121c97ad5cdSakolb waitq_link(waitq_t *wq, kthread_t *t)
122c97ad5cdSakolb {
123c97ad5cdSakolb 	kthread_t *next_tp;
124c97ad5cdSakolb 	kthread_t *last_tp;
125c97ad5cdSakolb 	kthread_t **tpp;
126c97ad5cdSakolb 	pri_t tpri, next_pri, last_pri = -1;
127c97ad5cdSakolb 
128c97ad5cdSakolb 	ASSERT(DISP_LOCK_HELD(&wq->wq_lock));
129c97ad5cdSakolb 
130c97ad5cdSakolb 	tpri = DISP_PRIO(t);
131c97ad5cdSakolb 	tpp = &wq->wq_first;
132c97ad5cdSakolb 	while ((next_tp = *tpp) != NULL) {
133c97ad5cdSakolb 		next_pri = DISP_PRIO(next_tp);
134c97ad5cdSakolb 		if (tpri > next_pri)
135c97ad5cdSakolb 			break;
136c97ad5cdSakolb 		last_tp = next_tp->t_priback;
137c97ad5cdSakolb 		last_pri = next_pri;
138c97ad5cdSakolb 		tpp = &last_tp->t_link;
139c97ad5cdSakolb 	}
140c97ad5cdSakolb 	*tpp = t;
141c97ad5cdSakolb 	t->t_link = next_tp;
142c97ad5cdSakolb 	if (last_pri == tpri) {
143c97ad5cdSakolb 		/* last_tp points to the last thread of this priority */
144c97ad5cdSakolb 		t->t_priback = last_tp;
145c97ad5cdSakolb 		t->t_priforw = last_tp->t_priforw;
146c97ad5cdSakolb 		last_tp->t_priforw->t_priback = t;
147c97ad5cdSakolb 		last_tp->t_priforw = t;
148c97ad5cdSakolb 	} else {
149c97ad5cdSakolb 		t->t_priback = t->t_priforw = t;
150c97ad5cdSakolb 	}
151c97ad5cdSakolb 	wq->wq_count++;
152c97ad5cdSakolb 	t->t_waitq = wq;
153c97ad5cdSakolb }
154c97ad5cdSakolb 
155c97ad5cdSakolb static void
156c97ad5cdSakolb waitq_unlink(waitq_t *wq, kthread_t *t)
157c97ad5cdSakolb {
158c97ad5cdSakolb 	kthread_t *nt;
159c97ad5cdSakolb 	kthread_t **ptl;
160c97ad5cdSakolb 
161c97ad5cdSakolb 	ASSERT(THREAD_LOCK_HELD(t));
162c97ad5cdSakolb 	ASSERT(DISP_LOCK_HELD(&wq->wq_lock));
163c97ad5cdSakolb 	ASSERT(t->t_waitq == wq);
164c97ad5cdSakolb 
165c97ad5cdSakolb 	ptl = &t->t_priback->t_link;
166c97ad5cdSakolb 	/*
167c97ad5cdSakolb 	 * Is it the head of a priority sublist?  If so, need to walk
168c97ad5cdSakolb 	 * the priorities to find the t_link pointer that points to it.
169c97ad5cdSakolb 	 */
170c97ad5cdSakolb 	if (*ptl != t) {
171c97ad5cdSakolb 		/*
172c97ad5cdSakolb 		 * Find the right priority level.
173c97ad5cdSakolb 		 */
174c97ad5cdSakolb 		ptl = &t->t_waitq->wq_first;
175c97ad5cdSakolb 		while ((nt = *ptl) != t)
176c97ad5cdSakolb 			ptl = &nt->t_priback->t_link;
177c97ad5cdSakolb 	}
178c97ad5cdSakolb 	/*
179c97ad5cdSakolb 	 * Remove thread from the t_link list.
180c97ad5cdSakolb 	 */
181c97ad5cdSakolb 	*ptl = t->t_link;
182c97ad5cdSakolb 
183c97ad5cdSakolb 	/*
184c97ad5cdSakolb 	 * Take it off the priority sublist if there's more than one
185c97ad5cdSakolb 	 * thread there.
186c97ad5cdSakolb 	 */
187c97ad5cdSakolb 	if (t->t_priforw != t) {
188c97ad5cdSakolb 		t->t_priback->t_priforw = t->t_priforw;
189c97ad5cdSakolb 		t->t_priforw->t_priback = t->t_priback;
190c97ad5cdSakolb 	}
191c97ad5cdSakolb 	t->t_link = NULL;
192c97ad5cdSakolb 
193c97ad5cdSakolb 	wq->wq_count--;
194c97ad5cdSakolb 	t->t_waitq = NULL;
195c97ad5cdSakolb 	t->t_priforw = NULL;
196c97ad5cdSakolb 	t->t_priback = NULL;
197c97ad5cdSakolb }
198c97ad5cdSakolb 
199c97ad5cdSakolb /*
200c97ad5cdSakolb  * Put specified thread to specified wait queue without dropping thread's lock.
201c97ad5cdSakolb  * Returns 1 if thread was successfully placed on project's wait queue, or
202c97ad5cdSakolb  * 0 if wait queue is blocked.
203c97ad5cdSakolb  */
204c97ad5cdSakolb int
205c97ad5cdSakolb waitq_enqueue(waitq_t *wq, kthread_t *t)
206c97ad5cdSakolb {
207c97ad5cdSakolb 	ASSERT(THREAD_LOCK_HELD(t));
208c97ad5cdSakolb 	ASSERT(t->t_sleepq == NULL);
209c97ad5cdSakolb 	ASSERT(t->t_waitq == NULL);
210c97ad5cdSakolb 	ASSERT(t->t_link == NULL);
211c97ad5cdSakolb 
212c97ad5cdSakolb 	disp_lock_enter_high(&wq->wq_lock);
213c97ad5cdSakolb 
214c97ad5cdSakolb 	/*
215c97ad5cdSakolb 	 * Can't enqueue anything on a blocked wait queue
216c97ad5cdSakolb 	 */
217c97ad5cdSakolb 	if (wq->wq_blocked) {
218c97ad5cdSakolb 		disp_lock_exit_high(&wq->wq_lock);
219c97ad5cdSakolb 		return (0);
220c97ad5cdSakolb 	}
221c97ad5cdSakolb 
222c97ad5cdSakolb 	/*
223c97ad5cdSakolb 	 * Mark the time when thread is placed on wait queue. The microstate
224c97ad5cdSakolb 	 * accounting code uses this timestamp to determine wait times.
225c97ad5cdSakolb 	 */
226c97ad5cdSakolb 	t->t_waitrq = gethrtime_unscaled();
227c97ad5cdSakolb 
228c97ad5cdSakolb 	/*
229c97ad5cdSakolb 	 * Mark thread as not swappable.  If necessary, it will get
230c97ad5cdSakolb 	 * swapped out when it returns to the userland.
231c97ad5cdSakolb 	 */
232c97ad5cdSakolb 	t->t_schedflag |= TS_DONT_SWAP;
233c97ad5cdSakolb 	DTRACE_SCHED1(cpucaps__sleep, kthread_t *, t);
234c97ad5cdSakolb 	waitq_link(wq, t);
235c97ad5cdSakolb 
236c97ad5cdSakolb 	THREAD_WAIT(t, &wq->wq_lock);
237c97ad5cdSakolb 	return (1);
238c97ad5cdSakolb }
239c97ad5cdSakolb 
240c97ad5cdSakolb /*
241c97ad5cdSakolb  * Change thread's priority while on the wait queue.
242c97ad5cdSakolb  * Dequeue and equeue it again so that it gets placed in the right place.
243c97ad5cdSakolb  */
244c97ad5cdSakolb void
245c97ad5cdSakolb waitq_change_pri(kthread_t *t, pri_t new_pri)
246c97ad5cdSakolb {
247c97ad5cdSakolb 	waitq_t *wq = t->t_waitq;
248c97ad5cdSakolb 
249c97ad5cdSakolb 	ASSERT(THREAD_LOCK_HELD(t));
250c97ad5cdSakolb 	ASSERT(ISWAITING(t));
251c97ad5cdSakolb 	ASSERT(wq != NULL);
252c97ad5cdSakolb 
253c97ad5cdSakolb 	waitq_unlink(wq, t);
254c97ad5cdSakolb 	t->t_pri = new_pri;
255c97ad5cdSakolb 	waitq_link(wq, t);
256c97ad5cdSakolb }
257c97ad5cdSakolb 
258c97ad5cdSakolb static void
259c97ad5cdSakolb waitq_dequeue(waitq_t *wq, kthread_t *t)
260c97ad5cdSakolb {
261c97ad5cdSakolb 	ASSERT(THREAD_LOCK_HELD(t));
262c97ad5cdSakolb 	ASSERT(t->t_waitq == wq);
263c97ad5cdSakolb 	ASSERT(ISWAITING(t));
264c97ad5cdSakolb 
265c97ad5cdSakolb 	waitq_unlink(wq, t);
266c97ad5cdSakolb 	DTRACE_SCHED1(cpucaps__wakeup, kthread_t *, t);
267c97ad5cdSakolb 
268c97ad5cdSakolb 	/*
269*b7e32555Sakolb 	 * Change thread to transition state and drop the wait queue lock. The
270*b7e32555Sakolb 	 * thread will remain locked since its t_lockp points to the
271*b7e32555Sakolb 	 * transition_lock.
272c97ad5cdSakolb 	 */
273*b7e32555Sakolb 	THREAD_TRANSITION(t);
274c97ad5cdSakolb }
275c97ad5cdSakolb 
276c97ad5cdSakolb /*
277c97ad5cdSakolb  * Return True iff there are any threads on the specified wait queue.
278c97ad5cdSakolb  * The check is done **without holding any locks**.
279c97ad5cdSakolb  */
280c97ad5cdSakolb boolean_t
281c97ad5cdSakolb waitq_isempty(waitq_t *wq)
282c97ad5cdSakolb {
283c97ad5cdSakolb 	return (wq->wq_count == 0);
284c97ad5cdSakolb }
285c97ad5cdSakolb 
286c97ad5cdSakolb /*
287c97ad5cdSakolb  * Take thread off its wait queue and make it runnable.
288c97ad5cdSakolb  * Returns with thread lock held.
289c97ad5cdSakolb  */
290c97ad5cdSakolb void
291c97ad5cdSakolb waitq_setrun(kthread_t *t)
292c97ad5cdSakolb {
293c97ad5cdSakolb 	waitq_t *wq = t->t_waitq;
294c97ad5cdSakolb 
295c97ad5cdSakolb 	ASSERT(THREAD_LOCK_HELD(t));
296c97ad5cdSakolb 
297c97ad5cdSakolb 	ASSERT(ISWAITING(t));
298c97ad5cdSakolb 	if (wq == NULL)
299c97ad5cdSakolb 		panic("waitq_setrun: thread %p is not on waitq", t);
300c97ad5cdSakolb 	waitq_dequeue(wq, t);
301c97ad5cdSakolb 	CL_SETRUN(t);
302c97ad5cdSakolb }
303c97ad5cdSakolb 
304c97ad5cdSakolb /*
305c97ad5cdSakolb  * Take the first thread off the wait queue and return pointer to it.
306c97ad5cdSakolb  */
307c97ad5cdSakolb static kthread_t *
308c97ad5cdSakolb waitq_takeone(waitq_t *wq)
309c97ad5cdSakolb {
310c97ad5cdSakolb 	kthread_t *t;
311c97ad5cdSakolb 
312c97ad5cdSakolb 	disp_lock_enter(&wq->wq_lock);
313*b7e32555Sakolb 	/*
314*b7e32555Sakolb 	 * waitq_dequeue drops wait queue lock but leaves the CPU at high PIL.
315*b7e32555Sakolb 	 */
316c97ad5cdSakolb 	if ((t = wq->wq_first) != NULL)
317c97ad5cdSakolb 		waitq_dequeue(wq, wq->wq_first);
318*b7e32555Sakolb 	else
319*b7e32555Sakolb 		disp_lock_exit(&wq->wq_lock);
320c97ad5cdSakolb 	return (t);
321c97ad5cdSakolb }
322c97ad5cdSakolb 
323c97ad5cdSakolb /*
324c97ad5cdSakolb  * Take the first thread off the wait queue and make it runnable.
325c97ad5cdSakolb  * Return the pointer to the thread or NULL if waitq is empty
326c97ad5cdSakolb  */
327c97ad5cdSakolb static kthread_t *
328c97ad5cdSakolb waitq_runfirst(waitq_t *wq)
329c97ad5cdSakolb {
330c97ad5cdSakolb 	kthread_t *t;
331c97ad5cdSakolb 
332c97ad5cdSakolb 	t = waitq_takeone(wq);
333c97ad5cdSakolb 	if (t != NULL) {
334*b7e32555Sakolb 		/*
335*b7e32555Sakolb 		 * t should have transition lock held.
336*b7e32555Sakolb 		 * CL_SETRUN() will replace it with dispq lock and keep it held.
337*b7e32555Sakolb 		 * thread_unlock() will drop dispq lock and restore PIL.
338*b7e32555Sakolb 		 */
339*b7e32555Sakolb 		ASSERT(THREAD_LOCK_HELD(t));
340c97ad5cdSakolb 		CL_SETRUN(t);
341*b7e32555Sakolb 		thread_unlock(t);
342c97ad5cdSakolb 	}
343c97ad5cdSakolb 	return (t);
344c97ad5cdSakolb }
345c97ad5cdSakolb 
346c97ad5cdSakolb /*
347c97ad5cdSakolb  * Take the first thread off the wait queue and make it runnable.
348c97ad5cdSakolb  */
349c97ad5cdSakolb void
350c97ad5cdSakolb waitq_runone(waitq_t *wq)
351c97ad5cdSakolb {
352c97ad5cdSakolb 	(void) waitq_runfirst(wq);
353c97ad5cdSakolb }
354c97ad5cdSakolb 
355c97ad5cdSakolb /*
356c97ad5cdSakolb  * Take all threads off the wait queue and make them runnable.
357c97ad5cdSakolb  */
358c97ad5cdSakolb static void
359c97ad5cdSakolb waitq_runall(waitq_t *wq)
360c97ad5cdSakolb {
361c97ad5cdSakolb 	while (waitq_runfirst(wq) != NULL)
362c97ad5cdSakolb 		;
363c97ad5cdSakolb }
364c97ad5cdSakolb 
365c97ad5cdSakolb /*
366c97ad5cdSakolb  * Prevent any new threads from entering wait queue and make all threads
367c97ad5cdSakolb  * currently on the wait queue runnable. After waitq_block() completion, no
368c97ad5cdSakolb  * threads should ever appear on the wait queue untill it is unblocked.
369c97ad5cdSakolb  */
370c97ad5cdSakolb void
371c97ad5cdSakolb waitq_block(waitq_t *wq)
372c97ad5cdSakolb {
373c97ad5cdSakolb 	ASSERT(!wq->wq_blocked);
374c97ad5cdSakolb 	disp_lock_enter(&wq->wq_lock);
375c97ad5cdSakolb 	wq->wq_blocked = B_TRUE;
376c97ad5cdSakolb 	disp_lock_exit(&wq->wq_lock);
377c97ad5cdSakolb 	waitq_runall(wq);
378c97ad5cdSakolb 	ASSERT(waitq_isempty(wq));
379c97ad5cdSakolb }
380c97ad5cdSakolb 
381c97ad5cdSakolb /*
382c97ad5cdSakolb  * Allow threads to be placed on the wait queue.
383c97ad5cdSakolb  */
384c97ad5cdSakolb void
385c97ad5cdSakolb waitq_unblock(waitq_t *wq)
386c97ad5cdSakolb {
387c97ad5cdSakolb 	disp_lock_enter(&wq->wq_lock);
388c97ad5cdSakolb 
389c97ad5cdSakolb 	ASSERT(waitq_isempty(wq));
390c97ad5cdSakolb 	ASSERT(wq->wq_blocked);
391c97ad5cdSakolb 
392c97ad5cdSakolb 	wq->wq_blocked = B_FALSE;
393c97ad5cdSakolb 
394c97ad5cdSakolb 	disp_lock_exit(&wq->wq_lock);
395c97ad5cdSakolb }
396