xref: /illumos-gate/usr/src/uts/common/os/aio_subr.c (revision 1b9bce10)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/types.h>
28 #include <sys/proc.h>
29 #include <sys/file.h>
30 #include <sys/errno.h>
31 #include <sys/param.h>
32 #include <sys/sysmacros.h>
33 #include <sys/cmn_err.h>
34 #include <sys/systm.h>
35 #include <vm/as.h>
36 #include <vm/page.h>
37 #include <sys/uio.h>
38 #include <sys/kmem.h>
39 #include <sys/debug.h>
40 #include <sys/aio_impl.h>
41 #include <sys/epm.h>
42 #include <sys/fs/snode.h>
43 #include <sys/siginfo.h>
44 #include <sys/cpuvar.h>
45 #include <sys/tnf_probe.h>
46 #include <sys/conf.h>
47 #include <sys/sdt.h>
48 
49 int aphysio(int (*)(), int (*)(), dev_t, int, void (*)(), struct aio_req *);
50 int aio_done(struct buf *);
51 void aphysio_unlock(aio_req_t *);
52 void aio_cleanup(int);
53 void aio_cleanup_exit(void);
54 
55 /*
56  * private functions
57  */
58 static void aio_sigev_send(proc_t *, sigqueue_t *);
59 static void aio_hash_delete(aio_t *, aio_req_t *);
60 static void aio_lio_free(aio_t *, aio_lio_t *);
61 static int aio_cleanup_cleanupq(aio_t *, aio_req_t *, int);
62 static int aio_cleanup_notifyq(aio_t *, aio_req_t *, int);
63 static void aio_cleanup_pollq(aio_t *, aio_req_t *, int);
64 static void aio_cleanup_portq(aio_t *, aio_req_t *, int);
65 
66 /*
67  * async version of physio() that doesn't wait synchronously
68  * for the driver's strategy routine to complete.
69  */
70 
71 int
aphysio(int (* strategy)(struct buf *),int (* cancel)(struct buf *),dev_t dev,int rw,void (* mincnt)(struct buf *),struct aio_req * aio)72 aphysio(
73 	int (*strategy)(struct buf *),
74 	int (*cancel)(struct buf *),
75 	dev_t dev,
76 	int rw,
77 	void (*mincnt)(struct buf *),
78 	struct aio_req *aio)
79 {
80 	struct uio *uio = aio->aio_uio;
81 	aio_req_t *reqp = (aio_req_t *)aio->aio_private;
82 	struct buf *bp = &reqp->aio_req_buf;
83 	struct iovec *iov;
84 	struct as *as;
85 	char *a;
86 	int	error;
87 	size_t	c;
88 	struct page **pplist;
89 	struct dev_ops *ops = devopsp[getmajor(dev)];
90 
91 	if (uio->uio_loffset < 0)
92 		return (EINVAL);
93 #ifdef	_ILP32
94 	/*
95 	 * For 32-bit kernels, check against SPEC_MAXOFFSET_T which represents
96 	 * the maximum size that can be supported by the IO subsystem.
97 	 * XXX this code assumes a D_64BIT driver.
98 	 */
99 	if (uio->uio_loffset > SPEC_MAXOFFSET_T)
100 		return (EINVAL);
101 #endif	/* _ILP32 */
102 
103 	TNF_PROBE_5(aphysio_start, "kaio", /* CSTYLED */,
104 	    tnf_opaque, bp, bp,
105 	    tnf_device, device, dev,
106 	    tnf_offset, blkno, btodt(uio->uio_loffset),
107 	    tnf_size, size, uio->uio_iov->iov_len,
108 	    tnf_bioflags, rw, rw);
109 
110 	if (rw == B_READ) {
111 		CPU_STATS_ADD_K(sys, phread, 1);
112 	} else {
113 		CPU_STATS_ADD_K(sys, phwrite, 1);
114 	}
115 
116 	iov = uio->uio_iov;
117 	sema_init(&bp->b_sem, 0, NULL, SEMA_DEFAULT, NULL);
118 	sema_init(&bp->b_io, 0, NULL, SEMA_DEFAULT, NULL);
119 
120 	bp->b_error = 0;
121 	bp->b_flags = B_BUSY | B_PHYS | B_ASYNC | rw;
122 	bp->b_edev = dev;
123 	bp->b_dev = cmpdev(dev);
124 	bp->b_lblkno = btodt(uio->uio_loffset);
125 	bp->b_offset = uio->uio_loffset;
126 	(void) ops->devo_getinfo(NULL, DDI_INFO_DEVT2DEVINFO,
127 	    (void *)bp->b_edev, (void **)&bp->b_dip);
128 
129 	/*
130 	 * Clustering: Clustering can set the b_iodone, b_forw and
131 	 * b_proc fields to cluster-specifc values.
132 	 */
133 	if (bp->b_iodone == NULL) {
134 		bp->b_iodone = aio_done;
135 		/* b_forw points at an aio_req_t structure */
136 		bp->b_forw = (struct buf *)reqp;
137 		bp->b_proc = curproc;
138 	}
139 
140 	a = bp->b_un.b_addr = iov->iov_base;
141 	c = bp->b_bcount = iov->iov_len;
142 
143 	(*mincnt)(bp);
144 	if (bp->b_bcount != iov->iov_len)
145 		return (ENOTSUP);
146 
147 	as = bp->b_proc->p_as;
148 
149 	error = as_pagelock(as, &pplist, a,
150 	    c, rw == B_READ? S_WRITE : S_READ);
151 	if (error != 0) {
152 		bp->b_flags |= B_ERROR;
153 		bp->b_error = error;
154 		bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_SHADOW);
155 		return (error);
156 	}
157 	reqp->aio_req_flags |= AIO_PAGELOCKDONE;
158 	bp->b_shadow = pplist;
159 	if (pplist != NULL) {
160 		bp->b_flags |= B_SHADOW;
161 	}
162 
163 	if (cancel != anocancel)
164 		cmn_err(CE_PANIC,
165 		    "aphysio: cancellation not supported, use anocancel");
166 
167 	reqp->aio_req_cancel = cancel;
168 
169 	DTRACE_IO1(start, struct buf *, bp);
170 
171 	return ((*strategy)(bp));
172 }
173 
174 /*ARGSUSED*/
175 int
anocancel(struct buf * bp)176 anocancel(struct buf *bp)
177 {
178 	return (ENXIO);
179 }
180 
181 /*
182  * Called from biodone().
183  * Notify process that a pending AIO has finished.
184  */
185 
186 /*
187  * Clustering: This function is made non-static as it is used
188  * by clustering s/w as contract private interface.
189  */
190 
191 int
aio_done(struct buf * bp)192 aio_done(struct buf *bp)
193 {
194 	proc_t *p;
195 	struct as *as;
196 	aio_req_t *reqp;
197 	aio_lio_t *head = NULL;
198 	aio_t *aiop;
199 	sigqueue_t *sigev = NULL;
200 	sigqueue_t *lio_sigev = NULL;
201 	port_kevent_t *pkevp = NULL;
202 	port_kevent_t *lio_pkevp = NULL;
203 	int fd;
204 	int cleanupqflag;
205 	int pollqflag;
206 	int portevpend;
207 	void (*func)();
208 	int use_port = 0;
209 	int reqp_flags = 0;
210 	int send_signal = 0;
211 
212 	p = bp->b_proc;
213 	as = p->p_as;
214 	reqp = (aio_req_t *)bp->b_forw;
215 	fd = reqp->aio_req_fd;
216 
217 	TNF_PROBE_5(aphysio_end, "kaio", /* CSTYLED */,
218 	    tnf_opaque, bp, bp,
219 	    tnf_device, device, bp->b_edev,
220 	    tnf_offset, blkno, btodt(reqp->aio_req_uio.uio_loffset),
221 	    tnf_size, size, reqp->aio_req_uio.uio_iov->iov_len,
222 	    tnf_bioflags, rw, (bp->b_flags & (B_READ|B_WRITE)));
223 
224 	/*
225 	 * mapout earlier so that more kmem is available when aio is
226 	 * heavily used. bug #1262082
227 	 */
228 	if (bp->b_flags & B_REMAPPED)
229 		bp_mapout(bp);
230 
231 	/* decrement fd's ref count by one, now that aio request is done. */
232 	areleasef(fd, P_FINFO(p));
233 
234 	aiop = p->p_aio;
235 	ASSERT(aiop != NULL);
236 
237 	mutex_enter(&aiop->aio_portq_mutex);
238 	mutex_enter(&aiop->aio_mutex);
239 	ASSERT(aiop->aio_pending > 0);
240 	ASSERT(reqp->aio_req_flags & AIO_PENDING);
241 	aiop->aio_pending--;
242 	reqp->aio_req_flags &= ~AIO_PENDING;
243 	reqp_flags = reqp->aio_req_flags;
244 	if ((pkevp = reqp->aio_req_portkev) != NULL) {
245 		/* Event port notification is desired for this transaction */
246 		if (reqp->aio_req_flags & AIO_CLOSE_PORT) {
247 			/*
248 			 * The port is being closed and it is waiting for
249 			 * pending asynchronous I/O transactions to complete.
250 			 */
251 			portevpend = --aiop->aio_portpendcnt;
252 			aio_deq(&aiop->aio_portpending, reqp);
253 			aio_enq(&aiop->aio_portq, reqp, 0);
254 			mutex_exit(&aiop->aio_mutex);
255 			mutex_exit(&aiop->aio_portq_mutex);
256 			port_send_event(pkevp);
257 			if (portevpend == 0)
258 				cv_broadcast(&aiop->aio_portcv);
259 			return (0);
260 		}
261 
262 		if (aiop->aio_flags & AIO_CLEANUP) {
263 			/*
264 			 * aio_cleanup_thread() is waiting for completion of
265 			 * transactions.
266 			 */
267 			mutex_enter(&as->a_contents);
268 			aio_deq(&aiop->aio_portpending, reqp);
269 			aio_enq(&aiop->aio_portcleanupq, reqp, 0);
270 			cv_signal(&aiop->aio_cleanupcv);
271 			mutex_exit(&as->a_contents);
272 			mutex_exit(&aiop->aio_mutex);
273 			mutex_exit(&aiop->aio_portq_mutex);
274 			return (0);
275 		}
276 
277 		aio_deq(&aiop->aio_portpending, reqp);
278 		aio_enq(&aiop->aio_portq, reqp, 0);
279 
280 		use_port = 1;
281 	} else {
282 		/*
283 		 * when the AIO_CLEANUP flag is enabled for this
284 		 * process, or when the AIO_POLL bit is set for
285 		 * this request, special handling is required.
286 		 * otherwise the request is put onto the doneq.
287 		 */
288 		cleanupqflag = (aiop->aio_flags & AIO_CLEANUP);
289 		pollqflag = (reqp->aio_req_flags & AIO_POLL);
290 		if (cleanupqflag | pollqflag) {
291 
292 			if (cleanupqflag)
293 				mutex_enter(&as->a_contents);
294 
295 			/*
296 			 * requests with their AIO_POLL bit set are put
297 			 * on the pollq, requests with sigevent structures
298 			 * or with listio heads are put on the notifyq, and
299 			 * the remaining requests don't require any special
300 			 * cleanup handling, so they're put onto the default
301 			 * cleanupq.
302 			 */
303 			if (pollqflag)
304 				aio_enq(&aiop->aio_pollq, reqp, AIO_POLLQ);
305 			else if (reqp->aio_req_sigqp || reqp->aio_req_lio)
306 				aio_enq(&aiop->aio_notifyq, reqp, AIO_NOTIFYQ);
307 			else
308 				aio_enq(&aiop->aio_cleanupq, reqp,
309 				    AIO_CLEANUPQ);
310 
311 			if (cleanupqflag) {
312 				cv_signal(&aiop->aio_cleanupcv);
313 				mutex_exit(&as->a_contents);
314 				mutex_exit(&aiop->aio_mutex);
315 				mutex_exit(&aiop->aio_portq_mutex);
316 			} else {
317 				ASSERT(pollqflag);
318 				/* block aio_cleanup_exit until we're done */
319 				aiop->aio_flags |= AIO_DONE_ACTIVE;
320 				mutex_exit(&aiop->aio_mutex);
321 				mutex_exit(&aiop->aio_portq_mutex);
322 				/*
323 				 * let the cleanup processing happen from an AST
324 				 * set an AST on all threads in this process
325 				 */
326 				mutex_enter(&p->p_lock);
327 				set_proc_ast(p);
328 				mutex_exit(&p->p_lock);
329 				mutex_enter(&aiop->aio_mutex);
330 				/* wakeup anybody waiting in aiowait() */
331 				cv_broadcast(&aiop->aio_waitcv);
332 
333 				/* wakeup aio_cleanup_exit if needed */
334 				if (aiop->aio_flags & AIO_CLEANUP)
335 					cv_signal(&aiop->aio_cleanupcv);
336 				aiop->aio_flags &= ~AIO_DONE_ACTIVE;
337 				mutex_exit(&aiop->aio_mutex);
338 			}
339 			return (0);
340 		}
341 
342 		/*
343 		 * save req's sigevent pointer, and check its
344 		 * value after releasing aio_mutex lock.
345 		 */
346 		sigev = reqp->aio_req_sigqp;
347 		reqp->aio_req_sigqp = NULL;
348 
349 		/* put request on done queue. */
350 		aio_enq(&aiop->aio_doneq, reqp, AIO_DONEQ);
351 	} /* portkevent */
352 
353 	/*
354 	 * when list IO notification is enabled, a notification or
355 	 * signal is sent only when all entries in the list are done.
356 	 */
357 	if ((head = reqp->aio_req_lio) != NULL) {
358 		ASSERT(head->lio_refcnt > 0);
359 		if (--head->lio_refcnt == 0) {
360 			/*
361 			 * save lio's sigevent pointer, and check
362 			 * its value after releasing aio_mutex lock.
363 			 */
364 			lio_sigev = head->lio_sigqp;
365 			head->lio_sigqp = NULL;
366 			cv_signal(&head->lio_notify);
367 			if (head->lio_port >= 0 &&
368 			    (lio_pkevp = head->lio_portkev) != NULL)
369 				head->lio_port = -1;
370 		}
371 	}
372 
373 	/*
374 	 * if AIO_WAITN set then
375 	 * send signal only when we reached the
376 	 * required amount of IO's finished
377 	 * or when all IO's are done
378 	 */
379 	if (aiop->aio_flags & AIO_WAITN) {
380 		if (aiop->aio_waitncnt > 0)
381 			aiop->aio_waitncnt--;
382 		if (aiop->aio_pending == 0 ||
383 		    aiop->aio_waitncnt == 0)
384 			cv_broadcast(&aiop->aio_waitcv);
385 	} else {
386 		cv_broadcast(&aiop->aio_waitcv);
387 	}
388 
389 	/*
390 	 * No need to set this flag for pollq, portq, lio requests.
391 	 * If this is an old Solaris aio request, and the process has
392 	 * a SIGIO signal handler enabled, then send a SIGIO signal.
393 	 */
394 	if (!sigev && !use_port && head == NULL &&
395 	    (reqp->aio_req_flags & AIO_SOLARIS) &&
396 	    (func = PTOU(p)->u_signal[SIGIO - 1]) != SIG_DFL &&
397 	    (func != SIG_IGN)) {
398 		send_signal = 1;
399 		reqp->aio_req_flags |= AIO_SIGNALLED;
400 	}
401 
402 	mutex_exit(&aiop->aio_mutex);
403 	mutex_exit(&aiop->aio_portq_mutex);
404 
405 	/*
406 	 * Could the cleanup thread be waiting for AIO with locked
407 	 * resources to finish?
408 	 * Ideally in that case cleanup thread should block on cleanupcv,
409 	 * but there is a window, where it could miss to see a new aio
410 	 * request that sneaked in.
411 	 */
412 	mutex_enter(&as->a_contents);
413 	if ((reqp_flags & AIO_PAGELOCKDONE) && AS_ISUNMAPWAIT(as))
414 		cv_broadcast(&as->a_cv);
415 	mutex_exit(&as->a_contents);
416 
417 	if (sigev)
418 		aio_sigev_send(p, sigev);
419 	else if (send_signal)
420 		psignal(p, SIGIO);
421 
422 	if (pkevp)
423 		port_send_event(pkevp);
424 	if (lio_sigev)
425 		aio_sigev_send(p, lio_sigev);
426 	if (lio_pkevp)
427 		port_send_event(lio_pkevp);
428 
429 	return (0);
430 }
431 
432 /*
433  * send a queued signal to the specified process when
434  * the event signal is non-NULL. A return value of 1
435  * will indicate that a signal is queued, and 0 means that
436  * no signal was specified, nor sent.
437  */
438 static void
aio_sigev_send(proc_t * p,sigqueue_t * sigev)439 aio_sigev_send(proc_t *p, sigqueue_t *sigev)
440 {
441 	ASSERT(sigev != NULL);
442 
443 	mutex_enter(&p->p_lock);
444 	sigaddqa(p, NULL, sigev);
445 	mutex_exit(&p->p_lock);
446 }
447 
448 /*
449  * special case handling for zero length requests. the aio request
450  * short circuits the normal completion path since all that's required
451  * to complete this request is to copyout a zero to the aio request's
452  * return value.
453  */
454 void
aio_zerolen(aio_req_t * reqp)455 aio_zerolen(aio_req_t *reqp)
456 {
457 
458 	struct buf *bp = &reqp->aio_req_buf;
459 
460 	reqp->aio_req_flags |= AIO_ZEROLEN;
461 
462 	bp->b_forw = (struct buf *)reqp;
463 	bp->b_proc = curproc;
464 
465 	bp->b_resid = 0;
466 	bp->b_flags = 0;
467 
468 	aio_done(bp);
469 }
470 
471 /*
472  * unlock pages previously locked by as_pagelock
473  */
474 void
aphysio_unlock(aio_req_t * reqp)475 aphysio_unlock(aio_req_t *reqp)
476 {
477 	struct buf *bp;
478 	struct iovec *iov;
479 	int flags;
480 
481 	if (reqp->aio_req_flags & AIO_PHYSIODONE)
482 		return;
483 
484 	reqp->aio_req_flags |= AIO_PHYSIODONE;
485 
486 	if (reqp->aio_req_flags & AIO_ZEROLEN)
487 		return;
488 
489 	bp = &reqp->aio_req_buf;
490 	iov = reqp->aio_req_uio.uio_iov;
491 	flags = (((bp->b_flags & B_READ) == B_READ) ? S_WRITE : S_READ);
492 	if (reqp->aio_req_flags & AIO_PAGELOCKDONE) {
493 		as_pageunlock(bp->b_proc->p_as,
494 		    bp->b_flags & B_SHADOW ? bp->b_shadow : NULL,
495 		    iov->iov_base, iov->iov_len, flags);
496 		reqp->aio_req_flags &= ~AIO_PAGELOCKDONE;
497 	}
498 	bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_SHADOW);
499 	bp->b_flags |= B_DONE;
500 }
501 
502 /*
503  * deletes a requests id from the hash table of outstanding io.
504  */
505 static void
aio_hash_delete(aio_t * aiop,struct aio_req_t * reqp)506 aio_hash_delete(aio_t *aiop, struct aio_req_t *reqp)
507 {
508 	long index;
509 	aio_result_t *resultp = reqp->aio_req_resultp;
510 	aio_req_t *current;
511 	aio_req_t **nextp;
512 
513 	index = AIO_HASH(resultp);
514 	nextp = (aiop->aio_hash + index);
515 	while ((current = *nextp) != NULL) {
516 		if (current->aio_req_resultp == resultp) {
517 			*nextp = current->aio_hash_next;
518 			return;
519 		}
520 		nextp = &current->aio_hash_next;
521 	}
522 }
523 
524 /*
525  * Put a list head struct onto its free list.
526  */
527 static void
aio_lio_free(aio_t * aiop,aio_lio_t * head)528 aio_lio_free(aio_t *aiop, aio_lio_t *head)
529 {
530 	ASSERT(MUTEX_HELD(&aiop->aio_mutex));
531 
532 	if (head->lio_sigqp != NULL)
533 		kmem_free(head->lio_sigqp, sizeof (sigqueue_t));
534 	head->lio_next = aiop->aio_lio_free;
535 	aiop->aio_lio_free = head;
536 }
537 
538 /*
539  * Put a reqp onto the freelist.
540  */
541 void
aio_req_free(aio_t * aiop,aio_req_t * reqp)542 aio_req_free(aio_t *aiop, aio_req_t *reqp)
543 {
544 	aio_lio_t *liop;
545 
546 	ASSERT(MUTEX_HELD(&aiop->aio_mutex));
547 
548 	if (reqp->aio_req_portkev) {
549 		port_free_event(reqp->aio_req_portkev);
550 		reqp->aio_req_portkev = NULL;
551 	}
552 
553 	if ((liop = reqp->aio_req_lio) != NULL) {
554 		if (--liop->lio_nent == 0)
555 			aio_lio_free(aiop, liop);
556 		reqp->aio_req_lio = NULL;
557 	}
558 	if (reqp->aio_req_sigqp != NULL) {
559 		kmem_free(reqp->aio_req_sigqp, sizeof (sigqueue_t));
560 		reqp->aio_req_sigqp = NULL;
561 	}
562 	reqp->aio_req_next = aiop->aio_free;
563 	reqp->aio_req_prev = NULL;
564 	aiop->aio_free = reqp;
565 	aiop->aio_outstanding--;
566 	if (aiop->aio_outstanding == 0)
567 		cv_broadcast(&aiop->aio_waitcv);
568 	aio_hash_delete(aiop, reqp);
569 }
570 
571 /*
572  * Put a reqp onto the freelist.
573  */
574 void
aio_req_free_port(aio_t * aiop,aio_req_t * reqp)575 aio_req_free_port(aio_t *aiop, aio_req_t *reqp)
576 {
577 	ASSERT(MUTEX_HELD(&aiop->aio_mutex));
578 
579 	reqp->aio_req_next = aiop->aio_free;
580 	reqp->aio_req_prev = NULL;
581 	aiop->aio_free = reqp;
582 	aiop->aio_outstanding--;
583 	aio_hash_delete(aiop, reqp);
584 }
585 
586 
587 /*
588  * Verify the integrity of a queue.
589  */
590 #if defined(DEBUG)
591 static void
aio_verify_queue(aio_req_t * head,aio_req_t * entry_present,aio_req_t * entry_missing)592 aio_verify_queue(aio_req_t *head,
593 	aio_req_t *entry_present, aio_req_t *entry_missing)
594 {
595 	aio_req_t *reqp;
596 	int found = 0;
597 	int present = 0;
598 
599 	if ((reqp = head) != NULL) {
600 		do {
601 			ASSERT(reqp->aio_req_prev->aio_req_next == reqp);
602 			ASSERT(reqp->aio_req_next->aio_req_prev == reqp);
603 			if (entry_present == reqp)
604 				found++;
605 			if (entry_missing == reqp)
606 				present++;
607 		} while ((reqp = reqp->aio_req_next) != head);
608 	}
609 	ASSERT(entry_present == NULL || found == 1);
610 	ASSERT(entry_missing == NULL || present == 0);
611 }
612 #else
613 #define	aio_verify_queue(x, y, z)
614 #endif
615 
616 /*
617  * Put a request onto the tail of a queue.
618  */
619 void
aio_enq(aio_req_t ** qhead,aio_req_t * reqp,int qflg_new)620 aio_enq(aio_req_t **qhead, aio_req_t *reqp, int qflg_new)
621 {
622 	aio_req_t *head;
623 	aio_req_t *prev;
624 
625 	aio_verify_queue(*qhead, NULL, reqp);
626 
627 	if ((head = *qhead) == NULL) {
628 		reqp->aio_req_next = reqp;
629 		reqp->aio_req_prev = reqp;
630 		*qhead = reqp;
631 	} else {
632 		reqp->aio_req_next = head;
633 		reqp->aio_req_prev = prev = head->aio_req_prev;
634 		prev->aio_req_next = reqp;
635 		head->aio_req_prev = reqp;
636 	}
637 	reqp->aio_req_flags |= qflg_new;
638 }
639 
640 /*
641  * Remove a request from its queue.
642  */
643 void
aio_deq(aio_req_t ** qhead,aio_req_t * reqp)644 aio_deq(aio_req_t **qhead, aio_req_t *reqp)
645 {
646 	aio_verify_queue(*qhead, reqp, NULL);
647 
648 	if (reqp->aio_req_next == reqp) {
649 		*qhead = NULL;
650 	} else {
651 		reqp->aio_req_prev->aio_req_next = reqp->aio_req_next;
652 		reqp->aio_req_next->aio_req_prev = reqp->aio_req_prev;
653 		if (*qhead == reqp)
654 			*qhead = reqp->aio_req_next;
655 	}
656 	reqp->aio_req_next = NULL;
657 	reqp->aio_req_prev = NULL;
658 }
659 
660 /*
661  * concatenate a specified queue with the cleanupq. the specified
662  * queue is put onto the tail of the cleanupq. all elements on the
663  * specified queue should have their aio_req_flags field cleared.
664  */
665 /*ARGSUSED*/
666 void
aio_cleanupq_concat(aio_t * aiop,aio_req_t * q2,int qflg)667 aio_cleanupq_concat(aio_t *aiop, aio_req_t *q2, int qflg)
668 {
669 	aio_req_t *cleanupqhead, *q2tail;
670 	aio_req_t *reqp = q2;
671 
672 	do {
673 		ASSERT(reqp->aio_req_flags & qflg);
674 		reqp->aio_req_flags &= ~qflg;
675 		reqp->aio_req_flags |= AIO_CLEANUPQ;
676 	} while ((reqp = reqp->aio_req_next) != q2);
677 
678 	cleanupqhead = aiop->aio_cleanupq;
679 	if (cleanupqhead == NULL)
680 		aiop->aio_cleanupq = q2;
681 	else {
682 		cleanupqhead->aio_req_prev->aio_req_next = q2;
683 		q2tail = q2->aio_req_prev;
684 		q2tail->aio_req_next = cleanupqhead;
685 		q2->aio_req_prev = cleanupqhead->aio_req_prev;
686 		cleanupqhead->aio_req_prev = q2tail;
687 	}
688 }
689 
690 /*
691  * cleanup aio requests that are on the per-process poll queue.
692  */
693 void
aio_cleanup(int flag)694 aio_cleanup(int flag)
695 {
696 	aio_t *aiop = curproc->p_aio;
697 	aio_req_t *pollqhead, *cleanupqhead, *notifyqhead;
698 	aio_req_t *cleanupport;
699 	aio_req_t *portq = NULL;
700 	void (*func)();
701 	int signalled = 0;
702 	int qflag = 0;
703 	int exitflg;
704 
705 	ASSERT(aiop != NULL);
706 
707 	if (flag == AIO_CLEANUP_EXIT)
708 		exitflg = AIO_CLEANUP_EXIT;
709 	else
710 		exitflg = 0;
711 
712 	/*
713 	 * We need to get the aio_cleanupq_mutex because we are calling
714 	 * aio_cleanup_cleanupq()
715 	 */
716 	mutex_enter(&aiop->aio_cleanupq_mutex);
717 	/*
718 	 * take all the requests off the cleanupq, the notifyq,
719 	 * and the pollq.
720 	 */
721 	mutex_enter(&aiop->aio_mutex);
722 	if ((cleanupqhead = aiop->aio_cleanupq) != NULL) {
723 		aiop->aio_cleanupq = NULL;
724 		qflag++;
725 	}
726 	if ((notifyqhead = aiop->aio_notifyq) != NULL) {
727 		aiop->aio_notifyq = NULL;
728 		qflag++;
729 	}
730 	if ((pollqhead = aiop->aio_pollq) != NULL) {
731 		aiop->aio_pollq = NULL;
732 		qflag++;
733 	}
734 	if (flag) {
735 		if ((portq = aiop->aio_portq) != NULL)
736 			qflag++;
737 
738 		if ((cleanupport = aiop->aio_portcleanupq) != NULL) {
739 			aiop->aio_portcleanupq = NULL;
740 			qflag++;
741 		}
742 	}
743 	mutex_exit(&aiop->aio_mutex);
744 
745 	/*
746 	 * return immediately if cleanupq, pollq, and
747 	 * notifyq are all empty. someone else must have
748 	 * emptied them.
749 	 */
750 	if (!qflag) {
751 		mutex_exit(&aiop->aio_cleanupq_mutex);
752 		return;
753 	}
754 
755 	/*
756 	 * do cleanup for the various queues.
757 	 */
758 	if (cleanupqhead)
759 		signalled = aio_cleanup_cleanupq(aiop, cleanupqhead, exitflg);
760 	mutex_exit(&aiop->aio_cleanupq_mutex);
761 	if (notifyqhead)
762 		signalled = aio_cleanup_notifyq(aiop, notifyqhead, exitflg);
763 	if (pollqhead)
764 		aio_cleanup_pollq(aiop, pollqhead, exitflg);
765 	if (flag && (cleanupport || portq))
766 		aio_cleanup_portq(aiop, cleanupport, exitflg);
767 
768 	if (exitflg)
769 		return;
770 
771 	/*
772 	 * If we have an active aio_cleanup_thread it's possible for
773 	 * this routine to push something on to the done queue after
774 	 * an aiowait/aiosuspend thread has already decided to block.
775 	 * This being the case, we need a cv_broadcast here to wake
776 	 * these threads up. It is simpler and cleaner to do this
777 	 * broadcast here than in the individual cleanup routines.
778 	 */
779 
780 	mutex_enter(&aiop->aio_mutex);
781 	/*
782 	 * If there has never been an old solaris aio request
783 	 * issued by this process, then do not send a SIGIO signal.
784 	 */
785 	if (!(aiop->aio_flags & AIO_SOLARIS_REQ))
786 		signalled = 1;
787 	cv_broadcast(&aiop->aio_waitcv);
788 	mutex_exit(&aiop->aio_mutex);
789 
790 	/*
791 	 * Only if the process wasn't already signalled,
792 	 * determine if a SIGIO signal should be delievered.
793 	 */
794 	if (!signalled &&
795 	    (func = PTOU(curproc)->u_signal[SIGIO - 1]) != SIG_DFL &&
796 	    func != SIG_IGN)
797 		psignal(curproc, SIGIO);
798 }
799 
800 
801 /*
802  * Do cleanup for every element of the port cleanup queue.
803  */
804 static void
aio_cleanup_portq(aio_t * aiop,aio_req_t * cleanupq,int exitflag)805 aio_cleanup_portq(aio_t *aiop, aio_req_t *cleanupq, int exitflag)
806 {
807 	aio_req_t	*reqp;
808 	aio_req_t	*next;
809 	aio_req_t	*headp;
810 	aio_lio_t	*liop;
811 
812 	/* first check the portq */
813 	if (exitflag || ((aiop->aio_flags & AIO_CLEANUP_PORT) == 0)) {
814 		mutex_enter(&aiop->aio_mutex);
815 		if (aiop->aio_flags & AIO_CLEANUP)
816 			aiop->aio_flags |= AIO_CLEANUP_PORT;
817 		mutex_exit(&aiop->aio_mutex);
818 
819 		/*
820 		 * It is not allowed to hold locks during aphysio_unlock().
821 		 * The aio_done() interrupt function will try to acquire
822 		 * aio_mutex and aio_portq_mutex.  Therefore we disconnect
823 		 * the portq list from the aiop for the duration of the
824 		 * aphysio_unlock() loop below.
825 		 */
826 		mutex_enter(&aiop->aio_portq_mutex);
827 		headp = aiop->aio_portq;
828 		aiop->aio_portq = NULL;
829 		mutex_exit(&aiop->aio_portq_mutex);
830 		if ((reqp = headp) != NULL) {
831 			do {
832 				next = reqp->aio_req_next;
833 				aphysio_unlock(reqp);
834 				if (exitflag) {
835 					mutex_enter(&aiop->aio_mutex);
836 					aio_req_free(aiop, reqp);
837 					mutex_exit(&aiop->aio_mutex);
838 				}
839 			} while ((reqp = next) != headp);
840 		}
841 
842 		if (headp != NULL && exitflag == 0) {
843 			/* move unlocked requests back to the port queue */
844 			aio_req_t *newq;
845 
846 			mutex_enter(&aiop->aio_portq_mutex);
847 			if ((newq = aiop->aio_portq) != NULL) {
848 				aio_req_t *headprev = headp->aio_req_prev;
849 				aio_req_t *newqprev = newq->aio_req_prev;
850 
851 				headp->aio_req_prev = newqprev;
852 				newq->aio_req_prev = headprev;
853 				headprev->aio_req_next = newq;
854 				newqprev->aio_req_next = headp;
855 			}
856 			aiop->aio_portq = headp;
857 			cv_broadcast(&aiop->aio_portcv);
858 			mutex_exit(&aiop->aio_portq_mutex);
859 		}
860 	}
861 
862 	/* now check the port cleanup queue */
863 	if ((reqp = cleanupq) == NULL)
864 		return;
865 	do {
866 		next = reqp->aio_req_next;
867 		aphysio_unlock(reqp);
868 		if (exitflag) {
869 			mutex_enter(&aiop->aio_mutex);
870 			aio_req_free(aiop, reqp);
871 			mutex_exit(&aiop->aio_mutex);
872 		} else {
873 			mutex_enter(&aiop->aio_portq_mutex);
874 			aio_enq(&aiop->aio_portq, reqp, 0);
875 			mutex_exit(&aiop->aio_portq_mutex);
876 			port_send_event(reqp->aio_req_portkev);
877 			if ((liop = reqp->aio_req_lio) != NULL) {
878 				int send_event = 0;
879 
880 				mutex_enter(&aiop->aio_mutex);
881 				ASSERT(liop->lio_refcnt > 0);
882 				if (--liop->lio_refcnt == 0) {
883 					if (liop->lio_port >= 0 &&
884 					    liop->lio_portkev) {
885 						liop->lio_port = -1;
886 						send_event = 1;
887 					}
888 				}
889 				mutex_exit(&aiop->aio_mutex);
890 				if (send_event)
891 					port_send_event(liop->lio_portkev);
892 			}
893 		}
894 	} while ((reqp = next) != cleanupq);
895 }
896 
897 /*
898  * Do cleanup for every element of the cleanupq.
899  */
900 static int
aio_cleanup_cleanupq(aio_t * aiop,aio_req_t * qhead,int exitflg)901 aio_cleanup_cleanupq(aio_t *aiop, aio_req_t *qhead, int exitflg)
902 {
903 	aio_req_t *reqp, *next;
904 	int signalled = 0;
905 
906 	ASSERT(MUTEX_HELD(&aiop->aio_cleanupq_mutex));
907 
908 	/*
909 	 * Since aio_req_done() or aio_req_find() use the HASH list to find
910 	 * the required requests, they could potentially take away elements
911 	 * if they are already done (AIO_DONEQ is set).
912 	 * The aio_cleanupq_mutex protects the queue for the duration of the
913 	 * loop from aio_req_done() and aio_req_find().
914 	 */
915 	if ((reqp = qhead) == NULL)
916 		return (0);
917 	do {
918 		ASSERT(reqp->aio_req_flags & AIO_CLEANUPQ);
919 		ASSERT(reqp->aio_req_portkev == NULL);
920 		next = reqp->aio_req_next;
921 		aphysio_unlock(reqp);
922 		mutex_enter(&aiop->aio_mutex);
923 		if (exitflg)
924 			aio_req_free(aiop, reqp);
925 		else
926 			aio_enq(&aiop->aio_doneq, reqp, AIO_DONEQ);
927 		if (!exitflg) {
928 			if (reqp->aio_req_flags & AIO_SIGNALLED)
929 				signalled++;
930 			else
931 				reqp->aio_req_flags |= AIO_SIGNALLED;
932 		}
933 		mutex_exit(&aiop->aio_mutex);
934 	} while ((reqp = next) != qhead);
935 	return (signalled);
936 }
937 
938 /*
939  * do cleanup for every element of the notify queue.
940  */
941 static int
aio_cleanup_notifyq(aio_t * aiop,aio_req_t * qhead,int exitflg)942 aio_cleanup_notifyq(aio_t *aiop, aio_req_t *qhead, int exitflg)
943 {
944 	aio_req_t *reqp, *next;
945 	aio_lio_t *liohead;
946 	sigqueue_t *sigev, *lio_sigev = NULL;
947 	int signalled = 0;
948 
949 	if ((reqp = qhead) == NULL)
950 		return (0);
951 	do {
952 		ASSERT(reqp->aio_req_flags & AIO_NOTIFYQ);
953 		next = reqp->aio_req_next;
954 		aphysio_unlock(reqp);
955 		if (exitflg) {
956 			mutex_enter(&aiop->aio_mutex);
957 			aio_req_free(aiop, reqp);
958 			mutex_exit(&aiop->aio_mutex);
959 		} else {
960 			mutex_enter(&aiop->aio_mutex);
961 			aio_enq(&aiop->aio_doneq, reqp, AIO_DONEQ);
962 			sigev = reqp->aio_req_sigqp;
963 			reqp->aio_req_sigqp = NULL;
964 			if ((liohead = reqp->aio_req_lio) != NULL) {
965 				ASSERT(liohead->lio_refcnt > 0);
966 				if (--liohead->lio_refcnt == 0) {
967 					cv_signal(&liohead->lio_notify);
968 					lio_sigev = liohead->lio_sigqp;
969 					liohead->lio_sigqp = NULL;
970 				}
971 			}
972 			mutex_exit(&aiop->aio_mutex);
973 			if (sigev) {
974 				signalled++;
975 				aio_sigev_send(reqp->aio_req_buf.b_proc,
976 				    sigev);
977 			}
978 			if (lio_sigev) {
979 				signalled++;
980 				aio_sigev_send(reqp->aio_req_buf.b_proc,
981 				    lio_sigev);
982 			}
983 		}
984 	} while ((reqp = next) != qhead);
985 
986 	return (signalled);
987 }
988 
989 /*
990  * Do cleanup for every element of the poll queue.
991  */
992 static void
aio_cleanup_pollq(aio_t * aiop,aio_req_t * qhead,int exitflg)993 aio_cleanup_pollq(aio_t *aiop, aio_req_t *qhead, int exitflg)
994 {
995 	aio_req_t *reqp, *next;
996 
997 	/*
998 	 * As no other threads should be accessing the queue at this point,
999 	 * it isn't necessary to hold aio_mutex while we traverse its elements.
1000 	 */
1001 	if ((reqp = qhead) == NULL)
1002 		return;
1003 	do {
1004 		ASSERT(reqp->aio_req_flags & AIO_POLLQ);
1005 		next = reqp->aio_req_next;
1006 		aphysio_unlock(reqp);
1007 		if (exitflg) {
1008 			mutex_enter(&aiop->aio_mutex);
1009 			aio_req_free(aiop, reqp);
1010 			mutex_exit(&aiop->aio_mutex);
1011 		} else {
1012 			aio_copyout_result(reqp);
1013 			mutex_enter(&aiop->aio_mutex);
1014 			aio_enq(&aiop->aio_doneq, reqp, AIO_DONEQ);
1015 			mutex_exit(&aiop->aio_mutex);
1016 		}
1017 	} while ((reqp = next) != qhead);
1018 }
1019 
1020 /*
1021  * called by exit(). waits for all outstanding kaio to finish
1022  * before the kaio resources are freed.
1023  */
1024 void
aio_cleanup_exit(void)1025 aio_cleanup_exit(void)
1026 {
1027 	proc_t *p = curproc;
1028 	aio_t *aiop = p->p_aio;
1029 	aio_req_t *reqp, *next, *head;
1030 	aio_lio_t *nxtlio, *liop;
1031 
1032 	/*
1033 	 * wait for all outstanding kaio to complete. process
1034 	 * is now single-threaded; no other kaio requests can
1035 	 * happen once aio_pending is zero.
1036 	 */
1037 	mutex_enter(&aiop->aio_mutex);
1038 	aiop->aio_flags |= AIO_CLEANUP;
1039 	while ((aiop->aio_pending != 0) || (aiop->aio_flags & AIO_DONE_ACTIVE))
1040 		cv_wait(&aiop->aio_cleanupcv, &aiop->aio_mutex);
1041 	mutex_exit(&aiop->aio_mutex);
1042 
1043 	/* cleanup the cleanup-thread queues. */
1044 	aio_cleanup(AIO_CLEANUP_EXIT);
1045 
1046 	/*
1047 	 * Although this process is now single-threaded, we
1048 	 * still need to protect ourselves against a race with
1049 	 * aio_cleanup_dr_delete_memory().
1050 	 */
1051 	mutex_enter(&p->p_lock);
1052 
1053 	/*
1054 	 * free up the done queue's resources.
1055 	 */
1056 	if ((head = aiop->aio_doneq) != NULL) {
1057 		aiop->aio_doneq = NULL;
1058 		reqp = head;
1059 		do {
1060 			next = reqp->aio_req_next;
1061 			aphysio_unlock(reqp);
1062 			kmem_free(reqp, sizeof (struct aio_req_t));
1063 		} while ((reqp = next) != head);
1064 	}
1065 	/*
1066 	 * release aio request freelist.
1067 	 */
1068 	for (reqp = aiop->aio_free; reqp != NULL; reqp = next) {
1069 		next = reqp->aio_req_next;
1070 		kmem_free(reqp, sizeof (struct aio_req_t));
1071 	}
1072 
1073 	/*
1074 	 * release io list head freelist.
1075 	 */
1076 	for (liop = aiop->aio_lio_free; liop != NULL; liop = nxtlio) {
1077 		nxtlio = liop->lio_next;
1078 		kmem_free(liop, sizeof (aio_lio_t));
1079 	}
1080 
1081 	if (aiop->aio_iocb)
1082 		kmem_free(aiop->aio_iocb, aiop->aio_iocbsz);
1083 
1084 	mutex_destroy(&aiop->aio_mutex);
1085 	mutex_destroy(&aiop->aio_portq_mutex);
1086 	mutex_destroy(&aiop->aio_cleanupq_mutex);
1087 	p->p_aio = NULL;
1088 	mutex_exit(&p->p_lock);
1089 	kmem_free(aiop, sizeof (struct aio));
1090 }
1091 
1092 /*
1093  * copy out aio request's result to a user-level result_t buffer.
1094  */
1095 void
aio_copyout_result(aio_req_t * reqp)1096 aio_copyout_result(aio_req_t *reqp)
1097 {
1098 	struct buf	*bp;
1099 	struct iovec	*iov;
1100 	void		*resultp;
1101 	int		error;
1102 	size_t		retval;
1103 
1104 	if (reqp->aio_req_flags & AIO_COPYOUTDONE)
1105 		return;
1106 
1107 	reqp->aio_req_flags |= AIO_COPYOUTDONE;
1108 
1109 	iov = reqp->aio_req_uio.uio_iov;
1110 	bp = &reqp->aio_req_buf;
1111 	/* "resultp" points to user-level result_t buffer */
1112 	resultp = (void *)reqp->aio_req_resultp;
1113 	if (bp->b_flags & B_ERROR) {
1114 		if (bp->b_error)
1115 			error = bp->b_error;
1116 		else
1117 			error = EIO;
1118 		retval = (size_t)-1;
1119 	} else {
1120 		error = 0;
1121 		retval = iov->iov_len - bp->b_resid;
1122 	}
1123 #ifdef	_SYSCALL32_IMPL
1124 	if (get_udatamodel() == DATAMODEL_NATIVE) {
1125 		(void) sulword(&((aio_result_t *)resultp)->aio_return, retval);
1126 		(void) suword32(&((aio_result_t *)resultp)->aio_errno, error);
1127 	} else {
1128 		(void) suword32(&((aio_result32_t *)resultp)->aio_return,
1129 		    (int)retval);
1130 		(void) suword32(&((aio_result32_t *)resultp)->aio_errno, error);
1131 	}
1132 #else
1133 	(void) suword32(&((aio_result_t *)resultp)->aio_return, retval);
1134 	(void) suword32(&((aio_result_t *)resultp)->aio_errno, error);
1135 #endif
1136 }
1137 
1138 
1139 void
aio_copyout_result_port(struct iovec * iov,struct buf * bp,void * resultp)1140 aio_copyout_result_port(struct iovec *iov, struct buf *bp, void *resultp)
1141 {
1142 	int errno;
1143 	size_t retval;
1144 
1145 	if (bp->b_flags & B_ERROR) {
1146 		if (bp->b_error)
1147 			errno = bp->b_error;
1148 		else
1149 			errno = EIO;
1150 		retval = (size_t)-1;
1151 	} else {
1152 		errno = 0;
1153 		retval = iov->iov_len - bp->b_resid;
1154 	}
1155 #ifdef	_SYSCALL32_IMPL
1156 	if (get_udatamodel() == DATAMODEL_NATIVE) {
1157 		(void) sulword(&((aio_result_t *)resultp)->aio_return, retval);
1158 		(void) suword32(&((aio_result_t *)resultp)->aio_errno, errno);
1159 	} else {
1160 		(void) suword32(&((aio_result32_t *)resultp)->aio_return,
1161 		    (int)retval);
1162 		(void) suword32(&((aio_result32_t *)resultp)->aio_errno, errno);
1163 	}
1164 #else
1165 	(void) suword32(&((aio_result_t *)resultp)->aio_return, retval);
1166 	(void) suword32(&((aio_result_t *)resultp)->aio_errno, errno);
1167 #endif
1168 }
1169 
1170 /*
1171  * This function is used to remove a request from the done queue.
1172  */
1173 
1174 void
aio_req_remove_portq(aio_t * aiop,aio_req_t * reqp)1175 aio_req_remove_portq(aio_t *aiop, aio_req_t *reqp)
1176 {
1177 	ASSERT(MUTEX_HELD(&aiop->aio_portq_mutex));
1178 	while (aiop->aio_portq == NULL) {
1179 		/*
1180 		 * aio_portq is set to NULL when aio_cleanup_portq()
1181 		 * is working with the event queue.
1182 		 * The aio_cleanup_thread() uses aio_cleanup_portq()
1183 		 * to unlock all AIO buffers with completed transactions.
1184 		 * Wait here until aio_cleanup_portq() restores the
1185 		 * list of completed transactions in aio_portq.
1186 		 */
1187 		cv_wait(&aiop->aio_portcv, &aiop->aio_portq_mutex);
1188 	}
1189 	aio_deq(&aiop->aio_portq, reqp);
1190 }
1191 
1192 /* ARGSUSED */
1193 void
aio_close_port(void * arg,int port,pid_t pid,int lastclose)1194 aio_close_port(void *arg, int port, pid_t pid, int lastclose)
1195 {
1196 	aio_t		*aiop;
1197 	aio_req_t 	*reqp;
1198 	aio_req_t 	*next;
1199 	aio_req_t	*headp;
1200 	int		counter;
1201 
1202 	if (arg == NULL)
1203 		aiop = curproc->p_aio;
1204 	else
1205 		aiop = (aio_t *)arg;
1206 
1207 	/*
1208 	 * The PORT_SOURCE_AIO source is always associated with every new
1209 	 * created port by default.
1210 	 * If no asynchronous I/O transactions were associated with the port
1211 	 * then the aiop pointer will still be set to NULL.
1212 	 */
1213 	if (aiop == NULL)
1214 		return;
1215 
1216 	/*
1217 	 * Within a process event ports can be used to collect events other
1218 	 * than PORT_SOURCE_AIO events. At the same time the process can submit
1219 	 * asynchronous I/Os transactions which are not associated with the
1220 	 * current port.
1221 	 * The current process oriented model of AIO uses a sigle queue for
1222 	 * pending events. On close the pending queue (queue of asynchronous
1223 	 * I/O transactions using event port notification) must be scanned
1224 	 * to detect and handle pending I/Os using the current port.
1225 	 */
1226 	mutex_enter(&aiop->aio_portq_mutex);
1227 	mutex_enter(&aiop->aio_mutex);
1228 	counter = 0;
1229 	if ((headp = aiop->aio_portpending) != NULL) {
1230 		reqp = headp;
1231 		do {
1232 			if (reqp->aio_req_portkev &&
1233 			    reqp->aio_req_port == port) {
1234 				reqp->aio_req_flags |= AIO_CLOSE_PORT;
1235 				counter++;
1236 			}
1237 		} while ((reqp = reqp->aio_req_next) != headp);
1238 	}
1239 	if (counter == 0) {
1240 		/* no AIOs pending */
1241 		mutex_exit(&aiop->aio_mutex);
1242 		mutex_exit(&aiop->aio_portq_mutex);
1243 		return;
1244 	}
1245 	aiop->aio_portpendcnt += counter;
1246 	mutex_exit(&aiop->aio_mutex);
1247 	while (aiop->aio_portpendcnt)
1248 		cv_wait(&aiop->aio_portcv, &aiop->aio_portq_mutex);
1249 
1250 	/*
1251 	 * all pending AIOs are completed.
1252 	 * check port doneq
1253 	 */
1254 	headp = NULL;
1255 	if ((reqp = aiop->aio_portq) != NULL) {
1256 		do {
1257 			next = reqp->aio_req_next;
1258 			if (reqp->aio_req_port == port) {
1259 				/* dequeue request and discard event */
1260 				aio_req_remove_portq(aiop, reqp);
1261 				port_free_event(reqp->aio_req_portkev);
1262 				/* put request in temporary queue */
1263 				reqp->aio_req_next = headp;
1264 				headp = reqp;
1265 			}
1266 		} while ((reqp = next) != aiop->aio_portq);
1267 	}
1268 	mutex_exit(&aiop->aio_portq_mutex);
1269 
1270 	/* headp points to the list of requests to be discarded */
1271 	for (reqp = headp; reqp != NULL; reqp = next) {
1272 		next = reqp->aio_req_next;
1273 		aphysio_unlock(reqp);
1274 		mutex_enter(&aiop->aio_mutex);
1275 		aio_req_free_port(aiop, reqp);
1276 		mutex_exit(&aiop->aio_mutex);
1277 	}
1278 
1279 	if (aiop->aio_flags & AIO_CLEANUP)
1280 		cv_broadcast(&aiop->aio_waitcv);
1281 }
1282 
1283 /*
1284  * aio_cleanup_dr_delete_memory is used by dr's delete_memory_thread
1285  * to kick start the aio_cleanup_thread for the give process to do the
1286  * necessary cleanup.
1287  * This is needed so that delete_memory_thread can obtain writer locks
1288  * on pages that need to be relocated during a dr memory delete operation,
1289  * otherwise a deadly embrace may occur.
1290  */
1291 int
aio_cleanup_dr_delete_memory(proc_t * procp)1292 aio_cleanup_dr_delete_memory(proc_t *procp)
1293 {
1294 	struct aio *aiop = procp->p_aio;
1295 	struct as *as = procp->p_as;
1296 	int ret = 0;
1297 
1298 	ASSERT(MUTEX_HELD(&procp->p_lock));
1299 
1300 	mutex_enter(&as->a_contents);
1301 
1302 	if (aiop != NULL) {
1303 		aiop->aio_rqclnup = 1;
1304 		cv_broadcast(&as->a_cv);
1305 		ret = 1;
1306 	}
1307 	mutex_exit(&as->a_contents);
1308 	return (ret);
1309 }
1310