xref: /illumos-gate/usr/src/uts/common/os/aio_subr.c (revision 2570281c)
17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
534709573Sraf  * Common Development and Distribution License (the "License").
634709573Sraf  * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate  *
87c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate  * and limitations under the License.
127c478bd9Sstevel@tonic-gate  *
137c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate  *
197c478bd9Sstevel@tonic-gate  * CDDL HEADER END
207c478bd9Sstevel@tonic-gate  */
2134709573Sraf 
227c478bd9Sstevel@tonic-gate /*
2316660111SSurya Prakki  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
247c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
257c478bd9Sstevel@tonic-gate  */
267c478bd9Sstevel@tonic-gate 
277c478bd9Sstevel@tonic-gate #include <sys/types.h>
287c478bd9Sstevel@tonic-gate #include <sys/proc.h>
297c478bd9Sstevel@tonic-gate #include <sys/file.h>
307c478bd9Sstevel@tonic-gate #include <sys/errno.h>
317c478bd9Sstevel@tonic-gate #include <sys/param.h>
327c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h>
337c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h>
347c478bd9Sstevel@tonic-gate #include <sys/systm.h>
357c478bd9Sstevel@tonic-gate #include <vm/as.h>
367c478bd9Sstevel@tonic-gate #include <vm/page.h>
377c478bd9Sstevel@tonic-gate #include <sys/uio.h>
387c478bd9Sstevel@tonic-gate #include <sys/kmem.h>
397c478bd9Sstevel@tonic-gate #include <sys/debug.h>
407c478bd9Sstevel@tonic-gate #include <sys/aio_impl.h>
417c478bd9Sstevel@tonic-gate #include <sys/epm.h>
427c478bd9Sstevel@tonic-gate #include <sys/fs/snode.h>
437c478bd9Sstevel@tonic-gate #include <sys/siginfo.h>
447c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h>
457c478bd9Sstevel@tonic-gate #include <sys/conf.h>
467c478bd9Sstevel@tonic-gate #include <sys/sdt.h>
477c478bd9Sstevel@tonic-gate 
487c478bd9Sstevel@tonic-gate int aphysio(int (*)(), int (*)(), dev_t, int, void (*)(), struct aio_req *);
49*1b9bce10SToomas Soome int aio_done(struct buf *);
507c478bd9Sstevel@tonic-gate void aphysio_unlock(aio_req_t *);
517c478bd9Sstevel@tonic-gate void aio_cleanup(int);
527c478bd9Sstevel@tonic-gate void aio_cleanup_exit(void);
537c478bd9Sstevel@tonic-gate 
547c478bd9Sstevel@tonic-gate /*
557c478bd9Sstevel@tonic-gate  * private functions
567c478bd9Sstevel@tonic-gate  */
577c478bd9Sstevel@tonic-gate static void aio_sigev_send(proc_t *, sigqueue_t *);
587c478bd9Sstevel@tonic-gate static void aio_hash_delete(aio_t *, aio_req_t *);
597c478bd9Sstevel@tonic-gate static void aio_lio_free(aio_t *, aio_lio_t *);
6016660111SSurya Prakki static int aio_cleanup_cleanupq(aio_t *, aio_req_t *, int);
617c478bd9Sstevel@tonic-gate static int aio_cleanup_notifyq(aio_t *, aio_req_t *, int);
627c478bd9Sstevel@tonic-gate static void aio_cleanup_pollq(aio_t *, aio_req_t *, int);
637c478bd9Sstevel@tonic-gate static void aio_cleanup_portq(aio_t *, aio_req_t *, int);
647c478bd9Sstevel@tonic-gate 
657c478bd9Sstevel@tonic-gate /*
667c478bd9Sstevel@tonic-gate  * async version of physio() that doesn't wait synchronously
677c478bd9Sstevel@tonic-gate  * for the driver's strategy routine to complete.
687c478bd9Sstevel@tonic-gate  */
697c478bd9Sstevel@tonic-gate 
707c478bd9Sstevel@tonic-gate int
aphysio(int (* strategy)(struct buf *),int (* cancel)(struct buf *),dev_t dev,int rw,void (* mincnt)(struct buf *),struct aio_req * aio)717c478bd9Sstevel@tonic-gate aphysio(
727c478bd9Sstevel@tonic-gate 	int (*strategy)(struct buf *),
737c478bd9Sstevel@tonic-gate 	int (*cancel)(struct buf *),
747c478bd9Sstevel@tonic-gate 	dev_t dev,
757c478bd9Sstevel@tonic-gate 	int rw,
767c478bd9Sstevel@tonic-gate 	void (*mincnt)(struct buf *),
777c478bd9Sstevel@tonic-gate 	struct aio_req *aio)
787c478bd9Sstevel@tonic-gate {
797c478bd9Sstevel@tonic-gate 	struct uio *uio = aio->aio_uio;
807c478bd9Sstevel@tonic-gate 	aio_req_t *reqp = (aio_req_t *)aio->aio_private;
817c478bd9Sstevel@tonic-gate 	struct buf *bp = &reqp->aio_req_buf;
827c478bd9Sstevel@tonic-gate 	struct iovec *iov;
837c478bd9Sstevel@tonic-gate 	struct as *as;
847c478bd9Sstevel@tonic-gate 	char *a;
857c478bd9Sstevel@tonic-gate 	int	error;
867c478bd9Sstevel@tonic-gate 	size_t	c;
877c478bd9Sstevel@tonic-gate 	struct page **pplist;
887c478bd9Sstevel@tonic-gate 	struct dev_ops *ops = devopsp[getmajor(dev)];
897c478bd9Sstevel@tonic-gate 
907c478bd9Sstevel@tonic-gate 	if (uio->uio_loffset < 0)
917c478bd9Sstevel@tonic-gate 		return (EINVAL);
927c478bd9Sstevel@tonic-gate #ifdef	_ILP32
937c478bd9Sstevel@tonic-gate 	/*
947c478bd9Sstevel@tonic-gate 	 * For 32-bit kernels, check against SPEC_MAXOFFSET_T which represents
957c478bd9Sstevel@tonic-gate 	 * the maximum size that can be supported by the IO subsystem.
967c478bd9Sstevel@tonic-gate 	 * XXX this code assumes a D_64BIT driver.
977c478bd9Sstevel@tonic-gate 	 */
987c478bd9Sstevel@tonic-gate 	if (uio->uio_loffset > SPEC_MAXOFFSET_T)
997c478bd9Sstevel@tonic-gate 		return (EINVAL);
1007c478bd9Sstevel@tonic-gate #endif	/* _ILP32 */
1017c478bd9Sstevel@tonic-gate 
1027c478bd9Sstevel@tonic-gate 	if (rw == B_READ) {
1037c478bd9Sstevel@tonic-gate 		CPU_STATS_ADD_K(sys, phread, 1);
1047c478bd9Sstevel@tonic-gate 	} else {
1057c478bd9Sstevel@tonic-gate 		CPU_STATS_ADD_K(sys, phwrite, 1);
1067c478bd9Sstevel@tonic-gate 	}
1077c478bd9Sstevel@tonic-gate 
1087c478bd9Sstevel@tonic-gate 	iov = uio->uio_iov;
1097c478bd9Sstevel@tonic-gate 	sema_init(&bp->b_sem, 0, NULL, SEMA_DEFAULT, NULL);
1107c478bd9Sstevel@tonic-gate 	sema_init(&bp->b_io, 0, NULL, SEMA_DEFAULT, NULL);
1117c478bd9Sstevel@tonic-gate 
1127c478bd9Sstevel@tonic-gate 	bp->b_error = 0;
1137c478bd9Sstevel@tonic-gate 	bp->b_flags = B_BUSY | B_PHYS | B_ASYNC | rw;
1147c478bd9Sstevel@tonic-gate 	bp->b_edev = dev;
1157c478bd9Sstevel@tonic-gate 	bp->b_dev = cmpdev(dev);
1167c478bd9Sstevel@tonic-gate 	bp->b_lblkno = btodt(uio->uio_loffset);
1177c478bd9Sstevel@tonic-gate 	bp->b_offset = uio->uio_loffset;
1187c478bd9Sstevel@tonic-gate 	(void) ops->devo_getinfo(NULL, DDI_INFO_DEVT2DEVINFO,
1197c478bd9Sstevel@tonic-gate 	    (void *)bp->b_edev, (void **)&bp->b_dip);
1207c478bd9Sstevel@tonic-gate 
1217c478bd9Sstevel@tonic-gate 	/*
1227c478bd9Sstevel@tonic-gate 	 * Clustering: Clustering can set the b_iodone, b_forw and
1237c478bd9Sstevel@tonic-gate 	 * b_proc fields to cluster-specifc values.
1247c478bd9Sstevel@tonic-gate 	 */
1257c478bd9Sstevel@tonic-gate 	if (bp->b_iodone == NULL) {
126*1b9bce10SToomas Soome 		bp->b_iodone = aio_done;
1277c478bd9Sstevel@tonic-gate 		/* b_forw points at an aio_req_t structure */
1287c478bd9Sstevel@tonic-gate 		bp->b_forw = (struct buf *)reqp;
1297c478bd9Sstevel@tonic-gate 		bp->b_proc = curproc;
1307c478bd9Sstevel@tonic-gate 	}
1317c478bd9Sstevel@tonic-gate 
1327c478bd9Sstevel@tonic-gate 	a = bp->b_un.b_addr = iov->iov_base;
1337c478bd9Sstevel@tonic-gate 	c = bp->b_bcount = iov->iov_len;
1347c478bd9Sstevel@tonic-gate 
1357c478bd9Sstevel@tonic-gate 	(*mincnt)(bp);
1367c478bd9Sstevel@tonic-gate 	if (bp->b_bcount != iov->iov_len)
1377c478bd9Sstevel@tonic-gate 		return (ENOTSUP);
1387c478bd9Sstevel@tonic-gate 
1397c478bd9Sstevel@tonic-gate 	as = bp->b_proc->p_as;
1407c478bd9Sstevel@tonic-gate 
1417c478bd9Sstevel@tonic-gate 	error = as_pagelock(as, &pplist, a,
1427c478bd9Sstevel@tonic-gate 	    c, rw == B_READ? S_WRITE : S_READ);
1437c478bd9Sstevel@tonic-gate 	if (error != 0) {
1447c478bd9Sstevel@tonic-gate 		bp->b_flags |= B_ERROR;
1457c478bd9Sstevel@tonic-gate 		bp->b_error = error;
1467c478bd9Sstevel@tonic-gate 		bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_SHADOW);
1477c478bd9Sstevel@tonic-gate 		return (error);
1487c478bd9Sstevel@tonic-gate 	}
1497c478bd9Sstevel@tonic-gate 	reqp->aio_req_flags |= AIO_PAGELOCKDONE;
1507c478bd9Sstevel@tonic-gate 	bp->b_shadow = pplist;
1517c478bd9Sstevel@tonic-gate 	if (pplist != NULL) {
1527c478bd9Sstevel@tonic-gate 		bp->b_flags |= B_SHADOW;
1537c478bd9Sstevel@tonic-gate 	}
1547c478bd9Sstevel@tonic-gate 
1557c478bd9Sstevel@tonic-gate 	if (cancel != anocancel)
1567c478bd9Sstevel@tonic-gate 		cmn_err(CE_PANIC,
1577c478bd9Sstevel@tonic-gate 		    "aphysio: cancellation not supported, use anocancel");
1587c478bd9Sstevel@tonic-gate 
1597c478bd9Sstevel@tonic-gate 	reqp->aio_req_cancel = cancel;
1607c478bd9Sstevel@tonic-gate 
1617c478bd9Sstevel@tonic-gate 	DTRACE_IO1(start, struct buf *, bp);
1627c478bd9Sstevel@tonic-gate 
1637c478bd9Sstevel@tonic-gate 	return ((*strategy)(bp));
1647c478bd9Sstevel@tonic-gate }
1657c478bd9Sstevel@tonic-gate 
1667c478bd9Sstevel@tonic-gate /*ARGSUSED*/
1677c478bd9Sstevel@tonic-gate int
anocancel(struct buf * bp)1687c478bd9Sstevel@tonic-gate anocancel(struct buf *bp)
1697c478bd9Sstevel@tonic-gate {
1707c478bd9Sstevel@tonic-gate 	return (ENXIO);
1717c478bd9Sstevel@tonic-gate }
1727c478bd9Sstevel@tonic-gate 
1737c478bd9Sstevel@tonic-gate /*
1747c478bd9Sstevel@tonic-gate  * Called from biodone().
1757c478bd9Sstevel@tonic-gate  * Notify process that a pending AIO has finished.
1767c478bd9Sstevel@tonic-gate  */
1777c478bd9Sstevel@tonic-gate 
1787c478bd9Sstevel@tonic-gate /*
1797c478bd9Sstevel@tonic-gate  * Clustering: This function is made non-static as it is used
1807c478bd9Sstevel@tonic-gate  * by clustering s/w as contract private interface.
1817c478bd9Sstevel@tonic-gate  */
1827c478bd9Sstevel@tonic-gate 
183*1b9bce10SToomas Soome int
aio_done(struct buf * bp)1847c478bd9Sstevel@tonic-gate aio_done(struct buf *bp)
1857c478bd9Sstevel@tonic-gate {
1867c478bd9Sstevel@tonic-gate 	proc_t *p;
1877c478bd9Sstevel@tonic-gate 	struct as *as;
1887c478bd9Sstevel@tonic-gate 	aio_req_t *reqp;
18934709573Sraf 	aio_lio_t *head = NULL;
1907c478bd9Sstevel@tonic-gate 	aio_t *aiop;
19134709573Sraf 	sigqueue_t *sigev = NULL;
1927c478bd9Sstevel@tonic-gate 	sigqueue_t *lio_sigev = NULL;
19334709573Sraf 	port_kevent_t *pkevp = NULL;
19434709573Sraf 	port_kevent_t *lio_pkevp = NULL;
1957c478bd9Sstevel@tonic-gate 	int fd;
1967c478bd9Sstevel@tonic-gate 	int cleanupqflag;
1977c478bd9Sstevel@tonic-gate 	int pollqflag;
1987c478bd9Sstevel@tonic-gate 	int portevpend;
1997c478bd9Sstevel@tonic-gate 	void (*func)();
20034709573Sraf 	int use_port = 0;
201fa7f62f0Ssp 	int reqp_flags = 0;
202b7555c90SSurya Prakki 	int send_signal = 0;
2037c478bd9Sstevel@tonic-gate 
2047c478bd9Sstevel@tonic-gate 	p = bp->b_proc;
205fa7f62f0Ssp 	as = p->p_as;
2067c478bd9Sstevel@tonic-gate 	reqp = (aio_req_t *)bp->b_forw;
2077c478bd9Sstevel@tonic-gate 	fd = reqp->aio_req_fd;
2087c478bd9Sstevel@tonic-gate 
2097c478bd9Sstevel@tonic-gate 	/*
2107c478bd9Sstevel@tonic-gate 	 * mapout earlier so that more kmem is available when aio is
2117c478bd9Sstevel@tonic-gate 	 * heavily used. bug #1262082
2127c478bd9Sstevel@tonic-gate 	 */
2137c478bd9Sstevel@tonic-gate 	if (bp->b_flags & B_REMAPPED)
2147c478bd9Sstevel@tonic-gate 		bp_mapout(bp);
2157c478bd9Sstevel@tonic-gate 
2167c478bd9Sstevel@tonic-gate 	/* decrement fd's ref count by one, now that aio request is done. */
2177c478bd9Sstevel@tonic-gate 	areleasef(fd, P_FINFO(p));
2187c478bd9Sstevel@tonic-gate 
2197c478bd9Sstevel@tonic-gate 	aiop = p->p_aio;
2207c478bd9Sstevel@tonic-gate 	ASSERT(aiop != NULL);
2217c478bd9Sstevel@tonic-gate 
22234709573Sraf 	mutex_enter(&aiop->aio_portq_mutex);
22334709573Sraf 	mutex_enter(&aiop->aio_mutex);
22434709573Sraf 	ASSERT(aiop->aio_pending > 0);
22534709573Sraf 	ASSERT(reqp->aio_req_flags & AIO_PENDING);
22634709573Sraf 	aiop->aio_pending--;
22734709573Sraf 	reqp->aio_req_flags &= ~AIO_PENDING;
228fa7f62f0Ssp 	reqp_flags = reqp->aio_req_flags;
22934709573Sraf 	if ((pkevp = reqp->aio_req_portkev) != NULL) {
2307c478bd9Sstevel@tonic-gate 		/* Event port notification is desired for this transaction */
2317c478bd9Sstevel@tonic-gate 		if (reqp->aio_req_flags & AIO_CLOSE_PORT) {
2327c478bd9Sstevel@tonic-gate 			/*
2337c478bd9Sstevel@tonic-gate 			 * The port is being closed and it is waiting for
2347c478bd9Sstevel@tonic-gate 			 * pending asynchronous I/O transactions to complete.
2357c478bd9Sstevel@tonic-gate 			 */
2367c478bd9Sstevel@tonic-gate 			portevpend = --aiop->aio_portpendcnt;
23734709573Sraf 			aio_deq(&aiop->aio_portpending, reqp);
23834709573Sraf 			aio_enq(&aiop->aio_portq, reqp, 0);
2397c478bd9Sstevel@tonic-gate 			mutex_exit(&aiop->aio_mutex);
2407c478bd9Sstevel@tonic-gate 			mutex_exit(&aiop->aio_portq_mutex);
24134709573Sraf 			port_send_event(pkevp);
2427c478bd9Sstevel@tonic-gate 			if (portevpend == 0)
2437c478bd9Sstevel@tonic-gate 				cv_broadcast(&aiop->aio_portcv);
244*1b9bce10SToomas Soome 			return (0);
2457c478bd9Sstevel@tonic-gate 		}
2467c478bd9Sstevel@tonic-gate 
2477c478bd9Sstevel@tonic-gate 		if (aiop->aio_flags & AIO_CLEANUP) {
2487c478bd9Sstevel@tonic-gate 			/*
2497c478bd9Sstevel@tonic-gate 			 * aio_cleanup_thread() is waiting for completion of
2507c478bd9Sstevel@tonic-gate 			 * transactions.
2517c478bd9Sstevel@tonic-gate 			 */
2527c478bd9Sstevel@tonic-gate 			mutex_enter(&as->a_contents);
25334709573Sraf 			aio_deq(&aiop->aio_portpending, reqp);
25434709573Sraf 			aio_enq(&aiop->aio_portcleanupq, reqp, 0);
2557c478bd9Sstevel@tonic-gate 			cv_signal(&aiop->aio_cleanupcv);
2567c478bd9Sstevel@tonic-gate 			mutex_exit(&as->a_contents);
2577c478bd9Sstevel@tonic-gate 			mutex_exit(&aiop->aio_mutex);
2587c478bd9Sstevel@tonic-gate 			mutex_exit(&aiop->aio_portq_mutex);
259*1b9bce10SToomas Soome 			return (0);
2607c478bd9Sstevel@tonic-gate 		}
2617c478bd9Sstevel@tonic-gate 
26234709573Sraf 		aio_deq(&aiop->aio_portpending, reqp);
26334709573Sraf 		aio_enq(&aiop->aio_portq, reqp, 0);
2647c478bd9Sstevel@tonic-gate 
26534709573Sraf 		use_port = 1;
26634709573Sraf 	} else {
2677c478bd9Sstevel@tonic-gate 		/*
26834709573Sraf 		 * when the AIO_CLEANUP flag is enabled for this
26934709573Sraf 		 * process, or when the AIO_POLL bit is set for
27034709573Sraf 		 * this request, special handling is required.
27134709573Sraf 		 * otherwise the request is put onto the doneq.
2727c478bd9Sstevel@tonic-gate 		 */
27334709573Sraf 		cleanupqflag = (aiop->aio_flags & AIO_CLEANUP);
27434709573Sraf 		pollqflag = (reqp->aio_req_flags & AIO_POLL);
27534709573Sraf 		if (cleanupqflag | pollqflag) {
27634709573Sraf 
277fa7f62f0Ssp 			if (cleanupqflag)
27834709573Sraf 				mutex_enter(&as->a_contents);
2797c478bd9Sstevel@tonic-gate 
2807c478bd9Sstevel@tonic-gate 			/*
28134709573Sraf 			 * requests with their AIO_POLL bit set are put
28234709573Sraf 			 * on the pollq, requests with sigevent structures
28334709573Sraf 			 * or with listio heads are put on the notifyq, and
28434709573Sraf 			 * the remaining requests don't require any special
28534709573Sraf 			 * cleanup handling, so they're put onto the default
28634709573Sraf 			 * cleanupq.
2877c478bd9Sstevel@tonic-gate 			 */
28834709573Sraf 			if (pollqflag)
28934709573Sraf 				aio_enq(&aiop->aio_pollq, reqp, AIO_POLLQ);
29034709573Sraf 			else if (reqp->aio_req_sigqp || reqp->aio_req_lio)
29134709573Sraf 				aio_enq(&aiop->aio_notifyq, reqp, AIO_NOTIFYQ);
29234709573Sraf 			else
29334709573Sraf 				aio_enq(&aiop->aio_cleanupq, reqp,
29434709573Sraf 				    AIO_CLEANUPQ);
29534709573Sraf 
29634709573Sraf 			if (cleanupqflag) {
2977c478bd9Sstevel@tonic-gate 				cv_signal(&aiop->aio_cleanupcv);
29834709573Sraf 				mutex_exit(&as->a_contents);
29934709573Sraf 				mutex_exit(&aiop->aio_mutex);
30034709573Sraf 				mutex_exit(&aiop->aio_portq_mutex);
30134709573Sraf 			} else {
30234709573Sraf 				ASSERT(pollqflag);
30334709573Sraf 				/* block aio_cleanup_exit until we're done */
30434709573Sraf 				aiop->aio_flags |= AIO_DONE_ACTIVE;
30534709573Sraf 				mutex_exit(&aiop->aio_mutex);
30634709573Sraf 				mutex_exit(&aiop->aio_portq_mutex);
30734709573Sraf 				/*
30834709573Sraf 				 * let the cleanup processing happen from an AST
30934709573Sraf 				 * set an AST on all threads in this process
31034709573Sraf 				 */
31134709573Sraf 				mutex_enter(&p->p_lock);
31234709573Sraf 				set_proc_ast(p);
31334709573Sraf 				mutex_exit(&p->p_lock);
31434709573Sraf 				mutex_enter(&aiop->aio_mutex);
31534709573Sraf 				/* wakeup anybody waiting in aiowait() */
31634709573Sraf 				cv_broadcast(&aiop->aio_waitcv);
31734709573Sraf 
31834709573Sraf 				/* wakeup aio_cleanup_exit if needed */
31934709573Sraf 				if (aiop->aio_flags & AIO_CLEANUP)
32034709573Sraf 					cv_signal(&aiop->aio_cleanupcv);
32134709573Sraf 				aiop->aio_flags &= ~AIO_DONE_ACTIVE;
32234709573Sraf 				mutex_exit(&aiop->aio_mutex);
32334709573Sraf 			}
324*1b9bce10SToomas Soome 			return (0);
3257c478bd9Sstevel@tonic-gate 		}
3267c478bd9Sstevel@tonic-gate 
32734709573Sraf 		/*
32834709573Sraf 		 * save req's sigevent pointer, and check its
32934709573Sraf 		 * value after releasing aio_mutex lock.
33034709573Sraf 		 */
33134709573Sraf 		sigev = reqp->aio_req_sigqp;
33234709573Sraf 		reqp->aio_req_sigqp = NULL;
3337c478bd9Sstevel@tonic-gate 
33434709573Sraf 		/* put request on done queue. */
33534709573Sraf 		aio_enq(&aiop->aio_doneq, reqp, AIO_DONEQ);
33634709573Sraf 	} /* portkevent */
3377c478bd9Sstevel@tonic-gate 
3387c478bd9Sstevel@tonic-gate 	/*
33934709573Sraf 	 * when list IO notification is enabled, a notification or
34034709573Sraf 	 * signal is sent only when all entries in the list are done.
3417c478bd9Sstevel@tonic-gate 	 */
3427c478bd9Sstevel@tonic-gate 	if ((head = reqp->aio_req_lio) != NULL) {
3437c478bd9Sstevel@tonic-gate 		ASSERT(head->lio_refcnt > 0);
3447c478bd9Sstevel@tonic-gate 		if (--head->lio_refcnt == 0) {
3457c478bd9Sstevel@tonic-gate 			/*
3467c478bd9Sstevel@tonic-gate 			 * save lio's sigevent pointer, and check
34734709573Sraf 			 * its value after releasing aio_mutex lock.
3487c478bd9Sstevel@tonic-gate 			 */
3497c478bd9Sstevel@tonic-gate 			lio_sigev = head->lio_sigqp;
3507c478bd9Sstevel@tonic-gate 			head->lio_sigqp = NULL;
35134709573Sraf 			cv_signal(&head->lio_notify);
35234709573Sraf 			if (head->lio_port >= 0 &&
35334709573Sraf 			    (lio_pkevp = head->lio_portkev) != NULL)
35434709573Sraf 				head->lio_port = -1;
3557c478bd9Sstevel@tonic-gate 		}
3567c478bd9Sstevel@tonic-gate 	}
3577c478bd9Sstevel@tonic-gate 
3587c478bd9Sstevel@tonic-gate 	/*
3597c478bd9Sstevel@tonic-gate 	 * if AIO_WAITN set then
3607c478bd9Sstevel@tonic-gate 	 * send signal only when we reached the
3617c478bd9Sstevel@tonic-gate 	 * required amount of IO's finished
3627c478bd9Sstevel@tonic-gate 	 * or when all IO's are done
3637c478bd9Sstevel@tonic-gate 	 */
3647c478bd9Sstevel@tonic-gate 	if (aiop->aio_flags & AIO_WAITN) {
3657c478bd9Sstevel@tonic-gate 		if (aiop->aio_waitncnt > 0)
3667c478bd9Sstevel@tonic-gate 			aiop->aio_waitncnt--;
3677c478bd9Sstevel@tonic-gate 		if (aiop->aio_pending == 0 ||
3687c478bd9Sstevel@tonic-gate 		    aiop->aio_waitncnt == 0)
3697c478bd9Sstevel@tonic-gate 			cv_broadcast(&aiop->aio_waitcv);
3707c478bd9Sstevel@tonic-gate 	} else {
3717c478bd9Sstevel@tonic-gate 		cv_broadcast(&aiop->aio_waitcv);
3727c478bd9Sstevel@tonic-gate 	}
3737c478bd9Sstevel@tonic-gate 
374b7555c90SSurya Prakki 	/*
375b7555c90SSurya Prakki 	 * No need to set this flag for pollq, portq, lio requests.
376d2749ac6SRoger A. Faulkner 	 * If this is an old Solaris aio request, and the process has
377d2749ac6SRoger A. Faulkner 	 * a SIGIO signal handler enabled, then send a SIGIO signal.
378b7555c90SSurya Prakki 	 */
379b7555c90SSurya Prakki 	if (!sigev && !use_port && head == NULL &&
380d2749ac6SRoger A. Faulkner 	    (reqp->aio_req_flags & AIO_SOLARIS) &&
381b7555c90SSurya Prakki 	    (func = PTOU(p)->u_signal[SIGIO - 1]) != SIG_DFL &&
382b7555c90SSurya Prakki 	    (func != SIG_IGN)) {
383b7555c90SSurya Prakki 		send_signal = 1;
384b7555c90SSurya Prakki 		reqp->aio_req_flags |= AIO_SIGNALLED;
385b7555c90SSurya Prakki 	}
386b7555c90SSurya Prakki 
3877c478bd9Sstevel@tonic-gate 	mutex_exit(&aiop->aio_mutex);
38834709573Sraf 	mutex_exit(&aiop->aio_portq_mutex);
38934709573Sraf 
390fa7f62f0Ssp 	/*
391fa7f62f0Ssp 	 * Could the cleanup thread be waiting for AIO with locked
392fa7f62f0Ssp 	 * resources to finish?
393fa7f62f0Ssp 	 * Ideally in that case cleanup thread should block on cleanupcv,
394fa7f62f0Ssp 	 * but there is a window, where it could miss to see a new aio
395fa7f62f0Ssp 	 * request that sneaked in.
396fa7f62f0Ssp 	 */
397fa7f62f0Ssp 	mutex_enter(&as->a_contents);
398fa7f62f0Ssp 	if ((reqp_flags & AIO_PAGELOCKDONE) && AS_ISUNMAPWAIT(as))
399fa7f62f0Ssp 		cv_broadcast(&as->a_cv);
400fa7f62f0Ssp 	mutex_exit(&as->a_contents);
401fa7f62f0Ssp 
4027c478bd9Sstevel@tonic-gate 	if (sigev)
4037c478bd9Sstevel@tonic-gate 		aio_sigev_send(p, sigev);
404b7555c90SSurya Prakki 	else if (send_signal)
405b7555c90SSurya Prakki 		psignal(p, SIGIO);
406b7555c90SSurya Prakki 
40734709573Sraf 	if (pkevp)
40834709573Sraf 		port_send_event(pkevp);
40934709573Sraf 	if (lio_sigev)
41034709573Sraf 		aio_sigev_send(p, lio_sigev);
41134709573Sraf 	if (lio_pkevp)
41234709573Sraf 		port_send_event(lio_pkevp);
413*1b9bce10SToomas Soome 
414*1b9bce10SToomas Soome 	return (0);
4157c478bd9Sstevel@tonic-gate }
4167c478bd9Sstevel@tonic-gate 
4177c478bd9Sstevel@tonic-gate /*
4187c478bd9Sstevel@tonic-gate  * send a queued signal to the specified process when
4197c478bd9Sstevel@tonic-gate  * the event signal is non-NULL. A return value of 1
4207c478bd9Sstevel@tonic-gate  * will indicate that a signal is queued, and 0 means that
4217c478bd9Sstevel@tonic-gate  * no signal was specified, nor sent.
4227c478bd9Sstevel@tonic-gate  */
4237c478bd9Sstevel@tonic-gate static void
aio_sigev_send(proc_t * p,sigqueue_t * sigev)4247c478bd9Sstevel@tonic-gate aio_sigev_send(proc_t *p, sigqueue_t *sigev)
4257c478bd9Sstevel@tonic-gate {
4267c478bd9Sstevel@tonic-gate 	ASSERT(sigev != NULL);
4277c478bd9Sstevel@tonic-gate 
4287c478bd9Sstevel@tonic-gate 	mutex_enter(&p->p_lock);
4297c478bd9Sstevel@tonic-gate 	sigaddqa(p, NULL, sigev);
4307c478bd9Sstevel@tonic-gate 	mutex_exit(&p->p_lock);
4317c478bd9Sstevel@tonic-gate }
4327c478bd9Sstevel@tonic-gate 
4337c478bd9Sstevel@tonic-gate /*
4347c478bd9Sstevel@tonic-gate  * special case handling for zero length requests. the aio request
4357c478bd9Sstevel@tonic-gate  * short circuits the normal completion path since all that's required
4367c478bd9Sstevel@tonic-gate  * to complete this request is to copyout a zero to the aio request's
4377c478bd9Sstevel@tonic-gate  * return value.
4387c478bd9Sstevel@tonic-gate  */
4397c478bd9Sstevel@tonic-gate void
aio_zerolen(aio_req_t * reqp)4407c478bd9Sstevel@tonic-gate aio_zerolen(aio_req_t *reqp)
4417c478bd9Sstevel@tonic-gate {
4427c478bd9Sstevel@tonic-gate 
4437c478bd9Sstevel@tonic-gate 	struct buf *bp = &reqp->aio_req_buf;
4447c478bd9Sstevel@tonic-gate 
4457c478bd9Sstevel@tonic-gate 	reqp->aio_req_flags |= AIO_ZEROLEN;
4467c478bd9Sstevel@tonic-gate 
4477c478bd9Sstevel@tonic-gate 	bp->b_forw = (struct buf *)reqp;
4487c478bd9Sstevel@tonic-gate 	bp->b_proc = curproc;
4497c478bd9Sstevel@tonic-gate 
4507c478bd9Sstevel@tonic-gate 	bp->b_resid = 0;
4517c478bd9Sstevel@tonic-gate 	bp->b_flags = 0;
4527c478bd9Sstevel@tonic-gate 
4537c478bd9Sstevel@tonic-gate 	aio_done(bp);
4547c478bd9Sstevel@tonic-gate }
4557c478bd9Sstevel@tonic-gate 
4567c478bd9Sstevel@tonic-gate /*
4577c478bd9Sstevel@tonic-gate  * unlock pages previously locked by as_pagelock
4587c478bd9Sstevel@tonic-gate  */
4597c478bd9Sstevel@tonic-gate void
aphysio_unlock(aio_req_t * reqp)4607c478bd9Sstevel@tonic-gate aphysio_unlock(aio_req_t *reqp)
4617c478bd9Sstevel@tonic-gate {
4627c478bd9Sstevel@tonic-gate 	struct buf *bp;
4637c478bd9Sstevel@tonic-gate 	struct iovec *iov;
4647c478bd9Sstevel@tonic-gate 	int flags;
4657c478bd9Sstevel@tonic-gate 
4667c478bd9Sstevel@tonic-gate 	if (reqp->aio_req_flags & AIO_PHYSIODONE)
4677c478bd9Sstevel@tonic-gate 		return;
4687c478bd9Sstevel@tonic-gate 
4697c478bd9Sstevel@tonic-gate 	reqp->aio_req_flags |= AIO_PHYSIODONE;
4707c478bd9Sstevel@tonic-gate 
4717c478bd9Sstevel@tonic-gate 	if (reqp->aio_req_flags & AIO_ZEROLEN)
4727c478bd9Sstevel@tonic-gate 		return;
4737c478bd9Sstevel@tonic-gate 
4747c478bd9Sstevel@tonic-gate 	bp = &reqp->aio_req_buf;
4757c478bd9Sstevel@tonic-gate 	iov = reqp->aio_req_uio.uio_iov;
4767c478bd9Sstevel@tonic-gate 	flags = (((bp->b_flags & B_READ) == B_READ) ? S_WRITE : S_READ);
4777c478bd9Sstevel@tonic-gate 	if (reqp->aio_req_flags & AIO_PAGELOCKDONE) {
4787c478bd9Sstevel@tonic-gate 		as_pageunlock(bp->b_proc->p_as,
47916660111SSurya Prakki 		    bp->b_flags & B_SHADOW ? bp->b_shadow : NULL,
48016660111SSurya Prakki 		    iov->iov_base, iov->iov_len, flags);
4817c478bd9Sstevel@tonic-gate 		reqp->aio_req_flags &= ~AIO_PAGELOCKDONE;
4827c478bd9Sstevel@tonic-gate 	}
4837c478bd9Sstevel@tonic-gate 	bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_SHADOW);
4847c478bd9Sstevel@tonic-gate 	bp->b_flags |= B_DONE;
4857c478bd9Sstevel@tonic-gate }
4867c478bd9Sstevel@tonic-gate 
4877c478bd9Sstevel@tonic-gate /*
48834709573Sraf  * deletes a requests id from the hash table of outstanding io.
4897c478bd9Sstevel@tonic-gate  */
4907c478bd9Sstevel@tonic-gate static void
aio_hash_delete(aio_t * aiop,struct aio_req_t * reqp)49134709573Sraf aio_hash_delete(aio_t *aiop, struct aio_req_t *reqp)
4927c478bd9Sstevel@tonic-gate {
4937c478bd9Sstevel@tonic-gate 	long index;
4947c478bd9Sstevel@tonic-gate 	aio_result_t *resultp = reqp->aio_req_resultp;
4957c478bd9Sstevel@tonic-gate 	aio_req_t *current;
4967c478bd9Sstevel@tonic-gate 	aio_req_t **nextp;
4977c478bd9Sstevel@tonic-gate 
4987c478bd9Sstevel@tonic-gate 	index = AIO_HASH(resultp);
4997c478bd9Sstevel@tonic-gate 	nextp = (aiop->aio_hash + index);
5007c478bd9Sstevel@tonic-gate 	while ((current = *nextp) != NULL) {
5017c478bd9Sstevel@tonic-gate 		if (current->aio_req_resultp == resultp) {
5027c478bd9Sstevel@tonic-gate 			*nextp = current->aio_hash_next;
5037c478bd9Sstevel@tonic-gate 			return;
5047c478bd9Sstevel@tonic-gate 		}
5057c478bd9Sstevel@tonic-gate 		nextp = &current->aio_hash_next;
5067c478bd9Sstevel@tonic-gate 	}
5077c478bd9Sstevel@tonic-gate }
5087c478bd9Sstevel@tonic-gate 
5097c478bd9Sstevel@tonic-gate /*
5107c478bd9Sstevel@tonic-gate  * Put a list head struct onto its free list.
5117c478bd9Sstevel@tonic-gate  */
5127c478bd9Sstevel@tonic-gate static void
aio_lio_free(aio_t * aiop,aio_lio_t * head)5137c478bd9Sstevel@tonic-gate aio_lio_free(aio_t *aiop, aio_lio_t *head)
5147c478bd9Sstevel@tonic-gate {
5157c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&aiop->aio_mutex));
5167c478bd9Sstevel@tonic-gate 
5177c478bd9Sstevel@tonic-gate 	if (head->lio_sigqp != NULL)
5187c478bd9Sstevel@tonic-gate 		kmem_free(head->lio_sigqp, sizeof (sigqueue_t));
5197c478bd9Sstevel@tonic-gate 	head->lio_next = aiop->aio_lio_free;
5207c478bd9Sstevel@tonic-gate 	aiop->aio_lio_free = head;
5217c478bd9Sstevel@tonic-gate }
5227c478bd9Sstevel@tonic-gate 
5237c478bd9Sstevel@tonic-gate /*
5247c478bd9Sstevel@tonic-gate  * Put a reqp onto the freelist.
5257c478bd9Sstevel@tonic-gate  */
5267c478bd9Sstevel@tonic-gate void
aio_req_free(aio_t * aiop,aio_req_t * reqp)5277c478bd9Sstevel@tonic-gate aio_req_free(aio_t *aiop, aio_req_t *reqp)
5287c478bd9Sstevel@tonic-gate {
5297c478bd9Sstevel@tonic-gate 	aio_lio_t *liop;
5307c478bd9Sstevel@tonic-gate 
5317c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&aiop->aio_mutex));
5327c478bd9Sstevel@tonic-gate 
5337c478bd9Sstevel@tonic-gate 	if (reqp->aio_req_portkev) {
5347c478bd9Sstevel@tonic-gate 		port_free_event(reqp->aio_req_portkev);
5357c478bd9Sstevel@tonic-gate 		reqp->aio_req_portkev = NULL;
5367c478bd9Sstevel@tonic-gate 	}
5377c478bd9Sstevel@tonic-gate 
5387c478bd9Sstevel@tonic-gate 	if ((liop = reqp->aio_req_lio) != NULL) {
5397c478bd9Sstevel@tonic-gate 		if (--liop->lio_nent == 0)
5407c478bd9Sstevel@tonic-gate 			aio_lio_free(aiop, liop);
5417c478bd9Sstevel@tonic-gate 		reqp->aio_req_lio = NULL;
5427c478bd9Sstevel@tonic-gate 	}
54334709573Sraf 	if (reqp->aio_req_sigqp != NULL) {
5447c478bd9Sstevel@tonic-gate 		kmem_free(reqp->aio_req_sigqp, sizeof (sigqueue_t));
54534709573Sraf 		reqp->aio_req_sigqp = NULL;
54634709573Sraf 	}
5477c478bd9Sstevel@tonic-gate 	reqp->aio_req_next = aiop->aio_free;
54834709573Sraf 	reqp->aio_req_prev = NULL;
5497c478bd9Sstevel@tonic-gate 	aiop->aio_free = reqp;
5507c478bd9Sstevel@tonic-gate 	aiop->aio_outstanding--;
5517c478bd9Sstevel@tonic-gate 	if (aiop->aio_outstanding == 0)
5527c478bd9Sstevel@tonic-gate 		cv_broadcast(&aiop->aio_waitcv);
5537c478bd9Sstevel@tonic-gate 	aio_hash_delete(aiop, reqp);
5547c478bd9Sstevel@tonic-gate }
5557c478bd9Sstevel@tonic-gate 
5567c478bd9Sstevel@tonic-gate /*
5577c478bd9Sstevel@tonic-gate  * Put a reqp onto the freelist.
5587c478bd9Sstevel@tonic-gate  */
5597c478bd9Sstevel@tonic-gate void
aio_req_free_port(aio_t * aiop,aio_req_t * reqp)5607c478bd9Sstevel@tonic-gate aio_req_free_port(aio_t *aiop, aio_req_t *reqp)
5617c478bd9Sstevel@tonic-gate {
5627c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&aiop->aio_mutex));
5637c478bd9Sstevel@tonic-gate 
5647c478bd9Sstevel@tonic-gate 	reqp->aio_req_next = aiop->aio_free;
56534709573Sraf 	reqp->aio_req_prev = NULL;
5667c478bd9Sstevel@tonic-gate 	aiop->aio_free = reqp;
5677c478bd9Sstevel@tonic-gate 	aiop->aio_outstanding--;
5687c478bd9Sstevel@tonic-gate 	aio_hash_delete(aiop, reqp);
5697c478bd9Sstevel@tonic-gate }
5707c478bd9Sstevel@tonic-gate 
5717c478bd9Sstevel@tonic-gate 
5727c478bd9Sstevel@tonic-gate /*
57334709573Sraf  * Verify the integrity of a queue.
5747c478bd9Sstevel@tonic-gate  */
57534709573Sraf #if defined(DEBUG)
5767c478bd9Sstevel@tonic-gate static void
aio_verify_queue(aio_req_t * head,aio_req_t * entry_present,aio_req_t * entry_missing)57734709573Sraf aio_verify_queue(aio_req_t *head,
57834709573Sraf 	aio_req_t *entry_present, aio_req_t *entry_missing)
5797c478bd9Sstevel@tonic-gate {
58034709573Sraf 	aio_req_t *reqp;
58134709573Sraf 	int found = 0;
58234709573Sraf 	int present = 0;
58334709573Sraf 
58434709573Sraf 	if ((reqp = head) != NULL) {
58534709573Sraf 		do {
58634709573Sraf 			ASSERT(reqp->aio_req_prev->aio_req_next == reqp);
58734709573Sraf 			ASSERT(reqp->aio_req_next->aio_req_prev == reqp);
58834709573Sraf 			if (entry_present == reqp)
58934709573Sraf 				found++;
59034709573Sraf 			if (entry_missing == reqp)
59134709573Sraf 				present++;
59234709573Sraf 		} while ((reqp = reqp->aio_req_next) != head);
5937c478bd9Sstevel@tonic-gate 	}
59434709573Sraf 	ASSERT(entry_present == NULL || found == 1);
59534709573Sraf 	ASSERT(entry_missing == NULL || present == 0);
5967c478bd9Sstevel@tonic-gate }
59734709573Sraf #else
59834709573Sraf #define	aio_verify_queue(x, y, z)
59934709573Sraf #endif
6007c478bd9Sstevel@tonic-gate 
6017c478bd9Sstevel@tonic-gate /*
60234709573Sraf  * Put a request onto the tail of a queue.
6037c478bd9Sstevel@tonic-gate  */
60434709573Sraf void
aio_enq(aio_req_t ** qhead,aio_req_t * reqp,int qflg_new)60534709573Sraf aio_enq(aio_req_t **qhead, aio_req_t *reqp, int qflg_new)
6067c478bd9Sstevel@tonic-gate {
60734709573Sraf 	aio_req_t *head;
60834709573Sraf 	aio_req_t *prev;
60934709573Sraf 
61034709573Sraf 	aio_verify_queue(*qhead, NULL, reqp);
6117c478bd9Sstevel@tonic-gate 
61234709573Sraf 	if ((head = *qhead) == NULL) {
6137c478bd9Sstevel@tonic-gate 		reqp->aio_req_next = reqp;
6147c478bd9Sstevel@tonic-gate 		reqp->aio_req_prev = reqp;
61534709573Sraf 		*qhead = reqp;
6167c478bd9Sstevel@tonic-gate 	} else {
61734709573Sraf 		reqp->aio_req_next = head;
61834709573Sraf 		reqp->aio_req_prev = prev = head->aio_req_prev;
61934709573Sraf 		prev->aio_req_next = reqp;
62034709573Sraf 		head->aio_req_prev = reqp;
6217c478bd9Sstevel@tonic-gate 	}
62234709573Sraf 	reqp->aio_req_flags |= qflg_new;
6237c478bd9Sstevel@tonic-gate }
6247c478bd9Sstevel@tonic-gate 
6257c478bd9Sstevel@tonic-gate /*
62634709573Sraf  * Remove a request from its queue.
6277c478bd9Sstevel@tonic-gate  */
6287c478bd9Sstevel@tonic-gate void
aio_deq(aio_req_t ** qhead,aio_req_t * reqp)62934709573Sraf aio_deq(aio_req_t **qhead, aio_req_t *reqp)
6307c478bd9Sstevel@tonic-gate {
63134709573Sraf 	aio_verify_queue(*qhead, reqp, NULL);
6327c478bd9Sstevel@tonic-gate 
63334709573Sraf 	if (reqp->aio_req_next == reqp) {
63434709573Sraf 		*qhead = NULL;
6357c478bd9Sstevel@tonic-gate 	} else {
6367c478bd9Sstevel@tonic-gate 		reqp->aio_req_prev->aio_req_next = reqp->aio_req_next;
6377c478bd9Sstevel@tonic-gate 		reqp->aio_req_next->aio_req_prev = reqp->aio_req_prev;
63834709573Sraf 		if (*qhead == reqp)
63934709573Sraf 			*qhead = reqp->aio_req_next;
64034709573Sraf 	}
64134709573Sraf 	reqp->aio_req_next = NULL;
6427c478bd9Sstevel@tonic-gate 	reqp->aio_req_prev = NULL;
6437c478bd9Sstevel@tonic-gate }
6447c478bd9Sstevel@tonic-gate 
6457c478bd9Sstevel@tonic-gate /*
6467c478bd9Sstevel@tonic-gate  * concatenate a specified queue with the cleanupq. the specified
6477c478bd9Sstevel@tonic-gate  * queue is put onto the tail of the cleanupq. all elements on the
6487c478bd9Sstevel@tonic-gate  * specified queue should have their aio_req_flags field cleared.
6497c478bd9Sstevel@tonic-gate  */
6507c478bd9Sstevel@tonic-gate /*ARGSUSED*/
6517c478bd9Sstevel@tonic-gate void
aio_cleanupq_concat(aio_t * aiop,aio_req_t * q2,int qflg)6527c478bd9Sstevel@tonic-gate aio_cleanupq_concat(aio_t *aiop, aio_req_t *q2, int qflg)
6537c478bd9Sstevel@tonic-gate {
6547c478bd9Sstevel@tonic-gate 	aio_req_t *cleanupqhead, *q2tail;
6557c478bd9Sstevel@tonic-gate 	aio_req_t *reqp = q2;
6567c478bd9Sstevel@tonic-gate 
6577c478bd9Sstevel@tonic-gate 	do {
6587c478bd9Sstevel@tonic-gate 		ASSERT(reqp->aio_req_flags & qflg);
6597c478bd9Sstevel@tonic-gate 		reqp->aio_req_flags &= ~qflg;
6607c478bd9Sstevel@tonic-gate 		reqp->aio_req_flags |= AIO_CLEANUPQ;
6617c478bd9Sstevel@tonic-gate 	} while ((reqp = reqp->aio_req_next) != q2);
6627c478bd9Sstevel@tonic-gate 
6637c478bd9Sstevel@tonic-gate 	cleanupqhead = aiop->aio_cleanupq;
6647c478bd9Sstevel@tonic-gate 	if (cleanupqhead == NULL)
6657c478bd9Sstevel@tonic-gate 		aiop->aio_cleanupq = q2;
6667c478bd9Sstevel@tonic-gate 	else {
6677c478bd9Sstevel@tonic-gate 		cleanupqhead->aio_req_prev->aio_req_next = q2;
6687c478bd9Sstevel@tonic-gate 		q2tail = q2->aio_req_prev;
6697c478bd9Sstevel@tonic-gate 		q2tail->aio_req_next = cleanupqhead;
6707c478bd9Sstevel@tonic-gate 		q2->aio_req_prev = cleanupqhead->aio_req_prev;
6717c478bd9Sstevel@tonic-gate 		cleanupqhead->aio_req_prev = q2tail;
6727c478bd9Sstevel@tonic-gate 	}
6737c478bd9Sstevel@tonic-gate }
6747c478bd9Sstevel@tonic-gate 
6757c478bd9Sstevel@tonic-gate /*
6767c478bd9Sstevel@tonic-gate  * cleanup aio requests that are on the per-process poll queue.
6777c478bd9Sstevel@tonic-gate  */
6787c478bd9Sstevel@tonic-gate void
aio_cleanup(int flag)6797c478bd9Sstevel@tonic-gate aio_cleanup(int flag)
6807c478bd9Sstevel@tonic-gate {
6817c478bd9Sstevel@tonic-gate 	aio_t *aiop = curproc->p_aio;
6827c478bd9Sstevel@tonic-gate 	aio_req_t *pollqhead, *cleanupqhead, *notifyqhead;
6837c478bd9Sstevel@tonic-gate 	aio_req_t *cleanupport;
6847c478bd9Sstevel@tonic-gate 	aio_req_t *portq = NULL;
6857c478bd9Sstevel@tonic-gate 	void (*func)();
6867c478bd9Sstevel@tonic-gate 	int signalled = 0;
6877c478bd9Sstevel@tonic-gate 	int qflag = 0;
6887c478bd9Sstevel@tonic-gate 	int exitflg;
6897c478bd9Sstevel@tonic-gate 
6907c478bd9Sstevel@tonic-gate 	ASSERT(aiop != NULL);
6917c478bd9Sstevel@tonic-gate 
6927c478bd9Sstevel@tonic-gate 	if (flag == AIO_CLEANUP_EXIT)
6937c478bd9Sstevel@tonic-gate 		exitflg = AIO_CLEANUP_EXIT;
6947c478bd9Sstevel@tonic-gate 	else
6957c478bd9Sstevel@tonic-gate 		exitflg = 0;
6967c478bd9Sstevel@tonic-gate 
6977c478bd9Sstevel@tonic-gate 	/*
6987c478bd9Sstevel@tonic-gate 	 * We need to get the aio_cleanupq_mutex because we are calling
6997c478bd9Sstevel@tonic-gate 	 * aio_cleanup_cleanupq()
7007c478bd9Sstevel@tonic-gate 	 */
7017c478bd9Sstevel@tonic-gate 	mutex_enter(&aiop->aio_cleanupq_mutex);
7027c478bd9Sstevel@tonic-gate 	/*
7037c478bd9Sstevel@tonic-gate 	 * take all the requests off the cleanupq, the notifyq,
7047c478bd9Sstevel@tonic-gate 	 * and the pollq.
7057c478bd9Sstevel@tonic-gate 	 */
7067c478bd9Sstevel@tonic-gate 	mutex_enter(&aiop->aio_mutex);
7077c478bd9Sstevel@tonic-gate 	if ((cleanupqhead = aiop->aio_cleanupq) != NULL) {
7087c478bd9Sstevel@tonic-gate 		aiop->aio_cleanupq = NULL;
7097c478bd9Sstevel@tonic-gate 		qflag++;
7107c478bd9Sstevel@tonic-gate 	}
7117c478bd9Sstevel@tonic-gate 	if ((notifyqhead = aiop->aio_notifyq) != NULL) {
7127c478bd9Sstevel@tonic-gate 		aiop->aio_notifyq = NULL;
7137c478bd9Sstevel@tonic-gate 		qflag++;
7147c478bd9Sstevel@tonic-gate 	}
7157c478bd9Sstevel@tonic-gate 	if ((pollqhead = aiop->aio_pollq) != NULL) {
7167c478bd9Sstevel@tonic-gate 		aiop->aio_pollq = NULL;
7177c478bd9Sstevel@tonic-gate 		qflag++;
7187c478bd9Sstevel@tonic-gate 	}
7197c478bd9Sstevel@tonic-gate 	if (flag) {
7207c478bd9Sstevel@tonic-gate 		if ((portq = aiop->aio_portq) != NULL)
7217c478bd9Sstevel@tonic-gate 			qflag++;
7227c478bd9Sstevel@tonic-gate 
7237c478bd9Sstevel@tonic-gate 		if ((cleanupport = aiop->aio_portcleanupq) != NULL) {
7247c478bd9Sstevel@tonic-gate 			aiop->aio_portcleanupq = NULL;
7257c478bd9Sstevel@tonic-gate 			qflag++;
7267c478bd9Sstevel@tonic-gate 		}
7277c478bd9Sstevel@tonic-gate 	}
7287c478bd9Sstevel@tonic-gate 	mutex_exit(&aiop->aio_mutex);
7297c478bd9Sstevel@tonic-gate 
7307c478bd9Sstevel@tonic-gate 	/*
7317c478bd9Sstevel@tonic-gate 	 * return immediately if cleanupq, pollq, and
7327c478bd9Sstevel@tonic-gate 	 * notifyq are all empty. someone else must have
7337c478bd9Sstevel@tonic-gate 	 * emptied them.
7347c478bd9Sstevel@tonic-gate 	 */
7357c478bd9Sstevel@tonic-gate 	if (!qflag) {
7367c478bd9Sstevel@tonic-gate 		mutex_exit(&aiop->aio_cleanupq_mutex);
7377c478bd9Sstevel@tonic-gate 		return;
7387c478bd9Sstevel@tonic-gate 	}
7397c478bd9Sstevel@tonic-gate 
7407c478bd9Sstevel@tonic-gate 	/*
7417c478bd9Sstevel@tonic-gate 	 * do cleanup for the various queues.
7427c478bd9Sstevel@tonic-gate 	 */
7437c478bd9Sstevel@tonic-gate 	if (cleanupqhead)
74416660111SSurya Prakki 		signalled = aio_cleanup_cleanupq(aiop, cleanupqhead, exitflg);
7457c478bd9Sstevel@tonic-gate 	mutex_exit(&aiop->aio_cleanupq_mutex);
7467c478bd9Sstevel@tonic-gate 	if (notifyqhead)
7477c478bd9Sstevel@tonic-gate 		signalled = aio_cleanup_notifyq(aiop, notifyqhead, exitflg);
7487c478bd9Sstevel@tonic-gate 	if (pollqhead)
7497c478bd9Sstevel@tonic-gate 		aio_cleanup_pollq(aiop, pollqhead, exitflg);
7507c478bd9Sstevel@tonic-gate 	if (flag && (cleanupport || portq))
7517c478bd9Sstevel@tonic-gate 		aio_cleanup_portq(aiop, cleanupport, exitflg);
7527c478bd9Sstevel@tonic-gate 
7537c478bd9Sstevel@tonic-gate 	if (exitflg)
7547c478bd9Sstevel@tonic-gate 		return;
7557c478bd9Sstevel@tonic-gate 
7567c478bd9Sstevel@tonic-gate 	/*
7577c478bd9Sstevel@tonic-gate 	 * If we have an active aio_cleanup_thread it's possible for
7587c478bd9Sstevel@tonic-gate 	 * this routine to push something on to the done queue after
7597c478bd9Sstevel@tonic-gate 	 * an aiowait/aiosuspend thread has already decided to block.
7607c478bd9Sstevel@tonic-gate 	 * This being the case, we need a cv_broadcast here to wake
7617c478bd9Sstevel@tonic-gate 	 * these threads up. It is simpler and cleaner to do this
7627c478bd9Sstevel@tonic-gate 	 * broadcast here than in the individual cleanup routines.
7637c478bd9Sstevel@tonic-gate 	 */
7647c478bd9Sstevel@tonic-gate 
7657c478bd9Sstevel@tonic-gate 	mutex_enter(&aiop->aio_mutex);
766d2749ac6SRoger A. Faulkner 	/*
767d2749ac6SRoger A. Faulkner 	 * If there has never been an old solaris aio request
768d2749ac6SRoger A. Faulkner 	 * issued by this process, then do not send a SIGIO signal.
769d2749ac6SRoger A. Faulkner 	 */
770d2749ac6SRoger A. Faulkner 	if (!(aiop->aio_flags & AIO_SOLARIS_REQ))
771d2749ac6SRoger A. Faulkner 		signalled = 1;
7727c478bd9Sstevel@tonic-gate 	cv_broadcast(&aiop->aio_waitcv);
7737c478bd9Sstevel@tonic-gate 	mutex_exit(&aiop->aio_mutex);
7747c478bd9Sstevel@tonic-gate 
7757c478bd9Sstevel@tonic-gate 	/*
7767c478bd9Sstevel@tonic-gate 	 * Only if the process wasn't already signalled,
7777c478bd9Sstevel@tonic-gate 	 * determine if a SIGIO signal should be delievered.
7787c478bd9Sstevel@tonic-gate 	 */
7797c478bd9Sstevel@tonic-gate 	if (!signalled &&
78034709573Sraf 	    (func = PTOU(curproc)->u_signal[SIGIO - 1]) != SIG_DFL &&
7817c478bd9Sstevel@tonic-gate 	    func != SIG_IGN)
7827c478bd9Sstevel@tonic-gate 		psignal(curproc, SIGIO);
7837c478bd9Sstevel@tonic-gate }
7847c478bd9Sstevel@tonic-gate 
7857c478bd9Sstevel@tonic-gate 
7867c478bd9Sstevel@tonic-gate /*
7877c478bd9Sstevel@tonic-gate  * Do cleanup for every element of the port cleanup queue.
7887c478bd9Sstevel@tonic-gate  */
7897c478bd9Sstevel@tonic-gate static void
aio_cleanup_portq(aio_t * aiop,aio_req_t * cleanupq,int exitflag)7907c478bd9Sstevel@tonic-gate aio_cleanup_portq(aio_t *aiop, aio_req_t *cleanupq, int exitflag)
7917c478bd9Sstevel@tonic-gate {
7927c478bd9Sstevel@tonic-gate 	aio_req_t	*reqp;
7937c478bd9Sstevel@tonic-gate 	aio_req_t	*next;
7947c478bd9Sstevel@tonic-gate 	aio_req_t	*headp;
79534709573Sraf 	aio_lio_t	*liop;
7967c478bd9Sstevel@tonic-gate 
7977c478bd9Sstevel@tonic-gate 	/* first check the portq */
7987c478bd9Sstevel@tonic-gate 	if (exitflag || ((aiop->aio_flags & AIO_CLEANUP_PORT) == 0)) {
7997c478bd9Sstevel@tonic-gate 		mutex_enter(&aiop->aio_mutex);
8007c478bd9Sstevel@tonic-gate 		if (aiop->aio_flags & AIO_CLEANUP)
8017c478bd9Sstevel@tonic-gate 			aiop->aio_flags |= AIO_CLEANUP_PORT;
8027c478bd9Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
8037c478bd9Sstevel@tonic-gate 
80434709573Sraf 		/*
80534709573Sraf 		 * It is not allowed to hold locks during aphysio_unlock().
80634709573Sraf 		 * The aio_done() interrupt function will try to acquire
80734709573Sraf 		 * aio_mutex and aio_portq_mutex.  Therefore we disconnect
80834709573Sraf 		 * the portq list from the aiop for the duration of the
80934709573Sraf 		 * aphysio_unlock() loop below.
81034709573Sraf 		 */
8117c478bd9Sstevel@tonic-gate 		mutex_enter(&aiop->aio_portq_mutex);
8127c478bd9Sstevel@tonic-gate 		headp = aiop->aio_portq;
8137c478bd9Sstevel@tonic-gate 		aiop->aio_portq = NULL;
8147c478bd9Sstevel@tonic-gate 		mutex_exit(&aiop->aio_portq_mutex);
81534709573Sraf 		if ((reqp = headp) != NULL) {
81634709573Sraf 			do {
81734709573Sraf 				next = reqp->aio_req_next;
81834709573Sraf 				aphysio_unlock(reqp);
81934709573Sraf 				if (exitflag) {
82034709573Sraf 					mutex_enter(&aiop->aio_mutex);
82134709573Sraf 					aio_req_free(aiop, reqp);
82234709573Sraf 					mutex_exit(&aiop->aio_mutex);
82334709573Sraf 				}
82434709573Sraf 			} while ((reqp = next) != headp);
8257c478bd9Sstevel@tonic-gate 		}
8267c478bd9Sstevel@tonic-gate 
8277c478bd9Sstevel@tonic-gate 		if (headp != NULL && exitflag == 0) {
82834709573Sraf 			/* move unlocked requests back to the port queue */
82934709573Sraf 			aio_req_t *newq;
83034709573Sraf 
8317c478bd9Sstevel@tonic-gate 			mutex_enter(&aiop->aio_portq_mutex);
83234709573Sraf 			if ((newq = aiop->aio_portq) != NULL) {
83334709573Sraf 				aio_req_t *headprev = headp->aio_req_prev;
83434709573Sraf 				aio_req_t *newqprev = newq->aio_req_prev;
83534709573Sraf 
83634709573Sraf 				headp->aio_req_prev = newqprev;
83734709573Sraf 				newq->aio_req_prev = headprev;
83834709573Sraf 				headprev->aio_req_next = newq;
83934709573Sraf 				newqprev->aio_req_next = headp;
8407c478bd9Sstevel@tonic-gate 			}
8417c478bd9Sstevel@tonic-gate 			aiop->aio_portq = headp;
8427c478bd9Sstevel@tonic-gate 			cv_broadcast(&aiop->aio_portcv);
8437c478bd9Sstevel@tonic-gate 			mutex_exit(&aiop->aio_portq_mutex);
8447c478bd9Sstevel@tonic-gate 		}
8457c478bd9Sstevel@tonic-gate 	}
8467c478bd9Sstevel@tonic-gate 
8477c478bd9Sstevel@tonic-gate 	/* now check the port cleanup queue */
84834709573Sraf 	if ((reqp = cleanupq) == NULL)
84934709573Sraf 		return;
85034709573Sraf 	do {
8517c478bd9Sstevel@tonic-gate 		next = reqp->aio_req_next;
8527c478bd9Sstevel@tonic-gate 		aphysio_unlock(reqp);
8537c478bd9Sstevel@tonic-gate 		if (exitflag) {
8547c478bd9Sstevel@tonic-gate 			mutex_enter(&aiop->aio_mutex);
8557c478bd9Sstevel@tonic-gate 			aio_req_free(aiop, reqp);
8567c478bd9Sstevel@tonic-gate 			mutex_exit(&aiop->aio_mutex);
8577c478bd9Sstevel@tonic-gate 		} else {
8587c478bd9Sstevel@tonic-gate 			mutex_enter(&aiop->aio_portq_mutex);
85934709573Sraf 			aio_enq(&aiop->aio_portq, reqp, 0);
8607c478bd9Sstevel@tonic-gate 			mutex_exit(&aiop->aio_portq_mutex);
86134709573Sraf 			port_send_event(reqp->aio_req_portkev);
86234709573Sraf 			if ((liop = reqp->aio_req_lio) != NULL) {
86334709573Sraf 				int send_event = 0;
86434709573Sraf 
86534709573Sraf 				mutex_enter(&aiop->aio_mutex);
86634709573Sraf 				ASSERT(liop->lio_refcnt > 0);
86734709573Sraf 				if (--liop->lio_refcnt == 0) {
86834709573Sraf 					if (liop->lio_port >= 0 &&
86934709573Sraf 					    liop->lio_portkev) {
87034709573Sraf 						liop->lio_port = -1;
87134709573Sraf 						send_event = 1;
87234709573Sraf 					}
87334709573Sraf 				}
87434709573Sraf 				mutex_exit(&aiop->aio_mutex);
87534709573Sraf 				if (send_event)
87634709573Sraf 					port_send_event(liop->lio_portkev);
87734709573Sraf 			}
8787c478bd9Sstevel@tonic-gate 		}
87934709573Sraf 	} while ((reqp = next) != cleanupq);
8807c478bd9Sstevel@tonic-gate }
8817c478bd9Sstevel@tonic-gate 
8827c478bd9Sstevel@tonic-gate /*
8837c478bd9Sstevel@tonic-gate  * Do cleanup for every element of the cleanupq.
8847c478bd9Sstevel@tonic-gate  */
88516660111SSurya Prakki static int
aio_cleanup_cleanupq(aio_t * aiop,aio_req_t * qhead,int exitflg)8867c478bd9Sstevel@tonic-gate aio_cleanup_cleanupq(aio_t *aiop, aio_req_t *qhead, int exitflg)
8877c478bd9Sstevel@tonic-gate {
8887c478bd9Sstevel@tonic-gate 	aio_req_t *reqp, *next;
88916660111SSurya Prakki 	int signalled = 0;
89034709573Sraf 
8917c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&aiop->aio_cleanupq_mutex));
8927c478bd9Sstevel@tonic-gate 
8937c478bd9Sstevel@tonic-gate 	/*
8947c478bd9Sstevel@tonic-gate 	 * Since aio_req_done() or aio_req_find() use the HASH list to find
8957c478bd9Sstevel@tonic-gate 	 * the required requests, they could potentially take away elements
8967c478bd9Sstevel@tonic-gate 	 * if they are already done (AIO_DONEQ is set).
8977c478bd9Sstevel@tonic-gate 	 * The aio_cleanupq_mutex protects the queue for the duration of the
8987c478bd9Sstevel@tonic-gate 	 * loop from aio_req_done() and aio_req_find().
8997c478bd9Sstevel@tonic-gate 	 */
90034709573Sraf 	if ((reqp = qhead) == NULL)
90116660111SSurya Prakki 		return (0);
90234709573Sraf 	do {
9037c478bd9Sstevel@tonic-gate 		ASSERT(reqp->aio_req_flags & AIO_CLEANUPQ);
90434709573Sraf 		ASSERT(reqp->aio_req_portkev == NULL);
9057c478bd9Sstevel@tonic-gate 		next = reqp->aio_req_next;
9067c478bd9Sstevel@tonic-gate 		aphysio_unlock(reqp);
9077c478bd9Sstevel@tonic-gate 		mutex_enter(&aiop->aio_mutex);
90834709573Sraf 		if (exitflg)
9097c478bd9Sstevel@tonic-gate 			aio_req_free(aiop, reqp);
91034709573Sraf 		else
91134709573Sraf 			aio_enq(&aiop->aio_doneq, reqp, AIO_DONEQ);
912b7555c90SSurya Prakki 		if (!exitflg) {
913b7555c90SSurya Prakki 			if (reqp->aio_req_flags & AIO_SIGNALLED)
914b7555c90SSurya Prakki 				signalled++;
915b7555c90SSurya Prakki 			else
916b7555c90SSurya Prakki 				reqp->aio_req_flags |= AIO_SIGNALLED;
917b7555c90SSurya Prakki 		}
9187c478bd9Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
91934709573Sraf 	} while ((reqp = next) != qhead);
92016660111SSurya Prakki 	return (signalled);
9217c478bd9Sstevel@tonic-gate }
9227c478bd9Sstevel@tonic-gate 
9237c478bd9Sstevel@tonic-gate /*
9247c478bd9Sstevel@tonic-gate  * do cleanup for every element of the notify queue.
9257c478bd9Sstevel@tonic-gate  */
9267c478bd9Sstevel@tonic-gate static int
aio_cleanup_notifyq(aio_t * aiop,aio_req_t * qhead,int exitflg)9277c478bd9Sstevel@tonic-gate aio_cleanup_notifyq(aio_t *aiop, aio_req_t *qhead, int exitflg)
9287c478bd9Sstevel@tonic-gate {
9297c478bd9Sstevel@tonic-gate 	aio_req_t *reqp, *next;
9307c478bd9Sstevel@tonic-gate 	aio_lio_t *liohead;
9317c478bd9Sstevel@tonic-gate 	sigqueue_t *sigev, *lio_sigev = NULL;
9327c478bd9Sstevel@tonic-gate 	int signalled = 0;
9337c478bd9Sstevel@tonic-gate 
93434709573Sraf 	if ((reqp = qhead) == NULL)
93534709573Sraf 		return (0);
93634709573Sraf 	do {
9377c478bd9Sstevel@tonic-gate 		ASSERT(reqp->aio_req_flags & AIO_NOTIFYQ);
9387c478bd9Sstevel@tonic-gate 		next = reqp->aio_req_next;
9397c478bd9Sstevel@tonic-gate 		aphysio_unlock(reqp);
9407c478bd9Sstevel@tonic-gate 		if (exitflg) {
9417c478bd9Sstevel@tonic-gate 			mutex_enter(&aiop->aio_mutex);
9427c478bd9Sstevel@tonic-gate 			aio_req_free(aiop, reqp);
9437c478bd9Sstevel@tonic-gate 			mutex_exit(&aiop->aio_mutex);
94434709573Sraf 		} else {
94534709573Sraf 			mutex_enter(&aiop->aio_mutex);
94634709573Sraf 			aio_enq(&aiop->aio_doneq, reqp, AIO_DONEQ);
94734709573Sraf 			sigev = reqp->aio_req_sigqp;
94834709573Sraf 			reqp->aio_req_sigqp = NULL;
94934709573Sraf 			if ((liohead = reqp->aio_req_lio) != NULL) {
95034709573Sraf 				ASSERT(liohead->lio_refcnt > 0);
95134709573Sraf 				if (--liohead->lio_refcnt == 0) {
95234709573Sraf 					cv_signal(&liohead->lio_notify);
95334709573Sraf 					lio_sigev = liohead->lio_sigqp;
95434709573Sraf 					liohead->lio_sigqp = NULL;
95534709573Sraf 				}
95634709573Sraf 			}
95734709573Sraf 			mutex_exit(&aiop->aio_mutex);
95834709573Sraf 			if (sigev) {
95934709573Sraf 				signalled++;
96034709573Sraf 				aio_sigev_send(reqp->aio_req_buf.b_proc,
96134709573Sraf 				    sigev);
96234709573Sraf 			}
96334709573Sraf 			if (lio_sigev) {
96434709573Sraf 				signalled++;
96534709573Sraf 				aio_sigev_send(reqp->aio_req_buf.b_proc,
96634709573Sraf 				    lio_sigev);
9677c478bd9Sstevel@tonic-gate 			}
9687c478bd9Sstevel@tonic-gate 		}
96934709573Sraf 	} while ((reqp = next) != qhead);
97034709573Sraf 
9717c478bd9Sstevel@tonic-gate 	return (signalled);
9727c478bd9Sstevel@tonic-gate }
9737c478bd9Sstevel@tonic-gate 
9747c478bd9Sstevel@tonic-gate /*
9757c478bd9Sstevel@tonic-gate  * Do cleanup for every element of the poll queue.
9767c478bd9Sstevel@tonic-gate  */
9777c478bd9Sstevel@tonic-gate static void
aio_cleanup_pollq(aio_t * aiop,aio_req_t * qhead,int exitflg)9787c478bd9Sstevel@tonic-gate aio_cleanup_pollq(aio_t *aiop, aio_req_t *qhead, int exitflg)
9797c478bd9Sstevel@tonic-gate {
9807c478bd9Sstevel@tonic-gate 	aio_req_t *reqp, *next;
9817c478bd9Sstevel@tonic-gate 
9827c478bd9Sstevel@tonic-gate 	/*
9837c478bd9Sstevel@tonic-gate 	 * As no other threads should be accessing the queue at this point,
9847c478bd9Sstevel@tonic-gate 	 * it isn't necessary to hold aio_mutex while we traverse its elements.
9857c478bd9Sstevel@tonic-gate 	 */
98634709573Sraf 	if ((reqp = qhead) == NULL)
98734709573Sraf 		return;
98834709573Sraf 	do {
9897c478bd9Sstevel@tonic-gate 		ASSERT(reqp->aio_req_flags & AIO_POLLQ);
9907c478bd9Sstevel@tonic-gate 		next = reqp->aio_req_next;
9917c478bd9Sstevel@tonic-gate 		aphysio_unlock(reqp);
9927c478bd9Sstevel@tonic-gate 		if (exitflg) {
9937c478bd9Sstevel@tonic-gate 			mutex_enter(&aiop->aio_mutex);
9947c478bd9Sstevel@tonic-gate 			aio_req_free(aiop, reqp);
9957c478bd9Sstevel@tonic-gate 			mutex_exit(&aiop->aio_mutex);
99634709573Sraf 		} else {
99734709573Sraf 			aio_copyout_result(reqp);
99834709573Sraf 			mutex_enter(&aiop->aio_mutex);
99934709573Sraf 			aio_enq(&aiop->aio_doneq, reqp, AIO_DONEQ);
100034709573Sraf 			mutex_exit(&aiop->aio_mutex);
10017c478bd9Sstevel@tonic-gate 		}
100234709573Sraf 	} while ((reqp = next) != qhead);
10037c478bd9Sstevel@tonic-gate }
10047c478bd9Sstevel@tonic-gate 
10057c478bd9Sstevel@tonic-gate /*
10067c478bd9Sstevel@tonic-gate  * called by exit(). waits for all outstanding kaio to finish
10077c478bd9Sstevel@tonic-gate  * before the kaio resources are freed.
10087c478bd9Sstevel@tonic-gate  */
10097c478bd9Sstevel@tonic-gate void
aio_cleanup_exit(void)10107c478bd9Sstevel@tonic-gate aio_cleanup_exit(void)
10117c478bd9Sstevel@tonic-gate {
10127c478bd9Sstevel@tonic-gate 	proc_t *p = curproc;
10137c478bd9Sstevel@tonic-gate 	aio_t *aiop = p->p_aio;
10147c478bd9Sstevel@tonic-gate 	aio_req_t *reqp, *next, *head;
10157c478bd9Sstevel@tonic-gate 	aio_lio_t *nxtlio, *liop;
10167c478bd9Sstevel@tonic-gate 
10177c478bd9Sstevel@tonic-gate 	/*
10187c478bd9Sstevel@tonic-gate 	 * wait for all outstanding kaio to complete. process
10197c478bd9Sstevel@tonic-gate 	 * is now single-threaded; no other kaio requests can
10207c478bd9Sstevel@tonic-gate 	 * happen once aio_pending is zero.
10217c478bd9Sstevel@tonic-gate 	 */
10227c478bd9Sstevel@tonic-gate 	mutex_enter(&aiop->aio_mutex);
10237c478bd9Sstevel@tonic-gate 	aiop->aio_flags |= AIO_CLEANUP;
10247c478bd9Sstevel@tonic-gate 	while ((aiop->aio_pending != 0) || (aiop->aio_flags & AIO_DONE_ACTIVE))
10257c478bd9Sstevel@tonic-gate 		cv_wait(&aiop->aio_cleanupcv, &aiop->aio_mutex);
10267c478bd9Sstevel@tonic-gate 	mutex_exit(&aiop->aio_mutex);
10277c478bd9Sstevel@tonic-gate 
10287c478bd9Sstevel@tonic-gate 	/* cleanup the cleanup-thread queues. */
10297c478bd9Sstevel@tonic-gate 	aio_cleanup(AIO_CLEANUP_EXIT);
10307c478bd9Sstevel@tonic-gate 
10317c478bd9Sstevel@tonic-gate 	/*
10327c478bd9Sstevel@tonic-gate 	 * Although this process is now single-threaded, we
10337c478bd9Sstevel@tonic-gate 	 * still need to protect ourselves against a race with
10347c478bd9Sstevel@tonic-gate 	 * aio_cleanup_dr_delete_memory().
10357c478bd9Sstevel@tonic-gate 	 */
10367c478bd9Sstevel@tonic-gate 	mutex_enter(&p->p_lock);
10377c478bd9Sstevel@tonic-gate 
10387c478bd9Sstevel@tonic-gate 	/*
10397c478bd9Sstevel@tonic-gate 	 * free up the done queue's resources.
10407c478bd9Sstevel@tonic-gate 	 */
10417c478bd9Sstevel@tonic-gate 	if ((head = aiop->aio_doneq) != NULL) {
104234709573Sraf 		aiop->aio_doneq = NULL;
104334709573Sraf 		reqp = head;
104434709573Sraf 		do {
10457c478bd9Sstevel@tonic-gate 			next = reqp->aio_req_next;
10467c478bd9Sstevel@tonic-gate 			aphysio_unlock(reqp);
10477c478bd9Sstevel@tonic-gate 			kmem_free(reqp, sizeof (struct aio_req_t));
104834709573Sraf 		} while ((reqp = next) != head);
10497c478bd9Sstevel@tonic-gate 	}
10507c478bd9Sstevel@tonic-gate 	/*
10517c478bd9Sstevel@tonic-gate 	 * release aio request freelist.
10527c478bd9Sstevel@tonic-gate 	 */
10537c478bd9Sstevel@tonic-gate 	for (reqp = aiop->aio_free; reqp != NULL; reqp = next) {
10547c478bd9Sstevel@tonic-gate 		next = reqp->aio_req_next;
10557c478bd9Sstevel@tonic-gate 		kmem_free(reqp, sizeof (struct aio_req_t));
10567c478bd9Sstevel@tonic-gate 	}
10577c478bd9Sstevel@tonic-gate 
10587c478bd9Sstevel@tonic-gate 	/*
10597c478bd9Sstevel@tonic-gate 	 * release io list head freelist.
10607c478bd9Sstevel@tonic-gate 	 */
10617c478bd9Sstevel@tonic-gate 	for (liop = aiop->aio_lio_free; liop != NULL; liop = nxtlio) {
10627c478bd9Sstevel@tonic-gate 		nxtlio = liop->lio_next;
10637c478bd9Sstevel@tonic-gate 		kmem_free(liop, sizeof (aio_lio_t));
10647c478bd9Sstevel@tonic-gate 	}
10657c478bd9Sstevel@tonic-gate 
10667c478bd9Sstevel@tonic-gate 	if (aiop->aio_iocb)
10677c478bd9Sstevel@tonic-gate 		kmem_free(aiop->aio_iocb, aiop->aio_iocbsz);
10687c478bd9Sstevel@tonic-gate 
10697c478bd9Sstevel@tonic-gate 	mutex_destroy(&aiop->aio_mutex);
10707c478bd9Sstevel@tonic-gate 	mutex_destroy(&aiop->aio_portq_mutex);
10717c478bd9Sstevel@tonic-gate 	mutex_destroy(&aiop->aio_cleanupq_mutex);
10727c478bd9Sstevel@tonic-gate 	p->p_aio = NULL;
10737c478bd9Sstevel@tonic-gate 	mutex_exit(&p->p_lock);
10747c478bd9Sstevel@tonic-gate 	kmem_free(aiop, sizeof (struct aio));
10757c478bd9Sstevel@tonic-gate }
10767c478bd9Sstevel@tonic-gate 
10777c478bd9Sstevel@tonic-gate /*
10787c478bd9Sstevel@tonic-gate  * copy out aio request's result to a user-level result_t buffer.
10797c478bd9Sstevel@tonic-gate  */
10807c478bd9Sstevel@tonic-gate void
aio_copyout_result(aio_req_t * reqp)10817c478bd9Sstevel@tonic-gate aio_copyout_result(aio_req_t *reqp)
10827c478bd9Sstevel@tonic-gate {
10837c478bd9Sstevel@tonic-gate 	struct buf	*bp;
10847c478bd9Sstevel@tonic-gate 	struct iovec	*iov;
10857c478bd9Sstevel@tonic-gate 	void		*resultp;
10867c478bd9Sstevel@tonic-gate 	int		error;
10877c478bd9Sstevel@tonic-gate 	size_t		retval;
10887c478bd9Sstevel@tonic-gate 
10897c478bd9Sstevel@tonic-gate 	if (reqp->aio_req_flags & AIO_COPYOUTDONE)
10907c478bd9Sstevel@tonic-gate 		return;
10917c478bd9Sstevel@tonic-gate 
10927c478bd9Sstevel@tonic-gate 	reqp->aio_req_flags |= AIO_COPYOUTDONE;
10937c478bd9Sstevel@tonic-gate 
10947c478bd9Sstevel@tonic-gate 	iov = reqp->aio_req_uio.uio_iov;
10957c478bd9Sstevel@tonic-gate 	bp = &reqp->aio_req_buf;
10967c478bd9Sstevel@tonic-gate 	/* "resultp" points to user-level result_t buffer */
10977c478bd9Sstevel@tonic-gate 	resultp = (void *)reqp->aio_req_resultp;
10987c478bd9Sstevel@tonic-gate 	if (bp->b_flags & B_ERROR) {
10997c478bd9Sstevel@tonic-gate 		if (bp->b_error)
11007c478bd9Sstevel@tonic-gate 			error = bp->b_error;
11017c478bd9Sstevel@tonic-gate 		else
11027c478bd9Sstevel@tonic-gate 			error = EIO;
11037c478bd9Sstevel@tonic-gate 		retval = (size_t)-1;
11047c478bd9Sstevel@tonic-gate 	} else {
11057c478bd9Sstevel@tonic-gate 		error = 0;
11067c478bd9Sstevel@tonic-gate 		retval = iov->iov_len - bp->b_resid;
11077c478bd9Sstevel@tonic-gate 	}
11087c478bd9Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
11097c478bd9Sstevel@tonic-gate 	if (get_udatamodel() == DATAMODEL_NATIVE) {
11107c478bd9Sstevel@tonic-gate 		(void) sulword(&((aio_result_t *)resultp)->aio_return, retval);
11117c478bd9Sstevel@tonic-gate 		(void) suword32(&((aio_result_t *)resultp)->aio_errno, error);
11127c478bd9Sstevel@tonic-gate 	} else {
11137c478bd9Sstevel@tonic-gate 		(void) suword32(&((aio_result32_t *)resultp)->aio_return,
11147c478bd9Sstevel@tonic-gate 		    (int)retval);
11157c478bd9Sstevel@tonic-gate 		(void) suword32(&((aio_result32_t *)resultp)->aio_errno, error);
11167c478bd9Sstevel@tonic-gate 	}
11177c478bd9Sstevel@tonic-gate #else
11187c478bd9Sstevel@tonic-gate 	(void) suword32(&((aio_result_t *)resultp)->aio_return, retval);
11197c478bd9Sstevel@tonic-gate 	(void) suword32(&((aio_result_t *)resultp)->aio_errno, error);
11207c478bd9Sstevel@tonic-gate #endif
11217c478bd9Sstevel@tonic-gate }
11227c478bd9Sstevel@tonic-gate 
11237c478bd9Sstevel@tonic-gate 
11247c478bd9Sstevel@tonic-gate void
aio_copyout_result_port(struct iovec * iov,struct buf * bp,void * resultp)11257c478bd9Sstevel@tonic-gate aio_copyout_result_port(struct iovec *iov, struct buf *bp, void *resultp)
11267c478bd9Sstevel@tonic-gate {
11277c478bd9Sstevel@tonic-gate 	int errno;
11287c478bd9Sstevel@tonic-gate 	size_t retval;
11297c478bd9Sstevel@tonic-gate 
11307c478bd9Sstevel@tonic-gate 	if (bp->b_flags & B_ERROR) {
11317c478bd9Sstevel@tonic-gate 		if (bp->b_error)
11327c478bd9Sstevel@tonic-gate 			errno = bp->b_error;
11337c478bd9Sstevel@tonic-gate 		else
11347c478bd9Sstevel@tonic-gate 			errno = EIO;
11357c478bd9Sstevel@tonic-gate 		retval = (size_t)-1;
11367c478bd9Sstevel@tonic-gate 	} else {
11377c478bd9Sstevel@tonic-gate 		errno = 0;
11387c478bd9Sstevel@tonic-gate 		retval = iov->iov_len - bp->b_resid;
11397c478bd9Sstevel@tonic-gate 	}
11407c478bd9Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
11417c478bd9Sstevel@tonic-gate 	if (get_udatamodel() == DATAMODEL_NATIVE) {
11427c478bd9Sstevel@tonic-gate 		(void) sulword(&((aio_result_t *)resultp)->aio_return, retval);
11437c478bd9Sstevel@tonic-gate 		(void) suword32(&((aio_result_t *)resultp)->aio_errno, errno);
11447c478bd9Sstevel@tonic-gate 	} else {
11457c478bd9Sstevel@tonic-gate 		(void) suword32(&((aio_result32_t *)resultp)->aio_return,
11467c478bd9Sstevel@tonic-gate 		    (int)retval);
11477c478bd9Sstevel@tonic-gate 		(void) suword32(&((aio_result32_t *)resultp)->aio_errno, errno);
11487c478bd9Sstevel@tonic-gate 	}
11497c478bd9Sstevel@tonic-gate #else
11507c478bd9Sstevel@tonic-gate 	(void) suword32(&((aio_result_t *)resultp)->aio_return, retval);
11517c478bd9Sstevel@tonic-gate 	(void) suword32(&((aio_result_t *)resultp)->aio_errno, errno);
11527c478bd9Sstevel@tonic-gate #endif
11537c478bd9Sstevel@tonic-gate }
11547c478bd9Sstevel@tonic-gate 
11557c478bd9Sstevel@tonic-gate /*
11567c478bd9Sstevel@tonic-gate  * This function is used to remove a request from the done queue.
11577c478bd9Sstevel@tonic-gate  */
11587c478bd9Sstevel@tonic-gate 
11597c478bd9Sstevel@tonic-gate void
aio_req_remove_portq(aio_t * aiop,aio_req_t * reqp)11607c478bd9Sstevel@tonic-gate aio_req_remove_portq(aio_t *aiop, aio_req_t *reqp)
11617c478bd9Sstevel@tonic-gate {
11627c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&aiop->aio_portq_mutex));
11637c478bd9Sstevel@tonic-gate 	while (aiop->aio_portq == NULL) {
11647c478bd9Sstevel@tonic-gate 		/*
11657c478bd9Sstevel@tonic-gate 		 * aio_portq is set to NULL when aio_cleanup_portq()
11667c478bd9Sstevel@tonic-gate 		 * is working with the event queue.
11677c478bd9Sstevel@tonic-gate 		 * The aio_cleanup_thread() uses aio_cleanup_portq()
11687c478bd9Sstevel@tonic-gate 		 * to unlock all AIO buffers with completed transactions.
11697c478bd9Sstevel@tonic-gate 		 * Wait here until aio_cleanup_portq() restores the
11707c478bd9Sstevel@tonic-gate 		 * list of completed transactions in aio_portq.
11717c478bd9Sstevel@tonic-gate 		 */
11727c478bd9Sstevel@tonic-gate 		cv_wait(&aiop->aio_portcv, &aiop->aio_portq_mutex);
11737c478bd9Sstevel@tonic-gate 	}
117434709573Sraf 	aio_deq(&aiop->aio_portq, reqp);
11757c478bd9Sstevel@tonic-gate }
11767c478bd9Sstevel@tonic-gate 
11777c478bd9Sstevel@tonic-gate /* ARGSUSED */
11787c478bd9Sstevel@tonic-gate void
aio_close_port(void * arg,int port,pid_t pid,int lastclose)11797c478bd9Sstevel@tonic-gate aio_close_port(void *arg, int port, pid_t pid, int lastclose)
11807c478bd9Sstevel@tonic-gate {
11817c478bd9Sstevel@tonic-gate 	aio_t		*aiop;
11827c478bd9Sstevel@tonic-gate 	aio_req_t 	*reqp;
11837c478bd9Sstevel@tonic-gate 	aio_req_t 	*next;
11847c478bd9Sstevel@tonic-gate 	aio_req_t	*headp;
11857c478bd9Sstevel@tonic-gate 	int		counter;
11867c478bd9Sstevel@tonic-gate 
11877c478bd9Sstevel@tonic-gate 	if (arg == NULL)
11887c478bd9Sstevel@tonic-gate 		aiop = curproc->p_aio;
11897c478bd9Sstevel@tonic-gate 	else
11907c478bd9Sstevel@tonic-gate 		aiop = (aio_t *)arg;
11917c478bd9Sstevel@tonic-gate 
11927c478bd9Sstevel@tonic-gate 	/*
11937c478bd9Sstevel@tonic-gate 	 * The PORT_SOURCE_AIO source is always associated with every new
11947c478bd9Sstevel@tonic-gate 	 * created port by default.
11957c478bd9Sstevel@tonic-gate 	 * If no asynchronous I/O transactions were associated with the port
11967c478bd9Sstevel@tonic-gate 	 * then the aiop pointer will still be set to NULL.
11977c478bd9Sstevel@tonic-gate 	 */
11987c478bd9Sstevel@tonic-gate 	if (aiop == NULL)
11997c478bd9Sstevel@tonic-gate 		return;
12007c478bd9Sstevel@tonic-gate 
12017c478bd9Sstevel@tonic-gate 	/*
12027c478bd9Sstevel@tonic-gate 	 * Within a process event ports can be used to collect events other
12037c478bd9Sstevel@tonic-gate 	 * than PORT_SOURCE_AIO events. At the same time the process can submit
12047c478bd9Sstevel@tonic-gate 	 * asynchronous I/Os transactions which are not associated with the
12057c478bd9Sstevel@tonic-gate 	 * current port.
12067c478bd9Sstevel@tonic-gate 	 * The current process oriented model of AIO uses a sigle queue for
12077c478bd9Sstevel@tonic-gate 	 * pending events. On close the pending queue (queue of asynchronous
12087c478bd9Sstevel@tonic-gate 	 * I/O transactions using event port notification) must be scanned
12097c478bd9Sstevel@tonic-gate 	 * to detect and handle pending I/Os using the current port.
12107c478bd9Sstevel@tonic-gate 	 */
12117c478bd9Sstevel@tonic-gate 	mutex_enter(&aiop->aio_portq_mutex);
12127c478bd9Sstevel@tonic-gate 	mutex_enter(&aiop->aio_mutex);
121334709573Sraf 	counter = 0;
121434709573Sraf 	if ((headp = aiop->aio_portpending) != NULL) {
121534709573Sraf 		reqp = headp;
121634709573Sraf 		do {
121734709573Sraf 			if (reqp->aio_req_portkev &&
121834709573Sraf 			    reqp->aio_req_port == port) {
121934709573Sraf 				reqp->aio_req_flags |= AIO_CLOSE_PORT;
122034709573Sraf 				counter++;
122134709573Sraf 			}
122234709573Sraf 		} while ((reqp = reqp->aio_req_next) != headp);
12237c478bd9Sstevel@tonic-gate 	}
12247c478bd9Sstevel@tonic-gate 	if (counter == 0) {
12257c478bd9Sstevel@tonic-gate 		/* no AIOs pending */
12267c478bd9Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
12277c478bd9Sstevel@tonic-gate 		mutex_exit(&aiop->aio_portq_mutex);
12287c478bd9Sstevel@tonic-gate 		return;
12297c478bd9Sstevel@tonic-gate 	}
12307c478bd9Sstevel@tonic-gate 	aiop->aio_portpendcnt += counter;
1231f7ccf9b3Spraks 	mutex_exit(&aiop->aio_mutex);
12327c478bd9Sstevel@tonic-gate 	while (aiop->aio_portpendcnt)
1233f7ccf9b3Spraks 		cv_wait(&aiop->aio_portcv, &aiop->aio_portq_mutex);
12347c478bd9Sstevel@tonic-gate 
12357c478bd9Sstevel@tonic-gate 	/*
12367c478bd9Sstevel@tonic-gate 	 * all pending AIOs are completed.
12377c478bd9Sstevel@tonic-gate 	 * check port doneq
12387c478bd9Sstevel@tonic-gate 	 */
12397c478bd9Sstevel@tonic-gate 	headp = NULL;
124034709573Sraf 	if ((reqp = aiop->aio_portq) != NULL) {
124134709573Sraf 		do {
124234709573Sraf 			next = reqp->aio_req_next;
124334709573Sraf 			if (reqp->aio_req_port == port) {
124434709573Sraf 				/* dequeue request and discard event */
124534709573Sraf 				aio_req_remove_portq(aiop, reqp);
124634709573Sraf 				port_free_event(reqp->aio_req_portkev);
124734709573Sraf 				/* put request in temporary queue */
124834709573Sraf 				reqp->aio_req_next = headp;
124934709573Sraf 				headp = reqp;
125034709573Sraf 			}
125134709573Sraf 		} while ((reqp = next) != aiop->aio_portq);
12527c478bd9Sstevel@tonic-gate 	}
12537c478bd9Sstevel@tonic-gate 	mutex_exit(&aiop->aio_portq_mutex);
12547c478bd9Sstevel@tonic-gate 
12557c478bd9Sstevel@tonic-gate 	/* headp points to the list of requests to be discarded */
12567c478bd9Sstevel@tonic-gate 	for (reqp = headp; reqp != NULL; reqp = next) {
12577c478bd9Sstevel@tonic-gate 		next = reqp->aio_req_next;
12587c478bd9Sstevel@tonic-gate 		aphysio_unlock(reqp);
12597c478bd9Sstevel@tonic-gate 		mutex_enter(&aiop->aio_mutex);
12607c478bd9Sstevel@tonic-gate 		aio_req_free_port(aiop, reqp);
12617c478bd9Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
12627c478bd9Sstevel@tonic-gate 	}
12637c478bd9Sstevel@tonic-gate 
12647c478bd9Sstevel@tonic-gate 	if (aiop->aio_flags & AIO_CLEANUP)
12657c478bd9Sstevel@tonic-gate 		cv_broadcast(&aiop->aio_waitcv);
12667c478bd9Sstevel@tonic-gate }
12677c478bd9Sstevel@tonic-gate 
12687c478bd9Sstevel@tonic-gate /*
12697c478bd9Sstevel@tonic-gate  * aio_cleanup_dr_delete_memory is used by dr's delete_memory_thread
1270b0b27ce6Spraks  * to kick start the aio_cleanup_thread for the give process to do the
1271b0b27ce6Spraks  * necessary cleanup.
1272b0b27ce6Spraks  * This is needed so that delete_memory_thread can obtain writer locks
1273b0b27ce6Spraks  * on pages that need to be relocated during a dr memory delete operation,
1274b0b27ce6Spraks  * otherwise a deadly embrace may occur.
12757c478bd9Sstevel@tonic-gate  */
12767c478bd9Sstevel@tonic-gate int
aio_cleanup_dr_delete_memory(proc_t * procp)12777c478bd9Sstevel@tonic-gate aio_cleanup_dr_delete_memory(proc_t *procp)
12787c478bd9Sstevel@tonic-gate {
12797c478bd9Sstevel@tonic-gate 	struct aio *aiop = procp->p_aio;
1280b0b27ce6Spraks 	struct as *as = procp->p_as;
1281b0b27ce6Spraks 	int ret = 0;
12827c478bd9Sstevel@tonic-gate 
12837c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&procp->p_lock));
12847c478bd9Sstevel@tonic-gate 
1285b0b27ce6Spraks 	mutex_enter(&as->a_contents);
1286b0b27ce6Spraks 
1287b0b27ce6Spraks 	if (aiop != NULL) {
1288b0b27ce6Spraks 		aiop->aio_rqclnup = 1;
1289b0b27ce6Spraks 		cv_broadcast(&as->a_cv);
1290b0b27ce6Spraks 		ret = 1;
1291b0b27ce6Spraks 	}
1292b0b27ce6Spraks 	mutex_exit(&as->a_contents);
1293b0b27ce6Spraks 	return (ret);
12947c478bd9Sstevel@tonic-gate }
1295