17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
507b65a64Saguzovsk  * Common Development and Distribution License (the "License").
607b65a64Saguzovsk  * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate  *
87c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate  * and limitations under the License.
127c478bd9Sstevel@tonic-gate  *
137c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate  *
197c478bd9Sstevel@tonic-gate  * CDDL HEADER END
207c478bd9Sstevel@tonic-gate  */
217c478bd9Sstevel@tonic-gate /*
2223d9e5acSMichael Corcoran  * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
2306e6833aSJosef 'Jeff' Sipek  * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
247c478bd9Sstevel@tonic-gate  */
257c478bd9Sstevel@tonic-gate 
267c478bd9Sstevel@tonic-gate #include <sys/types.h>
277c478bd9Sstevel@tonic-gate #include <sys/param.h>
287c478bd9Sstevel@tonic-gate #include <sys/systm.h>
297c478bd9Sstevel@tonic-gate #include <sys/buf.h>
307c478bd9Sstevel@tonic-gate #include <sys/cred.h>
317c478bd9Sstevel@tonic-gate #include <sys/errno.h>
327c478bd9Sstevel@tonic-gate #include <sys/vnode.h>
33aa59c4cbSrsb #include <sys/vfs_opreg.h>
347c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h>
357c478bd9Sstevel@tonic-gate #include <sys/swap.h>
367c478bd9Sstevel@tonic-gate #include <sys/mman.h>
377c478bd9Sstevel@tonic-gate #include <sys/vmsystm.h>
387c478bd9Sstevel@tonic-gate #include <sys/vtrace.h>
397c478bd9Sstevel@tonic-gate #include <sys/debug.h>
407c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h>
417c478bd9Sstevel@tonic-gate #include <sys/vm.h>
427c478bd9Sstevel@tonic-gate 
437c478bd9Sstevel@tonic-gate #include <sys/fs/swapnode.h>
447c478bd9Sstevel@tonic-gate 
457c478bd9Sstevel@tonic-gate #include <vm/seg.h>
467c478bd9Sstevel@tonic-gate #include <vm/page.h>
477c478bd9Sstevel@tonic-gate #include <vm/pvn.h>
487c478bd9Sstevel@tonic-gate #include <fs/fs_subr.h>
497c478bd9Sstevel@tonic-gate 
507c478bd9Sstevel@tonic-gate #include <vm/seg_kp.h>
517c478bd9Sstevel@tonic-gate 
527c478bd9Sstevel@tonic-gate /*
537c478bd9Sstevel@tonic-gate  * Define the routines within this file.
547c478bd9Sstevel@tonic-gate  */
557c478bd9Sstevel@tonic-gate static int	swap_getpage(struct vnode *vp, offset_t off, size_t len,
56da6c28aaSamw     uint_t *protp, struct page **plarr, size_t plsz, struct seg *seg,
57da6c28aaSamw     caddr_t addr, enum seg_rw rw, struct cred *cr, caller_context_t *ct);
587c478bd9Sstevel@tonic-gate static int	swap_putpage(struct vnode *vp, offset_t off, size_t len,
59da6c28aaSamw     int flags, struct cred *cr, caller_context_t *ct);
60da6c28aaSamw static void	swap_inactive(struct vnode *vp, struct cred *cr,
61da6c28aaSamw     caller_context_t *ct);
627c478bd9Sstevel@tonic-gate static void	swap_dispose(vnode_t *vp, page_t *pp, int fl, int dn,
63da6c28aaSamw     cred_t *cr, caller_context_t *ct);
647c478bd9Sstevel@tonic-gate 
657c478bd9Sstevel@tonic-gate static int	swap_getapage(struct vnode *vp, u_offset_t off, size_t len,
667c478bd9Sstevel@tonic-gate     uint_t *protp, page_t **plarr, size_t plsz,
677c478bd9Sstevel@tonic-gate     struct seg *seg, caddr_t addr, enum seg_rw rw, struct cred *cr);
687c478bd9Sstevel@tonic-gate 
697c478bd9Sstevel@tonic-gate int	swap_getconpage(struct vnode *vp, u_offset_t off, size_t len,
7007b65a64Saguzovsk     uint_t *protp, page_t **plarr, size_t plsz, page_t *conpp,
7107b65a64Saguzovsk     uint_t *pszc, spgcnt_t *nreloc, struct seg *seg, caddr_t addr,
7207b65a64Saguzovsk     enum seg_rw rw, struct cred *cr);
737c478bd9Sstevel@tonic-gate 
747c478bd9Sstevel@tonic-gate static int 	swap_putapage(struct vnode *vp, page_t *pp, u_offset_t *off,
757c478bd9Sstevel@tonic-gate     size_t *lenp, int flags, struct cred *cr);
767c478bd9Sstevel@tonic-gate 
777c478bd9Sstevel@tonic-gate const fs_operation_def_t swap_vnodeops_template[] = {
78aa59c4cbSrsb 	VOPNAME_INACTIVE,	{ .vop_inactive = swap_inactive },
79aa59c4cbSrsb 	VOPNAME_GETPAGE,	{ .vop_getpage = swap_getpage },
80aa59c4cbSrsb 	VOPNAME_PUTPAGE,	{ .vop_putpage = swap_putpage },
81aa59c4cbSrsb 	VOPNAME_DISPOSE,	{ .vop_dispose = swap_dispose },
82aa59c4cbSrsb 	VOPNAME_SETFL,		{ .error = fs_error },
83aa59c4cbSrsb 	VOPNAME_POLL,		{ .error = fs_error },
84aa59c4cbSrsb 	VOPNAME_PATHCONF,	{ .error = fs_error },
85aa59c4cbSrsb 	VOPNAME_GETSECATTR,	{ .error = fs_error },
86aa59c4cbSrsb 	VOPNAME_SHRLOCK,	{ .error = fs_error },
87aa59c4cbSrsb 	NULL,			NULL
887c478bd9Sstevel@tonic-gate };
897c478bd9Sstevel@tonic-gate 
907c478bd9Sstevel@tonic-gate vnodeops_t *swap_vnodeops;
917c478bd9Sstevel@tonic-gate 
927c478bd9Sstevel@tonic-gate /* ARGSUSED */
937c478bd9Sstevel@tonic-gate static void
swap_inactive(struct vnode * vp,struct cred * cr,caller_context_t * ct)947c478bd9Sstevel@tonic-gate swap_inactive(
957c478bd9Sstevel@tonic-gate 	struct vnode *vp,
96da6c28aaSamw 	struct cred *cr,
97da6c28aaSamw 	caller_context_t *ct)
987c478bd9Sstevel@tonic-gate {
997c478bd9Sstevel@tonic-gate 	SWAPFS_PRINT(SWAP_VOPS, "swap_inactive: vp %x\n", vp, 0, 0, 0, 0);
1007c478bd9Sstevel@tonic-gate }
1017c478bd9Sstevel@tonic-gate 
1027c478bd9Sstevel@tonic-gate /*
1037c478bd9Sstevel@tonic-gate  * Return all the pages from [off..off+len] in given file
1047c478bd9Sstevel@tonic-gate  */
105da6c28aaSamw /*ARGSUSED*/
1067c478bd9Sstevel@tonic-gate static int
swap_getpage(struct vnode * vp,offset_t off,size_t len,uint_t * protp,page_t * pl[],size_t plsz,struct seg * seg,caddr_t addr,enum seg_rw rw,struct cred * cr,caller_context_t * ct)1077c478bd9Sstevel@tonic-gate swap_getpage(
1087c478bd9Sstevel@tonic-gate 	struct vnode *vp,
1097c478bd9Sstevel@tonic-gate 	offset_t off,
1107c478bd9Sstevel@tonic-gate 	size_t len,
1117c478bd9Sstevel@tonic-gate 	uint_t *protp,
1127c478bd9Sstevel@tonic-gate 	page_t *pl[],
1137c478bd9Sstevel@tonic-gate 	size_t plsz,
1147c478bd9Sstevel@tonic-gate 	struct seg *seg,
1157c478bd9Sstevel@tonic-gate 	caddr_t addr,
1167c478bd9Sstevel@tonic-gate 	enum seg_rw rw,
117da6c28aaSamw 	struct cred *cr,
118da6c28aaSamw 	caller_context_t *ct)
1197c478bd9Sstevel@tonic-gate {
1207c478bd9Sstevel@tonic-gate 	SWAPFS_PRINT(SWAP_VOPS, "swap_getpage: vp %p, off %llx, len %lx\n",
1217c478bd9Sstevel@tonic-gate 	    (void *)vp, off, len, 0, 0);
1227c478bd9Sstevel@tonic-gate 
1237c478bd9Sstevel@tonic-gate 	TRACE_3(TR_FAC_SWAPFS, TR_SWAPFS_GETPAGE,
1247c478bd9Sstevel@tonic-gate 	    "swapfs getpage:vp %p off %llx len %ld",
1257c478bd9Sstevel@tonic-gate 	    (void *)vp, off, len);
1267c478bd9Sstevel@tonic-gate 
12706e6833aSJosef 'Jeff' Sipek 	return (pvn_getpages(swap_getapage, vp, (u_offset_t)off, len, protp,
12806e6833aSJosef 'Jeff' Sipek 	    pl, plsz, seg, addr, rw, cr));
1297c478bd9Sstevel@tonic-gate }
1307c478bd9Sstevel@tonic-gate 
1317c478bd9Sstevel@tonic-gate /*
13206e6833aSJosef 'Jeff' Sipek  * Called from pvn_getpages to get a particular page.
1337c478bd9Sstevel@tonic-gate  */
1347c478bd9Sstevel@tonic-gate /*ARGSUSED*/
1357c478bd9Sstevel@tonic-gate static int
swap_getapage(struct vnode * vp,u_offset_t off,size_t len,uint_t * protp,page_t * pl[],size_t plsz,struct seg * seg,caddr_t addr,enum seg_rw rw,struct cred * cr)1367c478bd9Sstevel@tonic-gate swap_getapage(
1377c478bd9Sstevel@tonic-gate 	struct vnode *vp,
1387c478bd9Sstevel@tonic-gate 	u_offset_t off,
1397c478bd9Sstevel@tonic-gate 	size_t len,
1407c478bd9Sstevel@tonic-gate 	uint_t *protp,
1417c478bd9Sstevel@tonic-gate 	page_t *pl[],
1427c478bd9Sstevel@tonic-gate 	size_t plsz,
1437c478bd9Sstevel@tonic-gate 	struct seg *seg,
1447c478bd9Sstevel@tonic-gate 	caddr_t addr,
1457c478bd9Sstevel@tonic-gate 	enum seg_rw rw,
1467c478bd9Sstevel@tonic-gate 	struct cred *cr)
1477c478bd9Sstevel@tonic-gate {
1487c478bd9Sstevel@tonic-gate 	struct page *pp, *rpp;
1497c478bd9Sstevel@tonic-gate 	int flags;
1507c478bd9Sstevel@tonic-gate 	int err = 0;
1517c478bd9Sstevel@tonic-gate 	struct vnode *pvp = NULL;
1527c478bd9Sstevel@tonic-gate 	u_offset_t poff;
1537c478bd9Sstevel@tonic-gate 	int flag_noreloc;
1547c478bd9Sstevel@tonic-gate 	se_t lock;
1557c478bd9Sstevel@tonic-gate 	extern int kcage_on;
1567c478bd9Sstevel@tonic-gate 	int upgrade = 0;
1577c478bd9Sstevel@tonic-gate 
1587c478bd9Sstevel@tonic-gate 	SWAPFS_PRINT(SWAP_VOPS, "swap_getapage: vp %p, off %llx, len %lx\n",
159a98e9dbfSaguzovsk 	    vp, off, len, 0, 0);
1607c478bd9Sstevel@tonic-gate 
1617c478bd9Sstevel@tonic-gate 	/*
1627c478bd9Sstevel@tonic-gate 	 * Until there is a call-back mechanism to cause SEGKP
1637c478bd9Sstevel@tonic-gate 	 * pages to be unlocked, make them non-relocatable.
1647c478bd9Sstevel@tonic-gate 	 */
1657c478bd9Sstevel@tonic-gate 	if (SEG_IS_SEGKP(seg))
1667c478bd9Sstevel@tonic-gate 		flag_noreloc = PG_NORELOC;
1677c478bd9Sstevel@tonic-gate 	else
1687c478bd9Sstevel@tonic-gate 		flag_noreloc = 0;
1697c478bd9Sstevel@tonic-gate 
1707c478bd9Sstevel@tonic-gate 	if (protp != NULL)
1717c478bd9Sstevel@tonic-gate 		*protp = PROT_ALL;
1727c478bd9Sstevel@tonic-gate 
1737c478bd9Sstevel@tonic-gate 	lock = (rw == S_CREATE ? SE_EXCL : SE_SHARED);
1747c478bd9Sstevel@tonic-gate 
1757c478bd9Sstevel@tonic-gate again:
1767c478bd9Sstevel@tonic-gate 	if (pp = page_lookup(vp, off, lock)) {
1777c478bd9Sstevel@tonic-gate 		/*
1787c478bd9Sstevel@tonic-gate 		 * In very rare instances, a segkp page may have been
1797c478bd9Sstevel@tonic-gate 		 * relocated outside of the kernel by the kernel cage
1807c478bd9Sstevel@tonic-gate 		 * due to the window between page_unlock() and
1817c478bd9Sstevel@tonic-gate 		 * VOP_PUTPAGE() in segkp_unlock().  Due to the
1827c478bd9Sstevel@tonic-gate 		 * rareness of these occurances, the solution is to
1837c478bd9Sstevel@tonic-gate 		 * relocate the page to a P_NORELOC page.
1847c478bd9Sstevel@tonic-gate 		 */
1857c478bd9Sstevel@tonic-gate 		if (flag_noreloc != 0) {
1867c478bd9Sstevel@tonic-gate 			if (!PP_ISNORELOC(pp) && kcage_on) {
1877c478bd9Sstevel@tonic-gate 				if (lock != SE_EXCL) {
1887c478bd9Sstevel@tonic-gate 					upgrade = 1;
1897c478bd9Sstevel@tonic-gate 					if (!page_tryupgrade(pp)) {
1907c478bd9Sstevel@tonic-gate 						page_unlock(pp);
1917c478bd9Sstevel@tonic-gate 						lock = SE_EXCL;
1927c478bd9Sstevel@tonic-gate 						goto again;
1937c478bd9Sstevel@tonic-gate 					}
1947c478bd9Sstevel@tonic-gate 				}
1957c478bd9Sstevel@tonic-gate 
1967c478bd9Sstevel@tonic-gate 				if (page_relocate_cage(&pp, &rpp) != 0)
1977c478bd9Sstevel@tonic-gate 					panic("swap_getapage: "
1987c478bd9Sstevel@tonic-gate 					    "page_relocate_cage failed");
1997c478bd9Sstevel@tonic-gate 
2007c478bd9Sstevel@tonic-gate 				pp = rpp;
2017c478bd9Sstevel@tonic-gate 			}
2027c478bd9Sstevel@tonic-gate 		}
2037c478bd9Sstevel@tonic-gate 
2047c478bd9Sstevel@tonic-gate 		if (pl) {
2057c478bd9Sstevel@tonic-gate 			if (upgrade)
2067c478bd9Sstevel@tonic-gate 				page_downgrade(pp);
2077c478bd9Sstevel@tonic-gate 
2087c478bd9Sstevel@tonic-gate 			pl[0] = pp;
2097c478bd9Sstevel@tonic-gate 			pl[1] = NULL;
2107c478bd9Sstevel@tonic-gate 		} else {
2117c478bd9Sstevel@tonic-gate 			page_unlock(pp);
2127c478bd9Sstevel@tonic-gate 		}
2137c478bd9Sstevel@tonic-gate 	} else {
2147c478bd9Sstevel@tonic-gate 		pp = page_create_va(vp, off, PAGESIZE,
2157c478bd9Sstevel@tonic-gate 		    PG_WAIT | PG_EXCL | flag_noreloc,
2167c478bd9Sstevel@tonic-gate 		    seg, addr);
2177c478bd9Sstevel@tonic-gate 		/*
2187c478bd9Sstevel@tonic-gate 		 * Someone raced in and created the page after we did the
2197c478bd9Sstevel@tonic-gate 		 * lookup but before we did the create, so go back and
2207c478bd9Sstevel@tonic-gate 		 * try to look it up again.
2217c478bd9Sstevel@tonic-gate 		 */
2227c478bd9Sstevel@tonic-gate 		if (pp == NULL)
2237c478bd9Sstevel@tonic-gate 			goto again;
2247c478bd9Sstevel@tonic-gate 		if (rw != S_CREATE) {
2257c478bd9Sstevel@tonic-gate 			err = swap_getphysname(vp, off, &pvp, &poff);
2267c478bd9Sstevel@tonic-gate 			if (pvp) {
2277c478bd9Sstevel@tonic-gate 				struct anon *ap;
2287c478bd9Sstevel@tonic-gate 				kmutex_t *ahm;
2297c478bd9Sstevel@tonic-gate 
2307c478bd9Sstevel@tonic-gate 				flags = (pl == NULL ? B_ASYNC|B_READ : B_READ);
2317c478bd9Sstevel@tonic-gate 				err = VOP_PAGEIO(pvp, pp, poff,
232da6c28aaSamw 				    PAGESIZE, flags, cr, NULL);
2337c478bd9Sstevel@tonic-gate 
2347c478bd9Sstevel@tonic-gate 				if (!err) {
23523d9e5acSMichael Corcoran 					ahm = AH_MUTEX(vp, off);
2367c478bd9Sstevel@tonic-gate 					mutex_enter(ahm);
2377c478bd9Sstevel@tonic-gate 
2387c478bd9Sstevel@tonic-gate 					ap = swap_anon(vp, off);
239a98e9dbfSaguzovsk 					if (ap == NULL) {
240a98e9dbfSaguzovsk 						panic("swap_getapage:"
241a98e9dbfSaguzovsk 						    " null anon");
242a98e9dbfSaguzovsk 					}
2437c478bd9Sstevel@tonic-gate 
2447c478bd9Sstevel@tonic-gate 					if (ap->an_pvp == pvp &&
2457c478bd9Sstevel@tonic-gate 					    ap->an_poff == poff) {
2467c478bd9Sstevel@tonic-gate 						swap_phys_free(pvp, poff,
2477c478bd9Sstevel@tonic-gate 						    PAGESIZE);
2487c478bd9Sstevel@tonic-gate 						ap->an_pvp = NULL;
249*7e12ceb3SToomas Soome 						ap->an_poff = 0;
2507c478bd9Sstevel@tonic-gate 						hat_setmod(pp);
2517c478bd9Sstevel@tonic-gate 					}
2527c478bd9Sstevel@tonic-gate 
2537c478bd9Sstevel@tonic-gate 					mutex_exit(ahm);
2547c478bd9Sstevel@tonic-gate 				}
2557c478bd9Sstevel@tonic-gate 			} else {
2567c478bd9Sstevel@tonic-gate 				if (!err)
2577c478bd9Sstevel@tonic-gate 					pagezero(pp, 0, PAGESIZE);
2587c478bd9Sstevel@tonic-gate 
2597c478bd9Sstevel@tonic-gate 				/*
2607c478bd9Sstevel@tonic-gate 				 * If it's a fault ahead, release page_io_lock
2617c478bd9Sstevel@tonic-gate 				 * and SE_EXCL we grabbed in page_create_va
2627c478bd9Sstevel@tonic-gate 				 *
2637c478bd9Sstevel@tonic-gate 				 * If we are here, we haven't called VOP_PAGEIO
2647c478bd9Sstevel@tonic-gate 				 * and thus calling pvn_read_done(pp, B_READ)
2657c478bd9Sstevel@tonic-gate 				 * below may mislead that we tried i/o. Besides,
2667c478bd9Sstevel@tonic-gate 				 * in case of async, pvn_read_done() should
2677c478bd9Sstevel@tonic-gate 				 * not be called by *getpage()
2687c478bd9Sstevel@tonic-gate 				 */
2697c478bd9Sstevel@tonic-gate 				if (pl == NULL) {
2707c478bd9Sstevel@tonic-gate 					/*
2717c478bd9Sstevel@tonic-gate 					 * swap_getphysname can return error
2727c478bd9Sstevel@tonic-gate 					 * only when we are getting called from
2737c478bd9Sstevel@tonic-gate 					 * swapslot_free which passes non-NULL
2747c478bd9Sstevel@tonic-gate 					 * pl to VOP_GETPAGE.
2757c478bd9Sstevel@tonic-gate 					 */
2767c478bd9Sstevel@tonic-gate 					ASSERT(err == 0);
2777c478bd9Sstevel@tonic-gate 					page_io_unlock(pp);
2787c478bd9Sstevel@tonic-gate 					page_unlock(pp);
2797c478bd9Sstevel@tonic-gate 				}
2807c478bd9Sstevel@tonic-gate 			}
2817c478bd9Sstevel@tonic-gate 		}
2827c478bd9Sstevel@tonic-gate 
2837c478bd9Sstevel@tonic-gate 		ASSERT(pp != NULL);
2847c478bd9Sstevel@tonic-gate 
2857c478bd9Sstevel@tonic-gate 		if (err && pl)
2867c478bd9Sstevel@tonic-gate 			pvn_read_done(pp, B_ERROR);
2877c478bd9Sstevel@tonic-gate 
2887c478bd9Sstevel@tonic-gate 		if (!err && pl)
2897c478bd9Sstevel@tonic-gate 			pvn_plist_init(pp, pl, plsz, off, PAGESIZE, rw);
2907c478bd9Sstevel@tonic-gate 	}
2917c478bd9Sstevel@tonic-gate 	TRACE_3(TR_FAC_SWAPFS, TR_SWAPFS_GETAPAGE,
292a98e9dbfSaguzovsk 	    "swapfs getapage:pp %p vp %p off %llx", pp, vp, off);
2937c478bd9Sstevel@tonic-gate 	return (err);
2947c478bd9Sstevel@tonic-gate }
2957c478bd9Sstevel@tonic-gate 
2967c478bd9Sstevel@tonic-gate /*
2977c478bd9Sstevel@tonic-gate  * Called from large page anon routines only! This is an ugly hack where
2987c478bd9Sstevel@tonic-gate  * the anon layer directly calls into swapfs with a preallocated large page.
2997c478bd9Sstevel@tonic-gate  * Another method would have been to change to VOP and add an extra arg for
3007c478bd9Sstevel@tonic-gate  * the preallocated large page. This all could be cleaned up later when we
3017c478bd9Sstevel@tonic-gate  * solve the anonymous naming problem and no longer need to loop across of
3027c478bd9Sstevel@tonic-gate  * the VOP in PAGESIZE increments to fill in or initialize a large page as
3037c478bd9Sstevel@tonic-gate  * is done today. I think the latter is better since it avoid a change to
3047c478bd9Sstevel@tonic-gate  * the VOP interface that could later be avoided.
3057c478bd9Sstevel@tonic-gate  */
3067c478bd9Sstevel@tonic-gate int
swap_getconpage(struct vnode * vp,u_offset_t off,size_t len,uint_t * protp,page_t * pl[],size_t plsz,page_t * conpp,uint_t * pszc,spgcnt_t * nreloc,struct seg * seg,caddr_t addr,enum seg_rw rw,struct cred * cr)3077c478bd9Sstevel@tonic-gate swap_getconpage(
3087c478bd9Sstevel@tonic-gate 	struct vnode *vp,
3097c478bd9Sstevel@tonic-gate 	u_offset_t off,
3107c478bd9Sstevel@tonic-gate 	size_t len,
3117c478bd9Sstevel@tonic-gate 	uint_t *protp,
3127c478bd9Sstevel@tonic-gate 	page_t *pl[],
3137c478bd9Sstevel@tonic-gate 	size_t plsz,
3147c478bd9Sstevel@tonic-gate 	page_t	*conpp,
31507b65a64Saguzovsk 	uint_t	*pszc,
3167c478bd9Sstevel@tonic-gate 	spgcnt_t *nreloc,
3177c478bd9Sstevel@tonic-gate 	struct seg *seg,
3187c478bd9Sstevel@tonic-gate 	caddr_t addr,
3197c478bd9Sstevel@tonic-gate 	enum seg_rw rw,
3207c478bd9Sstevel@tonic-gate 	struct cred *cr)
3217c478bd9Sstevel@tonic-gate {
3227c478bd9Sstevel@tonic-gate 	struct page	*pp;
3237c478bd9Sstevel@tonic-gate 	int 		err = 0;
3247c478bd9Sstevel@tonic-gate 	struct vnode	*pvp = NULL;
3257c478bd9Sstevel@tonic-gate 	u_offset_t	poff;
3267c478bd9Sstevel@tonic-gate 
3277c478bd9Sstevel@tonic-gate 	ASSERT(len == PAGESIZE);
3287c478bd9Sstevel@tonic-gate 	ASSERT(pl != NULL);
3297c478bd9Sstevel@tonic-gate 	ASSERT(plsz == PAGESIZE);
3307c478bd9Sstevel@tonic-gate 	ASSERT(protp == NULL);
3317c478bd9Sstevel@tonic-gate 	ASSERT(nreloc != NULL);
3327c478bd9Sstevel@tonic-gate 	ASSERT(!SEG_IS_SEGKP(seg)); /* XXX for now not supported */
3337c478bd9Sstevel@tonic-gate 	SWAPFS_PRINT(SWAP_VOPS, "swap_getconpage: vp %p, off %llx, len %lx\n",
334a98e9dbfSaguzovsk 	    vp, off, len, 0, 0);
3357c478bd9Sstevel@tonic-gate 
3367c478bd9Sstevel@tonic-gate 	/*
3377c478bd9Sstevel@tonic-gate 	 * If we are not using a preallocated page then we know one already
3387c478bd9Sstevel@tonic-gate 	 * exists. So just let the old code handle it.
3397c478bd9Sstevel@tonic-gate 	 */
3407c478bd9Sstevel@tonic-gate 	if (conpp == NULL) {
3417c478bd9Sstevel@tonic-gate 		err = swap_getapage(vp, (u_offset_t)off, len, protp, pl, plsz,
3427c478bd9Sstevel@tonic-gate 		    seg, addr, rw, cr);
3437c478bd9Sstevel@tonic-gate 		return (err);
3447c478bd9Sstevel@tonic-gate 	}
3457c478bd9Sstevel@tonic-gate 	ASSERT(conpp->p_szc != 0);
3467c478bd9Sstevel@tonic-gate 	ASSERT(PAGE_EXCL(conpp));
3477c478bd9Sstevel@tonic-gate 
3487c478bd9Sstevel@tonic-gate 
3497c478bd9Sstevel@tonic-gate 	ASSERT(conpp->p_next == conpp);
3507c478bd9Sstevel@tonic-gate 	ASSERT(conpp->p_prev == conpp);
3517c478bd9Sstevel@tonic-gate 	ASSERT(!PP_ISAGED(conpp));
3527c478bd9Sstevel@tonic-gate 	ASSERT(!PP_ISFREE(conpp));
3537c478bd9Sstevel@tonic-gate 
3547c478bd9Sstevel@tonic-gate 	*nreloc = 0;
3557c478bd9Sstevel@tonic-gate 	pp = page_lookup_create(vp, off, SE_SHARED, conpp, nreloc, 0);
3567c478bd9Sstevel@tonic-gate 
3577c478bd9Sstevel@tonic-gate 	/*
3587c478bd9Sstevel@tonic-gate 	 * If existing page is found we may need to relocate.
3597c478bd9Sstevel@tonic-gate 	 */
3607c478bd9Sstevel@tonic-gate 	if (pp != conpp) {
3617c478bd9Sstevel@tonic-gate 		ASSERT(rw != S_CREATE);
36207b65a64Saguzovsk 		ASSERT(pszc != NULL);
3637c478bd9Sstevel@tonic-gate 		ASSERT(PAGE_SHARED(pp));
3647c478bd9Sstevel@tonic-gate 		if (pp->p_szc < conpp->p_szc) {
36507b65a64Saguzovsk 			*pszc = pp->p_szc;
3667c478bd9Sstevel@tonic-gate 			page_unlock(pp);
3677c478bd9Sstevel@tonic-gate 			err = -1;
36807b65a64Saguzovsk 		} else if (pp->p_szc > conpp->p_szc &&
36907b65a64Saguzovsk 		    seg->s_szc > conpp->p_szc) {
37007b65a64Saguzovsk 			*pszc = MIN(pp->p_szc, seg->s_szc);
3717c478bd9Sstevel@tonic-gate 			page_unlock(pp);
3727c478bd9Sstevel@tonic-gate 			err = -2;
3737c478bd9Sstevel@tonic-gate 		} else {
3747c478bd9Sstevel@tonic-gate 			pl[0] = pp;
3757c478bd9Sstevel@tonic-gate 			pl[1] = NULL;
3767c478bd9Sstevel@tonic-gate 			if (page_pptonum(pp) &
37707b65a64Saguzovsk 			    (page_get_pagecnt(conpp->p_szc) - 1))
378a98e9dbfSaguzovsk 				cmn_err(CE_PANIC, "swap_getconpage: no root");
3797c478bd9Sstevel@tonic-gate 		}
3807c478bd9Sstevel@tonic-gate 		return (err);
3817c478bd9Sstevel@tonic-gate 	}
3827c478bd9Sstevel@tonic-gate 
3837c478bd9Sstevel@tonic-gate 	ASSERT(PAGE_EXCL(pp));
3847c478bd9Sstevel@tonic-gate 
3857c478bd9Sstevel@tonic-gate 	if (*nreloc != 0) {
3867c478bd9Sstevel@tonic-gate 		ASSERT(rw != S_CREATE);
3877c478bd9Sstevel@tonic-gate 		pl[0] = pp;
3887c478bd9Sstevel@tonic-gate 		pl[1] = NULL;
3897c478bd9Sstevel@tonic-gate 		return (0);
3907c478bd9Sstevel@tonic-gate 	}
3917c478bd9Sstevel@tonic-gate 
3927c478bd9Sstevel@tonic-gate 	*nreloc = 1;
3937c478bd9Sstevel@tonic-gate 
3947c478bd9Sstevel@tonic-gate 	/*
3957c478bd9Sstevel@tonic-gate 	 * If necessary do the page io.
3967c478bd9Sstevel@tonic-gate 	 */
3977c478bd9Sstevel@tonic-gate 	if (rw != S_CREATE) {
3987c478bd9Sstevel@tonic-gate 		/*
3997c478bd9Sstevel@tonic-gate 		 * Since we are only called now on behalf of an
4007c478bd9Sstevel@tonic-gate 		 * address space operation it's impossible for
4017c478bd9Sstevel@tonic-gate 		 * us to fail unlike swap_getapge() which
4027c478bd9Sstevel@tonic-gate 		 * also gets called from swapslot_free().
4037c478bd9Sstevel@tonic-gate 		 */
4047c478bd9Sstevel@tonic-gate 		if (swap_getphysname(vp, off, &pvp, &poff)) {
4057c478bd9Sstevel@tonic-gate 			cmn_err(CE_PANIC,
4067c478bd9Sstevel@tonic-gate 			    "swap_getconpage: swap_getphysname failed!");
4077c478bd9Sstevel@tonic-gate 		}
4087c478bd9Sstevel@tonic-gate 
409a98e9dbfSaguzovsk 		if (pvp != NULL) {
410a98e9dbfSaguzovsk 			err = VOP_PAGEIO(pvp, pp, poff, PAGESIZE, B_READ,
411a98e9dbfSaguzovsk 			    cr, NULL);
412a98e9dbfSaguzovsk 			if (err == 0) {
413a98e9dbfSaguzovsk 				struct anon *ap;
414a98e9dbfSaguzovsk 				kmutex_t *ahm;
415a98e9dbfSaguzovsk 
41623d9e5acSMichael Corcoran 				ahm = AH_MUTEX(vp, off);
417a98e9dbfSaguzovsk 				mutex_enter(ahm);
418a98e9dbfSaguzovsk 				ap = swap_anon(vp, off);
419a98e9dbfSaguzovsk 				if (ap == NULL)
420a98e9dbfSaguzovsk 					panic("swap_getconpage: null anon");
421a98e9dbfSaguzovsk 				if (ap->an_pvp != pvp || ap->an_poff != poff)
422a98e9dbfSaguzovsk 					panic("swap_getconpage: bad anon");
423a98e9dbfSaguzovsk 
424a98e9dbfSaguzovsk 				swap_phys_free(pvp, poff, PAGESIZE);
425a98e9dbfSaguzovsk 				ap->an_pvp = NULL;
426*7e12ceb3SToomas Soome 				ap->an_poff = 0;
427a98e9dbfSaguzovsk 				hat_setmod(pp);
428a98e9dbfSaguzovsk 				mutex_exit(ahm);
429a98e9dbfSaguzovsk 			}
4307c478bd9Sstevel@tonic-gate 		} else {
4317c478bd9Sstevel@tonic-gate 			pagezero(pp, 0, PAGESIZE);
4327c478bd9Sstevel@tonic-gate 		}
4337c478bd9Sstevel@tonic-gate 	}
4347c478bd9Sstevel@tonic-gate 
4357c478bd9Sstevel@tonic-gate 	/*
4367c478bd9Sstevel@tonic-gate 	 * Normally we would let pvn_read_done() destroy
4377c478bd9Sstevel@tonic-gate 	 * the page on IO error. But since this is a preallocated
4387c478bd9Sstevel@tonic-gate 	 * page we'll let the anon layer handle it.
4397c478bd9Sstevel@tonic-gate 	 */
4407c478bd9Sstevel@tonic-gate 	page_io_unlock(pp);
4417c478bd9Sstevel@tonic-gate 	if (err != 0)
4427c478bd9Sstevel@tonic-gate 		page_hashout(pp, NULL);
4437c478bd9Sstevel@tonic-gate 	ASSERT(pp->p_next == pp);
4447c478bd9Sstevel@tonic-gate 	ASSERT(pp->p_prev == pp);
4457c478bd9Sstevel@tonic-gate 
4467c478bd9Sstevel@tonic-gate 	TRACE_3(TR_FAC_SWAPFS, TR_SWAPFS_GETAPAGE,
447a98e9dbfSaguzovsk 	    "swapfs getconpage:pp %p vp %p off %llx", pp, vp, off);
4487c478bd9Sstevel@tonic-gate 
4497c478bd9Sstevel@tonic-gate 	pl[0] = pp;
4507c478bd9Sstevel@tonic-gate 	pl[1] = NULL;
4517c478bd9Sstevel@tonic-gate 	return (err);
4527c478bd9Sstevel@tonic-gate }
4537c478bd9Sstevel@tonic-gate 
4547c478bd9Sstevel@tonic-gate /* Async putpage klustering stuff */
4557c478bd9Sstevel@tonic-gate int sw_pending_size;
4567c478bd9Sstevel@tonic-gate extern int klustsize;
4577c478bd9Sstevel@tonic-gate extern struct async_reqs *sw_getreq();
4587c478bd9Sstevel@tonic-gate extern void sw_putreq(struct async_reqs *);
4597c478bd9Sstevel@tonic-gate extern void sw_putbackreq(struct async_reqs *);
4607c478bd9Sstevel@tonic-gate extern struct async_reqs *sw_getfree();
4617c478bd9Sstevel@tonic-gate extern void sw_putfree(struct async_reqs *);
4627c478bd9Sstevel@tonic-gate 
4637c478bd9Sstevel@tonic-gate static size_t swap_putpagecnt, swap_pagespushed;
4647c478bd9Sstevel@tonic-gate static size_t swap_otherfail, swap_otherpages;
4657c478bd9Sstevel@tonic-gate static size_t swap_klustfail, swap_klustpages;
4667c478bd9Sstevel@tonic-gate static size_t swap_getiofail, swap_getiopages;
4677c478bd9Sstevel@tonic-gate 
4687c478bd9Sstevel@tonic-gate /*
4697c478bd9Sstevel@tonic-gate  * Flags are composed of {B_INVAL, B_DIRTY B_FREE, B_DONTNEED}.
4707c478bd9Sstevel@tonic-gate  * If len == 0, do from off to EOF.
4717c478bd9Sstevel@tonic-gate  */
4727c478bd9Sstevel@tonic-gate static int swap_nopage = 0;	/* Don't do swap_putpage's if set */
4737c478bd9Sstevel@tonic-gate 
4747c478bd9Sstevel@tonic-gate /* ARGSUSED */
4757c478bd9Sstevel@tonic-gate static int
swap_putpage(struct vnode * vp,offset_t off,size_t len,int flags,struct cred * cr,caller_context_t * ct)4767c478bd9Sstevel@tonic-gate swap_putpage(
4777c478bd9Sstevel@tonic-gate 	struct vnode *vp,
4787c478bd9Sstevel@tonic-gate 	offset_t off,
4797c478bd9Sstevel@tonic-gate 	size_t len,
4807c478bd9Sstevel@tonic-gate 	int flags,
481da6c28aaSamw 	struct cred *cr,
482da6c28aaSamw 	caller_context_t *ct)
4837c478bd9Sstevel@tonic-gate {
4847c478bd9Sstevel@tonic-gate 	page_t *pp;
4857c478bd9Sstevel@tonic-gate 	u_offset_t io_off;
4867c478bd9Sstevel@tonic-gate 	size_t io_len = 0;
4877c478bd9Sstevel@tonic-gate 	int err = 0;
488ed0efa68SDonghai Qiao 	int nowait;
4897c478bd9Sstevel@tonic-gate 	struct async_reqs *arg;
4907c478bd9Sstevel@tonic-gate 
4917c478bd9Sstevel@tonic-gate 	if (swap_nopage)
4927c478bd9Sstevel@tonic-gate 		return (0);
4937c478bd9Sstevel@tonic-gate 
4947c478bd9Sstevel@tonic-gate 	ASSERT(vp->v_count != 0);
4957c478bd9Sstevel@tonic-gate 
496ed0efa68SDonghai Qiao 	nowait = flags & B_PAGE_NOWAIT;
497ed0efa68SDonghai Qiao 
4988c12346dSsl 	/*
4998c12346dSsl 	 * Clear force flag so that p_lckcnt pages are not invalidated.
5008c12346dSsl 	 */
501ed0efa68SDonghai Qiao 	flags &= ~(B_FORCE | B_PAGE_NOWAIT);
5028c12346dSsl 
5037c478bd9Sstevel@tonic-gate 	SWAPFS_PRINT(SWAP_VOPS,
5047c478bd9Sstevel@tonic-gate 	    "swap_putpage: vp %p, off %llx len %lx, flags %x\n",
5057c478bd9Sstevel@tonic-gate 	    (void *)vp, off, len, flags, 0);
5067c478bd9Sstevel@tonic-gate 	TRACE_3(TR_FAC_SWAPFS, TR_SWAPFS_PUTPAGE,
5077c478bd9Sstevel@tonic-gate 	    "swapfs putpage:vp %p off %llx len %ld", (void *)vp, off, len);
5087c478bd9Sstevel@tonic-gate 
5097c478bd9Sstevel@tonic-gate 	if (vp->v_flag & VNOMAP)
5107c478bd9Sstevel@tonic-gate 		return (ENOSYS);
5117c478bd9Sstevel@tonic-gate 
5127c478bd9Sstevel@tonic-gate 	if (!vn_has_cached_data(vp))
5137c478bd9Sstevel@tonic-gate 		return (0);
5147c478bd9Sstevel@tonic-gate 
5157c478bd9Sstevel@tonic-gate 	if (len == 0) {
5167c478bd9Sstevel@tonic-gate 		if (curproc == proc_pageout)
5177c478bd9Sstevel@tonic-gate 			cmn_err(CE_PANIC, "swapfs: pageout can't block");
5187c478bd9Sstevel@tonic-gate 
5197c478bd9Sstevel@tonic-gate 		/* Search the entire vp list for pages >= off. */
5207c478bd9Sstevel@tonic-gate 		err = pvn_vplist_dirty(vp, (u_offset_t)off, swap_putapage,
5217c478bd9Sstevel@tonic-gate 		    flags, cr);
5227c478bd9Sstevel@tonic-gate 	} else {
5237c478bd9Sstevel@tonic-gate 		u_offset_t eoff;
5247c478bd9Sstevel@tonic-gate 
5257c478bd9Sstevel@tonic-gate 		/*
5267c478bd9Sstevel@tonic-gate 		 * Loop over all offsets in the range [off...off + len]
5277c478bd9Sstevel@tonic-gate 		 * looking for pages to deal with.
5287c478bd9Sstevel@tonic-gate 		 */
5297c478bd9Sstevel@tonic-gate 		eoff = off + len;
5307c478bd9Sstevel@tonic-gate 		for (io_off = (u_offset_t)off; io_off < eoff;
5317c478bd9Sstevel@tonic-gate 		    io_off += io_len) {
5327c478bd9Sstevel@tonic-gate 			/*
5337c478bd9Sstevel@tonic-gate 			 * If we run out of the async req slot, put the page
5347c478bd9Sstevel@tonic-gate 			 * now instead of queuing.
5357c478bd9Sstevel@tonic-gate 			 */
5367c478bd9Sstevel@tonic-gate 			if (flags == (B_ASYNC | B_FREE) &&
5377c478bd9Sstevel@tonic-gate 			    sw_pending_size < klustsize &&
5387c478bd9Sstevel@tonic-gate 			    (arg = sw_getfree())) {
5397c478bd9Sstevel@tonic-gate 				/*
5407c478bd9Sstevel@tonic-gate 				 * If we are clustering, we should allow
5417c478bd9Sstevel@tonic-gate 				 * pageout to feed us more pages because # of
5427c478bd9Sstevel@tonic-gate 				 * pushes is limited by # of I/Os, and one
5437c478bd9Sstevel@tonic-gate 				 * cluster is considered to be one I/O.
5447c478bd9Sstevel@tonic-gate 				 */
5457c478bd9Sstevel@tonic-gate 				if (pushes)
5467c478bd9Sstevel@tonic-gate 					pushes--;
5477c478bd9Sstevel@tonic-gate 
5487c478bd9Sstevel@tonic-gate 				arg->a_vp = vp;
5497c478bd9Sstevel@tonic-gate 				arg->a_off = io_off;
5507c478bd9Sstevel@tonic-gate 				arg->a_len = PAGESIZE;
5517c478bd9Sstevel@tonic-gate 				arg->a_flags = B_ASYNC | B_FREE;
5527c478bd9Sstevel@tonic-gate 				arg->a_cred = kcred;
5537c478bd9Sstevel@tonic-gate 				sw_putreq(arg);
5547c478bd9Sstevel@tonic-gate 				io_len = PAGESIZE;
5557c478bd9Sstevel@tonic-gate 				continue;
5567c478bd9Sstevel@tonic-gate 			}
5577c478bd9Sstevel@tonic-gate 			/*
5587c478bd9Sstevel@tonic-gate 			 * If we are not invalidating pages, use the
5597c478bd9Sstevel@tonic-gate 			 * routine page_lookup_nowait() to prevent
5607c478bd9Sstevel@tonic-gate 			 * reclaiming them from the free list.
5617c478bd9Sstevel@tonic-gate 			 */
562ed0efa68SDonghai Qiao 			if (!nowait && ((flags & B_INVAL) ||
563ed0efa68SDonghai Qiao 			    (flags & (B_ASYNC | B_FREE)) == B_FREE))
5647c478bd9Sstevel@tonic-gate 				pp = page_lookup(vp, io_off, SE_EXCL);
5657c478bd9Sstevel@tonic-gate 			else
5667c478bd9Sstevel@tonic-gate 				pp = page_lookup_nowait(vp, io_off,
567ed0efa68SDonghai Qiao 				    (flags & (B_FREE | B_INVAL)) ?
568ed0efa68SDonghai Qiao 				    SE_EXCL : SE_SHARED);
5697c478bd9Sstevel@tonic-gate 
5707c478bd9Sstevel@tonic-gate 			if (pp == NULL || pvn_getdirty(pp, flags) == 0)
5717c478bd9Sstevel@tonic-gate 				io_len = PAGESIZE;
5727c478bd9Sstevel@tonic-gate 			else {
5737c478bd9Sstevel@tonic-gate 				err = swap_putapage(vp, pp, &io_off, &io_len,
5747c478bd9Sstevel@tonic-gate 				    flags, cr);
5757c478bd9Sstevel@tonic-gate 				if (err != 0)
5767c478bd9Sstevel@tonic-gate 					break;
5777c478bd9Sstevel@tonic-gate 			}
5787c478bd9Sstevel@tonic-gate 		}
5797c478bd9Sstevel@tonic-gate 	}
5807c478bd9Sstevel@tonic-gate 	/* If invalidating, verify all pages on vnode list are gone. */
5817c478bd9Sstevel@tonic-gate 	if (err == 0 && off == 0 && len == 0 &&
5827c478bd9Sstevel@tonic-gate 	    (flags & B_INVAL) && vn_has_cached_data(vp)) {
5837c478bd9Sstevel@tonic-gate 		cmn_err(CE_WARN,
5847c478bd9Sstevel@tonic-gate 		    "swap_putpage: B_INVAL, pages not gone");
5857c478bd9Sstevel@tonic-gate 	}
5867c478bd9Sstevel@tonic-gate 	return (err);
5877c478bd9Sstevel@tonic-gate }
5887c478bd9Sstevel@tonic-gate 
5897c478bd9Sstevel@tonic-gate /*
5907c478bd9Sstevel@tonic-gate  * Write out a single page.
5917c478bd9Sstevel@tonic-gate  * For swapfs this means choose a physical swap slot and write the page
5927c478bd9Sstevel@tonic-gate  * out using VOP_PAGEIO.
5937c478bd9Sstevel@tonic-gate  * In the (B_ASYNC | B_FREE) case we try to find a bunch of other dirty
5947c478bd9Sstevel@tonic-gate  * swapfs pages, a bunch of contiguous swap slots and then write them
5957c478bd9Sstevel@tonic-gate  * all out in one clustered i/o.
5967c478bd9Sstevel@tonic-gate  */
5977c478bd9Sstevel@tonic-gate /*ARGSUSED*/
5987c478bd9Sstevel@tonic-gate static int
swap_putapage(struct vnode * vp,page_t * pp,u_offset_t * offp,size_t * lenp,int flags,struct cred * cr)5997c478bd9Sstevel@tonic-gate swap_putapage(
6007c478bd9Sstevel@tonic-gate 	struct vnode *vp,
6017c478bd9Sstevel@tonic-gate 	page_t *pp,
6027c478bd9Sstevel@tonic-gate 	u_offset_t *offp,
6037c478bd9Sstevel@tonic-gate 	size_t *lenp,
6047c478bd9Sstevel@tonic-gate 	int flags,
6057c478bd9Sstevel@tonic-gate 	struct cred *cr)
6067c478bd9Sstevel@tonic-gate {
6077c478bd9Sstevel@tonic-gate 	int err;
6087c478bd9Sstevel@tonic-gate 	struct vnode *pvp;
6097c478bd9Sstevel@tonic-gate 	u_offset_t poff, off;
6107c478bd9Sstevel@tonic-gate 	u_offset_t doff;
6117c478bd9Sstevel@tonic-gate 	size_t dlen;
6127c478bd9Sstevel@tonic-gate 	size_t klsz = 0;
6137c478bd9Sstevel@tonic-gate 	u_offset_t klstart = 0;
6147c478bd9Sstevel@tonic-gate 	struct vnode *klvp = NULL;
6157c478bd9Sstevel@tonic-gate 	page_t *pplist;
6167c478bd9Sstevel@tonic-gate 	se_t se;
6177c478bd9Sstevel@tonic-gate 	struct async_reqs *arg;
6187c478bd9Sstevel@tonic-gate 	size_t swap_klustsize;
6197c478bd9Sstevel@tonic-gate 
6207c478bd9Sstevel@tonic-gate 	/*
6217c478bd9Sstevel@tonic-gate 	 * This check is added for callers who access swap_putpage with len = 0.
6227c478bd9Sstevel@tonic-gate 	 * swap_putpage calls swap_putapage page-by-page via pvn_vplist_dirty.
6237c478bd9Sstevel@tonic-gate 	 * And it's necessary to do the same queuing if users have the same
6247c478bd9Sstevel@tonic-gate 	 * B_ASYNC|B_FREE flags on.
6257c478bd9Sstevel@tonic-gate 	 */
6267c478bd9Sstevel@tonic-gate 	if (flags == (B_ASYNC | B_FREE) &&
6277c478bd9Sstevel@tonic-gate 	    sw_pending_size < klustsize && (arg = sw_getfree())) {
6287c478bd9Sstevel@tonic-gate 
6297c478bd9Sstevel@tonic-gate 		hat_setmod(pp);
6307c478bd9Sstevel@tonic-gate 		page_io_unlock(pp);
6317c478bd9Sstevel@tonic-gate 		page_unlock(pp);
6327c478bd9Sstevel@tonic-gate 
6337c478bd9Sstevel@tonic-gate 		arg->a_vp = vp;
6347c478bd9Sstevel@tonic-gate 		arg->a_off = pp->p_offset;
6357c478bd9Sstevel@tonic-gate 		arg->a_len = PAGESIZE;
6367c478bd9Sstevel@tonic-gate 		arg->a_flags = B_ASYNC | B_FREE;
6377c478bd9Sstevel@tonic-gate 		arg->a_cred = kcred;
6387c478bd9Sstevel@tonic-gate 		sw_putreq(arg);
6397c478bd9Sstevel@tonic-gate 
6407c478bd9Sstevel@tonic-gate 		return (0);
6417c478bd9Sstevel@tonic-gate 	}
6427c478bd9Sstevel@tonic-gate 
6437c478bd9Sstevel@tonic-gate 	SWAPFS_PRINT(SWAP_PUTP,
644a98e9dbfSaguzovsk 	    "swap_putapage: pp %p, vp %p, off %llx, flags %x\n",
645a98e9dbfSaguzovsk 	    pp, vp, pp->p_offset, flags, 0);
6467c478bd9Sstevel@tonic-gate 
6477c478bd9Sstevel@tonic-gate 	ASSERT(PAGE_LOCKED(pp));
6487c478bd9Sstevel@tonic-gate 
6497c478bd9Sstevel@tonic-gate 	off = pp->p_offset;
6507c478bd9Sstevel@tonic-gate 
6517c478bd9Sstevel@tonic-gate 	doff = off;
6527c478bd9Sstevel@tonic-gate 	dlen = PAGESIZE;
6537c478bd9Sstevel@tonic-gate 
6547c478bd9Sstevel@tonic-gate 	if (err = swap_newphysname(vp, off, &doff, &dlen, &pvp, &poff)) {
6557c478bd9Sstevel@tonic-gate 		err = (flags == (B_ASYNC | B_FREE) ? ENOMEM : 0);
6567c478bd9Sstevel@tonic-gate 		hat_setmod(pp);
6577c478bd9Sstevel@tonic-gate 		page_io_unlock(pp);
6587c478bd9Sstevel@tonic-gate 		page_unlock(pp);
6597c478bd9Sstevel@tonic-gate 		goto out;
6607c478bd9Sstevel@tonic-gate 	}
6617c478bd9Sstevel@tonic-gate 
6627c478bd9Sstevel@tonic-gate 	klvp = pvp;
6637c478bd9Sstevel@tonic-gate 	klstart = poff;
6647c478bd9Sstevel@tonic-gate 	pplist = pp;
6657c478bd9Sstevel@tonic-gate 	/*
6667c478bd9Sstevel@tonic-gate 	 * If this is ASYNC | FREE and we've accumulated a bunch of such
6677c478bd9Sstevel@tonic-gate 	 * pending requests, kluster.
6687c478bd9Sstevel@tonic-gate 	 */
6697c478bd9Sstevel@tonic-gate 	if (flags == (B_ASYNC | B_FREE))
6707c478bd9Sstevel@tonic-gate 		swap_klustsize = klustsize;
6717c478bd9Sstevel@tonic-gate 	else
6727c478bd9Sstevel@tonic-gate 		swap_klustsize = PAGESIZE;
6737c478bd9Sstevel@tonic-gate 	se = (flags & B_FREE ? SE_EXCL : SE_SHARED);
6747c478bd9Sstevel@tonic-gate 	klsz = PAGESIZE;
6757c478bd9Sstevel@tonic-gate 	while (klsz < swap_klustsize) {
6767c478bd9Sstevel@tonic-gate 		if ((arg = sw_getreq()) == NULL) {
6777c478bd9Sstevel@tonic-gate 			swap_getiofail++;
6787c478bd9Sstevel@tonic-gate 			swap_getiopages += btop(klsz);
6797c478bd9Sstevel@tonic-gate 			break;
6807c478bd9Sstevel@tonic-gate 		}
6817c478bd9Sstevel@tonic-gate 		ASSERT(vn_matchops(arg->a_vp, swap_vnodeops));
6827c478bd9Sstevel@tonic-gate 		vp = arg->a_vp;
6837c478bd9Sstevel@tonic-gate 		off = arg->a_off;
6847c478bd9Sstevel@tonic-gate 
6857c478bd9Sstevel@tonic-gate 		if ((pp = page_lookup_nowait(vp, off, se)) == NULL) {
6867c478bd9Sstevel@tonic-gate 			swap_otherfail++;
6877c478bd9Sstevel@tonic-gate 			swap_otherpages += btop(klsz);
6887c478bd9Sstevel@tonic-gate 			sw_putfree(arg);
6897c478bd9Sstevel@tonic-gate 			break;
6907c478bd9Sstevel@tonic-gate 		}
6917c478bd9Sstevel@tonic-gate 		if (pvn_getdirty(pp, flags | B_DELWRI) == 0) {
6927c478bd9Sstevel@tonic-gate 			sw_putfree(arg);
6937c478bd9Sstevel@tonic-gate 			continue;
6947c478bd9Sstevel@tonic-gate 		}
6957c478bd9Sstevel@tonic-gate 		/* Get new physical backing store for the page */
6967c478bd9Sstevel@tonic-gate 		doff = off;
6977c478bd9Sstevel@tonic-gate 		dlen = PAGESIZE;
6987c478bd9Sstevel@tonic-gate 		if (err = swap_newphysname(vp, off, &doff, &dlen,
699a98e9dbfSaguzovsk 		    &pvp, &poff)) {
7007c478bd9Sstevel@tonic-gate 			swap_otherfail++;
7017c478bd9Sstevel@tonic-gate 			swap_otherpages += btop(klsz);
7027c478bd9Sstevel@tonic-gate 			hat_setmod(pp);
7037c478bd9Sstevel@tonic-gate 			page_io_unlock(pp);
7047c478bd9Sstevel@tonic-gate 			page_unlock(pp);
7057c478bd9Sstevel@tonic-gate 			sw_putbackreq(arg);
7067c478bd9Sstevel@tonic-gate 			break;
7077c478bd9Sstevel@tonic-gate 		}
7087c478bd9Sstevel@tonic-gate 		/* Try to cluster new physical name with previous ones */
7097c478bd9Sstevel@tonic-gate 		if (klvp == pvp && poff == klstart + klsz) {
7107c478bd9Sstevel@tonic-gate 			klsz += PAGESIZE;
7117c478bd9Sstevel@tonic-gate 			page_add(&pplist, pp);
7127c478bd9Sstevel@tonic-gate 			pplist = pplist->p_next;
7137c478bd9Sstevel@tonic-gate 			sw_putfree(arg);
7147c478bd9Sstevel@tonic-gate 		} else if (klvp == pvp && poff == klstart - PAGESIZE) {
7157c478bd9Sstevel@tonic-gate 			klsz += PAGESIZE;
7167c478bd9Sstevel@tonic-gate 			klstart -= PAGESIZE;
7177c478bd9Sstevel@tonic-gate 			page_add(&pplist, pp);
7187c478bd9Sstevel@tonic-gate 			sw_putfree(arg);
7197c478bd9Sstevel@tonic-gate 		} else {
7207c478bd9Sstevel@tonic-gate 			swap_klustfail++;
7217c478bd9Sstevel@tonic-gate 			swap_klustpages += btop(klsz);
7227c478bd9Sstevel@tonic-gate 			hat_setmod(pp);
7237c478bd9Sstevel@tonic-gate 			page_io_unlock(pp);
7247c478bd9Sstevel@tonic-gate 			page_unlock(pp);
7257c478bd9Sstevel@tonic-gate 			sw_putbackreq(arg);
7267c478bd9Sstevel@tonic-gate 			break;
7277c478bd9Sstevel@tonic-gate 		}
7287c478bd9Sstevel@tonic-gate 	}
7297c478bd9Sstevel@tonic-gate 
7307c478bd9Sstevel@tonic-gate 	err = VOP_PAGEIO(klvp, pplist, klstart, klsz,
731a98e9dbfSaguzovsk 	    B_WRITE | flags, cr, NULL);
7327c478bd9Sstevel@tonic-gate 
7337c478bd9Sstevel@tonic-gate 	if ((flags & B_ASYNC) == 0)
7347c478bd9Sstevel@tonic-gate 		pvn_write_done(pp, ((err) ? B_ERROR : 0) | B_WRITE | flags);
7357c478bd9Sstevel@tonic-gate 
7367c478bd9Sstevel@tonic-gate 	/* Statistics */
7377c478bd9Sstevel@tonic-gate 	if (!err) {
7387c478bd9Sstevel@tonic-gate 		swap_putpagecnt++;
7397c478bd9Sstevel@tonic-gate 		swap_pagespushed += btop(klsz);
7407c478bd9Sstevel@tonic-gate 	}
7417c478bd9Sstevel@tonic-gate out:
7427c478bd9Sstevel@tonic-gate 	TRACE_4(TR_FAC_SWAPFS, TR_SWAPFS_PUTAPAGE,
743a98e9dbfSaguzovsk 	    "swapfs putapage:vp %p klvp %p, klstart %lx, klsz %lx",
744a98e9dbfSaguzovsk 	    vp, klvp, klstart, klsz);
7457c478bd9Sstevel@tonic-gate 	if (err && err != ENOMEM)
7467c478bd9Sstevel@tonic-gate 		cmn_err(CE_WARN, "swapfs_putapage: err %d\n", err);
7477c478bd9Sstevel@tonic-gate 	if (lenp)
7487c478bd9Sstevel@tonic-gate 		*lenp = PAGESIZE;
7497c478bd9Sstevel@tonic-gate 	return (err);
7507c478bd9Sstevel@tonic-gate }
7517c478bd9Sstevel@tonic-gate 
7527c478bd9Sstevel@tonic-gate static void
swap_dispose(vnode_t * vp,page_t * pp,int fl,int dn,cred_t * cr,caller_context_t * ct)753da6c28aaSamw swap_dispose(
754da6c28aaSamw 	vnode_t *vp,
755da6c28aaSamw 	page_t *pp,
756da6c28aaSamw 	int fl,
757da6c28aaSamw 	int dn,
758da6c28aaSamw 	cred_t *cr,
759da6c28aaSamw 	caller_context_t *ct)
7607c478bd9Sstevel@tonic-gate {
7617c478bd9Sstevel@tonic-gate 	int err;
7627c478bd9Sstevel@tonic-gate 	u_offset_t off = pp->p_offset;
7637c478bd9Sstevel@tonic-gate 	vnode_t *pvp;
7647c478bd9Sstevel@tonic-gate 	u_offset_t poff;
7657c478bd9Sstevel@tonic-gate 
7667c478bd9Sstevel@tonic-gate 	ASSERT(PAGE_EXCL(pp));
7677c478bd9Sstevel@tonic-gate 
7687c478bd9Sstevel@tonic-gate 	/*
7697c478bd9Sstevel@tonic-gate 	 * The caller will free/invalidate large page in one shot instead of
7707c478bd9Sstevel@tonic-gate 	 * one small page at a time.
7717c478bd9Sstevel@tonic-gate 	 */
7727c478bd9Sstevel@tonic-gate 	if (pp->p_szc != 0) {
7737c478bd9Sstevel@tonic-gate 		page_unlock(pp);
7747c478bd9Sstevel@tonic-gate 		return;
7757c478bd9Sstevel@tonic-gate 	}
7767c478bd9Sstevel@tonic-gate 
7777c478bd9Sstevel@tonic-gate 	err = swap_getphysname(vp, off, &pvp, &poff);
7787c478bd9Sstevel@tonic-gate 	if (!err && pvp != NULL)
779da6c28aaSamw 		VOP_DISPOSE(pvp, pp, fl, dn, cr, ct);
7807c478bd9Sstevel@tonic-gate 	else
781da6c28aaSamw 		fs_dispose(vp, pp, fl, dn, cr, ct);
7827c478bd9Sstevel@tonic-gate }
783