1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
24 */
25
26#include <sys/types.h>
27#include <sys/param.h>
28#include <sys/systm.h>
29#include <sys/buf.h>
30#include <sys/cred.h>
31#include <sys/errno.h>
32#include <sys/vnode.h>
33#include <sys/vfs_opreg.h>
34#include <sys/cmn_err.h>
35#include <sys/swap.h>
36#include <sys/mman.h>
37#include <sys/vmsystm.h>
38#include <sys/vtrace.h>
39#include <sys/debug.h>
40#include <sys/sysmacros.h>
41#include <sys/vm.h>
42
43#include <sys/fs/swapnode.h>
44
45#include <vm/seg.h>
46#include <vm/page.h>
47#include <vm/pvn.h>
48#include <fs/fs_subr.h>
49
50#include <vm/seg_kp.h>
51
52/*
53 * Define the routines within this file.
54 */
55static int	swap_getpage(struct vnode *vp, offset_t off, size_t len,
56    uint_t *protp, struct page **plarr, size_t plsz, struct seg *seg,
57    caddr_t addr, enum seg_rw rw, struct cred *cr, caller_context_t *ct);
58static int	swap_putpage(struct vnode *vp, offset_t off, size_t len,
59    int flags, struct cred *cr, caller_context_t *ct);
60static void	swap_inactive(struct vnode *vp, struct cred *cr,
61    caller_context_t *ct);
62static void	swap_dispose(vnode_t *vp, page_t *pp, int fl, int dn,
63    cred_t *cr, caller_context_t *ct);
64
65static int	swap_getapage(struct vnode *vp, u_offset_t off, size_t len,
66    uint_t *protp, page_t **plarr, size_t plsz,
67    struct seg *seg, caddr_t addr, enum seg_rw rw, struct cred *cr);
68
69int	swap_getconpage(struct vnode *vp, u_offset_t off, size_t len,
70    uint_t *protp, page_t **plarr, size_t plsz, page_t *conpp,
71    uint_t *pszc, spgcnt_t *nreloc, struct seg *seg, caddr_t addr,
72    enum seg_rw rw, struct cred *cr);
73
74static int 	swap_putapage(struct vnode *vp, page_t *pp, u_offset_t *off,
75    size_t *lenp, int flags, struct cred *cr);
76
77const fs_operation_def_t swap_vnodeops_template[] = {
78	VOPNAME_INACTIVE,	{ .vop_inactive = swap_inactive },
79	VOPNAME_GETPAGE,	{ .vop_getpage = swap_getpage },
80	VOPNAME_PUTPAGE,	{ .vop_putpage = swap_putpage },
81	VOPNAME_DISPOSE,	{ .vop_dispose = swap_dispose },
82	VOPNAME_SETFL,		{ .error = fs_error },
83	VOPNAME_POLL,		{ .error = fs_error },
84	VOPNAME_PATHCONF,	{ .error = fs_error },
85	VOPNAME_GETSECATTR,	{ .error = fs_error },
86	VOPNAME_SHRLOCK,	{ .error = fs_error },
87	NULL,			NULL
88};
89
90vnodeops_t *swap_vnodeops;
91
92/* ARGSUSED */
93static void
94swap_inactive(
95	struct vnode *vp,
96	struct cred *cr,
97	caller_context_t *ct)
98{
99	SWAPFS_PRINT(SWAP_VOPS, "swap_inactive: vp %x\n", vp, 0, 0, 0, 0);
100}
101
102/*
103 * Return all the pages from [off..off+len] in given file
104 */
105/*ARGSUSED*/
106static int
107swap_getpage(
108	struct vnode *vp,
109	offset_t off,
110	size_t len,
111	uint_t *protp,
112	page_t *pl[],
113	size_t plsz,
114	struct seg *seg,
115	caddr_t addr,
116	enum seg_rw rw,
117	struct cred *cr,
118	caller_context_t *ct)
119{
120	SWAPFS_PRINT(SWAP_VOPS, "swap_getpage: vp %p, off %llx, len %lx\n",
121	    (void *)vp, off, len, 0, 0);
122
123	TRACE_3(TR_FAC_SWAPFS, TR_SWAPFS_GETPAGE,
124	    "swapfs getpage:vp %p off %llx len %ld",
125	    (void *)vp, off, len);
126
127	return (pvn_getpages(swap_getapage, vp, (u_offset_t)off, len, protp,
128	    pl, plsz, seg, addr, rw, cr));
129}
130
131/*
132 * Called from pvn_getpages to get a particular page.
133 */
134/*ARGSUSED*/
135static int
136swap_getapage(
137	struct vnode *vp,
138	u_offset_t off,
139	size_t len,
140	uint_t *protp,
141	page_t *pl[],
142	size_t plsz,
143	struct seg *seg,
144	caddr_t addr,
145	enum seg_rw rw,
146	struct cred *cr)
147{
148	struct page *pp, *rpp;
149	int flags;
150	int err = 0;
151	struct vnode *pvp = NULL;
152	u_offset_t poff;
153	int flag_noreloc;
154	se_t lock;
155	extern int kcage_on;
156	int upgrade = 0;
157
158	SWAPFS_PRINT(SWAP_VOPS, "swap_getapage: vp %p, off %llx, len %lx\n",
159	    vp, off, len, 0, 0);
160
161	/*
162	 * Until there is a call-back mechanism to cause SEGKP
163	 * pages to be unlocked, make them non-relocatable.
164	 */
165	if (SEG_IS_SEGKP(seg))
166		flag_noreloc = PG_NORELOC;
167	else
168		flag_noreloc = 0;
169
170	if (protp != NULL)
171		*protp = PROT_ALL;
172
173	lock = (rw == S_CREATE ? SE_EXCL : SE_SHARED);
174
175again:
176	if (pp = page_lookup(vp, off, lock)) {
177		/*
178		 * In very rare instances, a segkp page may have been
179		 * relocated outside of the kernel by the kernel cage
180		 * due to the window between page_unlock() and
181		 * VOP_PUTPAGE() in segkp_unlock().  Due to the
182		 * rareness of these occurances, the solution is to
183		 * relocate the page to a P_NORELOC page.
184		 */
185		if (flag_noreloc != 0) {
186			if (!PP_ISNORELOC(pp) && kcage_on) {
187				if (lock != SE_EXCL) {
188					upgrade = 1;
189					if (!page_tryupgrade(pp)) {
190						page_unlock(pp);
191						lock = SE_EXCL;
192						goto again;
193					}
194				}
195
196				if (page_relocate_cage(&pp, &rpp) != 0)
197					panic("swap_getapage: "
198					    "page_relocate_cage failed");
199
200				pp = rpp;
201			}
202		}
203
204		if (pl) {
205			if (upgrade)
206				page_downgrade(pp);
207
208			pl[0] = pp;
209			pl[1] = NULL;
210		} else {
211			page_unlock(pp);
212		}
213	} else {
214		pp = page_create_va(vp, off, PAGESIZE,
215		    PG_WAIT | PG_EXCL | flag_noreloc,
216		    seg, addr);
217		/*
218		 * Someone raced in and created the page after we did the
219		 * lookup but before we did the create, so go back and
220		 * try to look it up again.
221		 */
222		if (pp == NULL)
223			goto again;
224		if (rw != S_CREATE) {
225			err = swap_getphysname(vp, off, &pvp, &poff);
226			if (pvp) {
227				struct anon *ap;
228				kmutex_t *ahm;
229
230				flags = (pl == NULL ? B_ASYNC|B_READ : B_READ);
231				err = VOP_PAGEIO(pvp, pp, poff,
232				    PAGESIZE, flags, cr, NULL);
233
234				if (!err) {
235					ahm = AH_MUTEX(vp, off);
236					mutex_enter(ahm);
237
238					ap = swap_anon(vp, off);
239					if (ap == NULL) {
240						panic("swap_getapage:"
241						    " null anon");
242					}
243
244					if (ap->an_pvp == pvp &&
245					    ap->an_poff == poff) {
246						swap_phys_free(pvp, poff,
247						    PAGESIZE);
248						ap->an_pvp = NULL;
249						ap->an_poff = 0;
250						hat_setmod(pp);
251					}
252
253					mutex_exit(ahm);
254				}
255			} else {
256				if (!err)
257					pagezero(pp, 0, PAGESIZE);
258
259				/*
260				 * If it's a fault ahead, release page_io_lock
261				 * and SE_EXCL we grabbed in page_create_va
262				 *
263				 * If we are here, we haven't called VOP_PAGEIO
264				 * and thus calling pvn_read_done(pp, B_READ)
265				 * below may mislead that we tried i/o. Besides,
266				 * in case of async, pvn_read_done() should
267				 * not be called by *getpage()
268				 */
269				if (pl == NULL) {
270					/*
271					 * swap_getphysname can return error
272					 * only when we are getting called from
273					 * swapslot_free which passes non-NULL
274					 * pl to VOP_GETPAGE.
275					 */
276					ASSERT(err == 0);
277					page_io_unlock(pp);
278					page_unlock(pp);
279				}
280			}
281		}
282
283		ASSERT(pp != NULL);
284
285		if (err && pl)
286			pvn_read_done(pp, B_ERROR);
287
288		if (!err && pl)
289			pvn_plist_init(pp, pl, plsz, off, PAGESIZE, rw);
290	}
291	TRACE_3(TR_FAC_SWAPFS, TR_SWAPFS_GETAPAGE,
292	    "swapfs getapage:pp %p vp %p off %llx", pp, vp, off);
293	return (err);
294}
295
296/*
297 * Called from large page anon routines only! This is an ugly hack where
298 * the anon layer directly calls into swapfs with a preallocated large page.
299 * Another method would have been to change to VOP and add an extra arg for
300 * the preallocated large page. This all could be cleaned up later when we
301 * solve the anonymous naming problem and no longer need to loop across of
302 * the VOP in PAGESIZE increments to fill in or initialize a large page as
303 * is done today. I think the latter is better since it avoid a change to
304 * the VOP interface that could later be avoided.
305 */
306int
307swap_getconpage(
308	struct vnode *vp,
309	u_offset_t off,
310	size_t len,
311	uint_t *protp,
312	page_t *pl[],
313	size_t plsz,
314	page_t	*conpp,
315	uint_t	*pszc,
316	spgcnt_t *nreloc,
317	struct seg *seg,
318	caddr_t addr,
319	enum seg_rw rw,
320	struct cred *cr)
321{
322	struct page	*pp;
323	int 		err = 0;
324	struct vnode	*pvp = NULL;
325	u_offset_t	poff;
326
327	ASSERT(len == PAGESIZE);
328	ASSERT(pl != NULL);
329	ASSERT(plsz == PAGESIZE);
330	ASSERT(protp == NULL);
331	ASSERT(nreloc != NULL);
332	ASSERT(!SEG_IS_SEGKP(seg)); /* XXX for now not supported */
333	SWAPFS_PRINT(SWAP_VOPS, "swap_getconpage: vp %p, off %llx, len %lx\n",
334	    vp, off, len, 0, 0);
335
336	/*
337	 * If we are not using a preallocated page then we know one already
338	 * exists. So just let the old code handle it.
339	 */
340	if (conpp == NULL) {
341		err = swap_getapage(vp, (u_offset_t)off, len, protp, pl, plsz,
342		    seg, addr, rw, cr);
343		return (err);
344	}
345	ASSERT(conpp->p_szc != 0);
346	ASSERT(PAGE_EXCL(conpp));
347
348
349	ASSERT(conpp->p_next == conpp);
350	ASSERT(conpp->p_prev == conpp);
351	ASSERT(!PP_ISAGED(conpp));
352	ASSERT(!PP_ISFREE(conpp));
353
354	*nreloc = 0;
355	pp = page_lookup_create(vp, off, SE_SHARED, conpp, nreloc, 0);
356
357	/*
358	 * If existing page is found we may need to relocate.
359	 */
360	if (pp != conpp) {
361		ASSERT(rw != S_CREATE);
362		ASSERT(pszc != NULL);
363		ASSERT(PAGE_SHARED(pp));
364		if (pp->p_szc < conpp->p_szc) {
365			*pszc = pp->p_szc;
366			page_unlock(pp);
367			err = -1;
368		} else if (pp->p_szc > conpp->p_szc &&
369		    seg->s_szc > conpp->p_szc) {
370			*pszc = MIN(pp->p_szc, seg->s_szc);
371			page_unlock(pp);
372			err = -2;
373		} else {
374			pl[0] = pp;
375			pl[1] = NULL;
376			if (page_pptonum(pp) &
377			    (page_get_pagecnt(conpp->p_szc) - 1))
378				cmn_err(CE_PANIC, "swap_getconpage: no root");
379		}
380		return (err);
381	}
382
383	ASSERT(PAGE_EXCL(pp));
384
385	if (*nreloc != 0) {
386		ASSERT(rw != S_CREATE);
387		pl[0] = pp;
388		pl[1] = NULL;
389		return (0);
390	}
391
392	*nreloc = 1;
393
394	/*
395	 * If necessary do the page io.
396	 */
397	if (rw != S_CREATE) {
398		/*
399		 * Since we are only called now on behalf of an
400		 * address space operation it's impossible for
401		 * us to fail unlike swap_getapge() which
402		 * also gets called from swapslot_free().
403		 */
404		if (swap_getphysname(vp, off, &pvp, &poff)) {
405			cmn_err(CE_PANIC,
406			    "swap_getconpage: swap_getphysname failed!");
407		}
408
409		if (pvp != NULL) {
410			err = VOP_PAGEIO(pvp, pp, poff, PAGESIZE, B_READ,
411			    cr, NULL);
412			if (err == 0) {
413				struct anon *ap;
414				kmutex_t *ahm;
415
416				ahm = AH_MUTEX(vp, off);
417				mutex_enter(ahm);
418				ap = swap_anon(vp, off);
419				if (ap == NULL)
420					panic("swap_getconpage: null anon");
421				if (ap->an_pvp != pvp || ap->an_poff != poff)
422					panic("swap_getconpage: bad anon");
423
424				swap_phys_free(pvp, poff, PAGESIZE);
425				ap->an_pvp = NULL;
426				ap->an_poff = 0;
427				hat_setmod(pp);
428				mutex_exit(ahm);
429			}
430		} else {
431			pagezero(pp, 0, PAGESIZE);
432		}
433	}
434
435	/*
436	 * Normally we would let pvn_read_done() destroy
437	 * the page on IO error. But since this is a preallocated
438	 * page we'll let the anon layer handle it.
439	 */
440	page_io_unlock(pp);
441	if (err != 0)
442		page_hashout(pp, NULL);
443	ASSERT(pp->p_next == pp);
444	ASSERT(pp->p_prev == pp);
445
446	TRACE_3(TR_FAC_SWAPFS, TR_SWAPFS_GETAPAGE,
447	    "swapfs getconpage:pp %p vp %p off %llx", pp, vp, off);
448
449	pl[0] = pp;
450	pl[1] = NULL;
451	return (err);
452}
453
454/* Async putpage klustering stuff */
455int sw_pending_size;
456extern int klustsize;
457extern struct async_reqs *sw_getreq();
458extern void sw_putreq(struct async_reqs *);
459extern void sw_putbackreq(struct async_reqs *);
460extern struct async_reqs *sw_getfree();
461extern void sw_putfree(struct async_reqs *);
462
463static size_t swap_putpagecnt, swap_pagespushed;
464static size_t swap_otherfail, swap_otherpages;
465static size_t swap_klustfail, swap_klustpages;
466static size_t swap_getiofail, swap_getiopages;
467
468/*
469 * Flags are composed of {B_INVAL, B_DIRTY B_FREE, B_DONTNEED}.
470 * If len == 0, do from off to EOF.
471 */
472static int swap_nopage = 0;	/* Don't do swap_putpage's if set */
473
474/* ARGSUSED */
475static int
476swap_putpage(
477	struct vnode *vp,
478	offset_t off,
479	size_t len,
480	int flags,
481	struct cred *cr,
482	caller_context_t *ct)
483{
484	page_t *pp;
485	u_offset_t io_off;
486	size_t io_len = 0;
487	int err = 0;
488	int nowait;
489	struct async_reqs *arg;
490
491	if (swap_nopage)
492		return (0);
493
494	ASSERT(vp->v_count != 0);
495
496	nowait = flags & B_PAGE_NOWAIT;
497
498	/*
499	 * Clear force flag so that p_lckcnt pages are not invalidated.
500	 */
501	flags &= ~(B_FORCE | B_PAGE_NOWAIT);
502
503	SWAPFS_PRINT(SWAP_VOPS,
504	    "swap_putpage: vp %p, off %llx len %lx, flags %x\n",
505	    (void *)vp, off, len, flags, 0);
506	TRACE_3(TR_FAC_SWAPFS, TR_SWAPFS_PUTPAGE,
507	    "swapfs putpage:vp %p off %llx len %ld", (void *)vp, off, len);
508
509	if (vp->v_flag & VNOMAP)
510		return (ENOSYS);
511
512	if (!vn_has_cached_data(vp))
513		return (0);
514
515	if (len == 0) {
516		if (curproc == proc_pageout)
517			cmn_err(CE_PANIC, "swapfs: pageout can't block");
518
519		/* Search the entire vp list for pages >= off. */
520		err = pvn_vplist_dirty(vp, (u_offset_t)off, swap_putapage,
521		    flags, cr);
522	} else {
523		u_offset_t eoff;
524
525		/*
526		 * Loop over all offsets in the range [off...off + len]
527		 * looking for pages to deal with.
528		 */
529		eoff = off + len;
530		for (io_off = (u_offset_t)off; io_off < eoff;
531		    io_off += io_len) {
532			/*
533			 * If we run out of the async req slot, put the page
534			 * now instead of queuing.
535			 */
536			if (flags == (B_ASYNC | B_FREE) &&
537			    sw_pending_size < klustsize &&
538			    (arg = sw_getfree())) {
539				/*
540				 * If we are clustering, we should allow
541				 * pageout to feed us more pages because # of
542				 * pushes is limited by # of I/Os, and one
543				 * cluster is considered to be one I/O.
544				 */
545				if (pushes)
546					pushes--;
547
548				arg->a_vp = vp;
549				arg->a_off = io_off;
550				arg->a_len = PAGESIZE;
551				arg->a_flags = B_ASYNC | B_FREE;
552				arg->a_cred = kcred;
553				sw_putreq(arg);
554				io_len = PAGESIZE;
555				continue;
556			}
557			/*
558			 * If we are not invalidating pages, use the
559			 * routine page_lookup_nowait() to prevent
560			 * reclaiming them from the free list.
561			 */
562			if (!nowait && ((flags & B_INVAL) ||
563			    (flags & (B_ASYNC | B_FREE)) == B_FREE))
564				pp = page_lookup(vp, io_off, SE_EXCL);
565			else
566				pp = page_lookup_nowait(vp, io_off,
567				    (flags & (B_FREE | B_INVAL)) ?
568				    SE_EXCL : SE_SHARED);
569
570			if (pp == NULL || pvn_getdirty(pp, flags) == 0)
571				io_len = PAGESIZE;
572			else {
573				err = swap_putapage(vp, pp, &io_off, &io_len,
574				    flags, cr);
575				if (err != 0)
576					break;
577			}
578		}
579	}
580	/* If invalidating, verify all pages on vnode list are gone. */
581	if (err == 0 && off == 0 && len == 0 &&
582	    (flags & B_INVAL) && vn_has_cached_data(vp)) {
583		cmn_err(CE_WARN,
584		    "swap_putpage: B_INVAL, pages not gone");
585	}
586	return (err);
587}
588
589/*
590 * Write out a single page.
591 * For swapfs this means choose a physical swap slot and write the page
592 * out using VOP_PAGEIO.
593 * In the (B_ASYNC | B_FREE) case we try to find a bunch of other dirty
594 * swapfs pages, a bunch of contiguous swap slots and then write them
595 * all out in one clustered i/o.
596 */
597/*ARGSUSED*/
598static int
599swap_putapage(
600	struct vnode *vp,
601	page_t *pp,
602	u_offset_t *offp,
603	size_t *lenp,
604	int flags,
605	struct cred *cr)
606{
607	int err;
608	struct vnode *pvp;
609	u_offset_t poff, off;
610	u_offset_t doff;
611	size_t dlen;
612	size_t klsz = 0;
613	u_offset_t klstart = 0;
614	struct vnode *klvp = NULL;
615	page_t *pplist;
616	se_t se;
617	struct async_reqs *arg;
618	size_t swap_klustsize;
619
620	/*
621	 * This check is added for callers who access swap_putpage with len = 0.
622	 * swap_putpage calls swap_putapage page-by-page via pvn_vplist_dirty.
623	 * And it's necessary to do the same queuing if users have the same
624	 * B_ASYNC|B_FREE flags on.
625	 */
626	if (flags == (B_ASYNC | B_FREE) &&
627	    sw_pending_size < klustsize && (arg = sw_getfree())) {
628
629		hat_setmod(pp);
630		page_io_unlock(pp);
631		page_unlock(pp);
632
633		arg->a_vp = vp;
634		arg->a_off = pp->p_offset;
635		arg->a_len = PAGESIZE;
636		arg->a_flags = B_ASYNC | B_FREE;
637		arg->a_cred = kcred;
638		sw_putreq(arg);
639
640		return (0);
641	}
642
643	SWAPFS_PRINT(SWAP_PUTP,
644	    "swap_putapage: pp %p, vp %p, off %llx, flags %x\n",
645	    pp, vp, pp->p_offset, flags, 0);
646
647	ASSERT(PAGE_LOCKED(pp));
648
649	off = pp->p_offset;
650
651	doff = off;
652	dlen = PAGESIZE;
653
654	if (err = swap_newphysname(vp, off, &doff, &dlen, &pvp, &poff)) {
655		err = (flags == (B_ASYNC | B_FREE) ? ENOMEM : 0);
656		hat_setmod(pp);
657		page_io_unlock(pp);
658		page_unlock(pp);
659		goto out;
660	}
661
662	klvp = pvp;
663	klstart = poff;
664	pplist = pp;
665	/*
666	 * If this is ASYNC | FREE and we've accumulated a bunch of such
667	 * pending requests, kluster.
668	 */
669	if (flags == (B_ASYNC | B_FREE))
670		swap_klustsize = klustsize;
671	else
672		swap_klustsize = PAGESIZE;
673	se = (flags & B_FREE ? SE_EXCL : SE_SHARED);
674	klsz = PAGESIZE;
675	while (klsz < swap_klustsize) {
676		if ((arg = sw_getreq()) == NULL) {
677			swap_getiofail++;
678			swap_getiopages += btop(klsz);
679			break;
680		}
681		ASSERT(vn_matchops(arg->a_vp, swap_vnodeops));
682		vp = arg->a_vp;
683		off = arg->a_off;
684
685		if ((pp = page_lookup_nowait(vp, off, se)) == NULL) {
686			swap_otherfail++;
687			swap_otherpages += btop(klsz);
688			sw_putfree(arg);
689			break;
690		}
691		if (pvn_getdirty(pp, flags | B_DELWRI) == 0) {
692			sw_putfree(arg);
693			continue;
694		}
695		/* Get new physical backing store for the page */
696		doff = off;
697		dlen = PAGESIZE;
698		if (err = swap_newphysname(vp, off, &doff, &dlen,
699		    &pvp, &poff)) {
700			swap_otherfail++;
701			swap_otherpages += btop(klsz);
702			hat_setmod(pp);
703			page_io_unlock(pp);
704			page_unlock(pp);
705			sw_putbackreq(arg);
706			break;
707		}
708		/* Try to cluster new physical name with previous ones */
709		if (klvp == pvp && poff == klstart + klsz) {
710			klsz += PAGESIZE;
711			page_add(&pplist, pp);
712			pplist = pplist->p_next;
713			sw_putfree(arg);
714		} else if (klvp == pvp && poff == klstart - PAGESIZE) {
715			klsz += PAGESIZE;
716			klstart -= PAGESIZE;
717			page_add(&pplist, pp);
718			sw_putfree(arg);
719		} else {
720			swap_klustfail++;
721			swap_klustpages += btop(klsz);
722			hat_setmod(pp);
723			page_io_unlock(pp);
724			page_unlock(pp);
725			sw_putbackreq(arg);
726			break;
727		}
728	}
729
730	err = VOP_PAGEIO(klvp, pplist, klstart, klsz,
731	    B_WRITE | flags, cr, NULL);
732
733	if ((flags & B_ASYNC) == 0)
734		pvn_write_done(pp, ((err) ? B_ERROR : 0) | B_WRITE | flags);
735
736	/* Statistics */
737	if (!err) {
738		swap_putpagecnt++;
739		swap_pagespushed += btop(klsz);
740	}
741out:
742	TRACE_4(TR_FAC_SWAPFS, TR_SWAPFS_PUTAPAGE,
743	    "swapfs putapage:vp %p klvp %p, klstart %lx, klsz %lx",
744	    vp, klvp, klstart, klsz);
745	if (err && err != ENOMEM)
746		cmn_err(CE_WARN, "swapfs_putapage: err %d\n", err);
747	if (lenp)
748		*lenp = PAGESIZE;
749	return (err);
750}
751
752static void
753swap_dispose(
754	vnode_t *vp,
755	page_t *pp,
756	int fl,
757	int dn,
758	cred_t *cr,
759	caller_context_t *ct)
760{
761	int err;
762	u_offset_t off = pp->p_offset;
763	vnode_t *pvp;
764	u_offset_t poff;
765
766	ASSERT(PAGE_EXCL(pp));
767
768	/*
769	 * The caller will free/invalidate large page in one shot instead of
770	 * one small page at a time.
771	 */
772	if (pp->p_szc != 0) {
773		page_unlock(pp);
774		return;
775	}
776
777	err = swap_getphysname(vp, off, &pvp, &poff);
778	if (!err && pvp != NULL)
779		VOP_DISPOSE(pvp, pp, fl, dn, cr, ct);
780	else
781		fs_dispose(vp, pp, fl, dn, cr, ct);
782}
783