xref: /illumos-gate/usr/src/uts/common/fs/fsflush.c (revision d3d50737)
17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5ad23a2dbSjohansen  * Common Development and Distribution License (the "License").
6ad23a2dbSjohansen  * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate  *
87c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate  * and limitations under the License.
127c478bd9Sstevel@tonic-gate  *
137c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate  *
197c478bd9Sstevel@tonic-gate  * CDDL HEADER END
207c478bd9Sstevel@tonic-gate  */
217c478bd9Sstevel@tonic-gate /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
227c478bd9Sstevel@tonic-gate /*	  All Rights Reserved  	*/
237c478bd9Sstevel@tonic-gate 
247c478bd9Sstevel@tonic-gate 
257c478bd9Sstevel@tonic-gate /*
2607fa3635SAmrita Sadhukhan  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
277c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
287c478bd9Sstevel@tonic-gate  */
297c478bd9Sstevel@tonic-gate 
307c478bd9Sstevel@tonic-gate #include <sys/types.h>
317c478bd9Sstevel@tonic-gate #include <sys/t_lock.h>
327c478bd9Sstevel@tonic-gate #include <sys/param.h>
337c478bd9Sstevel@tonic-gate #include <sys/tuneable.h>
347c478bd9Sstevel@tonic-gate #include <sys/inline.h>
357c478bd9Sstevel@tonic-gate #include <sys/systm.h>
367c478bd9Sstevel@tonic-gate #include <sys/proc.h>
377c478bd9Sstevel@tonic-gate #include <sys/user.h>
387c478bd9Sstevel@tonic-gate #include <sys/var.h>
397c478bd9Sstevel@tonic-gate #include <sys/buf.h>
407c478bd9Sstevel@tonic-gate #include <sys/vfs.h>
417c478bd9Sstevel@tonic-gate #include <sys/cred.h>
427c478bd9Sstevel@tonic-gate #include <sys/kmem.h>
437c478bd9Sstevel@tonic-gate #include <sys/vnode.h>
447c478bd9Sstevel@tonic-gate #include <sys/swap.h>
457c478bd9Sstevel@tonic-gate #include <sys/vm.h>
467c478bd9Sstevel@tonic-gate #include <sys/debug.h>
477c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h>
487c478bd9Sstevel@tonic-gate #include <sys/sysinfo.h>
497c478bd9Sstevel@tonic-gate #include <sys/callb.h>
507c478bd9Sstevel@tonic-gate #include <sys/reboot.h>
517c478bd9Sstevel@tonic-gate #include <sys/time.h>
527c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_inode.h>
537c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_bio.h>
547c478bd9Sstevel@tonic-gate 
557c478bd9Sstevel@tonic-gate #include <vm/hat.h>
567c478bd9Sstevel@tonic-gate #include <vm/page.h>
577c478bd9Sstevel@tonic-gate #include <vm/pvn.h>
587c478bd9Sstevel@tonic-gate #include <vm/seg_kmem.h>
597c478bd9Sstevel@tonic-gate 
607c478bd9Sstevel@tonic-gate int doiflush = 1;	/* non-zero to turn inode flushing on */
617c478bd9Sstevel@tonic-gate int dopageflush = 1;	/* non-zero to turn page flushing on */
627c478bd9Sstevel@tonic-gate 
637c478bd9Sstevel@tonic-gate /*
647c478bd9Sstevel@tonic-gate  * To improve boot performance, don't run the inode flushing loop until
657c478bd9Sstevel@tonic-gate  * the specified number of seconds after boot.  To revert to the old
667c478bd9Sstevel@tonic-gate  * behavior, set fsflush_iflush_delay to 0.  We have not created any new
677c478bd9Sstevel@tonic-gate  * filesystem danger that did not exist previously, since there is always a
687c478bd9Sstevel@tonic-gate  * window in between when fsflush does the inode flush loop during which the
697c478bd9Sstevel@tonic-gate  * system could crash, fail to sync the filesystem, and fsck will be needed
707c478bd9Sstevel@tonic-gate  * to recover.  We have, however, widened this window.  Finally,
717c478bd9Sstevel@tonic-gate  * we never delay inode flushing if we're booting into single user mode,
727c478bd9Sstevel@tonic-gate  * where the administrator may be modifying files or using fsck.  This
737c478bd9Sstevel@tonic-gate  * modification avoids inode flushes during boot whose only purpose is to
747c478bd9Sstevel@tonic-gate  * update atimes on files which have been accessed during boot.
757c478bd9Sstevel@tonic-gate  */
767c478bd9Sstevel@tonic-gate int fsflush_iflush_delay = 60;
777c478bd9Sstevel@tonic-gate 
787c478bd9Sstevel@tonic-gate kcondvar_t fsflush_cv;
797c478bd9Sstevel@tonic-gate static kmutex_t fsflush_lock;	/* just for the cv_wait */
807c478bd9Sstevel@tonic-gate ksema_t fsflush_sema;		/* to serialize with reboot */
817c478bd9Sstevel@tonic-gate 
827c478bd9Sstevel@tonic-gate /*
837c478bd9Sstevel@tonic-gate  * some statistics for fsflush_do_pages
847c478bd9Sstevel@tonic-gate  */
857c478bd9Sstevel@tonic-gate typedef struct {
867c478bd9Sstevel@tonic-gate 	ulong_t fsf_scan;	/* number of pages scanned */
877c478bd9Sstevel@tonic-gate 	ulong_t fsf_examined;	/* number of page_t's actually examined, can */
887c478bd9Sstevel@tonic-gate 				/* be less than fsf_scan due to large pages */
897c478bd9Sstevel@tonic-gate 	ulong_t fsf_locked;	/* pages we actually page_lock()ed */
907c478bd9Sstevel@tonic-gate 	ulong_t fsf_modified;	/* number of modified pages found */
917c478bd9Sstevel@tonic-gate 	ulong_t fsf_coalesce;	/* number of page coalesces done */
927c478bd9Sstevel@tonic-gate 	ulong_t fsf_time;	/* nanoseconds of run time */
937c478bd9Sstevel@tonic-gate 	ulong_t fsf_releases;	/* number of page_release() done */
947c478bd9Sstevel@tonic-gate } fsf_stat_t;
957c478bd9Sstevel@tonic-gate 
967c478bd9Sstevel@tonic-gate fsf_stat_t fsf_recent;	/* counts for most recent duty cycle */
977c478bd9Sstevel@tonic-gate fsf_stat_t fsf_total;	/* total of counts */
987c478bd9Sstevel@tonic-gate ulong_t fsf_cycles;	/* number of runs refelected in fsf_total */
997c478bd9Sstevel@tonic-gate 
1007c478bd9Sstevel@tonic-gate /*
101da6c28aaSamw  * data used to determine when we can coalesce consecutive free pages
1027c478bd9Sstevel@tonic-gate  * into larger pages.
1037c478bd9Sstevel@tonic-gate  */
1047c478bd9Sstevel@tonic-gate #define	MAX_PAGESIZES	32
1057c478bd9Sstevel@tonic-gate static ulong_t		fsf_npgsz;
1067c478bd9Sstevel@tonic-gate static pgcnt_t		fsf_pgcnt[MAX_PAGESIZES];
1077c478bd9Sstevel@tonic-gate static pgcnt_t		fsf_mask[MAX_PAGESIZES];
1087c478bd9Sstevel@tonic-gate 
1097c478bd9Sstevel@tonic-gate 
1107c478bd9Sstevel@tonic-gate /*
1117c478bd9Sstevel@tonic-gate  * Scan page_t's and issue I/O's for modified pages.
1127c478bd9Sstevel@tonic-gate  *
1137c478bd9Sstevel@tonic-gate  * Also coalesces consecutive small sized free pages into the next larger
1147c478bd9Sstevel@tonic-gate  * pagesize. This costs a tiny bit of time in fsflush, but will reduce time
1157c478bd9Sstevel@tonic-gate  * spent scanning on later passes and for anybody allocating large pages.
1167c478bd9Sstevel@tonic-gate  */
1177c478bd9Sstevel@tonic-gate static void
fsflush_do_pages()1187c478bd9Sstevel@tonic-gate fsflush_do_pages()
1197c478bd9Sstevel@tonic-gate {
1207c478bd9Sstevel@tonic-gate 	vnode_t		*vp;
1217c478bd9Sstevel@tonic-gate 	ulong_t		pcount;
1227c478bd9Sstevel@tonic-gate 	hrtime_t	timer = gethrtime();
1237c478bd9Sstevel@tonic-gate 	ulong_t		releases = 0;
1247c478bd9Sstevel@tonic-gate 	ulong_t		nexamined = 0;
1257c478bd9Sstevel@tonic-gate 	ulong_t		nlocked = 0;
1267c478bd9Sstevel@tonic-gate 	ulong_t		nmodified = 0;
1277c478bd9Sstevel@tonic-gate 	ulong_t		ncoalesce = 0;
1289853d9e8SJason Beloro 	ulong_t		cnt;
1297c478bd9Sstevel@tonic-gate 	int		mod;
1309853d9e8SJason Beloro 	int		fspage = 1;
1317c478bd9Sstevel@tonic-gate 	u_offset_t	offset;
1327c478bd9Sstevel@tonic-gate 	uint_t		szc;
1337c478bd9Sstevel@tonic-gate 
134da6c28aaSamw 	page_t		*coal_page = NULL;  /* 1st page in group to coalesce */
1357c478bd9Sstevel@tonic-gate 	uint_t		coal_szc = 0;	    /* size code, coal_page->p_szc */
1367c478bd9Sstevel@tonic-gate 	uint_t		coal_cnt = 0;	    /* count of pages seen */
1377c478bd9Sstevel@tonic-gate 
1387c478bd9Sstevel@tonic-gate 	static ulong_t	nscan = 0;
1397c478bd9Sstevel@tonic-gate 	static pgcnt_t	last_total_pages = 0;
1409853d9e8SJason Beloro 	static page_t	*pp = NULL;
1417c478bd9Sstevel@tonic-gate 
1427c478bd9Sstevel@tonic-gate 	/*
1437c478bd9Sstevel@tonic-gate 	 * Check to see if total_pages has changed.
1447c478bd9Sstevel@tonic-gate 	 */
1457c478bd9Sstevel@tonic-gate 	if (total_pages != last_total_pages) {
1467c478bd9Sstevel@tonic-gate 		last_total_pages = total_pages;
1477c478bd9Sstevel@tonic-gate 		nscan = (last_total_pages * (tune.t_fsflushr))/v.v_autoup;
1487c478bd9Sstevel@tonic-gate 	}
1497c478bd9Sstevel@tonic-gate 
1509853d9e8SJason Beloro 	if (pp == NULL)
1519853d9e8SJason Beloro 		pp = memsegs->pages;
1527c478bd9Sstevel@tonic-gate 
1537c478bd9Sstevel@tonic-gate 	pcount = 0;
15407fa3635SAmrita Sadhukhan 	while (pcount < nscan) {
1557c478bd9Sstevel@tonic-gate 
1567c478bd9Sstevel@tonic-gate 		/*
1577c478bd9Sstevel@tonic-gate 		 * move to the next page, skipping over large pages
1587c478bd9Sstevel@tonic-gate 		 * and issuing prefetches.
1597c478bd9Sstevel@tonic-gate 		 */
1609853d9e8SJason Beloro 		if (pp->p_szc && fspage == 0) {
1619853d9e8SJason Beloro 			pfn_t pfn;
1629853d9e8SJason Beloro 
1639853d9e8SJason Beloro 			pfn  = page_pptonum(pp);
1649853d9e8SJason Beloro 			cnt = page_get_pagecnt(pp->p_szc);
1659853d9e8SJason Beloro 			cnt -= pfn & (cnt - 1);
1669853d9e8SJason Beloro 		} else
1679853d9e8SJason Beloro 			cnt = 1;
1689853d9e8SJason Beloro 
1699853d9e8SJason Beloro 		pp = page_nextn(pp, cnt);
1707c478bd9Sstevel@tonic-gate 		prefetch_page_r((void *)pp);
1717c478bd9Sstevel@tonic-gate 		ASSERT(pp != NULL);
1729853d9e8SJason Beloro 		pcount += cnt;
1737c478bd9Sstevel@tonic-gate 
1747c478bd9Sstevel@tonic-gate 		/*
1757c478bd9Sstevel@tonic-gate 		 * Do a bunch of dirty tests (ie. no locking) to determine
1767c478bd9Sstevel@tonic-gate 		 * if we can quickly skip this page. These tests are repeated
1777c478bd9Sstevel@tonic-gate 		 * after acquiring the page lock.
1787c478bd9Sstevel@tonic-gate 		 */
1797c478bd9Sstevel@tonic-gate 		++nexamined;
1807c478bd9Sstevel@tonic-gate 		if (PP_ISSWAP(pp)) {
1819853d9e8SJason Beloro 			fspage = 0;
1827c478bd9Sstevel@tonic-gate 			coal_page = NULL;
1837c478bd9Sstevel@tonic-gate 			continue;
1847c478bd9Sstevel@tonic-gate 		}
1857c478bd9Sstevel@tonic-gate 
1867c478bd9Sstevel@tonic-gate 		/*
1877c478bd9Sstevel@tonic-gate 		 * skip free pages too, but try coalescing them into larger
1887c478bd9Sstevel@tonic-gate 		 * pagesizes
1897c478bd9Sstevel@tonic-gate 		 */
1907c478bd9Sstevel@tonic-gate 		if (PP_ISFREE(pp)) {
1917c478bd9Sstevel@tonic-gate 			/*
1927c478bd9Sstevel@tonic-gate 			 * skip pages with a file system identity or that
1937c478bd9Sstevel@tonic-gate 			 * are already maximum size
1947c478bd9Sstevel@tonic-gate 			 */
1959853d9e8SJason Beloro 			fspage = 0;
1967c478bd9Sstevel@tonic-gate 			szc = pp->p_szc;
1977c478bd9Sstevel@tonic-gate 			if (pp->p_vnode != NULL || szc == fsf_npgsz - 1) {
1987c478bd9Sstevel@tonic-gate 				coal_page = NULL;
1997c478bd9Sstevel@tonic-gate 				continue;
2007c478bd9Sstevel@tonic-gate 			}
2017c478bd9Sstevel@tonic-gate 
2027c478bd9Sstevel@tonic-gate 			/*
2037c478bd9Sstevel@tonic-gate 			 * If not in a coalescing candidate page or the size
2047c478bd9Sstevel@tonic-gate 			 * codes are different, start a new candidate.
2057c478bd9Sstevel@tonic-gate 			 */
2067c478bd9Sstevel@tonic-gate 			if (coal_page == NULL || coal_szc != szc) {
2077c478bd9Sstevel@tonic-gate 
2087c478bd9Sstevel@tonic-gate 				/*
2097c478bd9Sstevel@tonic-gate 				 * page must be properly aligned
2107c478bd9Sstevel@tonic-gate 				 */
2117c478bd9Sstevel@tonic-gate 				if ((page_pptonum(pp) & fsf_mask[szc]) != 0) {
2127c478bd9Sstevel@tonic-gate 					coal_page = NULL;
2137c478bd9Sstevel@tonic-gate 					continue;
2147c478bd9Sstevel@tonic-gate 				}
2157c478bd9Sstevel@tonic-gate 				coal_page = pp;
2167c478bd9Sstevel@tonic-gate 				coal_szc = szc;
2177c478bd9Sstevel@tonic-gate 				coal_cnt = 1;
2187c478bd9Sstevel@tonic-gate 				continue;
2197c478bd9Sstevel@tonic-gate 			}
2207c478bd9Sstevel@tonic-gate 
2217c478bd9Sstevel@tonic-gate 			/*
2227c478bd9Sstevel@tonic-gate 			 * acceptable to add this to existing candidate page
2237c478bd9Sstevel@tonic-gate 			 */
2247c478bd9Sstevel@tonic-gate 			++coal_cnt;
2257c478bd9Sstevel@tonic-gate 			if (coal_cnt < fsf_pgcnt[coal_szc])
2267c478bd9Sstevel@tonic-gate 				continue;
2277c478bd9Sstevel@tonic-gate 
2287c478bd9Sstevel@tonic-gate 			/*
2297c478bd9Sstevel@tonic-gate 			 * We've got enough pages to coalesce, so do it.
2307c478bd9Sstevel@tonic-gate 			 * After promoting, we clear coal_page, so it will
2317c478bd9Sstevel@tonic-gate 			 * take another pass to promote this to an even
2327c478bd9Sstevel@tonic-gate 			 * larger page.
2337c478bd9Sstevel@tonic-gate 			 */
2347c478bd9Sstevel@tonic-gate 			++ncoalesce;
2357c478bd9Sstevel@tonic-gate 			(void) page_promote_size(coal_page, coal_szc);
2367c478bd9Sstevel@tonic-gate 			coal_page = NULL;
2377c478bd9Sstevel@tonic-gate 			continue;
2387c478bd9Sstevel@tonic-gate 		} else {
2397c478bd9Sstevel@tonic-gate 			coal_page = NULL;
2407c478bd9Sstevel@tonic-gate 		}
2417c478bd9Sstevel@tonic-gate 
242ad23a2dbSjohansen 		if (PP_ISKAS(pp) ||
2437c478bd9Sstevel@tonic-gate 		    PAGE_LOCKED(pp) ||
2447c478bd9Sstevel@tonic-gate 		    pp->p_lckcnt != 0 ||
2459853d9e8SJason Beloro 		    pp->p_cowcnt != 0) {
2469853d9e8SJason Beloro 			fspage = 0;
2477c478bd9Sstevel@tonic-gate 			continue;
2489853d9e8SJason Beloro 		}
2497c478bd9Sstevel@tonic-gate 
2507c478bd9Sstevel@tonic-gate 
2517c478bd9Sstevel@tonic-gate 		/*
2527c478bd9Sstevel@tonic-gate 		 * Reject pages that can't be "exclusively" locked.
2537c478bd9Sstevel@tonic-gate 		 */
2547c478bd9Sstevel@tonic-gate 		if (!page_trylock(pp, SE_EXCL))
2557c478bd9Sstevel@tonic-gate 			continue;
2567c478bd9Sstevel@tonic-gate 		++nlocked;
2577c478bd9Sstevel@tonic-gate 
2587c478bd9Sstevel@tonic-gate 
2597c478bd9Sstevel@tonic-gate 		/*
2607c478bd9Sstevel@tonic-gate 		 * After locking the page, redo the above checks.
2617c478bd9Sstevel@tonic-gate 		 * Since we locked the page, leave out the PAGE_LOCKED() test.
2627c478bd9Sstevel@tonic-gate 		 */
2637c478bd9Sstevel@tonic-gate 		vp = pp->p_vnode;
2647c478bd9Sstevel@tonic-gate 		if (PP_ISSWAP(pp) ||
2657c478bd9Sstevel@tonic-gate 		    PP_ISFREE(pp) ||
2667c478bd9Sstevel@tonic-gate 		    vp == NULL ||
267ad23a2dbSjohansen 		    PP_ISKAS(pp) ||
2687c478bd9Sstevel@tonic-gate 		    (vp->v_flag & VISSWAP) != 0) {
2699853d9e8SJason Beloro 			page_unlock(pp);
2709853d9e8SJason Beloro 			fspage = 0;
2719853d9e8SJason Beloro 			continue;
2729853d9e8SJason Beloro 		}
2739853d9e8SJason Beloro 		if (pp->p_lckcnt != 0 || pp->p_cowcnt != 0) {
2747c478bd9Sstevel@tonic-gate 			page_unlock(pp);
2757c478bd9Sstevel@tonic-gate 			continue;
2767c478bd9Sstevel@tonic-gate 		}
2777c478bd9Sstevel@tonic-gate 
2789853d9e8SJason Beloro 		fspage = 1;
2797c478bd9Sstevel@tonic-gate 		ASSERT(vp->v_type != VCHR);
2807c478bd9Sstevel@tonic-gate 
2817c478bd9Sstevel@tonic-gate 		/*
2827c478bd9Sstevel@tonic-gate 		 * Check the modified bit. Leaving the bit alone in hardware.
2837c478bd9Sstevel@tonic-gate 		 * It will be cleared if we do the putpage.
2847c478bd9Sstevel@tonic-gate 		 */
2857c478bd9Sstevel@tonic-gate 		if (IS_VMODSORT(vp))
2867c478bd9Sstevel@tonic-gate 			mod = hat_ismod(pp);
2877c478bd9Sstevel@tonic-gate 		else
2887c478bd9Sstevel@tonic-gate 			mod = hat_pagesync(pp,
2897c478bd9Sstevel@tonic-gate 			    HAT_SYNC_DONTZERO | HAT_SYNC_STOPON_MOD) & P_MOD;
2907c478bd9Sstevel@tonic-gate 
2917c478bd9Sstevel@tonic-gate 		if (mod) {
2927c478bd9Sstevel@tonic-gate 			++nmodified;
2937c478bd9Sstevel@tonic-gate 			offset = pp->p_offset;
2947c478bd9Sstevel@tonic-gate 
2957c478bd9Sstevel@tonic-gate 			/*
2967c478bd9Sstevel@tonic-gate 			 * Hold the vnode before releasing the page lock
2977c478bd9Sstevel@tonic-gate 			 * to prevent it from being freed and re-used by
2987c478bd9Sstevel@tonic-gate 			 * some other thread.
2997c478bd9Sstevel@tonic-gate 			 */
3007c478bd9Sstevel@tonic-gate 			VN_HOLD(vp);
3017c478bd9Sstevel@tonic-gate 
3027c478bd9Sstevel@tonic-gate 			page_unlock(pp);
3037c478bd9Sstevel@tonic-gate 
3047c478bd9Sstevel@tonic-gate 			(void) VOP_PUTPAGE(vp, offset, PAGESIZE, B_ASYNC,
305da6c28aaSamw 			    kcred, NULL);
3067c478bd9Sstevel@tonic-gate 
3077c478bd9Sstevel@tonic-gate 			VN_RELE(vp);
3087c478bd9Sstevel@tonic-gate 		} else {
3097c478bd9Sstevel@tonic-gate 
3107c478bd9Sstevel@tonic-gate 			/*
3117c478bd9Sstevel@tonic-gate 			 * Catch any pages which should be on the cache list,
3127c478bd9Sstevel@tonic-gate 			 * but aren't yet.
3137c478bd9Sstevel@tonic-gate 			 */
3147c478bd9Sstevel@tonic-gate 			if (hat_page_is_mapped(pp) == 0) {
3157c478bd9Sstevel@tonic-gate 				++releases;
3167c478bd9Sstevel@tonic-gate 				(void) page_release(pp, 1);
3177c478bd9Sstevel@tonic-gate 			} else {
3187c478bd9Sstevel@tonic-gate 				page_unlock(pp);
3197c478bd9Sstevel@tonic-gate 			}
3207c478bd9Sstevel@tonic-gate 		}
3217c478bd9Sstevel@tonic-gate 	}
3227c478bd9Sstevel@tonic-gate 
3237c478bd9Sstevel@tonic-gate 	/*
3247c478bd9Sstevel@tonic-gate 	 * maintain statistics
3257c478bd9Sstevel@tonic-gate 	 * reset every million wakeups, just to avoid overflow
3267c478bd9Sstevel@tonic-gate 	 */
3277c478bd9Sstevel@tonic-gate 	if (++fsf_cycles == 1000000) {
3287c478bd9Sstevel@tonic-gate 		fsf_cycles = 0;
3297c478bd9Sstevel@tonic-gate 		fsf_total.fsf_scan = 0;
3307c478bd9Sstevel@tonic-gate 		fsf_total.fsf_examined = 0;
3317c478bd9Sstevel@tonic-gate 		fsf_total.fsf_locked = 0;
3327c478bd9Sstevel@tonic-gate 		fsf_total.fsf_modified = 0;
3337c478bd9Sstevel@tonic-gate 		fsf_total.fsf_coalesce = 0;
3347c478bd9Sstevel@tonic-gate 		fsf_total.fsf_time = 0;
3357c478bd9Sstevel@tonic-gate 		fsf_total.fsf_releases = 0;
3367c478bd9Sstevel@tonic-gate 	} else {
3377c478bd9Sstevel@tonic-gate 		fsf_total.fsf_scan += fsf_recent.fsf_scan = nscan;
3387c478bd9Sstevel@tonic-gate 		fsf_total.fsf_examined += fsf_recent.fsf_examined = nexamined;
3397c478bd9Sstevel@tonic-gate 		fsf_total.fsf_locked += fsf_recent.fsf_locked = nlocked;
3407c478bd9Sstevel@tonic-gate 		fsf_total.fsf_modified += fsf_recent.fsf_modified = nmodified;
3417c478bd9Sstevel@tonic-gate 		fsf_total.fsf_coalesce += fsf_recent.fsf_coalesce = ncoalesce;
3427c478bd9Sstevel@tonic-gate 		fsf_total.fsf_time += fsf_recent.fsf_time = gethrtime() - timer;
3437c478bd9Sstevel@tonic-gate 		fsf_total.fsf_releases += fsf_recent.fsf_releases = releases;
3447c478bd9Sstevel@tonic-gate 	}
3457c478bd9Sstevel@tonic-gate }
3467c478bd9Sstevel@tonic-gate 
3477c478bd9Sstevel@tonic-gate /*
3487c478bd9Sstevel@tonic-gate  * As part of file system hardening, this daemon is awakened
3497c478bd9Sstevel@tonic-gate  * every second to flush cached data which includes the
3507c478bd9Sstevel@tonic-gate  * buffer cache, the inode cache and mapped pages.
3517c478bd9Sstevel@tonic-gate  */
3527c478bd9Sstevel@tonic-gate void
fsflush()3537c478bd9Sstevel@tonic-gate fsflush()
3547c478bd9Sstevel@tonic-gate {
3557c478bd9Sstevel@tonic-gate 	struct buf *bp, *dwp;
3567c478bd9Sstevel@tonic-gate 	struct hbuf *hp;
3577c478bd9Sstevel@tonic-gate 	int autoup;
3587c478bd9Sstevel@tonic-gate 	unsigned int ix, icount, count = 0;
3597c478bd9Sstevel@tonic-gate 	callb_cpr_t cprinfo;
3607c478bd9Sstevel@tonic-gate 	uint_t		bcount;
3617c478bd9Sstevel@tonic-gate 	kmutex_t	*hmp;
3627c478bd9Sstevel@tonic-gate 	struct vfssw *vswp;
3637c478bd9Sstevel@tonic-gate 
3647c478bd9Sstevel@tonic-gate 	proc_fsflush = ttoproc(curthread);
3657c478bd9Sstevel@tonic-gate 	proc_fsflush->p_cstime = 0;
3667c478bd9Sstevel@tonic-gate 	proc_fsflush->p_stime =  0;
3677c478bd9Sstevel@tonic-gate 	proc_fsflush->p_cutime =  0;
3687c478bd9Sstevel@tonic-gate 	proc_fsflush->p_utime = 0;
369ae115bc7Smrj 	bcopy("fsflush", curproc->p_user.u_psargs, 8);
370ae115bc7Smrj 	bcopy("fsflush", curproc->p_user.u_comm, 7);
3717c478bd9Sstevel@tonic-gate 
3727c478bd9Sstevel@tonic-gate 	mutex_init(&fsflush_lock, NULL, MUTEX_DEFAULT, NULL);
3737c478bd9Sstevel@tonic-gate 	sema_init(&fsflush_sema, 0, NULL, SEMA_DEFAULT, NULL);
3747c478bd9Sstevel@tonic-gate 
3757c478bd9Sstevel@tonic-gate 	/*
3767c478bd9Sstevel@tonic-gate 	 * Setup page coalescing.
3777c478bd9Sstevel@tonic-gate 	 */
3787c478bd9Sstevel@tonic-gate 	fsf_npgsz = page_num_pagesizes();
3797c478bd9Sstevel@tonic-gate 	ASSERT(fsf_npgsz < MAX_PAGESIZES);
3807c478bd9Sstevel@tonic-gate 	for (ix = 0; ix < fsf_npgsz - 1; ++ix) {
3817c478bd9Sstevel@tonic-gate 		fsf_pgcnt[ix] =
3827c478bd9Sstevel@tonic-gate 		    page_get_pagesize(ix + 1) / page_get_pagesize(ix);
3837c478bd9Sstevel@tonic-gate 		fsf_mask[ix] = page_get_pagecnt(ix + 1) - 1;
3847c478bd9Sstevel@tonic-gate 	}
3857c478bd9Sstevel@tonic-gate 
3867c478bd9Sstevel@tonic-gate 	autoup = v.v_autoup * hz;
3877c478bd9Sstevel@tonic-gate 	icount = v.v_autoup / tune.t_fsflushr;
3887c478bd9Sstevel@tonic-gate 	CALLB_CPR_INIT(&cprinfo, &fsflush_lock, callb_generic_cpr, "fsflush");
3897c478bd9Sstevel@tonic-gate loop:
3907c478bd9Sstevel@tonic-gate 	sema_v(&fsflush_sema);
3917c478bd9Sstevel@tonic-gate 	mutex_enter(&fsflush_lock);
3927c478bd9Sstevel@tonic-gate 	CALLB_CPR_SAFE_BEGIN(&cprinfo);
3937c478bd9Sstevel@tonic-gate 	cv_wait(&fsflush_cv, &fsflush_lock);		/* wait for clock */
3947c478bd9Sstevel@tonic-gate 	CALLB_CPR_SAFE_END(&cprinfo, &fsflush_lock);
3957c478bd9Sstevel@tonic-gate 	mutex_exit(&fsflush_lock);
3967c478bd9Sstevel@tonic-gate 	sema_p(&fsflush_sema);
3977c478bd9Sstevel@tonic-gate 
3987c478bd9Sstevel@tonic-gate 	/*
3997c478bd9Sstevel@tonic-gate 	 * Write back all old B_DELWRI buffers on the freelist.
4007c478bd9Sstevel@tonic-gate 	 */
4017c478bd9Sstevel@tonic-gate 	bcount = 0;
4027c478bd9Sstevel@tonic-gate 	for (ix = 0; ix < v.v_hbuf; ix++) {
4037c478bd9Sstevel@tonic-gate 
4047c478bd9Sstevel@tonic-gate 		hp = &hbuf[ix];
4057c478bd9Sstevel@tonic-gate 		dwp = (struct buf *)&dwbuf[ix];
4067c478bd9Sstevel@tonic-gate 
4077c478bd9Sstevel@tonic-gate 		bcount += (hp->b_length);
4087c478bd9Sstevel@tonic-gate 
4097c478bd9Sstevel@tonic-gate 		if (dwp->av_forw == dwp) {
4107c478bd9Sstevel@tonic-gate 			continue;
4117c478bd9Sstevel@tonic-gate 		}
4127c478bd9Sstevel@tonic-gate 
4137c478bd9Sstevel@tonic-gate 		hmp = &hbuf[ix].b_lock;
4147c478bd9Sstevel@tonic-gate 		mutex_enter(hmp);
4157c478bd9Sstevel@tonic-gate 		bp = dwp->av_forw;
4167c478bd9Sstevel@tonic-gate 
4177c478bd9Sstevel@tonic-gate 		/*
4187c478bd9Sstevel@tonic-gate 		 * Go down only on the delayed write lists.
4197c478bd9Sstevel@tonic-gate 		 */
4207c478bd9Sstevel@tonic-gate 		while (bp != dwp) {
4217c478bd9Sstevel@tonic-gate 
4227c478bd9Sstevel@tonic-gate 			ASSERT(bp->b_flags & B_DELWRI);
4237c478bd9Sstevel@tonic-gate 
4247c478bd9Sstevel@tonic-gate 			if ((bp->b_flags & B_DELWRI) &&
425*d3d50737SRafael Vanoni 			    (ddi_get_lbolt() - bp->b_start >= autoup) &&
4267c478bd9Sstevel@tonic-gate 			    sema_tryp(&bp->b_sem)) {
4277c478bd9Sstevel@tonic-gate 				bp->b_flags |= B_ASYNC;
4287c478bd9Sstevel@tonic-gate 				hp->b_length--;
4297c478bd9Sstevel@tonic-gate 				notavail(bp);
4307c478bd9Sstevel@tonic-gate 				mutex_exit(hmp);
4317c478bd9Sstevel@tonic-gate 				if (bp->b_vp == NULL) {
4327c478bd9Sstevel@tonic-gate 					BWRITE(bp);
4337c478bd9Sstevel@tonic-gate 				} else {
4347c478bd9Sstevel@tonic-gate 					UFS_BWRITE(VTOI(bp->b_vp)->i_ufsvfs,
43507fa3635SAmrita Sadhukhan 					    bp);
4367c478bd9Sstevel@tonic-gate 				}
4377c478bd9Sstevel@tonic-gate 				mutex_enter(hmp);
4387c478bd9Sstevel@tonic-gate 				bp = dwp->av_forw;
4397c478bd9Sstevel@tonic-gate 			} else {
4407c478bd9Sstevel@tonic-gate 				bp = bp->av_forw;
4417c478bd9Sstevel@tonic-gate 			}
4427c478bd9Sstevel@tonic-gate 		}
4437c478bd9Sstevel@tonic-gate 		mutex_exit(hmp);
4447c478bd9Sstevel@tonic-gate 	}
4457c478bd9Sstevel@tonic-gate 
4467c478bd9Sstevel@tonic-gate 	/*
4477c478bd9Sstevel@tonic-gate 	 *
4487c478bd9Sstevel@tonic-gate 	 * There is no need to wakeup any thread waiting on bio_mem_cv
4497c478bd9Sstevel@tonic-gate 	 * since brelse will wake them up as soon as IO is complete.
4507c478bd9Sstevel@tonic-gate 	 */
4517c478bd9Sstevel@tonic-gate 	bfreelist.b_bcount = bcount;
4527c478bd9Sstevel@tonic-gate 
4537c478bd9Sstevel@tonic-gate 	if (dopageflush)
4547c478bd9Sstevel@tonic-gate 		fsflush_do_pages();
4557c478bd9Sstevel@tonic-gate 
4567c478bd9Sstevel@tonic-gate 	if (!doiflush)
4577c478bd9Sstevel@tonic-gate 		goto loop;
4587c478bd9Sstevel@tonic-gate 
4597c478bd9Sstevel@tonic-gate 	/*
4607c478bd9Sstevel@tonic-gate 	 * If the system was not booted to single user mode, skip the
4617c478bd9Sstevel@tonic-gate 	 * inode flushing until after fsflush_iflush_delay secs have elapsed.
4627c478bd9Sstevel@tonic-gate 	 */
4637c478bd9Sstevel@tonic-gate 	if ((boothowto & RB_SINGLE) == 0 &&
464*d3d50737SRafael Vanoni 	    (ddi_get_lbolt64() / hz) < fsflush_iflush_delay)
4657c478bd9Sstevel@tonic-gate 		goto loop;
4667c478bd9Sstevel@tonic-gate 
4677c478bd9Sstevel@tonic-gate 	/*
4687c478bd9Sstevel@tonic-gate 	 * Flush cached attribute information (e.g. inodes).
4697c478bd9Sstevel@tonic-gate 	 */
4707c478bd9Sstevel@tonic-gate 	if (++count >= icount) {
4717c478bd9Sstevel@tonic-gate 		count = 0;
4727c478bd9Sstevel@tonic-gate 
4737c478bd9Sstevel@tonic-gate 		/*
4747c478bd9Sstevel@tonic-gate 		 * Sync back cached data.
4757c478bd9Sstevel@tonic-gate 		 */
4767c478bd9Sstevel@tonic-gate 		RLOCK_VFSSW();
4777c478bd9Sstevel@tonic-gate 		for (vswp = &vfssw[1]; vswp < &vfssw[nfstype]; vswp++) {
4787c478bd9Sstevel@tonic-gate 			if (ALLOCATED_VFSSW(vswp) && VFS_INSTALLED(vswp)) {
4797c478bd9Sstevel@tonic-gate 				vfs_refvfssw(vswp);
4807c478bd9Sstevel@tonic-gate 				RUNLOCK_VFSSW();
4817c478bd9Sstevel@tonic-gate 				(void) fsop_sync_by_kind(vswp - vfssw,
48207fa3635SAmrita Sadhukhan 				    SYNC_ATTR, kcred);
4837c478bd9Sstevel@tonic-gate 				vfs_unrefvfssw(vswp);
4847c478bd9Sstevel@tonic-gate 				RLOCK_VFSSW();
4857c478bd9Sstevel@tonic-gate 			}
4867c478bd9Sstevel@tonic-gate 		}
4877c478bd9Sstevel@tonic-gate 		RUNLOCK_VFSSW();
4887c478bd9Sstevel@tonic-gate 	}
4897c478bd9Sstevel@tonic-gate 	goto loop;
4907c478bd9Sstevel@tonic-gate }
491