xref: /illumos-gate/usr/src/uts/common/fs/ufs/ufs_alloc.c (revision bbf21555)
17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
533c22cb3Smishra  * Common Development and Distribution License (the "License").
633c22cb3Smishra  * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate  *
87c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate  * and limitations under the License.
127c478bd9Sstevel@tonic-gate  *
137c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate  *
197c478bd9Sstevel@tonic-gate  * CDDL HEADER END
207c478bd9Sstevel@tonic-gate  */
217c478bd9Sstevel@tonic-gate /*
22e7da395aSOwen Roberts  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
237c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
247c478bd9Sstevel@tonic-gate  */
257c478bd9Sstevel@tonic-gate 
267c478bd9Sstevel@tonic-gate /*	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
271f563eb1SToomas Soome /*	  All Rights Reserved	*/
287c478bd9Sstevel@tonic-gate 
297c478bd9Sstevel@tonic-gate /*
307c478bd9Sstevel@tonic-gate  * University Copyright- Copyright (c) 1982, 1986, 1988
317c478bd9Sstevel@tonic-gate  * The Regents of the University of California
327c478bd9Sstevel@tonic-gate  * All Rights Reserved
337c478bd9Sstevel@tonic-gate  *
347c478bd9Sstevel@tonic-gate  * University Acknowledgment- Portions of this document are derived from
357c478bd9Sstevel@tonic-gate  * software developed by the University of California, Berkeley, and its
367c478bd9Sstevel@tonic-gate  * contributors.
377c478bd9Sstevel@tonic-gate  */
387c478bd9Sstevel@tonic-gate 
39303bf60bSsdebnath #include <sys/condvar_impl.h>
407c478bd9Sstevel@tonic-gate #include <sys/types.h>
417c478bd9Sstevel@tonic-gate #include <sys/t_lock.h>
427c478bd9Sstevel@tonic-gate #include <sys/debug.h>
437c478bd9Sstevel@tonic-gate #include <sys/param.h>
447c478bd9Sstevel@tonic-gate #include <sys/systm.h>
457c478bd9Sstevel@tonic-gate #include <sys/signal.h>
467c478bd9Sstevel@tonic-gate #include <sys/cred.h>
477c478bd9Sstevel@tonic-gate #include <sys/proc.h>
487c478bd9Sstevel@tonic-gate #include <sys/disp.h>
497c478bd9Sstevel@tonic-gate #include <sys/user.h>
507c478bd9Sstevel@tonic-gate #include <sys/buf.h>
517c478bd9Sstevel@tonic-gate #include <sys/vfs.h>
527c478bd9Sstevel@tonic-gate #include <sys/vnode.h>
537c478bd9Sstevel@tonic-gate #include <sys/acl.h>
547c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_fs.h>
557c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_inode.h>
567c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_acl.h>
577c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_bio.h>
587c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_quota.h>
597c478bd9Sstevel@tonic-gate #include <sys/kmem.h>
607c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_trans.h>
617c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_panic.h>
627c478bd9Sstevel@tonic-gate #include <sys/errno.h>
637c478bd9Sstevel@tonic-gate #include <sys/time.h>
647c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h>
657c478bd9Sstevel@tonic-gate #include <sys/file.h>
667c478bd9Sstevel@tonic-gate #include <sys/fcntl.h>
677c478bd9Sstevel@tonic-gate #include <sys/flock.h>
687c478bd9Sstevel@tonic-gate #include <fs/fs_subr.h>
697c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h>
707c478bd9Sstevel@tonic-gate #include <sys/policy.h>
71e7da395aSOwen Roberts #include <sys/fs/ufs_log.h>
727c478bd9Sstevel@tonic-gate 
737c478bd9Sstevel@tonic-gate static ino_t	hashalloc();
747c478bd9Sstevel@tonic-gate static daddr_t	fragextend();
757c478bd9Sstevel@tonic-gate static daddr_t	alloccg();
767c478bd9Sstevel@tonic-gate static daddr_t	alloccgblk();
777c478bd9Sstevel@tonic-gate static ino_t	ialloccg();
787c478bd9Sstevel@tonic-gate static daddr_t	mapsearch();
79e7da395aSOwen Roberts static int	findlogstartcg();
807c478bd9Sstevel@tonic-gate 
817c478bd9Sstevel@tonic-gate extern int	inside[], around[];
827c478bd9Sstevel@tonic-gate extern uchar_t	*fragtbl[];
837c478bd9Sstevel@tonic-gate void delay();
847c478bd9Sstevel@tonic-gate 
857c478bd9Sstevel@tonic-gate /*
867c478bd9Sstevel@tonic-gate  * Allocate a block in the file system.
877c478bd9Sstevel@tonic-gate  *
887c478bd9Sstevel@tonic-gate  * The size of the requested block is given, which must be some
897c478bd9Sstevel@tonic-gate  * multiple of fs_fsize and <= fs_bsize.
907c478bd9Sstevel@tonic-gate  * A preference may be optionally specified. If a preference is given
917c478bd9Sstevel@tonic-gate  * the following hierarchy is used to allocate a block:
927c478bd9Sstevel@tonic-gate  *   1) allocate the requested block.
937c478bd9Sstevel@tonic-gate  *   2) allocate a rotationally optimal block in the same cylinder.
947c478bd9Sstevel@tonic-gate  *   3) allocate a block in the same cylinder group.
957c478bd9Sstevel@tonic-gate  *   4) quadratically rehash into other cylinder groups, until an
967c478bd9Sstevel@tonic-gate  *	available block is located.
977c478bd9Sstevel@tonic-gate  * If no block preference is given the following hierarchy is used
987c478bd9Sstevel@tonic-gate  * to allocate a block:
997c478bd9Sstevel@tonic-gate  *   1) allocate a block in the cylinder group that contains the
1007c478bd9Sstevel@tonic-gate  *	inode for the file.
1017c478bd9Sstevel@tonic-gate  *   2) quadratically rehash into other cylinder groups, until an
1027c478bd9Sstevel@tonic-gate  *	available block is located.
1037c478bd9Sstevel@tonic-gate  */
1047c478bd9Sstevel@tonic-gate int
alloc(struct inode * ip,daddr_t bpref,int size,daddr_t * bnp,cred_t * cr)1057c478bd9Sstevel@tonic-gate alloc(struct inode *ip, daddr_t bpref, int size, daddr_t *bnp, cred_t *cr)
1067c478bd9Sstevel@tonic-gate {
1077c478bd9Sstevel@tonic-gate 	struct fs *fs;
1087c478bd9Sstevel@tonic-gate 	struct ufsvfs *ufsvfsp;
1097c478bd9Sstevel@tonic-gate 	daddr_t bno;
1107c478bd9Sstevel@tonic-gate 	int cg;
1117c478bd9Sstevel@tonic-gate 	int err;
1127c478bd9Sstevel@tonic-gate 	char *errmsg = NULL;
1137c478bd9Sstevel@tonic-gate 	size_t len;
114d3d50737SRafael Vanoni 	clock_t	now;
1157c478bd9Sstevel@tonic-gate 
1167c478bd9Sstevel@tonic-gate 	ufsvfsp = ip->i_ufsvfs;
1177c478bd9Sstevel@tonic-gate 	fs = ufsvfsp->vfs_fs;
1187c478bd9Sstevel@tonic-gate 	if ((unsigned)size > fs->fs_bsize || fragoff(fs, size) != 0) {
119303bf60bSsdebnath 		err = ufs_fault(ITOV(ip), "alloc: bad size, dev = 0x%lx,"
120303bf60bSsdebnath 		    " bsize = %d, size = %d, fs = %s\n",
121303bf60bSsdebnath 		    ip->i_dev, fs->fs_bsize, size, fs->fs_fsmnt);
1227c478bd9Sstevel@tonic-gate 		return (err);
1237c478bd9Sstevel@tonic-gate 	}
1247c478bd9Sstevel@tonic-gate 	if (size == fs->fs_bsize && fs->fs_cstotal.cs_nbfree == 0)
1257c478bd9Sstevel@tonic-gate 		goto nospace;
1267c478bd9Sstevel@tonic-gate 	if (freespace(fs, ufsvfsp) <= 0 &&
1277c478bd9Sstevel@tonic-gate 	    secpolicy_fs_minfree(cr, ufsvfsp->vfs_vfs) != 0)
1287c478bd9Sstevel@tonic-gate 		goto nospace;
1297c478bd9Sstevel@tonic-gate 	err = chkdq(ip, (long)btodb(size), 0, cr, &errmsg, &len);
1307c478bd9Sstevel@tonic-gate 	/* Note that may not have err, but may have errmsg */
1317c478bd9Sstevel@tonic-gate 	if (errmsg != NULL) {
1327c478bd9Sstevel@tonic-gate 		uprintf(errmsg);
1337c478bd9Sstevel@tonic-gate 		kmem_free(errmsg, len);
1347c478bd9Sstevel@tonic-gate 		errmsg = NULL;
1357c478bd9Sstevel@tonic-gate 	}
1367c478bd9Sstevel@tonic-gate 	if (err)
1377c478bd9Sstevel@tonic-gate 		return (err);
1387c478bd9Sstevel@tonic-gate 	if (bpref >= fs->fs_size)
1397c478bd9Sstevel@tonic-gate 		bpref = 0;
1407c478bd9Sstevel@tonic-gate 	if (bpref == 0)
1417c478bd9Sstevel@tonic-gate 		cg = (int)itog(fs, ip->i_number);
1427c478bd9Sstevel@tonic-gate 	else
1437c478bd9Sstevel@tonic-gate 		cg = dtog(fs, bpref);
1447c478bd9Sstevel@tonic-gate 
1457c478bd9Sstevel@tonic-gate 	bno = (daddr_t)hashalloc(ip, cg, (long)bpref, size,
1467c478bd9Sstevel@tonic-gate 	    (ulong_t (*)())alloccg);
1477c478bd9Sstevel@tonic-gate 	if (bno > 0) {
1487c478bd9Sstevel@tonic-gate 		*bnp = bno;
1497c478bd9Sstevel@tonic-gate 		return (0);
1507c478bd9Sstevel@tonic-gate 	}
1517c478bd9Sstevel@tonic-gate 
1527c478bd9Sstevel@tonic-gate 	/*
1537c478bd9Sstevel@tonic-gate 	 * hashalloc() failed because some other thread grabbed
1547c478bd9Sstevel@tonic-gate 	 * the last block so unwind the quota operation.  We can
1557c478bd9Sstevel@tonic-gate 	 * ignore the return because subtractions don't fail and
1567c478bd9Sstevel@tonic-gate 	 * size is guaranteed to be >= zero by our caller.
1577c478bd9Sstevel@tonic-gate 	 */
1587c478bd9Sstevel@tonic-gate 	(void) chkdq(ip, -(long)btodb(size), 0, cr, (char **)NULL,
15980d34432Sfrankho 	    (size_t *)NULL);
1607c478bd9Sstevel@tonic-gate 
1617c478bd9Sstevel@tonic-gate nospace:
162d3d50737SRafael Vanoni 	now = ddi_get_lbolt();
1637c478bd9Sstevel@tonic-gate 	mutex_enter(&ufsvfsp->vfs_lock);
164d3d50737SRafael Vanoni 	if ((now - ufsvfsp->vfs_lastwhinetime) > (hz << 2) &&
16580d34432Sfrankho 	    (!(TRANS_ISTRANS(ufsvfsp)) || !(ip->i_flag & IQUIET))) {
166d3d50737SRafael Vanoni 		ufsvfsp->vfs_lastwhinetime = now;
1677c478bd9Sstevel@tonic-gate 		cmn_err(CE_NOTE, "alloc: %s: file system full", fs->fs_fsmnt);
1687c478bd9Sstevel@tonic-gate 	}
1697c478bd9Sstevel@tonic-gate 	mutex_exit(&ufsvfsp->vfs_lock);
1707c478bd9Sstevel@tonic-gate 	return (ENOSPC);
1717c478bd9Sstevel@tonic-gate }
1727c478bd9Sstevel@tonic-gate 
1737c478bd9Sstevel@tonic-gate /*
1747c478bd9Sstevel@tonic-gate  * Reallocate a fragment to a bigger size
1757c478bd9Sstevel@tonic-gate  *
1767c478bd9Sstevel@tonic-gate  * The number and size of the old block is given, and a preference
1777c478bd9Sstevel@tonic-gate  * and new size is also specified.  The allocator attempts to extend
1787c478bd9Sstevel@tonic-gate  * the original block.  Failing that, the regular block allocator is
1797c478bd9Sstevel@tonic-gate  * invoked to get an appropriate block.
1807c478bd9Sstevel@tonic-gate  */
1817c478bd9Sstevel@tonic-gate int
realloccg(struct inode * ip,daddr_t bprev,daddr_t bpref,int osize,int nsize,daddr_t * bnp,cred_t * cr)1827c478bd9Sstevel@tonic-gate realloccg(struct inode *ip, daddr_t bprev, daddr_t bpref, int osize,
1837c478bd9Sstevel@tonic-gate     int nsize, daddr_t *bnp, cred_t *cr)
1847c478bd9Sstevel@tonic-gate {
1857c478bd9Sstevel@tonic-gate 	daddr_t bno;
1867c478bd9Sstevel@tonic-gate 	struct fs *fs;
1877c478bd9Sstevel@tonic-gate 	struct ufsvfs *ufsvfsp;
1887c478bd9Sstevel@tonic-gate 	int cg, request;
1897c478bd9Sstevel@tonic-gate 	int err;
1907c478bd9Sstevel@tonic-gate 	char *errmsg = NULL;
1917c478bd9Sstevel@tonic-gate 	size_t len;
192d3d50737SRafael Vanoni 	clock_t	now;
1937c478bd9Sstevel@tonic-gate 
1947c478bd9Sstevel@tonic-gate 	ufsvfsp = ip->i_ufsvfs;
1957c478bd9Sstevel@tonic-gate 	fs = ufsvfsp->vfs_fs;
1967c478bd9Sstevel@tonic-gate 	if ((unsigned)osize > fs->fs_bsize || fragoff(fs, osize) != 0 ||
1977c478bd9Sstevel@tonic-gate 	    (unsigned)nsize > fs->fs_bsize || fragoff(fs, nsize) != 0) {
1987c478bd9Sstevel@tonic-gate 		err = ufs_fault(ITOV(ip),
199303bf60bSsdebnath 		    "realloccg: bad size, dev=0x%lx, bsize=%d, "
200303bf60bSsdebnath 		    "osize=%d, nsize=%d, fs=%s\n",
201303bf60bSsdebnath 		    ip->i_dev, fs->fs_bsize, osize, nsize, fs->fs_fsmnt);
2027c478bd9Sstevel@tonic-gate 		return (err);
2037c478bd9Sstevel@tonic-gate 	}
2047c478bd9Sstevel@tonic-gate 	if (freespace(fs, ufsvfsp) <= 0 &&
2057c478bd9Sstevel@tonic-gate 	    secpolicy_fs_minfree(cr, ufsvfsp->vfs_vfs) != 0)
2067c478bd9Sstevel@tonic-gate 		goto nospace;
2077c478bd9Sstevel@tonic-gate 	if (bprev == 0) {
2087c478bd9Sstevel@tonic-gate 		err = ufs_fault(ITOV(ip),
209303bf60bSsdebnath 		    "realloccg: bad bprev, dev = 0x%lx, bsize = %d,"
210303bf60bSsdebnath 		    " bprev = %ld, fs = %s\n", ip->i_dev, fs->fs_bsize, bprev,
2117c478bd9Sstevel@tonic-gate 		    fs->fs_fsmnt);
2127c478bd9Sstevel@tonic-gate 		return (err);
2137c478bd9Sstevel@tonic-gate 	}
2147c478bd9Sstevel@tonic-gate 	err = chkdq(ip, (long)btodb(nsize - osize), 0, cr, &errmsg, &len);
2157c478bd9Sstevel@tonic-gate 	/* Note that may not have err, but may have errmsg */
2167c478bd9Sstevel@tonic-gate 	if (errmsg != NULL) {
2177c478bd9Sstevel@tonic-gate 		uprintf(errmsg);
2187c478bd9Sstevel@tonic-gate 		kmem_free(errmsg, len);
2197c478bd9Sstevel@tonic-gate 		errmsg = NULL;
2207c478bd9Sstevel@tonic-gate 	}
2217c478bd9Sstevel@tonic-gate 	if (err)
2227c478bd9Sstevel@tonic-gate 		return (err);
2237c478bd9Sstevel@tonic-gate 	cg = dtog(fs, bprev);
2247c478bd9Sstevel@tonic-gate 	bno = fragextend(ip, cg, (long)bprev, osize, nsize);
2257c478bd9Sstevel@tonic-gate 	if (bno != 0) {
2267c478bd9Sstevel@tonic-gate 		*bnp = bno;
2277c478bd9Sstevel@tonic-gate 		return (0);
2287c478bd9Sstevel@tonic-gate 	}
2297c478bd9Sstevel@tonic-gate 	if (bpref >= fs->fs_size)
2307c478bd9Sstevel@tonic-gate 		bpref = 0;
2317c478bd9Sstevel@tonic-gate 
2327c478bd9Sstevel@tonic-gate 	/*
2337c478bd9Sstevel@tonic-gate 	 * When optimizing for time we allocate a full block and
2347c478bd9Sstevel@tonic-gate 	 * then only use the upper portion for this request. When
2357c478bd9Sstevel@tonic-gate 	 * this file grows again it will grow into the unused portion
2367c478bd9Sstevel@tonic-gate 	 * of the block (See fragextend() above).  This saves time
2377c478bd9Sstevel@tonic-gate 	 * because an extra disk write would be needed if the frags
2387c478bd9Sstevel@tonic-gate 	 * following the current allocation were not free. The extra
2397c478bd9Sstevel@tonic-gate 	 * disk write is needed to move the data from its current
2407c478bd9Sstevel@tonic-gate 	 * location into the newly allocated position.
2417c478bd9Sstevel@tonic-gate 	 *
2427c478bd9Sstevel@tonic-gate 	 * When optimizing for space we allocate a run of frags
2437c478bd9Sstevel@tonic-gate 	 * that is just the right size for this request.
2447c478bd9Sstevel@tonic-gate 	 */
2457c478bd9Sstevel@tonic-gate 	request = (fs->fs_optim == FS_OPTTIME) ? fs->fs_bsize : nsize;
2467c478bd9Sstevel@tonic-gate 	bno = (daddr_t)hashalloc(ip, cg, (long)bpref, request,
24780d34432Sfrankho 	    (ulong_t (*)())alloccg);
2487c478bd9Sstevel@tonic-gate 	if (bno > 0) {
2497c478bd9Sstevel@tonic-gate 		*bnp = bno;
2507c478bd9Sstevel@tonic-gate 		if (nsize < request)
2517c478bd9Sstevel@tonic-gate 			(void) free(ip, bno + numfrags(fs, nsize),
2527c478bd9Sstevel@tonic-gate 			    (off_t)(request - nsize), I_NOCANCEL);
2537c478bd9Sstevel@tonic-gate 		return (0);
2547c478bd9Sstevel@tonic-gate 	}
2557c478bd9Sstevel@tonic-gate 
2567c478bd9Sstevel@tonic-gate 	/*
2577c478bd9Sstevel@tonic-gate 	 * hashalloc() failed because some other thread grabbed
2587c478bd9Sstevel@tonic-gate 	 * the last block so unwind the quota operation.  We can
2597c478bd9Sstevel@tonic-gate 	 * ignore the return because subtractions don't fail, and
2607c478bd9Sstevel@tonic-gate 	 * our caller guarantees nsize >= osize.
2617c478bd9Sstevel@tonic-gate 	 */
2627c478bd9Sstevel@tonic-gate 	(void) chkdq(ip, -(long)btodb(nsize - osize), 0, cr, (char **)NULL,
26380d34432Sfrankho 	    (size_t *)NULL);
2647c478bd9Sstevel@tonic-gate 
2657c478bd9Sstevel@tonic-gate nospace:
266d3d50737SRafael Vanoni 	now = ddi_get_lbolt();
2677c478bd9Sstevel@tonic-gate 	mutex_enter(&ufsvfsp->vfs_lock);
268d3d50737SRafael Vanoni 	if ((now - ufsvfsp->vfs_lastwhinetime) > (hz << 2) &&
26980d34432Sfrankho 	    (!(TRANS_ISTRANS(ufsvfsp)) || !(ip->i_flag & IQUIET))) {
270d3d50737SRafael Vanoni 		ufsvfsp->vfs_lastwhinetime = now;
2717c478bd9Sstevel@tonic-gate 		cmn_err(CE_NOTE,
27280d34432Sfrankho 		    "realloccg %s: file system full", fs->fs_fsmnt);
2737c478bd9Sstevel@tonic-gate 	}
2747c478bd9Sstevel@tonic-gate 	mutex_exit(&ufsvfsp->vfs_lock);
2757c478bd9Sstevel@tonic-gate 	return (ENOSPC);
2767c478bd9Sstevel@tonic-gate }
2777c478bd9Sstevel@tonic-gate 
2787c478bd9Sstevel@tonic-gate /*
2797c478bd9Sstevel@tonic-gate  * Allocate an inode in the file system.
2807c478bd9Sstevel@tonic-gate  *
2817c478bd9Sstevel@tonic-gate  * A preference may be optionally specified. If a preference is given
2827c478bd9Sstevel@tonic-gate  * the following hierarchy is used to allocate an inode:
2837c478bd9Sstevel@tonic-gate  *   1) allocate the requested inode.
2847c478bd9Sstevel@tonic-gate  *   2) allocate an inode in the same cylinder group.
2857c478bd9Sstevel@tonic-gate  *   3) quadratically rehash into other cylinder groups, until an
2867c478bd9Sstevel@tonic-gate  *	available inode is located.
2877c478bd9Sstevel@tonic-gate  * If no inode preference is given the following hierarchy is used
2887c478bd9Sstevel@tonic-gate  * to allocate an inode:
2897c478bd9Sstevel@tonic-gate  *   1) allocate an inode in cylinder group 0.
2907c478bd9Sstevel@tonic-gate  *   2) quadratically rehash into other cylinder groups, until an
2917c478bd9Sstevel@tonic-gate  *	available inode is located.
2927c478bd9Sstevel@tonic-gate  */
2937c478bd9Sstevel@tonic-gate int
ufs_ialloc(struct inode * pip,ino_t ipref,mode_t mode,struct inode ** ipp,cred_t * cr)2947c478bd9Sstevel@tonic-gate ufs_ialloc(struct inode *pip,
2957c478bd9Sstevel@tonic-gate     ino_t ipref, mode_t mode, struct inode **ipp, cred_t *cr)
2967c478bd9Sstevel@tonic-gate {
2977c478bd9Sstevel@tonic-gate 	struct inode *ip;
2987c478bd9Sstevel@tonic-gate 	struct fs *fs;
2997c478bd9Sstevel@tonic-gate 	int cg;
3007c478bd9Sstevel@tonic-gate 	ino_t ino;
3017c478bd9Sstevel@tonic-gate 	int err;
3027c478bd9Sstevel@tonic-gate 	int nifree;
3037c478bd9Sstevel@tonic-gate 	struct ufsvfs *ufsvfsp = pip->i_ufsvfs;
3047c478bd9Sstevel@tonic-gate 	char *errmsg = NULL;
3057c478bd9Sstevel@tonic-gate 	size_t len;
3067c478bd9Sstevel@tonic-gate 
3077c478bd9Sstevel@tonic-gate 	ASSERT(RW_WRITE_HELD(&pip->i_rwlock));
3087c478bd9Sstevel@tonic-gate 	fs = pip->i_fs;
3097c478bd9Sstevel@tonic-gate loop:
3107c478bd9Sstevel@tonic-gate 	nifree = fs->fs_cstotal.cs_nifree;
3117c478bd9Sstevel@tonic-gate 
3127c478bd9Sstevel@tonic-gate 	if (nifree == 0)
3137c478bd9Sstevel@tonic-gate 		goto noinodes;
3147c478bd9Sstevel@tonic-gate 	/*
3157c478bd9Sstevel@tonic-gate 	 * Shadow inodes don't count against a user's inode allocation.
3167c478bd9Sstevel@tonic-gate 	 * They are an implementation method and not a resource.
3177c478bd9Sstevel@tonic-gate 	 */
3187c478bd9Sstevel@tonic-gate 	if ((mode != IFSHAD) && (mode != IFATTRDIR)) {
3197c478bd9Sstevel@tonic-gate 		err = chkiq((struct ufsvfs *)ITOV(pip)->v_vfsp->vfs_data,
32080d34432Sfrankho 		    /* change */ 1, (struct inode *)NULL, crgetuid(cr), 0,
32180d34432Sfrankho 		    cr, &errmsg, &len);
3227c478bd9Sstevel@tonic-gate 		/*
3237c478bd9Sstevel@tonic-gate 		 * As we haven't acquired any locks yet, dump the message
3247c478bd9Sstevel@tonic-gate 		 * now.
3257c478bd9Sstevel@tonic-gate 		 */
3267c478bd9Sstevel@tonic-gate 		if (errmsg != NULL) {
3277c478bd9Sstevel@tonic-gate 			uprintf(errmsg);
3287c478bd9Sstevel@tonic-gate 			kmem_free(errmsg, len);
3297c478bd9Sstevel@tonic-gate 			errmsg = NULL;
3307c478bd9Sstevel@tonic-gate 		}
3317c478bd9Sstevel@tonic-gate 		if (err)
3327c478bd9Sstevel@tonic-gate 			return (err);
3337c478bd9Sstevel@tonic-gate 	}
3347c478bd9Sstevel@tonic-gate 
3357c478bd9Sstevel@tonic-gate 	if (ipref >= (ulong_t)(fs->fs_ncg * fs->fs_ipg))
3367c478bd9Sstevel@tonic-gate 		ipref = 0;
3377c478bd9Sstevel@tonic-gate 	cg = (int)itog(fs, ipref);
3387c478bd9Sstevel@tonic-gate 	ino = (ino_t)hashalloc(pip, cg, (long)ipref, (int)mode,
3397c478bd9Sstevel@tonic-gate 	    (ulong_t (*)())ialloccg);
3407c478bd9Sstevel@tonic-gate 	if (ino == 0) {
3417c478bd9Sstevel@tonic-gate 		if ((mode != IFSHAD) && (mode != IFATTRDIR)) {
3427c478bd9Sstevel@tonic-gate 			/*
3437c478bd9Sstevel@tonic-gate 			 * We can safely ignore the return from chkiq()
3447c478bd9Sstevel@tonic-gate 			 * because deallocations can only fail if we
3457c478bd9Sstevel@tonic-gate 			 * can't get the user's quota info record off
3467c478bd9Sstevel@tonic-gate 			 * the disk due to an I/O error.  In that case,
3477c478bd9Sstevel@tonic-gate 			 * the quota subsystem is already messed up.
3487c478bd9Sstevel@tonic-gate 			 */
3497c478bd9Sstevel@tonic-gate 			(void) chkiq(ufsvfsp, /* change */ -1,
35080d34432Sfrankho 			    (struct inode *)NULL, crgetuid(cr), 0, cr,
35180d34432Sfrankho 			    (char **)NULL, (size_t *)NULL);
3527c478bd9Sstevel@tonic-gate 		}
3537c478bd9Sstevel@tonic-gate 		goto noinodes;
3547c478bd9Sstevel@tonic-gate 	}
3557c478bd9Sstevel@tonic-gate 	err = ufs_iget(pip->i_vfs, ino, ipp, cr);
3567c478bd9Sstevel@tonic-gate 	if (err) {
3577c478bd9Sstevel@tonic-gate 		if ((mode != IFSHAD) && (mode != IFATTRDIR)) {
3587c478bd9Sstevel@tonic-gate 			/*
3597c478bd9Sstevel@tonic-gate 			 * See above comment about why it is safe to ignore an
3607c478bd9Sstevel@tonic-gate 			 * error return here.
3617c478bd9Sstevel@tonic-gate 			 */
3627c478bd9Sstevel@tonic-gate 			(void) chkiq(ufsvfsp, /* change */ -1,
36380d34432Sfrankho 			    (struct inode *)NULL, crgetuid(cr), 0, cr,
36480d34432Sfrankho 			    (char **)NULL, (size_t *)NULL);
3657c478bd9Sstevel@tonic-gate 		}
3667c478bd9Sstevel@tonic-gate 		ufs_ifree(pip, ino, 0);
3677c478bd9Sstevel@tonic-gate 		return (err);
3687c478bd9Sstevel@tonic-gate 	}
3697c478bd9Sstevel@tonic-gate 	ip = *ipp;
3707c478bd9Sstevel@tonic-gate 	ASSERT(!ip->i_ufs_acl);
3717c478bd9Sstevel@tonic-gate 	ASSERT(!ip->i_dquot);
3727c478bd9Sstevel@tonic-gate 	rw_enter(&ip->i_contents, RW_WRITER);
3737c478bd9Sstevel@tonic-gate 
3747c478bd9Sstevel@tonic-gate 	/*
3757c478bd9Sstevel@tonic-gate 	 * Check if we really got a free inode, if not then complain
3767c478bd9Sstevel@tonic-gate 	 * and mark the inode ISTALE so that it will be freed by the
3777c478bd9Sstevel@tonic-gate 	 * ufs idle thread eventually and will not be sent to ufs_delete().
3787c478bd9Sstevel@tonic-gate 	 */
3797c478bd9Sstevel@tonic-gate 	if (ip->i_mode || (ip->i_nlink > 0)) {
3807c478bd9Sstevel@tonic-gate 		ip->i_flag |= ISTALE;
3817c478bd9Sstevel@tonic-gate 		rw_exit(&ip->i_contents);
3827c478bd9Sstevel@tonic-gate 		VN_RELE(ITOV(ip));
3837c478bd9Sstevel@tonic-gate 		cmn_err(CE_WARN,
384*bbf21555SRichard Lowe 		    "%s: unexpected allocated inode %d, run fsck(8)%s",
38580d34432Sfrankho 		    fs->fs_fsmnt, (int)ino,
38680d34432Sfrankho 		    (TRANS_ISTRANS(ufsvfsp) ? " -o f" : ""));
3877c478bd9Sstevel@tonic-gate 		goto loop;
3887c478bd9Sstevel@tonic-gate 	}
3897c478bd9Sstevel@tonic-gate 
3907c478bd9Sstevel@tonic-gate 	/*
3917c478bd9Sstevel@tonic-gate 	 * Check the inode has no size or data blocks.
3927c478bd9Sstevel@tonic-gate 	 * This could have happened if the truncation failed when
3937c478bd9Sstevel@tonic-gate 	 * deleting the inode. It used to be possible for this to occur
3947c478bd9Sstevel@tonic-gate 	 * if a block allocation failed when iteratively truncating a
3957c478bd9Sstevel@tonic-gate 	 * large file using logging and with a full file system.
3967c478bd9Sstevel@tonic-gate 	 * This was fixed with bug fix 4348738. However, truncation may
3977c478bd9Sstevel@tonic-gate 	 * still fail on an IO error. So in all cases for safety and
3987c478bd9Sstevel@tonic-gate 	 * security we clear out the size; the blocks allocated; and
3997c478bd9Sstevel@tonic-gate 	 * pointers to the blocks. This will ultimately cause a fsck
4007c478bd9Sstevel@tonic-gate 	 * error of un-accounted for blocks, but its a fairly benign error,
4017c478bd9Sstevel@tonic-gate 	 * and possibly the correct thing to do anyway as accesssing those
4027c478bd9Sstevel@tonic-gate 	 * blocks agains may lead to more IO errors.
4037c478bd9Sstevel@tonic-gate 	 */
4047c478bd9Sstevel@tonic-gate 	if (ip->i_size || ip->i_blocks) {
4057c478bd9Sstevel@tonic-gate 		int i;
4067c478bd9Sstevel@tonic-gate 
4077c478bd9Sstevel@tonic-gate 		if (ip->i_size) {
4087c478bd9Sstevel@tonic-gate 			cmn_err(CE_WARN,
409*bbf21555SRichard Lowe 			    "%s: free inode %d had size 0x%llx, run fsck(8)%s",
410303bf60bSsdebnath 			    fs->fs_fsmnt, (int)ino, ip->i_size,
411303bf60bSsdebnath 			    (TRANS_ISTRANS(ufsvfsp) ? " -o f" : ""));
4127c478bd9Sstevel@tonic-gate 		}
4137c478bd9Sstevel@tonic-gate 		/*
4147c478bd9Sstevel@tonic-gate 		 * Clear any garbage left behind.
4157c478bd9Sstevel@tonic-gate 		 */
4167c478bd9Sstevel@tonic-gate 		ip->i_size = (u_offset_t)0;
4177c478bd9Sstevel@tonic-gate 		ip->i_blocks = 0;
4187c478bd9Sstevel@tonic-gate 		for (i = 0; i < NDADDR; i++)
4197c478bd9Sstevel@tonic-gate 			ip->i_db[i] = 0;
4207c478bd9Sstevel@tonic-gate 		for (i = 0; i < NIADDR; i++)
4217c478bd9Sstevel@tonic-gate 			ip->i_ib[i] = 0;
4227c478bd9Sstevel@tonic-gate 	}
4237c478bd9Sstevel@tonic-gate 
4247c478bd9Sstevel@tonic-gate 	/*
4257c478bd9Sstevel@tonic-gate 	 * Initialize the link count
4267c478bd9Sstevel@tonic-gate 	 */
4277c478bd9Sstevel@tonic-gate 	ip->i_nlink = 0;
4287c478bd9Sstevel@tonic-gate 
4297c478bd9Sstevel@tonic-gate 	/*
4307c478bd9Sstevel@tonic-gate 	 * Clear the old flags
4317c478bd9Sstevel@tonic-gate 	 */
4327c478bd9Sstevel@tonic-gate 	ip->i_flag &= IREF;
4337c478bd9Sstevel@tonic-gate 
4347c478bd9Sstevel@tonic-gate 	/*
4357c478bd9Sstevel@tonic-gate 	 * Access times are not really defined if the fs is mounted
4367c478bd9Sstevel@tonic-gate 	 * with 'noatime'. But it can cause nfs clients to fail
4377c478bd9Sstevel@tonic-gate 	 * open() if the atime is not a legal value. Set a legal value
4387c478bd9Sstevel@tonic-gate 	 * here when the inode is allocated.
4397c478bd9Sstevel@tonic-gate 	 */
4407c478bd9Sstevel@tonic-gate 	if (ufsvfsp->vfs_noatime) {
4417c478bd9Sstevel@tonic-gate 		mutex_enter(&ufs_iuniqtime_lock);
4427c478bd9Sstevel@tonic-gate 		ip->i_atime = iuniqtime;
4437c478bd9Sstevel@tonic-gate 		mutex_exit(&ufs_iuniqtime_lock);
4447c478bd9Sstevel@tonic-gate 	}
4457c478bd9Sstevel@tonic-gate 	rw_exit(&ip->i_contents);
4467c478bd9Sstevel@tonic-gate 	return (0);
4477c478bd9Sstevel@tonic-gate noinodes:
4487c478bd9Sstevel@tonic-gate 	if (!(TRANS_ISTRANS(ufsvfsp)) || !(pip->i_flag & IQUIET))
4497c478bd9Sstevel@tonic-gate 		cmn_err(CE_NOTE, "%s: out of inodes\n", fs->fs_fsmnt);
4507c478bd9Sstevel@tonic-gate 	return (ENOSPC);
4517c478bd9Sstevel@tonic-gate }
4527c478bd9Sstevel@tonic-gate 
4537c478bd9Sstevel@tonic-gate /*
4547c478bd9Sstevel@tonic-gate  * Find a cylinder group to place a directory.
4557c478bd9Sstevel@tonic-gate  * Returns an inumber within the selected cylinder group.
4567c478bd9Sstevel@tonic-gate  * Note, the vfs_lock is not needed as we don't require exact cg summary info.
4577c478bd9Sstevel@tonic-gate  *
4587c478bd9Sstevel@tonic-gate  * If the switch ufs_close_dirs is set, then the policy is to use
4597c478bd9Sstevel@tonic-gate  * the current cg if it has more than 25% free inodes and more
4607c478bd9Sstevel@tonic-gate  * than 25% free blocks. Otherwise the cgs are searched from
4617c478bd9Sstevel@tonic-gate  * the beginning and the first cg with the same criteria is
4627c478bd9Sstevel@tonic-gate  * used. If that is also null then we revert to the old algorithm.
4637c478bd9Sstevel@tonic-gate  * This tends to cluster files at the beginning of the disk
4647c478bd9Sstevel@tonic-gate  * until the disk gets full.
4657c478bd9Sstevel@tonic-gate  *
4667c478bd9Sstevel@tonic-gate  * Otherwise if ufs_close_dirs is not set then the original policy is
4677c478bd9Sstevel@tonic-gate  * used which is to select from among those cylinder groups with
4687c478bd9Sstevel@tonic-gate  * above the average number of free inodes, the one with the smallest
4697c478bd9Sstevel@tonic-gate  * number of directories.
4707c478bd9Sstevel@tonic-gate  */
4717c478bd9Sstevel@tonic-gate 
4727c478bd9Sstevel@tonic-gate int ufs_close_dirs = 1;	/* allocate directories close as possible */
4737c478bd9Sstevel@tonic-gate 
4747c478bd9Sstevel@tonic-gate ino_t
dirpref(inode_t * dp)4757c478bd9Sstevel@tonic-gate dirpref(inode_t *dp)
4767c478bd9Sstevel@tonic-gate {
4777c478bd9Sstevel@tonic-gate 	int cg, minndir, mincg, avgifree, mininode, minbpg, ifree;
4787c478bd9Sstevel@tonic-gate 	struct fs *fs = dp->i_fs;
4797c478bd9Sstevel@tonic-gate 
4807c478bd9Sstevel@tonic-gate 	cg = itog(fs, dp->i_number);
4817c478bd9Sstevel@tonic-gate 	mininode = fs->fs_ipg >> 2;
4827c478bd9Sstevel@tonic-gate 	minbpg = fs->fs_maxbpg >> 2;
4837c478bd9Sstevel@tonic-gate 	if (ufs_close_dirs &&
4847c478bd9Sstevel@tonic-gate 	    (fs->fs_cs(fs, cg).cs_nifree > mininode) &&
4857c478bd9Sstevel@tonic-gate 	    (fs->fs_cs(fs, cg).cs_nbfree > minbpg)) {
4867c478bd9Sstevel@tonic-gate 		return (dp->i_number);
4877c478bd9Sstevel@tonic-gate 	}
4887c478bd9Sstevel@tonic-gate 
4897c478bd9Sstevel@tonic-gate 	avgifree = fs->fs_cstotal.cs_nifree / fs->fs_ncg;
4907c478bd9Sstevel@tonic-gate 	minndir = fs->fs_ipg;
4917c478bd9Sstevel@tonic-gate 	mincg = 0;
4927c478bd9Sstevel@tonic-gate 	for (cg = 0; cg < fs->fs_ncg; cg++) {
4937c478bd9Sstevel@tonic-gate 		ifree = fs->fs_cs(fs, cg).cs_nifree;
4947c478bd9Sstevel@tonic-gate 		if (ufs_close_dirs &&
4957c478bd9Sstevel@tonic-gate 		    (ifree > mininode) &&
4967c478bd9Sstevel@tonic-gate 		    (fs->fs_cs(fs, cg).cs_nbfree > minbpg)) {
4977c478bd9Sstevel@tonic-gate 			return ((ino_t)(fs->fs_ipg * cg));
4987c478bd9Sstevel@tonic-gate 		}
4997c478bd9Sstevel@tonic-gate 		if ((fs->fs_cs(fs, cg).cs_ndir < minndir) &&
5007c478bd9Sstevel@tonic-gate 		    (ifree >= avgifree)) {
5017c478bd9Sstevel@tonic-gate 			mincg = cg;
5027c478bd9Sstevel@tonic-gate 			minndir = fs->fs_cs(fs, cg).cs_ndir;
5037c478bd9Sstevel@tonic-gate 		}
5047c478bd9Sstevel@tonic-gate 	}
5057c478bd9Sstevel@tonic-gate 	return ((ino_t)(fs->fs_ipg * mincg));
5067c478bd9Sstevel@tonic-gate }
5077c478bd9Sstevel@tonic-gate 
5087c478bd9Sstevel@tonic-gate /*
5097c478bd9Sstevel@tonic-gate  * Select the desired position for the next block in a file.  The file is
5107c478bd9Sstevel@tonic-gate  * logically divided into sections. The first section is composed of the
5117c478bd9Sstevel@tonic-gate  * direct blocks. Each additional section contains fs_maxbpg blocks.
5127c478bd9Sstevel@tonic-gate  *
5137c478bd9Sstevel@tonic-gate  * If no blocks have been allocated in the first section, the policy is to
5147c478bd9Sstevel@tonic-gate  * request a block in the same cylinder group as the inode that describes
5157c478bd9Sstevel@tonic-gate  * the file. If no blocks have been allocated in any other section, the
5167c478bd9Sstevel@tonic-gate  * policy is to place the section in a cylinder group with a greater than
5177c478bd9Sstevel@tonic-gate  * average number of free blocks.  An appropriate cylinder group is found
5187c478bd9Sstevel@tonic-gate  * by using a rotor that sweeps the cylinder groups. When a new group of
5197c478bd9Sstevel@tonic-gate  * blocks is needed, the sweep begins in the cylinder group following the
5207c478bd9Sstevel@tonic-gate  * cylinder group from which the previous allocation was made. The sweep
5217c478bd9Sstevel@tonic-gate  * continues until a cylinder group with greater than the average number
5227c478bd9Sstevel@tonic-gate  * of free blocks is found. If the allocation is for the first block in an
5237c478bd9Sstevel@tonic-gate  * indirect block, the information on the previous allocation is unavailable;
5247c478bd9Sstevel@tonic-gate  * here a best guess is made based upon the logical block number being
5257c478bd9Sstevel@tonic-gate  * allocated.
5267c478bd9Sstevel@tonic-gate  *
5277c478bd9Sstevel@tonic-gate  * If a section is already partially allocated, the policy is to
5287c478bd9Sstevel@tonic-gate  * contiguously allocate fs_maxcontig blocks.  The end of one of these
5297c478bd9Sstevel@tonic-gate  * contiguous blocks and the beginning of the next is physically separated
5307c478bd9Sstevel@tonic-gate  * so that the disk head will be in transit between them for at least
5317c478bd9Sstevel@tonic-gate  * fs_rotdelay milliseconds.  This is to allow time for the processor to
5327c478bd9Sstevel@tonic-gate  * schedule another I/O transfer.
5337c478bd9Sstevel@tonic-gate  */
5347c478bd9Sstevel@tonic-gate daddr_t
blkpref(struct inode * ip,daddr_t lbn,int indx,daddr32_t * bap)5357c478bd9Sstevel@tonic-gate blkpref(struct inode *ip, daddr_t lbn, int indx, daddr32_t *bap)
5367c478bd9Sstevel@tonic-gate {
5377c478bd9Sstevel@tonic-gate 	struct fs *fs;
5387c478bd9Sstevel@tonic-gate 	struct ufsvfs *ufsvfsp;
5397c478bd9Sstevel@tonic-gate 	int cg;
5407c478bd9Sstevel@tonic-gate 	int avgbfree, startcg;
5417c478bd9Sstevel@tonic-gate 	daddr_t nextblk;
5427c478bd9Sstevel@tonic-gate 
5437c478bd9Sstevel@tonic-gate 	ufsvfsp = ip->i_ufsvfs;
5447c478bd9Sstevel@tonic-gate 	fs = ip->i_fs;
5457c478bd9Sstevel@tonic-gate 	if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) {
5467c478bd9Sstevel@tonic-gate 		if (lbn < NDADDR) {
5477c478bd9Sstevel@tonic-gate 			cg = itog(fs, ip->i_number);
5487c478bd9Sstevel@tonic-gate 			return (fs->fs_fpg * cg + fs->fs_frag);
5497c478bd9Sstevel@tonic-gate 		}
5507c478bd9Sstevel@tonic-gate 		/*
5517c478bd9Sstevel@tonic-gate 		 * Find a cylinder with greater than average
5527c478bd9Sstevel@tonic-gate 		 * number of unused data blocks.
5537c478bd9Sstevel@tonic-gate 		 */
5547c478bd9Sstevel@tonic-gate 		if (indx == 0 || bap[indx - 1] == 0)
5557c478bd9Sstevel@tonic-gate 			startcg = itog(fs, ip->i_number) + lbn / fs->fs_maxbpg;
5567c478bd9Sstevel@tonic-gate 		else
5577c478bd9Sstevel@tonic-gate 			startcg = dtog(fs, bap[indx - 1]) + 1;
5587c478bd9Sstevel@tonic-gate 		startcg %= fs->fs_ncg;
5597c478bd9Sstevel@tonic-gate 
5607c478bd9Sstevel@tonic-gate 		mutex_enter(&ufsvfsp->vfs_lock);
5617c478bd9Sstevel@tonic-gate 		avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg;
5627c478bd9Sstevel@tonic-gate 		/*
5637c478bd9Sstevel@tonic-gate 		 * used for computing log space for writes/truncs
5647c478bd9Sstevel@tonic-gate 		 */
5657c478bd9Sstevel@tonic-gate 		ufsvfsp->vfs_avgbfree = avgbfree;
5667c478bd9Sstevel@tonic-gate 		for (cg = startcg; cg < fs->fs_ncg; cg++)
5677c478bd9Sstevel@tonic-gate 			if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
5687c478bd9Sstevel@tonic-gate 				fs->fs_cgrotor = cg;
5697c478bd9Sstevel@tonic-gate 				mutex_exit(&ufsvfsp->vfs_lock);
5707c478bd9Sstevel@tonic-gate 				return (fs->fs_fpg * cg + fs->fs_frag);
5717c478bd9Sstevel@tonic-gate 			}
5727c478bd9Sstevel@tonic-gate 		for (cg = 0; cg <= startcg; cg++)
5737c478bd9Sstevel@tonic-gate 			if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
5747c478bd9Sstevel@tonic-gate 				fs->fs_cgrotor = cg;
5757c478bd9Sstevel@tonic-gate 				mutex_exit(&ufsvfsp->vfs_lock);
5767c478bd9Sstevel@tonic-gate 				return (fs->fs_fpg * cg + fs->fs_frag);
5777c478bd9Sstevel@tonic-gate 			}
5787c478bd9Sstevel@tonic-gate 		mutex_exit(&ufsvfsp->vfs_lock);
5791f563eb1SToomas Soome 		return (0);
5807c478bd9Sstevel@tonic-gate 	}
5817c478bd9Sstevel@tonic-gate 	/*
5827c478bd9Sstevel@tonic-gate 	 * One or more previous blocks have been laid out. If less
5837c478bd9Sstevel@tonic-gate 	 * than fs_maxcontig previous blocks are contiguous, the
5847c478bd9Sstevel@tonic-gate 	 * next block is requested contiguously, otherwise it is
5857c478bd9Sstevel@tonic-gate 	 * requested rotationally delayed by fs_rotdelay milliseconds.
5867c478bd9Sstevel@tonic-gate 	 */
587303bf60bSsdebnath 
588303bf60bSsdebnath 	nextblk = bap[indx - 1];
589303bf60bSsdebnath 	/*
590303bf60bSsdebnath 	 * Provision for fallocate to return positive
591303bf60bSsdebnath 	 * blk preference based on last allocation
592303bf60bSsdebnath 	 */
593303bf60bSsdebnath 	if (nextblk < 0 && nextblk != UFS_HOLE) {
594303bf60bSsdebnath 		nextblk = (-bap[indx - 1]) + fs->fs_frag;
595303bf60bSsdebnath 	} else {
596303bf60bSsdebnath 		nextblk = bap[indx - 1] + fs->fs_frag;
597303bf60bSsdebnath 	}
598303bf60bSsdebnath 
599303bf60bSsdebnath 	if (indx > fs->fs_maxcontig && bap[indx - fs->fs_maxcontig] +
600303bf60bSsdebnath 	    blkstofrags(fs, fs->fs_maxcontig) != nextblk) {
6017c478bd9Sstevel@tonic-gate 		return (nextblk);
602303bf60bSsdebnath 	}
6037c478bd9Sstevel@tonic-gate 	if (fs->fs_rotdelay != 0)
6047c478bd9Sstevel@tonic-gate 		/*
6057c478bd9Sstevel@tonic-gate 		 * Here we convert ms of delay to frags as:
6067c478bd9Sstevel@tonic-gate 		 * (frags) = (ms) * (rev/sec) * (sect/rev) /
6071f563eb1SToomas Soome 		 *	((sect/frag) * (ms/sec))
6087c478bd9Sstevel@tonic-gate 		 * then round up to the next block.
6097c478bd9Sstevel@tonic-gate 		 */
6107c478bd9Sstevel@tonic-gate 		nextblk += roundup(fs->fs_rotdelay * fs->fs_rps * fs->fs_nsect /
6117c478bd9Sstevel@tonic-gate 		    (NSPF(fs) * 1000), fs->fs_frag);
6127c478bd9Sstevel@tonic-gate 	return (nextblk);
6137c478bd9Sstevel@tonic-gate }
6147c478bd9Sstevel@tonic-gate 
6157c478bd9Sstevel@tonic-gate /*
6167c478bd9Sstevel@tonic-gate  * Free a block or fragment.
6177c478bd9Sstevel@tonic-gate  *
6187c478bd9Sstevel@tonic-gate  * The specified block or fragment is placed back in the
6197c478bd9Sstevel@tonic-gate  * free map. If a fragment is deallocated, a possible
6207c478bd9Sstevel@tonic-gate  * block reassembly is checked.
6217c478bd9Sstevel@tonic-gate  */
6227c478bd9Sstevel@tonic-gate void
free(struct inode * ip,daddr_t bno,off_t size,int flags)6237c478bd9Sstevel@tonic-gate free(struct inode *ip, daddr_t bno, off_t size, int flags)
6247c478bd9Sstevel@tonic-gate {
6257c478bd9Sstevel@tonic-gate 	struct fs *fs = ip->i_fs;
6267c478bd9Sstevel@tonic-gate 	struct ufsvfs *ufsvfsp = ip->i_ufsvfs;
627121be23bSjkennedy 	struct ufs_q *delq = &ufsvfsp->vfs_delete;
628121be23bSjkennedy 	struct ufs_delq_info *delq_info = &ufsvfsp->vfs_delete_info;
6297c478bd9Sstevel@tonic-gate 	struct cg *cgp;
6307c478bd9Sstevel@tonic-gate 	struct buf *bp;
6317c478bd9Sstevel@tonic-gate 	int cg, bmap, bbase;
6327c478bd9Sstevel@tonic-gate 	int i;
6337c478bd9Sstevel@tonic-gate 	uchar_t *blksfree;
6347c478bd9Sstevel@tonic-gate 	int *blktot;
6357c478bd9Sstevel@tonic-gate 	short *blks;
6367c478bd9Sstevel@tonic-gate 	daddr_t blkno, cylno, rpos;
6377c478bd9Sstevel@tonic-gate 
638303bf60bSsdebnath 	/*
639303bf60bSsdebnath 	 * fallocate'd files will have negative block address.
640303bf60bSsdebnath 	 * So negate it again to get original block address.
641303bf60bSsdebnath 	 */
64233c22cb3Smishra 	if (bno < 0 && (bno % fs->fs_frag == 0) && bno != UFS_HOLE) {
643303bf60bSsdebnath 		bno = -bno;
644303bf60bSsdebnath 	}
645303bf60bSsdebnath 
6467c478bd9Sstevel@tonic-gate 	if ((unsigned long)size > fs->fs_bsize || fragoff(fs, size) != 0) {
6477c478bd9Sstevel@tonic-gate 		(void) ufs_fault(ITOV(ip),
648303bf60bSsdebnath 		    "free: bad size, dev = 0x%lx, bsize = %d, size = %d, "
649303bf60bSsdebnath 		    "fs = %s\n", ip->i_dev, fs->fs_bsize,
650303bf60bSsdebnath 		    (int)size, fs->fs_fsmnt);
6517c478bd9Sstevel@tonic-gate 		return;
6527c478bd9Sstevel@tonic-gate 	}
6537c478bd9Sstevel@tonic-gate 	cg = dtog(fs, bno);
6547c478bd9Sstevel@tonic-gate 	ASSERT(!ufs_badblock(ip, bno));
6557c478bd9Sstevel@tonic-gate 	bp = UFS_BREAD(ufsvfsp, ip->i_dev, (daddr_t)fsbtodb(fs, cgtod(fs, cg)),
656303bf60bSsdebnath 	    (int)fs->fs_cgsize);
6577c478bd9Sstevel@tonic-gate 
6587c478bd9Sstevel@tonic-gate 	cgp = bp->b_un.b_cg;
6597c478bd9Sstevel@tonic-gate 	if (bp->b_flags & B_ERROR || !cg_chkmagic(cgp)) {
6607c478bd9Sstevel@tonic-gate 		brelse(bp);
6617c478bd9Sstevel@tonic-gate 		return;
6627c478bd9Sstevel@tonic-gate 	}
6637c478bd9Sstevel@tonic-gate 
6647c478bd9Sstevel@tonic-gate 	if (!(flags & I_NOCANCEL))
6657c478bd9Sstevel@tonic-gate 		TRANS_CANCEL(ufsvfsp, ldbtob(fsbtodb(fs, bno)), size, flags);
6667c478bd9Sstevel@tonic-gate 	if (flags & (I_DIR|I_IBLK|I_SHAD|I_QUOTA)) {
6677c478bd9Sstevel@tonic-gate 		TRANS_MATA_FREE(ufsvfsp, ldbtob(fsbtodb(fs, bno)), size);
6687c478bd9Sstevel@tonic-gate 	}
6697c478bd9Sstevel@tonic-gate 	blksfree = cg_blksfree(cgp);
6707c478bd9Sstevel@tonic-gate 	blktot = cg_blktot(cgp);
6717c478bd9Sstevel@tonic-gate 	mutex_enter(&ufsvfsp->vfs_lock);
6727c478bd9Sstevel@tonic-gate 	cgp->cg_time = gethrestime_sec();
6737c478bd9Sstevel@tonic-gate 	bno = dtogd(fs, bno);
6747c478bd9Sstevel@tonic-gate 	if (size == fs->fs_bsize) {
6757c478bd9Sstevel@tonic-gate 		blkno = fragstoblks(fs, bno);
6767c478bd9Sstevel@tonic-gate 		cylno = cbtocylno(fs, bno);
6777c478bd9Sstevel@tonic-gate 		rpos = cbtorpos(ufsvfsp, bno);
6787c478bd9Sstevel@tonic-gate 		blks = cg_blks(ufsvfsp, cgp, cylno);
6797c478bd9Sstevel@tonic-gate 		if (!isclrblock(fs, blksfree, blkno)) {
6807c478bd9Sstevel@tonic-gate 			mutex_exit(&ufsvfsp->vfs_lock);
6817c478bd9Sstevel@tonic-gate 			brelse(bp);
6827c478bd9Sstevel@tonic-gate 			(void) ufs_fault(ITOV(ip), "free: freeing free block, "
6837c478bd9Sstevel@tonic-gate 			    "dev:0x%lx, block:%ld, ino:%lu, fs:%s",
6847c478bd9Sstevel@tonic-gate 			    ip->i_dev, bno, ip->i_number, fs->fs_fsmnt);
6857c478bd9Sstevel@tonic-gate 			return;
6867c478bd9Sstevel@tonic-gate 		}
6877c478bd9Sstevel@tonic-gate 		setblock(fs, blksfree, blkno);
6887c478bd9Sstevel@tonic-gate 		blks[rpos]++;
6897c478bd9Sstevel@tonic-gate 		blktot[cylno]++;
6907c478bd9Sstevel@tonic-gate 		cgp->cg_cs.cs_nbfree++;		/* Log below */
6917c478bd9Sstevel@tonic-gate 		fs->fs_cstotal.cs_nbfree++;
6927c478bd9Sstevel@tonic-gate 		fs->fs_cs(fs, cg).cs_nbfree++;
693121be23bSjkennedy 		if (TRANS_ISTRANS(ufsvfsp) && (flags & I_ACCT)) {
694121be23bSjkennedy 			mutex_enter(&delq->uq_mutex);
695121be23bSjkennedy 			delq_info->delq_unreclaimed_blocks -=
696121be23bSjkennedy 			    btodb(fs->fs_bsize);
697121be23bSjkennedy 			mutex_exit(&delq->uq_mutex);
698121be23bSjkennedy 		}
6997c478bd9Sstevel@tonic-gate 	} else {
7007c478bd9Sstevel@tonic-gate 		bbase = bno - fragnum(fs, bno);
7017c478bd9Sstevel@tonic-gate 		/*
7027c478bd9Sstevel@tonic-gate 		 * Decrement the counts associated with the old frags
7037c478bd9Sstevel@tonic-gate 		 */
7047c478bd9Sstevel@tonic-gate 		bmap = blkmap(fs, blksfree, bbase);
7057c478bd9Sstevel@tonic-gate 		fragacct(fs, bmap, cgp->cg_frsum, -1);
7067c478bd9Sstevel@tonic-gate 		/*
7077c478bd9Sstevel@tonic-gate 		 * Deallocate the fragment
7087c478bd9Sstevel@tonic-gate 		 */
7097c478bd9Sstevel@tonic-gate 		for (i = 0; i < numfrags(fs, size); i++) {
7107c478bd9Sstevel@tonic-gate 			if (isset(blksfree, bno + i)) {
7117c478bd9Sstevel@tonic-gate 				brelse(bp);
7127c478bd9Sstevel@tonic-gate 				mutex_exit(&ufsvfsp->vfs_lock);
7137c478bd9Sstevel@tonic-gate 				(void) ufs_fault(ITOV(ip),
7147c478bd9Sstevel@tonic-gate 				    "free: freeing free frag, "
7157c478bd9Sstevel@tonic-gate 				    "dev:0x%lx, blk:%ld, cg:%d, "
7167c478bd9Sstevel@tonic-gate 				    "ino:%lu, fs:%s",
7177c478bd9Sstevel@tonic-gate 				    ip->i_dev,
7187c478bd9Sstevel@tonic-gate 				    bno + i,
7197c478bd9Sstevel@tonic-gate 				    cgp->cg_cgx,
7207c478bd9Sstevel@tonic-gate 				    ip->i_number,
7217c478bd9Sstevel@tonic-gate 				    fs->fs_fsmnt);
7227c478bd9Sstevel@tonic-gate 				return;
7237c478bd9Sstevel@tonic-gate 			}
7247c478bd9Sstevel@tonic-gate 			setbit(blksfree, bno + i);
7257c478bd9Sstevel@tonic-gate 		}
7267c478bd9Sstevel@tonic-gate 		cgp->cg_cs.cs_nffree += i;
7277c478bd9Sstevel@tonic-gate 		fs->fs_cstotal.cs_nffree += i;
7287c478bd9Sstevel@tonic-gate 		fs->fs_cs(fs, cg).cs_nffree += i;
729121be23bSjkennedy 		if (TRANS_ISTRANS(ufsvfsp) && (flags & I_ACCT)) {
730121be23bSjkennedy 			mutex_enter(&delq->uq_mutex);
731121be23bSjkennedy 			delq_info->delq_unreclaimed_blocks -=
732121be23bSjkennedy 			    btodb(i * fs->fs_fsize);
733121be23bSjkennedy 			mutex_exit(&delq->uq_mutex);
734121be23bSjkennedy 		}
7357c478bd9Sstevel@tonic-gate 		/*
7367c478bd9Sstevel@tonic-gate 		 * Add back in counts associated with the new frags
7377c478bd9Sstevel@tonic-gate 		 */
7387c478bd9Sstevel@tonic-gate 		bmap = blkmap(fs, blksfree, bbase);
7397c478bd9Sstevel@tonic-gate 		fragacct(fs, bmap, cgp->cg_frsum, 1);
7407c478bd9Sstevel@tonic-gate 		/*
7417c478bd9Sstevel@tonic-gate 		 * If a complete block has been reassembled, account for it
7427c478bd9Sstevel@tonic-gate 		 */
7437c478bd9Sstevel@tonic-gate 		blkno = fragstoblks(fs, bbase);
7447c478bd9Sstevel@tonic-gate 		if (isblock(fs, blksfree, blkno)) {
7457c478bd9Sstevel@tonic-gate 			cylno = cbtocylno(fs, bbase);
7467c478bd9Sstevel@tonic-gate 			rpos = cbtorpos(ufsvfsp, bbase);
7477c478bd9Sstevel@tonic-gate 			blks = cg_blks(ufsvfsp, cgp, cylno);
7487c478bd9Sstevel@tonic-gate 			blks[rpos]++;
7497c478bd9Sstevel@tonic-gate 			blktot[cylno]++;
7507c478bd9Sstevel@tonic-gate 			cgp->cg_cs.cs_nffree -= fs->fs_frag;
7517c478bd9Sstevel@tonic-gate 			fs->fs_cstotal.cs_nffree -= fs->fs_frag;
7527c478bd9Sstevel@tonic-gate 			fs->fs_cs(fs, cg).cs_nffree -= fs->fs_frag;
7537c478bd9Sstevel@tonic-gate 			cgp->cg_cs.cs_nbfree++;
7547c478bd9Sstevel@tonic-gate 			fs->fs_cstotal.cs_nbfree++;
7557c478bd9Sstevel@tonic-gate 			fs->fs_cs(fs, cg).cs_nbfree++;
7567c478bd9Sstevel@tonic-gate 		}
7577c478bd9Sstevel@tonic-gate 	}
7587c478bd9Sstevel@tonic-gate 	fs->fs_fmod = 1;
7597c478bd9Sstevel@tonic-gate 	ufs_notclean(ufsvfsp);
7607c478bd9Sstevel@tonic-gate 	TRANS_BUF(ufsvfsp, 0, fs->fs_cgsize, bp, DT_CG);
7617c478bd9Sstevel@tonic-gate 	TRANS_SI(ufsvfsp, fs, cg);
7627c478bd9Sstevel@tonic-gate 	bdrwrite(bp);
7637c478bd9Sstevel@tonic-gate }
7647c478bd9Sstevel@tonic-gate 
7657c478bd9Sstevel@tonic-gate /*
7667c478bd9Sstevel@tonic-gate  * Free an inode.
7677c478bd9Sstevel@tonic-gate  *
7687c478bd9Sstevel@tonic-gate  * The specified inode is placed back in the free map.
7697c478bd9Sstevel@tonic-gate  */
7707c478bd9Sstevel@tonic-gate void
ufs_ifree(struct inode * ip,ino_t ino,mode_t mode)7717c478bd9Sstevel@tonic-gate ufs_ifree(struct inode *ip, ino_t ino, mode_t mode)
7727c478bd9Sstevel@tonic-gate {
7737c478bd9Sstevel@tonic-gate 	struct fs *fs = ip->i_fs;
7747c478bd9Sstevel@tonic-gate 	struct ufsvfs *ufsvfsp = ip->i_ufsvfs;
7757c478bd9Sstevel@tonic-gate 	struct cg *cgp;
7767c478bd9Sstevel@tonic-gate 	struct buf *bp;
7777c478bd9Sstevel@tonic-gate 	unsigned int inot;
7787c478bd9Sstevel@tonic-gate 	int cg;
7797c478bd9Sstevel@tonic-gate 	char *iused;
7807c478bd9Sstevel@tonic-gate 
7817c478bd9Sstevel@tonic-gate 	if (ip->i_number == ino && ip->i_mode != 0) {
7827c478bd9Sstevel@tonic-gate 		(void) ufs_fault(ITOV(ip),
7837c478bd9Sstevel@tonic-gate 		    "ufs_ifree: illegal mode: (imode) %o, (omode) %o, ino %d, "
7847c478bd9Sstevel@tonic-gate 		    "fs = %s\n",
7857c478bd9Sstevel@tonic-gate 		    ip->i_mode, mode, (int)ip->i_number, fs->fs_fsmnt);
7867c478bd9Sstevel@tonic-gate 		return;
7877c478bd9Sstevel@tonic-gate 	}
7887c478bd9Sstevel@tonic-gate 	if (ino >= fs->fs_ipg * fs->fs_ncg) {
7897c478bd9Sstevel@tonic-gate 		(void) ufs_fault(ITOV(ip),
7907c478bd9Sstevel@tonic-gate 		    "ifree: range, dev = 0x%x, ino = %d, fs = %s\n",
7917c478bd9Sstevel@tonic-gate 		    (int)ip->i_dev, (int)ino, fs->fs_fsmnt);
7927c478bd9Sstevel@tonic-gate 		return;
7937c478bd9Sstevel@tonic-gate 	}
7947c478bd9Sstevel@tonic-gate 	cg = (int)itog(fs, ino);
7957c478bd9Sstevel@tonic-gate 	bp = UFS_BREAD(ufsvfsp, ip->i_dev, (daddr_t)fsbtodb(fs, cgtod(fs, cg)),
796303bf60bSsdebnath 	    (int)fs->fs_cgsize);
7977c478bd9Sstevel@tonic-gate 
7987c478bd9Sstevel@tonic-gate 	cgp = bp->b_un.b_cg;
7997c478bd9Sstevel@tonic-gate 	if (bp->b_flags & B_ERROR || !cg_chkmagic(cgp)) {
8007c478bd9Sstevel@tonic-gate 		brelse(bp);
8017c478bd9Sstevel@tonic-gate 		return;
8027c478bd9Sstevel@tonic-gate 	}
8037c478bd9Sstevel@tonic-gate 	mutex_enter(&ufsvfsp->vfs_lock);
8047c478bd9Sstevel@tonic-gate 	cgp->cg_time = gethrestime_sec();
8057c478bd9Sstevel@tonic-gate 	iused = cg_inosused(cgp);
8067c478bd9Sstevel@tonic-gate 	inot = (unsigned int)(ino % (ulong_t)fs->fs_ipg);
8077c478bd9Sstevel@tonic-gate 	if (isclr(iused, inot)) {
8087c478bd9Sstevel@tonic-gate 		mutex_exit(&ufsvfsp->vfs_lock);
8097c478bd9Sstevel@tonic-gate 		brelse(bp);
8107c478bd9Sstevel@tonic-gate 		(void) ufs_fault(ITOV(ip), "ufs_ifree: freeing free inode, "
811303bf60bSsdebnath 		    "mode: (imode) %o, (omode) %o, ino:%d, "
812303bf60bSsdebnath 		    "fs:%s",
813303bf60bSsdebnath 		    ip->i_mode, mode, (int)ino, fs->fs_fsmnt);
8147c478bd9Sstevel@tonic-gate 		return;
8157c478bd9Sstevel@tonic-gate 	}
8167c478bd9Sstevel@tonic-gate 	clrbit(iused, inot);
8177c478bd9Sstevel@tonic-gate 
8187c478bd9Sstevel@tonic-gate 	if (inot < (ulong_t)cgp->cg_irotor)
8197c478bd9Sstevel@tonic-gate 		cgp->cg_irotor = inot;
8207c478bd9Sstevel@tonic-gate 	cgp->cg_cs.cs_nifree++;
8217c478bd9Sstevel@tonic-gate 	fs->fs_cstotal.cs_nifree++;
8227c478bd9Sstevel@tonic-gate 	fs->fs_cs(fs, cg).cs_nifree++;
8237c478bd9Sstevel@tonic-gate 	if (((mode & IFMT) == IFDIR) || ((mode & IFMT) == IFATTRDIR)) {
8247c478bd9Sstevel@tonic-gate 		cgp->cg_cs.cs_ndir--;
8257c478bd9Sstevel@tonic-gate 		fs->fs_cstotal.cs_ndir--;
8267c478bd9Sstevel@tonic-gate 		fs->fs_cs(fs, cg).cs_ndir--;
8277c478bd9Sstevel@tonic-gate 	}
8287c478bd9Sstevel@tonic-gate 	fs->fs_fmod = 1;
8297c478bd9Sstevel@tonic-gate 	ufs_notclean(ufsvfsp);
8307c478bd9Sstevel@tonic-gate 	TRANS_BUF(ufsvfsp, 0, fs->fs_cgsize, bp, DT_CG);
8317c478bd9Sstevel@tonic-gate 	TRANS_SI(ufsvfsp, fs, cg);
8327c478bd9Sstevel@tonic-gate 	bdrwrite(bp);
8337c478bd9Sstevel@tonic-gate }
8347c478bd9Sstevel@tonic-gate 
8357c478bd9Sstevel@tonic-gate /*
8367c478bd9Sstevel@tonic-gate  * Implement the cylinder overflow algorithm.
8377c478bd9Sstevel@tonic-gate  *
8387c478bd9Sstevel@tonic-gate  * The policy implemented by this algorithm is:
8397c478bd9Sstevel@tonic-gate  *   1) allocate the block in its requested cylinder group.
8407c478bd9Sstevel@tonic-gate  *   2) quadratically rehash on the cylinder group number.
8417c478bd9Sstevel@tonic-gate  *   3) brute force search for a free block.
8427c478bd9Sstevel@tonic-gate  * The size parameter means size for data blocks, mode for inodes.
8437c478bd9Sstevel@tonic-gate  */
8447c478bd9Sstevel@tonic-gate static ino_t
hashalloc(struct inode * ip,int cg,long pref,int size,ulong_t (* allocator)())8457c478bd9Sstevel@tonic-gate hashalloc(struct inode *ip, int cg, long pref, int size, ulong_t (*allocator)())
8467c478bd9Sstevel@tonic-gate {
8477c478bd9Sstevel@tonic-gate 	struct fs *fs;
8487c478bd9Sstevel@tonic-gate 	int i;
8497c478bd9Sstevel@tonic-gate 	long result;
8507c478bd9Sstevel@tonic-gate 	int icg = cg;
8517c478bd9Sstevel@tonic-gate 
8527c478bd9Sstevel@tonic-gate 	fs = ip->i_fs;
8537c478bd9Sstevel@tonic-gate 	/*
8547c478bd9Sstevel@tonic-gate 	 * 1: preferred cylinder group
8557c478bd9Sstevel@tonic-gate 	 */
8567c478bd9Sstevel@tonic-gate 	result = (*allocator)(ip, cg, pref, size);
8577c478bd9Sstevel@tonic-gate 	if (result)
8587c478bd9Sstevel@tonic-gate 		return (result);
8597c478bd9Sstevel@tonic-gate 	/*
8607c478bd9Sstevel@tonic-gate 	 * 2: quadratic rehash
8617c478bd9Sstevel@tonic-gate 	 */
8627c478bd9Sstevel@tonic-gate 	for (i = 1; i < fs->fs_ncg; i *= 2) {
8637c478bd9Sstevel@tonic-gate 		cg += i;
8647c478bd9Sstevel@tonic-gate 		if (cg >= fs->fs_ncg)
8657c478bd9Sstevel@tonic-gate 			cg -= fs->fs_ncg;
8667c478bd9Sstevel@tonic-gate 		result = (*allocator)(ip, cg, 0, size);
8677c478bd9Sstevel@tonic-gate 		if (result)
8687c478bd9Sstevel@tonic-gate 			return (result);
8697c478bd9Sstevel@tonic-gate 	}
8707c478bd9Sstevel@tonic-gate 	/*
8717c478bd9Sstevel@tonic-gate 	 * 3: brute force search
8727c478bd9Sstevel@tonic-gate 	 * Note that we start at i == 2, since 0 was checked initially,
8737c478bd9Sstevel@tonic-gate 	 * and 1 is always checked in the quadratic rehash.
8747c478bd9Sstevel@tonic-gate 	 */
8757c478bd9Sstevel@tonic-gate 	cg = (icg + 2) % fs->fs_ncg;
8767c478bd9Sstevel@tonic-gate 	for (i = 2; i < fs->fs_ncg; i++) {
8777c478bd9Sstevel@tonic-gate 		result = (*allocator)(ip, cg, 0, size);
8787c478bd9Sstevel@tonic-gate 		if (result)
8797c478bd9Sstevel@tonic-gate 			return (result);
8807c478bd9Sstevel@tonic-gate 		cg++;
8817c478bd9Sstevel@tonic-gate 		if (cg == fs->fs_ncg)
8827c478bd9Sstevel@tonic-gate 			cg = 0;
8837c478bd9Sstevel@tonic-gate 	}
8841f563eb1SToomas Soome 	return (0);
8857c478bd9Sstevel@tonic-gate }
8867c478bd9Sstevel@tonic-gate 
8877c478bd9Sstevel@tonic-gate /*
8887c478bd9Sstevel@tonic-gate  * Determine whether a fragment can be extended.
8897c478bd9Sstevel@tonic-gate  *
8907c478bd9Sstevel@tonic-gate  * Check to see if the necessary fragments are available, and
8917c478bd9Sstevel@tonic-gate  * if they are, allocate them.
8927c478bd9Sstevel@tonic-gate  */
8937c478bd9Sstevel@tonic-gate static daddr_t
fragextend(struct inode * ip,int cg,long bprev,int osize,int nsize)8947c478bd9Sstevel@tonic-gate fragextend(struct inode *ip, int cg, long bprev, int osize, int nsize)
8957c478bd9Sstevel@tonic-gate {
8967c478bd9Sstevel@tonic-gate 	struct ufsvfs *ufsvfsp = ip->i_ufsvfs;
8977c478bd9Sstevel@tonic-gate 	struct fs *fs = ip->i_fs;
8987c478bd9Sstevel@tonic-gate 	struct buf *bp;
8997c478bd9Sstevel@tonic-gate 	struct cg *cgp;
9007c478bd9Sstevel@tonic-gate 	uchar_t *blksfree;
9017c478bd9Sstevel@tonic-gate 	long bno;
9027c478bd9Sstevel@tonic-gate 	int frags, bbase;
9037c478bd9Sstevel@tonic-gate 	int i, j;
9047c478bd9Sstevel@tonic-gate 
9057c478bd9Sstevel@tonic-gate 	if (fs->fs_cs(fs, cg).cs_nffree < numfrags(fs, nsize - osize))
9061f563eb1SToomas Soome 		return (0);
9077c478bd9Sstevel@tonic-gate 	frags = numfrags(fs, nsize);
9087c478bd9Sstevel@tonic-gate 	bbase = (int)fragnum(fs, bprev);
9097c478bd9Sstevel@tonic-gate 	if (bbase > fragnum(fs, (bprev + frags - 1))) {
9107c478bd9Sstevel@tonic-gate 		/* cannot extend across a block boundary */
9111f563eb1SToomas Soome 		return (0);
9127c478bd9Sstevel@tonic-gate 	}
9137c478bd9Sstevel@tonic-gate 
9147c478bd9Sstevel@tonic-gate 	bp = UFS_BREAD(ufsvfsp, ip->i_dev, (daddr_t)fsbtodb(fs, cgtod(fs, cg)),
915303bf60bSsdebnath 	    (int)fs->fs_cgsize);
9167c478bd9Sstevel@tonic-gate 	cgp = bp->b_un.b_cg;
9177c478bd9Sstevel@tonic-gate 	if (bp->b_flags & B_ERROR || !cg_chkmagic(cgp)) {
9187c478bd9Sstevel@tonic-gate 		brelse(bp);
9191f563eb1SToomas Soome 		return (0);
9207c478bd9Sstevel@tonic-gate 	}
9217c478bd9Sstevel@tonic-gate 
9227c478bd9Sstevel@tonic-gate 	blksfree = cg_blksfree(cgp);
9237c478bd9Sstevel@tonic-gate 	mutex_enter(&ufsvfsp->vfs_lock);
9247c478bd9Sstevel@tonic-gate 	bno = dtogd(fs, bprev);
9257c478bd9Sstevel@tonic-gate 	for (i = numfrags(fs, osize); i < frags; i++) {
9267c478bd9Sstevel@tonic-gate 		if (isclr(blksfree, bno + i)) {
9277c478bd9Sstevel@tonic-gate 			mutex_exit(&ufsvfsp->vfs_lock);
9287c478bd9Sstevel@tonic-gate 			brelse(bp);
9291f563eb1SToomas Soome 			return (0);
9307c478bd9Sstevel@tonic-gate 		}
9317c478bd9Sstevel@tonic-gate 		if ((TRANS_ISCANCEL(ufsvfsp, ldbtob(fsbtodb(fs, bprev + i)),
93280d34432Sfrankho 		    fs->fs_fsize))) {
9337c478bd9Sstevel@tonic-gate 			mutex_exit(&ufsvfsp->vfs_lock);
9347c478bd9Sstevel@tonic-gate 			brelse(bp);
9351f563eb1SToomas Soome 			return (0);
9367c478bd9Sstevel@tonic-gate 		}
9377c478bd9Sstevel@tonic-gate 	}
9387c478bd9Sstevel@tonic-gate 
9397c478bd9Sstevel@tonic-gate 	cgp->cg_time = gethrestime_sec();
9407c478bd9Sstevel@tonic-gate 	/*
9417c478bd9Sstevel@tonic-gate 	 * The current fragment can be extended,
9427c478bd9Sstevel@tonic-gate 	 * deduct the count on fragment being extended into
9437c478bd9Sstevel@tonic-gate 	 * increase the count on the remaining fragment (if any)
9447c478bd9Sstevel@tonic-gate 	 * allocate the extended piece.
9457c478bd9Sstevel@tonic-gate 	 */
9467c478bd9Sstevel@tonic-gate 	for (i = frags; i < fs->fs_frag - bbase; i++)
9477c478bd9Sstevel@tonic-gate 		if (isclr(blksfree, bno + i))
9487c478bd9Sstevel@tonic-gate 			break;
9497c478bd9Sstevel@tonic-gate 	j = i - numfrags(fs, osize);
9507c478bd9Sstevel@tonic-gate 	cgp->cg_frsum[j]--;
9517c478bd9Sstevel@tonic-gate 	ASSERT(cgp->cg_frsum[j] >= 0);
9527c478bd9Sstevel@tonic-gate 	if (i != frags)
9537c478bd9Sstevel@tonic-gate 		cgp->cg_frsum[i - frags]++;
9547c478bd9Sstevel@tonic-gate 	for (i = numfrags(fs, osize); i < frags; i++) {
9557c478bd9Sstevel@tonic-gate 		clrbit(blksfree, bno + i);
9567c478bd9Sstevel@tonic-gate 		cgp->cg_cs.cs_nffree--;
9577c478bd9Sstevel@tonic-gate 		fs->fs_cs(fs, cg).cs_nffree--;
9587c478bd9Sstevel@tonic-gate 		fs->fs_cstotal.cs_nffree--;
9597c478bd9Sstevel@tonic-gate 	}
9607c478bd9Sstevel@tonic-gate 	fs->fs_fmod = 1;
9617c478bd9Sstevel@tonic-gate 	ufs_notclean(ufsvfsp);
9627c478bd9Sstevel@tonic-gate 	TRANS_BUF(ufsvfsp, 0, fs->fs_cgsize, bp, DT_CG);
9637c478bd9Sstevel@tonic-gate 	TRANS_SI(ufsvfsp, fs, cg);
9647c478bd9Sstevel@tonic-gate 	bdrwrite(bp);
9657c478bd9Sstevel@tonic-gate 	return ((daddr_t)bprev);
9667c478bd9Sstevel@tonic-gate }
9677c478bd9Sstevel@tonic-gate 
9687c478bd9Sstevel@tonic-gate /*
9697c478bd9Sstevel@tonic-gate  * Determine whether a block can be allocated.
9707c478bd9Sstevel@tonic-gate  *
9717c478bd9Sstevel@tonic-gate  * Check to see if a block of the apprpriate size
9727c478bd9Sstevel@tonic-gate  * is available, and if it is, allocate it.
9737c478bd9Sstevel@tonic-gate  */
9747c478bd9Sstevel@tonic-gate static daddr_t
alloccg(struct inode * ip,int cg,daddr_t bpref,int size)9757c478bd9Sstevel@tonic-gate alloccg(struct inode *ip, int cg, daddr_t bpref, int size)
9767c478bd9Sstevel@tonic-gate {
9777c478bd9Sstevel@tonic-gate 	struct ufsvfs *ufsvfsp = ip->i_ufsvfs;
9787c478bd9Sstevel@tonic-gate 	struct fs *fs = ip->i_fs;
9797c478bd9Sstevel@tonic-gate 	struct buf *bp;
9807c478bd9Sstevel@tonic-gate 	struct cg *cgp;
9817c478bd9Sstevel@tonic-gate 	uchar_t *blksfree;
9827c478bd9Sstevel@tonic-gate 	int bno, frags;
9837c478bd9Sstevel@tonic-gate 	int allocsiz;
9847c478bd9Sstevel@tonic-gate 	int i;
9857c478bd9Sstevel@tonic-gate 
986b3143109Smishra 	/*
987b3143109Smishra 	 * Searching for space could be time expensive so do some
988b3143109Smishra 	 * up front checking to verify that there is actually space
989b3143109Smishra 	 * available (free blocks or free frags).
990b3143109Smishra 	 */
991b3143109Smishra 	if (fs->fs_cs(fs, cg).cs_nbfree == 0) {
992b3143109Smishra 		if (size == fs->fs_bsize)
993b3143109Smishra 			return (0);
994b3143109Smishra 
995b3143109Smishra 		/*
996b3143109Smishra 		 * If there are not enough free frags then return.
997b3143109Smishra 		 */
998b3143109Smishra 		if (fs->fs_cs(fs, cg).cs_nffree < numfrags(fs, size))
999b3143109Smishra 			return (0);
1000b3143109Smishra 	}
1001b3143109Smishra 
10027c478bd9Sstevel@tonic-gate 	bp = UFS_BREAD(ufsvfsp, ip->i_dev, (daddr_t)fsbtodb(fs, cgtod(fs, cg)),
1003303bf60bSsdebnath 	    (int)fs->fs_cgsize);
10047c478bd9Sstevel@tonic-gate 
10057c478bd9Sstevel@tonic-gate 	cgp = bp->b_un.b_cg;
10067c478bd9Sstevel@tonic-gate 	if (bp->b_flags & B_ERROR || !cg_chkmagic(cgp) ||
10077c478bd9Sstevel@tonic-gate 	    (cgp->cg_cs.cs_nbfree == 0 && size == fs->fs_bsize)) {
10087c478bd9Sstevel@tonic-gate 		brelse(bp);
10097c478bd9Sstevel@tonic-gate 		return (0);
10107c478bd9Sstevel@tonic-gate 	}
10117c478bd9Sstevel@tonic-gate 	blksfree = cg_blksfree(cgp);
10127c478bd9Sstevel@tonic-gate 	mutex_enter(&ufsvfsp->vfs_lock);
10137c478bd9Sstevel@tonic-gate 	cgp->cg_time = gethrestime_sec();
10147c478bd9Sstevel@tonic-gate 	if (size == fs->fs_bsize) {
10157c478bd9Sstevel@tonic-gate 		if ((bno = alloccgblk(ufsvfsp, cgp, bpref, bp)) == 0)
10167c478bd9Sstevel@tonic-gate 			goto errout;
10177c478bd9Sstevel@tonic-gate 		fs->fs_fmod = 1;
10187c478bd9Sstevel@tonic-gate 		ufs_notclean(ufsvfsp);
10197c478bd9Sstevel@tonic-gate 		TRANS_SI(ufsvfsp, fs, cg);
10207c478bd9Sstevel@tonic-gate 		bdrwrite(bp);
10217c478bd9Sstevel@tonic-gate 		return (bno);
10227c478bd9Sstevel@tonic-gate 	}
10237c478bd9Sstevel@tonic-gate 	/*
10240d5abb8cSViswanathan Kannappan 	 * Check fragment bitmap to see if any fragments are already available.
10250d5abb8cSViswanathan Kannappan 	 * mapsearch() may fail because the fragment that fits this request
10260d5abb8cSViswanathan Kannappan 	 * might still be on the cancel list and not available for re-use yet.
10270d5abb8cSViswanathan Kannappan 	 * Look for a bigger sized fragment to allocate first before we have
10280d5abb8cSViswanathan Kannappan 	 * to give up and fragment a whole new block eventually.
10297c478bd9Sstevel@tonic-gate 	 */
10307c478bd9Sstevel@tonic-gate 	frags = numfrags(fs, size);
10310d5abb8cSViswanathan Kannappan 	allocsiz = frags;
10320d5abb8cSViswanathan Kannappan next_size:
10330d5abb8cSViswanathan Kannappan 	for (; allocsiz < fs->fs_frag; allocsiz++)
10347c478bd9Sstevel@tonic-gate 		if (cgp->cg_frsum[allocsiz] != 0)
10357c478bd9Sstevel@tonic-gate 			break;
10367c478bd9Sstevel@tonic-gate 
10370d5abb8cSViswanathan Kannappan 	if (allocsiz != fs->fs_frag) {
10387c478bd9Sstevel@tonic-gate 		bno = mapsearch(ufsvfsp, cgp, bpref, allocsiz);
10390d5abb8cSViswanathan Kannappan 		if (bno < 0 && allocsiz < (fs->fs_frag - 1)) {
10400d5abb8cSViswanathan Kannappan 			allocsiz++;
10410d5abb8cSViswanathan Kannappan 			goto next_size;
10420d5abb8cSViswanathan Kannappan 		}
10430d5abb8cSViswanathan Kannappan 	}
10447c478bd9Sstevel@tonic-gate 
10457c478bd9Sstevel@tonic-gate 	if (allocsiz == fs->fs_frag || bno < 0) {
10467c478bd9Sstevel@tonic-gate 		/*
10477c478bd9Sstevel@tonic-gate 		 * No fragments were available, so a block
10487c478bd9Sstevel@tonic-gate 		 * will be allocated and hacked up.
10497c478bd9Sstevel@tonic-gate 		 */
10507c478bd9Sstevel@tonic-gate 		if (cgp->cg_cs.cs_nbfree == 0)
10517c478bd9Sstevel@tonic-gate 			goto errout;
10527c478bd9Sstevel@tonic-gate 		if ((bno = alloccgblk(ufsvfsp, cgp, bpref, bp)) == 0)
10537c478bd9Sstevel@tonic-gate 			goto errout;
10547c478bd9Sstevel@tonic-gate 		bpref = dtogd(fs, bno);
10557c478bd9Sstevel@tonic-gate 		for (i = frags; i < fs->fs_frag; i++)
10567c478bd9Sstevel@tonic-gate 			setbit(blksfree, bpref + i);
10577c478bd9Sstevel@tonic-gate 		i = fs->fs_frag - frags;
10587c478bd9Sstevel@tonic-gate 		cgp->cg_cs.cs_nffree += i;
10597c478bd9Sstevel@tonic-gate 		fs->fs_cstotal.cs_nffree += i;
10607c478bd9Sstevel@tonic-gate 		fs->fs_cs(fs, cg).cs_nffree += i;
10617c478bd9Sstevel@tonic-gate 		cgp->cg_frsum[i]++;
10627c478bd9Sstevel@tonic-gate 		fs->fs_fmod = 1;
10637c478bd9Sstevel@tonic-gate 		ufs_notclean(ufsvfsp);
10647c478bd9Sstevel@tonic-gate 		TRANS_SI(ufsvfsp, fs, cg);
10657c478bd9Sstevel@tonic-gate 		bdrwrite(bp);
10667c478bd9Sstevel@tonic-gate 		return (bno);
10677c478bd9Sstevel@tonic-gate 	}
10687c478bd9Sstevel@tonic-gate 
10697c478bd9Sstevel@tonic-gate 	for (i = 0; i < frags; i++)
10707c478bd9Sstevel@tonic-gate 		clrbit(blksfree, bno + i);
10717c478bd9Sstevel@tonic-gate 	cgp->cg_cs.cs_nffree -= frags;
10727c478bd9Sstevel@tonic-gate 	fs->fs_cstotal.cs_nffree -= frags;
10737c478bd9Sstevel@tonic-gate 	fs->fs_cs(fs, cg).cs_nffree -= frags;
10747c478bd9Sstevel@tonic-gate 	cgp->cg_frsum[allocsiz]--;
10757c478bd9Sstevel@tonic-gate 	ASSERT(cgp->cg_frsum[allocsiz] >= 0);
10767c478bd9Sstevel@tonic-gate 	if (frags != allocsiz) {
10777c478bd9Sstevel@tonic-gate 		cgp->cg_frsum[allocsiz - frags]++;
10787c478bd9Sstevel@tonic-gate 	}
10797c478bd9Sstevel@tonic-gate 	fs->fs_fmod = 1;
10807c478bd9Sstevel@tonic-gate 	ufs_notclean(ufsvfsp);
10817c478bd9Sstevel@tonic-gate 	TRANS_BUF(ufsvfsp, 0, fs->fs_cgsize, bp, DT_CG);
10827c478bd9Sstevel@tonic-gate 	TRANS_SI(ufsvfsp, fs, cg);
10837c478bd9Sstevel@tonic-gate 	bdrwrite(bp);
10847c478bd9Sstevel@tonic-gate 	return (cg * fs->fs_fpg + bno);
10857c478bd9Sstevel@tonic-gate errout:
10867c478bd9Sstevel@tonic-gate 	mutex_exit(&ufsvfsp->vfs_lock);
10877c478bd9Sstevel@tonic-gate 	brelse(bp);
10887c478bd9Sstevel@tonic-gate 	return (0);
10897c478bd9Sstevel@tonic-gate }
10907c478bd9Sstevel@tonic-gate 
10917c478bd9Sstevel@tonic-gate /*
10927c478bd9Sstevel@tonic-gate  * Allocate a block in a cylinder group.
10937c478bd9Sstevel@tonic-gate  *
10947c478bd9Sstevel@tonic-gate  * This algorithm implements the following policy:
10957c478bd9Sstevel@tonic-gate  *   1) allocate the requested block.
10967c478bd9Sstevel@tonic-gate  *   2) allocate a rotationally optimal block in the same cylinder.
10977c478bd9Sstevel@tonic-gate  *   3) allocate the next available block on the block rotor for the
10987c478bd9Sstevel@tonic-gate  *	specified cylinder group.
10997c478bd9Sstevel@tonic-gate  * Note that this routine only allocates fs_bsize blocks; these
11007c478bd9Sstevel@tonic-gate  * blocks may be fragmented by the routine that allocates them.
11017c478bd9Sstevel@tonic-gate  */
11027c478bd9Sstevel@tonic-gate static daddr_t
alloccgblk(struct ufsvfs * ufsvfsp,struct cg * cgp,daddr_t bpref,struct buf * bp)11037c478bd9Sstevel@tonic-gate alloccgblk(
11047c478bd9Sstevel@tonic-gate 	struct ufsvfs *ufsvfsp,
11057c478bd9Sstevel@tonic-gate 	struct cg *cgp,
11067c478bd9Sstevel@tonic-gate 	daddr_t bpref,
11077c478bd9Sstevel@tonic-gate 	struct buf *bp)
11087c478bd9Sstevel@tonic-gate {
11097c478bd9Sstevel@tonic-gate 	daddr_t bno;
11107c478bd9Sstevel@tonic-gate 	int cylno, pos, delta, rotbl_size;
11117c478bd9Sstevel@tonic-gate 	short *cylbp;
11127c478bd9Sstevel@tonic-gate 	int i;
11137c478bd9Sstevel@tonic-gate 	struct fs *fs;
11147c478bd9Sstevel@tonic-gate 	uchar_t *blksfree;
11157c478bd9Sstevel@tonic-gate 	daddr_t blkno, rpos, frag;
11167c478bd9Sstevel@tonic-gate 	short *blks;
11177c478bd9Sstevel@tonic-gate 	int32_t *blktot;
11187c478bd9Sstevel@tonic-gate 
11197c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&ufsvfsp->vfs_lock));
11207c478bd9Sstevel@tonic-gate 	fs = ufsvfsp->vfs_fs;
11217c478bd9Sstevel@tonic-gate 	blksfree = cg_blksfree(cgp);
11227c478bd9Sstevel@tonic-gate 	if (bpref == 0) {
11237c478bd9Sstevel@tonic-gate 		bpref = cgp->cg_rotor;
11247c478bd9Sstevel@tonic-gate 		goto norot;
11257c478bd9Sstevel@tonic-gate 	}
11267c478bd9Sstevel@tonic-gate 	bpref = blknum(fs, bpref);
11277c478bd9Sstevel@tonic-gate 	bpref = dtogd(fs, bpref);
11287c478bd9Sstevel@tonic-gate 	/*
11297c478bd9Sstevel@tonic-gate 	 * If the requested block is available, use it.
11307c478bd9Sstevel@tonic-gate 	 */
11317c478bd9Sstevel@tonic-gate 	if (isblock(fs, blksfree, (daddr_t)fragstoblks(fs, bpref))) {
11327c478bd9Sstevel@tonic-gate 		bno = bpref;
11337c478bd9Sstevel@tonic-gate 		goto gotit;
11347c478bd9Sstevel@tonic-gate 	}
11357c478bd9Sstevel@tonic-gate 	/*
11367c478bd9Sstevel@tonic-gate 	 * Check for a block available on the same cylinder.
11377c478bd9Sstevel@tonic-gate 	 */
11387c478bd9Sstevel@tonic-gate 	cylno = cbtocylno(fs, bpref);
11397c478bd9Sstevel@tonic-gate 	if (cg_blktot(cgp)[cylno] == 0)
11407c478bd9Sstevel@tonic-gate 		goto norot;
11417c478bd9Sstevel@tonic-gate 	if (fs->fs_cpc == 0) {
11427c478bd9Sstevel@tonic-gate 		/*
11437c478bd9Sstevel@tonic-gate 		 * Block layout info is not available, so just
11447c478bd9Sstevel@tonic-gate 		 * have to take any block in this cylinder.
11457c478bd9Sstevel@tonic-gate 		 */
11467c478bd9Sstevel@tonic-gate 		bpref = howmany(fs->fs_spc * cylno, NSPF(fs));
11477c478bd9Sstevel@tonic-gate 		goto norot;
11487c478bd9Sstevel@tonic-gate 	}
11497c478bd9Sstevel@tonic-gate 	/*
11507c478bd9Sstevel@tonic-gate 	 * Check the summary information to see if a block is
11517c478bd9Sstevel@tonic-gate 	 * available in the requested cylinder starting at the
11527c478bd9Sstevel@tonic-gate 	 * requested rotational position and proceeding around.
11537c478bd9Sstevel@tonic-gate 	 */
11547c478bd9Sstevel@tonic-gate 	cylbp = cg_blks(ufsvfsp, cgp, cylno);
11557c478bd9Sstevel@tonic-gate 	pos = cbtorpos(ufsvfsp, bpref);
11567c478bd9Sstevel@tonic-gate 	for (i = pos; i < ufsvfsp->vfs_nrpos; i++)
11577c478bd9Sstevel@tonic-gate 		if (cylbp[i] > 0)
11587c478bd9Sstevel@tonic-gate 			break;
11597c478bd9Sstevel@tonic-gate 	if (i == ufsvfsp->vfs_nrpos)
11607c478bd9Sstevel@tonic-gate 		for (i = 0; i < pos; i++)
11617c478bd9Sstevel@tonic-gate 			if (cylbp[i] > 0)
11627c478bd9Sstevel@tonic-gate 				break;
11637c478bd9Sstevel@tonic-gate 	if (cylbp[i] > 0) {
11647c478bd9Sstevel@tonic-gate 		/*
11657c478bd9Sstevel@tonic-gate 		 * Found a rotational position, now find the actual
11667c478bd9Sstevel@tonic-gate 		 * block.  A "panic" if none is actually there.
11677c478bd9Sstevel@tonic-gate 		 */
11687c478bd9Sstevel@tonic-gate 
11697c478bd9Sstevel@tonic-gate 		/*
11707c478bd9Sstevel@tonic-gate 		 * Up to this point, "pos" has referred to the rotational
11717c478bd9Sstevel@tonic-gate 		 * position of the desired block.  From now on, it holds
11727c478bd9Sstevel@tonic-gate 		 * the offset of the current cylinder within a cylinder
11737c478bd9Sstevel@tonic-gate 		 * cycle.  (A cylinder cycle refers to a set of cylinders
11747c478bd9Sstevel@tonic-gate 		 * which are described by a single rotational table; the
11757c478bd9Sstevel@tonic-gate 		 * size of the cycle is fs_cpc.)
11767c478bd9Sstevel@tonic-gate 		 *
11777c478bd9Sstevel@tonic-gate 		 * bno is set to the block number of the first block within
11787c478bd9Sstevel@tonic-gate 		 * the current cylinder cycle.
11797c478bd9Sstevel@tonic-gate 		 */
11807c478bd9Sstevel@tonic-gate 
11817c478bd9Sstevel@tonic-gate 		pos = cylno % fs->fs_cpc;
11827c478bd9Sstevel@tonic-gate 		bno = (cylno - pos) * fs->fs_spc / NSPB(fs);
11837c478bd9Sstevel@tonic-gate 
11847c478bd9Sstevel@tonic-gate 		/*
11857c478bd9Sstevel@tonic-gate 		 * The blocks within a cylinder are grouped into equivalence
11867c478bd9Sstevel@tonic-gate 		 * classes according to their "rotational position."  There
11877c478bd9Sstevel@tonic-gate 		 * are two tables used to determine these classes.
11887c478bd9Sstevel@tonic-gate 		 *
11897c478bd9Sstevel@tonic-gate 		 * The positional offset table (fs_postbl) has an entry for
11907c478bd9Sstevel@tonic-gate 		 * each rotational position of each cylinder in a cylinder
11917c478bd9Sstevel@tonic-gate 		 * cycle.  This entry contains the relative block number
11927c478bd9Sstevel@tonic-gate 		 * (counting from the start of the cylinder cycle) of the
11937c478bd9Sstevel@tonic-gate 		 * first block in the equivalence class for that position
11947c478bd9Sstevel@tonic-gate 		 * and that cylinder.  Positions for which no blocks exist
11957c478bd9Sstevel@tonic-gate 		 * are indicated by a -1.
11967c478bd9Sstevel@tonic-gate 		 *
11977c478bd9Sstevel@tonic-gate 		 * The rotational delta table (fs_rotbl) has an entry for
11987c478bd9Sstevel@tonic-gate 		 * each block in a cylinder cycle.  This entry contains
11997c478bd9Sstevel@tonic-gate 		 * the offset from that block to the next block in the
12007c478bd9Sstevel@tonic-gate 		 * same equivalence class.  The last block in the class
12017c478bd9Sstevel@tonic-gate 		 * is indicated by a zero in the table.
12027c478bd9Sstevel@tonic-gate 		 *
12037c478bd9Sstevel@tonic-gate 		 * The following code, then, walks through all of the blocks
12047c478bd9Sstevel@tonic-gate 		 * in the cylinder (cylno) which we're allocating within
12057c478bd9Sstevel@tonic-gate 		 * which are in the equivalence class for the rotational
12067c478bd9Sstevel@tonic-gate 		 * position (i) which we're allocating within.
12077c478bd9Sstevel@tonic-gate 		 */
12087c478bd9Sstevel@tonic-gate 
12097c478bd9Sstevel@tonic-gate 		if (fs_postbl(ufsvfsp, pos)[i] == -1) {
12107c478bd9Sstevel@tonic-gate 			(void) ufs_fault(ufsvfsp->vfs_root,
1211303bf60bSsdebnath 			    "alloccgblk: cyl groups corrupted, pos = %d, "
1212303bf60bSsdebnath 			    "i = %d, fs = %s\n", pos, i, fs->fs_fsmnt);
12137c478bd9Sstevel@tonic-gate 			return (0);
12147c478bd9Sstevel@tonic-gate 		}
12157c478bd9Sstevel@tonic-gate 
12167c478bd9Sstevel@tonic-gate 		/*
12177c478bd9Sstevel@tonic-gate 		 * There is one entry in the rotational table for each block
12187c478bd9Sstevel@tonic-gate 		 * in the cylinder cycle.  These are whole blocks, not frags.
12197c478bd9Sstevel@tonic-gate 		 */
12207c478bd9Sstevel@tonic-gate 
12217c478bd9Sstevel@tonic-gate 		rotbl_size = (fs->fs_cpc * fs->fs_spc) >>
12227c478bd9Sstevel@tonic-gate 		    (fs->fs_fragshift + fs->fs_fsbtodb);
12237c478bd9Sstevel@tonic-gate 
12247c478bd9Sstevel@tonic-gate 		/*
12257c478bd9Sstevel@tonic-gate 		 * As we start, "i" is the rotational position within which
12267c478bd9Sstevel@tonic-gate 		 * we're searching.  After the next line, it will be a block
12277c478bd9Sstevel@tonic-gate 		 * number (relative to the start of the cylinder cycle)
12287c478bd9Sstevel@tonic-gate 		 * within the equivalence class of that rotational position.
12297c478bd9Sstevel@tonic-gate 		 */
12307c478bd9Sstevel@tonic-gate 
12317c478bd9Sstevel@tonic-gate 		i = fs_postbl(ufsvfsp, pos)[i];
12327c478bd9Sstevel@tonic-gate 
12337c478bd9Sstevel@tonic-gate 		for (;;) {
12347c478bd9Sstevel@tonic-gate 			if (isblock(fs, blksfree, (daddr_t)(bno + i))) {
12357c478bd9Sstevel@tonic-gate 				bno = blkstofrags(fs, (bno + i));
12367c478bd9Sstevel@tonic-gate 				goto gotit;
12377c478bd9Sstevel@tonic-gate 			}
12387c478bd9Sstevel@tonic-gate 			delta = fs_rotbl(fs)[i];
12397c478bd9Sstevel@tonic-gate 			if (delta <= 0 ||		/* End of chain, or */
12407c478bd9Sstevel@tonic-gate 			    delta + i > rotbl_size)	/* end of table? */
12417c478bd9Sstevel@tonic-gate 				break;			/* If so, panic. */
12427c478bd9Sstevel@tonic-gate 			i += delta;
12437c478bd9Sstevel@tonic-gate 		}
12447c478bd9Sstevel@tonic-gate 		(void) ufs_fault(ufsvfsp->vfs_root,
1245303bf60bSsdebnath 		    "alloccgblk: can't find blk in cyl, pos:%d, i:%d, "
1246303bf60bSsdebnath 		    "fs:%s bno: %x\n", pos, i, fs->fs_fsmnt, (int)bno);
12477c478bd9Sstevel@tonic-gate 		return (0);
12487c478bd9Sstevel@tonic-gate 	}
12497c478bd9Sstevel@tonic-gate norot:
12507c478bd9Sstevel@tonic-gate 	/*
12517c478bd9Sstevel@tonic-gate 	 * No blocks in the requested cylinder, so take
12527c478bd9Sstevel@tonic-gate 	 * next available one in this cylinder group.
12537c478bd9Sstevel@tonic-gate 	 */
12547c478bd9Sstevel@tonic-gate 	bno = mapsearch(ufsvfsp, cgp, bpref, (int)fs->fs_frag);
12557c478bd9Sstevel@tonic-gate 	if (bno < 0)
12567c478bd9Sstevel@tonic-gate 		return (0);
12577c478bd9Sstevel@tonic-gate 	cgp->cg_rotor = bno;
12587c478bd9Sstevel@tonic-gate gotit:
12597c478bd9Sstevel@tonic-gate 	blkno = fragstoblks(fs, bno);
12607c478bd9Sstevel@tonic-gate 	frag = (cgp->cg_cgx * fs->fs_fpg) + bno;
12617c478bd9Sstevel@tonic-gate 	if (TRANS_ISCANCEL(ufsvfsp, ldbtob(fsbtodb(fs, frag)), fs->fs_bsize))
12627c478bd9Sstevel@tonic-gate 		goto norot;
12637c478bd9Sstevel@tonic-gate 	clrblock(fs, blksfree, (long)blkno);
12647c478bd9Sstevel@tonic-gate 	/*
12657c478bd9Sstevel@tonic-gate 	 * the other cg/sb/si fields are TRANS'ed by the caller
12667c478bd9Sstevel@tonic-gate 	 */
12677c478bd9Sstevel@tonic-gate 	cgp->cg_cs.cs_nbfree--;
12687c478bd9Sstevel@tonic-gate 	fs->fs_cstotal.cs_nbfree--;
12697c478bd9Sstevel@tonic-gate 	fs->fs_cs(fs, cgp->cg_cgx).cs_nbfree--;
12707c478bd9Sstevel@tonic-gate 	cylno = cbtocylno(fs, bno);
12717c478bd9Sstevel@tonic-gate 	blks = cg_blks(ufsvfsp, cgp, cylno);
12727c478bd9Sstevel@tonic-gate 	rpos = cbtorpos(ufsvfsp, bno);
12737c478bd9Sstevel@tonic-gate 	blktot = cg_blktot(cgp);
12747c478bd9Sstevel@tonic-gate 	blks[rpos]--;
12757c478bd9Sstevel@tonic-gate 	blktot[cylno]--;
12767c478bd9Sstevel@tonic-gate 	TRANS_BUF(ufsvfsp, 0, fs->fs_cgsize, bp, DT_CG);
12777c478bd9Sstevel@tonic-gate 	fs->fs_fmod = 1;
12787c478bd9Sstevel@tonic-gate 	return (frag);
12797c478bd9Sstevel@tonic-gate }
12807c478bd9Sstevel@tonic-gate 
12817c478bd9Sstevel@tonic-gate /*
12827c478bd9Sstevel@tonic-gate  * Determine whether an inode can be allocated.
12837c478bd9Sstevel@tonic-gate  *
12847c478bd9Sstevel@tonic-gate  * Check to see if an inode is available, and if it is,
12857c478bd9Sstevel@tonic-gate  * allocate it using the following policy:
12867c478bd9Sstevel@tonic-gate  *   1) allocate the requested inode.
12877c478bd9Sstevel@tonic-gate  *   2) allocate the next available inode after the requested
12887c478bd9Sstevel@tonic-gate  *	inode in the specified cylinder group.
12897c478bd9Sstevel@tonic-gate  */
12907c478bd9Sstevel@tonic-gate static ino_t
ialloccg(struct inode * ip,int cg,daddr_t ipref,int mode)12917c478bd9Sstevel@tonic-gate ialloccg(struct inode *ip, int cg, daddr_t ipref, int mode)
12927c478bd9Sstevel@tonic-gate {
12937c478bd9Sstevel@tonic-gate 	struct ufsvfs *ufsvfsp = ip->i_ufsvfs;
12947c478bd9Sstevel@tonic-gate 	struct fs *fs = ip->i_fs;
12957c478bd9Sstevel@tonic-gate 	struct cg *cgp;
12967c478bd9Sstevel@tonic-gate 	struct buf *bp;
12977c478bd9Sstevel@tonic-gate 	int start, len, loc, map, i;
12987c478bd9Sstevel@tonic-gate 	char *iused;
12997c478bd9Sstevel@tonic-gate 
13007c478bd9Sstevel@tonic-gate 	if (fs->fs_cs(fs, cg).cs_nifree == 0)
13017c478bd9Sstevel@tonic-gate 		return (0);
13027c478bd9Sstevel@tonic-gate 	bp = UFS_BREAD(ufsvfsp, ip->i_dev, (daddr_t)fsbtodb(fs, cgtod(fs, cg)),
130380d34432Sfrankho 	    (int)fs->fs_cgsize);
13047c478bd9Sstevel@tonic-gate 
13057c478bd9Sstevel@tonic-gate 	cgp = bp->b_un.b_cg;
13067c478bd9Sstevel@tonic-gate 	if (bp->b_flags & B_ERROR || !cg_chkmagic(cgp) ||
13077c478bd9Sstevel@tonic-gate 	    cgp->cg_cs.cs_nifree == 0) {
13087c478bd9Sstevel@tonic-gate 		brelse(bp);
13097c478bd9Sstevel@tonic-gate 		return (0);
13107c478bd9Sstevel@tonic-gate 	}
13117c478bd9Sstevel@tonic-gate 	iused = cg_inosused(cgp);
13127c478bd9Sstevel@tonic-gate 	mutex_enter(&ufsvfsp->vfs_lock);
13137c478bd9Sstevel@tonic-gate 	/*
13147c478bd9Sstevel@tonic-gate 	 * While we are waiting for the mutex, someone may have taken
13157c478bd9Sstevel@tonic-gate 	 * the last available inode.  Need to recheck.
13167c478bd9Sstevel@tonic-gate 	 */
13177c478bd9Sstevel@tonic-gate 	if (cgp->cg_cs.cs_nifree == 0) {
13187c478bd9Sstevel@tonic-gate 		mutex_exit(&ufsvfsp->vfs_lock);
13197c478bd9Sstevel@tonic-gate 		brelse(bp);
13207c478bd9Sstevel@tonic-gate 		return (0);
13217c478bd9Sstevel@tonic-gate 	}
13227c478bd9Sstevel@tonic-gate 
13237c478bd9Sstevel@tonic-gate 	cgp->cg_time = gethrestime_sec();
13247c478bd9Sstevel@tonic-gate 	if (ipref) {
13257c478bd9Sstevel@tonic-gate 		ipref %= fs->fs_ipg;
13267c478bd9Sstevel@tonic-gate 		if (isclr(iused, ipref))
13277c478bd9Sstevel@tonic-gate 			goto gotit;
13287c478bd9Sstevel@tonic-gate 	}
13297c478bd9Sstevel@tonic-gate 	start = cgp->cg_irotor / NBBY;
13307c478bd9Sstevel@tonic-gate 	len = howmany(fs->fs_ipg - cgp->cg_irotor, NBBY);
13317c478bd9Sstevel@tonic-gate 	loc = skpc(0xff, (uint_t)len, &iused[start]);
13327c478bd9Sstevel@tonic-gate 	if (loc == 0) {
13337c478bd9Sstevel@tonic-gate 		len = start + 1;
13347c478bd9Sstevel@tonic-gate 		start = 0;
13357c478bd9Sstevel@tonic-gate 		loc = skpc(0xff, (uint_t)len, &iused[0]);
13367c478bd9Sstevel@tonic-gate 		if (loc == 0) {
13377c478bd9Sstevel@tonic-gate 			mutex_exit(&ufsvfsp->vfs_lock);
13387c478bd9Sstevel@tonic-gate 			(void) ufs_fault(ITOV(ip),
1339303bf60bSsdebnath 			    "ialloccg: map corrupted, cg = %d, irotor = %d, "
1340303bf60bSsdebnath 			    "fs = %s\n", cg, (int)cgp->cg_irotor, fs->fs_fsmnt);
13417c478bd9Sstevel@tonic-gate 			return (0);
13427c478bd9Sstevel@tonic-gate 		}
13437c478bd9Sstevel@tonic-gate 	}
13447c478bd9Sstevel@tonic-gate 	i = start + len - loc;
13457c478bd9Sstevel@tonic-gate 	map = iused[i];
13467c478bd9Sstevel@tonic-gate 	ipref = i * NBBY;
13477c478bd9Sstevel@tonic-gate 	for (i = 1; i < (1 << NBBY); i <<= 1, ipref++) {
13487c478bd9Sstevel@tonic-gate 		if ((map & i) == 0) {
13497c478bd9Sstevel@tonic-gate 			cgp->cg_irotor = ipref;
13507c478bd9Sstevel@tonic-gate 			goto gotit;
13517c478bd9Sstevel@tonic-gate 		}
13527c478bd9Sstevel@tonic-gate 	}
13537c478bd9Sstevel@tonic-gate 
13547c478bd9Sstevel@tonic-gate 	mutex_exit(&ufsvfsp->vfs_lock);
13557c478bd9Sstevel@tonic-gate 	(void) ufs_fault(ITOV(ip), "ialloccg: block not in mapfs = %s",
135680d34432Sfrankho 	    fs->fs_fsmnt);
13577c478bd9Sstevel@tonic-gate 	return (0);
13587c478bd9Sstevel@tonic-gate gotit:
13597c478bd9Sstevel@tonic-gate 	setbit(iused, ipref);
13607c478bd9Sstevel@tonic-gate 	cgp->cg_cs.cs_nifree--;
13617c478bd9Sstevel@tonic-gate 	fs->fs_cstotal.cs_nifree--;
13627c478bd9Sstevel@tonic-gate 	fs->fs_cs(fs, cg).cs_nifree--;
13637c478bd9Sstevel@tonic-gate 	if (((mode & IFMT) == IFDIR) || ((mode & IFMT) == IFATTRDIR)) {
13647c478bd9Sstevel@tonic-gate 		cgp->cg_cs.cs_ndir++;
13657c478bd9Sstevel@tonic-gate 		fs->fs_cstotal.cs_ndir++;
13667c478bd9Sstevel@tonic-gate 		fs->fs_cs(fs, cg).cs_ndir++;
13677c478bd9Sstevel@tonic-gate 	}
13687c478bd9Sstevel@tonic-gate 	fs->fs_fmod = 1;
13697c478bd9Sstevel@tonic-gate 	ufs_notclean(ufsvfsp);
13707c478bd9Sstevel@tonic-gate 	TRANS_BUF(ufsvfsp, 0, fs->fs_cgsize, bp, DT_CG);
13717c478bd9Sstevel@tonic-gate 	TRANS_SI(ufsvfsp, fs, cg);
13727c478bd9Sstevel@tonic-gate 	bdrwrite(bp);
13737c478bd9Sstevel@tonic-gate 	return (cg * fs->fs_ipg + ipref);
13747c478bd9Sstevel@tonic-gate }
13757c478bd9Sstevel@tonic-gate 
13767c478bd9Sstevel@tonic-gate /*
13777c478bd9Sstevel@tonic-gate  * Find a block of the specified size in the specified cylinder group.
13787c478bd9Sstevel@tonic-gate  *
13797c478bd9Sstevel@tonic-gate  * It is a panic if a request is made to find a block if none are
13807c478bd9Sstevel@tonic-gate  * available.
13817c478bd9Sstevel@tonic-gate  */
13827c478bd9Sstevel@tonic-gate static daddr_t
mapsearch(struct ufsvfs * ufsvfsp,struct cg * cgp,daddr_t bpref,int allocsiz)13831f563eb1SToomas Soome mapsearch(struct ufsvfs *ufsvfsp, struct cg *cgp, daddr_t bpref, int allocsiz)
13847c478bd9Sstevel@tonic-gate {
13857c478bd9Sstevel@tonic-gate 	struct fs *fs	= ufsvfsp->vfs_fs;
13867c478bd9Sstevel@tonic-gate 	daddr_t bno, cfrag;
13877c478bd9Sstevel@tonic-gate 	int start, len, loc, i, last, first, secondtime;
13887c478bd9Sstevel@tonic-gate 	int blk, field, subfield, pos;
13897c478bd9Sstevel@tonic-gate 	int gotit;
13907c478bd9Sstevel@tonic-gate 
13917c478bd9Sstevel@tonic-gate 	/*
13927c478bd9Sstevel@tonic-gate 	 * ufsvfs->vfs_lock is held when calling this.
13937c478bd9Sstevel@tonic-gate 	 */
13947c478bd9Sstevel@tonic-gate 	/*
13957c478bd9Sstevel@tonic-gate 	 * Find the fragment by searching through the
13967c478bd9Sstevel@tonic-gate 	 * free block map for an appropriate bit pattern.
13977c478bd9Sstevel@tonic-gate 	 */
13987c478bd9Sstevel@tonic-gate 	if (bpref)
13997c478bd9Sstevel@tonic-gate 		start = dtogd(fs, bpref) / NBBY;
14007c478bd9Sstevel@tonic-gate 	else
14017c478bd9Sstevel@tonic-gate 		start = cgp->cg_frotor / NBBY;
14027c478bd9Sstevel@tonic-gate 	/*
14037c478bd9Sstevel@tonic-gate 	 * the following loop performs two scans -- the first scan
14047c478bd9Sstevel@tonic-gate 	 * searches the bottom half of the array for a match and the
14057c478bd9Sstevel@tonic-gate 	 * second scan searches the top half of the array.  The loops
14067c478bd9Sstevel@tonic-gate 	 * have been merged just to make things difficult.
14077c478bd9Sstevel@tonic-gate 	 */
14087c478bd9Sstevel@tonic-gate 	first = start;
14097c478bd9Sstevel@tonic-gate 	last = howmany(fs->fs_fpg, NBBY);
14107c478bd9Sstevel@tonic-gate 	secondtime = 0;
14117c478bd9Sstevel@tonic-gate 	cfrag = cgp->cg_cgx * fs->fs_fpg;
14127c478bd9Sstevel@tonic-gate 	while (first < last) {
14137c478bd9Sstevel@tonic-gate 		len = last - first;
14147c478bd9Sstevel@tonic-gate 		/*
14157c478bd9Sstevel@tonic-gate 		 * search the array for a match
14167c478bd9Sstevel@tonic-gate 		 */
14177c478bd9Sstevel@tonic-gate 		loc = scanc((unsigned)len, (uchar_t *)&cg_blksfree(cgp)[first],
141880d34432Sfrankho 		    (uchar_t *)fragtbl[fs->fs_frag],
141980d34432Sfrankho 		    (int)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY))));
14207c478bd9Sstevel@tonic-gate 		/*
14217c478bd9Sstevel@tonic-gate 		 * match found
14227c478bd9Sstevel@tonic-gate 		 */
14237c478bd9Sstevel@tonic-gate 		if (loc) {
14247c478bd9Sstevel@tonic-gate 			bno = (last - loc) * NBBY;
14257c478bd9Sstevel@tonic-gate 
14267c478bd9Sstevel@tonic-gate 			/*
14277c478bd9Sstevel@tonic-gate 			 * Found the byte in the map, sift
14287c478bd9Sstevel@tonic-gate 			 * through the bits to find the selected frag
14297c478bd9Sstevel@tonic-gate 			 */
14307c478bd9Sstevel@tonic-gate 			cgp->cg_frotor = bno;
14317c478bd9Sstevel@tonic-gate 			gotit = 0;
14327c478bd9Sstevel@tonic-gate 			for (i = bno + NBBY; bno < i; bno += fs->fs_frag) {
14337c478bd9Sstevel@tonic-gate 				blk = blkmap(fs, cg_blksfree(cgp), bno);
14347c478bd9Sstevel@tonic-gate 				blk <<= 1;
14357c478bd9Sstevel@tonic-gate 				field = around[allocsiz];
14367c478bd9Sstevel@tonic-gate 				subfield = inside[allocsiz];
14377c478bd9Sstevel@tonic-gate 				for (pos = 0;
14387c478bd9Sstevel@tonic-gate 				    pos <= fs->fs_frag - allocsiz;
14397c478bd9Sstevel@tonic-gate 				    pos++) {
14407c478bd9Sstevel@tonic-gate 					if ((blk & field) == subfield) {
14417c478bd9Sstevel@tonic-gate 						gotit++;
14427c478bd9Sstevel@tonic-gate 						break;
14437c478bd9Sstevel@tonic-gate 					}
14447c478bd9Sstevel@tonic-gate 					field <<= 1;
14457c478bd9Sstevel@tonic-gate 					subfield <<= 1;
14467c478bd9Sstevel@tonic-gate 				}
14477c478bd9Sstevel@tonic-gate 				if (gotit)
14487c478bd9Sstevel@tonic-gate 					break;
14497c478bd9Sstevel@tonic-gate 			}
14507c478bd9Sstevel@tonic-gate 			bno += pos;
14517c478bd9Sstevel@tonic-gate 
14527c478bd9Sstevel@tonic-gate 			/*
14537c478bd9Sstevel@tonic-gate 			 * success if block is *not* being converted from
14547c478bd9Sstevel@tonic-gate 			 * metadata into userdata (harpy).  If so, ignore.
14557c478bd9Sstevel@tonic-gate 			 */
14567c478bd9Sstevel@tonic-gate 			if (!TRANS_ISCANCEL(ufsvfsp,
1457303bf60bSsdebnath 			    ldbtob(fsbtodb(fs, (cfrag+bno))),
1458303bf60bSsdebnath 			    allocsiz * fs->fs_fsize))
14597c478bd9Sstevel@tonic-gate 				return (bno);
1460303bf60bSsdebnath 
14617c478bd9Sstevel@tonic-gate 			/*
14627c478bd9Sstevel@tonic-gate 			 * keep looking -- this block is being converted
14637c478bd9Sstevel@tonic-gate 			 */
14647c478bd9Sstevel@tonic-gate 			first = (last - loc) + 1;
14657c478bd9Sstevel@tonic-gate 			loc = 0;
14667c478bd9Sstevel@tonic-gate 			if (first < last)
14677c478bd9Sstevel@tonic-gate 				continue;
14687c478bd9Sstevel@tonic-gate 		}
14697c478bd9Sstevel@tonic-gate 		/*
14707c478bd9Sstevel@tonic-gate 		 * no usable matches in bottom half -- now search the top half
14717c478bd9Sstevel@tonic-gate 		 */
14727c478bd9Sstevel@tonic-gate 		if (secondtime)
14737c478bd9Sstevel@tonic-gate 			/*
14747c478bd9Sstevel@tonic-gate 			 * no usable matches in top half -- all done
14757c478bd9Sstevel@tonic-gate 			 */
14767c478bd9Sstevel@tonic-gate 			break;
14777c478bd9Sstevel@tonic-gate 		secondtime = 1;
14787c478bd9Sstevel@tonic-gate 		last = start + 1;
14797c478bd9Sstevel@tonic-gate 		first = 0;
14807c478bd9Sstevel@tonic-gate 	}
14817c478bd9Sstevel@tonic-gate 	/*
14827c478bd9Sstevel@tonic-gate 	 * no usable matches
14837c478bd9Sstevel@tonic-gate 	 */
14847c478bd9Sstevel@tonic-gate 	return ((daddr_t)-1);
14857c478bd9Sstevel@tonic-gate }
14867c478bd9Sstevel@tonic-gate 
14877c478bd9Sstevel@tonic-gate #define	UFSNADDR (NDADDR + NIADDR)	/* NADDR applies to (obsolete) S5FS */
14887c478bd9Sstevel@tonic-gate #define	IB(i)	(NDADDR + (i))	/* index of i'th indirect block ptr */
14897c478bd9Sstevel@tonic-gate #define	SINGLE	0		/* single indirect block ptr */
14907c478bd9Sstevel@tonic-gate #define	DOUBLE	1		/* double indirect block ptr */
14917c478bd9Sstevel@tonic-gate #define	TRIPLE	2		/* triple indirect block ptr */
14927c478bd9Sstevel@tonic-gate 
1493303bf60bSsdebnath /*
1494303bf60bSsdebnath  * Acquire a write lock, and keep trying till we get it
1495303bf60bSsdebnath  */
1496303bf60bSsdebnath static int
allocsp_wlockfs(struct vnode * vp,struct lockfs * lf)1497303bf60bSsdebnath allocsp_wlockfs(struct vnode *vp, struct lockfs *lf)
1498303bf60bSsdebnath {
1499303bf60bSsdebnath 	int err = 0;
1500303bf60bSsdebnath 
1501303bf60bSsdebnath lockagain:
1502303bf60bSsdebnath 	do {
1503303bf60bSsdebnath 		err = ufs_fiolfss(vp, lf);
1504303bf60bSsdebnath 		if (err)
1505303bf60bSsdebnath 			return (err);
1506303bf60bSsdebnath 	} while (!LOCKFS_IS_ULOCK(lf));
1507303bf60bSsdebnath 
1508303bf60bSsdebnath 	lf->lf_lock = LOCKFS_WLOCK;
1509303bf60bSsdebnath 	lf->lf_flags = 0;
1510303bf60bSsdebnath 	lf->lf_comment = NULL;
1511303bf60bSsdebnath 	err = ufs__fiolfs(vp, lf, 1, 0);
1512303bf60bSsdebnath 
1513303bf60bSsdebnath 	if (err == EBUSY || err == EINVAL)
1514303bf60bSsdebnath 		goto lockagain;
1515303bf60bSsdebnath 
1516303bf60bSsdebnath 	return (err);
1517303bf60bSsdebnath }
1518303bf60bSsdebnath 
1519303bf60bSsdebnath /*
1520303bf60bSsdebnath  * Release the write lock
1521303bf60bSsdebnath  */
1522303bf60bSsdebnath static int
allocsp_unlockfs(struct vnode * vp,struct lockfs * lf)1523303bf60bSsdebnath allocsp_unlockfs(struct vnode *vp, struct lockfs *lf)
1524303bf60bSsdebnath {
1525303bf60bSsdebnath 	int err = 0;
1526303bf60bSsdebnath 
1527303bf60bSsdebnath 	lf->lf_lock = LOCKFS_ULOCK;
1528303bf60bSsdebnath 	lf->lf_flags = 0;
1529303bf60bSsdebnath 	err = ufs__fiolfs(vp, lf, 1, 0);
1530303bf60bSsdebnath 	return (err);
1531303bf60bSsdebnath }
1532303bf60bSsdebnath 
1533303bf60bSsdebnath struct allocsp_undo {
1534303bf60bSsdebnath 	daddr_t offset;
1535303bf60bSsdebnath 	daddr_t blk;
1536303bf60bSsdebnath 	struct allocsp_undo *next;
1537303bf60bSsdebnath };
1538303bf60bSsdebnath 
1539303bf60bSsdebnath /*
1540303bf60bSsdebnath  * ufs_allocsp() can be used to pre-allocate blocks for a file on a given
15414f21de4dSjr  * file system. For direct blocks, the blocks are allocated from the offset
15424f21de4dSjr  * requested to the block boundary, then any full blocks are allocated,
15434f21de4dSjr  * and finally any remainder.
15444f21de4dSjr  * For indirect blocks the blocks are not initialized and are
15454f21de4dSjr  * only marked as allocated. These addresses are then stored as negative
15464f21de4dSjr  * block numbers in the inode to imply special handling. UFS has been modified
15474f21de4dSjr  * where necessary to understand this new notion.
15484f21de4dSjr  * Successfully fallocated files will have IFALLOCATE cflag set in the inode.
1549303bf60bSsdebnath  */
1550303bf60bSsdebnath int
ufs_allocsp(struct vnode * vp,struct flock64 * lp,cred_t * cr)1551303bf60bSsdebnath ufs_allocsp(struct vnode *vp, struct flock64 *lp, cred_t *cr)
1552303bf60bSsdebnath {
1553303bf60bSsdebnath 	struct lockfs lf;
1554303bf60bSsdebnath 	int berr, err, resv, issync;
15554f21de4dSjr 	off_t istart, len; /* istart, special for idb */
1556303bf60bSsdebnath 	struct inode *ip;
1557303bf60bSsdebnath 	struct fs *fs;
1558303bf60bSsdebnath 	struct ufsvfs *ufsvfsp;
15594f21de4dSjr 	u_offset_t resid, i, uoff;
1560303bf60bSsdebnath 	daddr32_t db_undo[NDADDR];	/* old direct blocks */
1561303bf60bSsdebnath 	struct allocsp_undo *ib_undo = NULL;	/* ib undo */
1562303bf60bSsdebnath 	struct allocsp_undo *undo = NULL;
1563303bf60bSsdebnath 	u_offset_t osz;			/* old file size */
1564303bf60bSsdebnath 	int chunkblks = 0;		/* # of blocks in 1 allocation */
1565303bf60bSsdebnath 	int cnt = 0;
1566303bf60bSsdebnath 	daddr_t allocblk;
1567303bf60bSsdebnath 	daddr_t totblks = 0;
1568303bf60bSsdebnath 	struct ulockfs	*ulp;
15694f21de4dSjr 	size_t done_len;
15704f21de4dSjr 	int nbytes, offsetn;
15714f21de4dSjr 
1572303bf60bSsdebnath 
1573303bf60bSsdebnath 	ASSERT(vp->v_type == VREG);
1574303bf60bSsdebnath 
1575303bf60bSsdebnath 	ip = VTOI(vp);
1576303bf60bSsdebnath 	fs = ip->i_fs;
1577303bf60bSsdebnath 	if ((ufsvfsp = ip->i_ufsvfs) == NULL) {
1578303bf60bSsdebnath 		err = EIO;
1579303bf60bSsdebnath 		goto out_allocsp;
1580303bf60bSsdebnath 	}
1581303bf60bSsdebnath 
15824f21de4dSjr 	istart = blkroundup(fs, (lp->l_start));
1583303bf60bSsdebnath 	len = blkroundup(fs, (lp->l_len));
1584303bf60bSsdebnath 	chunkblks = blkroundup(fs, ufsvfsp->vfs_iotransz) / fs->fs_bsize;
1585303bf60bSsdebnath 	ulp = &ufsvfsp->vfs_ulockfs;
1586303bf60bSsdebnath 
1587303bf60bSsdebnath 	if (lp->l_start < 0 || lp->l_len <= 0)
1588303bf60bSsdebnath 		return (EINVAL);
1589303bf60bSsdebnath 
1590303bf60bSsdebnath 	/* Quickly check to make sure we have space before we proceed */
1591303bf60bSsdebnath 	if (lblkno(fs, len) > fs->fs_cstotal.cs_nbfree) {
1592303bf60bSsdebnath 		if (TRANS_ISTRANS(ufsvfsp)) {
1593303bf60bSsdebnath 			ufs_delete_drain_wait(ufsvfsp, 1);
1594303bf60bSsdebnath 			if (lblkno(fs, len) > fs->fs_cstotal.cs_nbfree)
1595303bf60bSsdebnath 				return (ENOSPC);
1596303bf60bSsdebnath 		} else
1597303bf60bSsdebnath 			return (ENOSPC);
1598303bf60bSsdebnath 	}
1599303bf60bSsdebnath 
1600303bf60bSsdebnath 	/*
1601303bf60bSsdebnath 	 * We will keep i_rwlock locked as WRITER through out the function
1602303bf60bSsdebnath 	 * since we don't want anyone else reading or writing to the inode
1603303bf60bSsdebnath 	 * while we are in the middle of fallocating the file.
1604303bf60bSsdebnath 	 */
1605303bf60bSsdebnath 	rw_enter(&ip->i_rwlock, RW_WRITER);
1606303bf60bSsdebnath 
1607303bf60bSsdebnath 	/* Back up the direct block list, used for undo later if necessary */
1608303bf60bSsdebnath 	rw_enter(&ip->i_contents, RW_READER);
1609303bf60bSsdebnath 	for (i = 0; i < NDADDR; i++)
1610303bf60bSsdebnath 		db_undo[i] = ip->i_db[i];
1611303bf60bSsdebnath 	osz = ip->i_size;
1612303bf60bSsdebnath 	rw_exit(&ip->i_contents);
1613303bf60bSsdebnath 
16144f21de4dSjr 	/* Write lock the file system */
16154f21de4dSjr 	if (err = allocsp_wlockfs(vp, &lf))
16164f21de4dSjr 		goto exit;
16174f21de4dSjr 
16184f21de4dSjr 	/*
16194f21de4dSjr 	 * Allocate any direct blocks now.
16204f21de4dSjr 	 * Blocks are allocated from the offset requested to the block
16214f21de4dSjr 	 * boundary, then any full blocks are allocated, and finally any
16224f21de4dSjr 	 * remainder.
16234f21de4dSjr 	 */
16244f21de4dSjr 	if (lblkno(fs, lp->l_start) < NDADDR) {
1625303bf60bSsdebnath 		ufs_trans_trunc_resv(ip, ip->i_size + (NDADDR * fs->fs_bsize),
1626303bf60bSsdebnath 		    &resv, &resid);
1627303bf60bSsdebnath 		TRANS_BEGIN_CSYNC(ufsvfsp, issync, TOP_ALLOCSP, resv);
1628303bf60bSsdebnath 
1629303bf60bSsdebnath 		rw_enter(&ufsvfsp->vfs_dqrwlock, RW_READER);
1630303bf60bSsdebnath 		rw_enter(&ip->i_contents, RW_WRITER);
1631303bf60bSsdebnath 
16324f21de4dSjr 		done_len = 0;
16334f21de4dSjr 		while ((done_len < lp->l_len) &&
16344f21de4dSjr 		    (lblkno(fs, lp->l_start + done_len) < NDADDR)) {
16354f21de4dSjr 			uoff = (offset_t)(lp->l_start + done_len);
16364f21de4dSjr 			offsetn = (int)blkoff(fs, uoff);
16374f21de4dSjr 			nbytes = (int)MIN(fs->fs_bsize - offsetn,
16384f21de4dSjr 			    lp->l_len - done_len);
16394f21de4dSjr 
16404f21de4dSjr 			berr = bmap_write(ip, uoff, offsetn + nbytes,
16414f21de4dSjr 			    BI_FALLOCATE, &allocblk, cr);
1642303bf60bSsdebnath 			/* Yikes error, quit */
1643303bf60bSsdebnath 			if (berr) {
1644303bf60bSsdebnath 				TRANS_INODE(ufsvfsp, ip);
1645303bf60bSsdebnath 				rw_exit(&ip->i_contents);
1646303bf60bSsdebnath 				rw_exit(&ufsvfsp->vfs_dqrwlock);
1647303bf60bSsdebnath 				TRANS_END_CSYNC(ufsvfsp, err, issync,
1648303bf60bSsdebnath 				    TOP_ALLOCSP, resv);
16494f21de4dSjr 				err = allocsp_unlockfs(vp, &lf);
1650303bf60bSsdebnath 				goto exit;
1651303bf60bSsdebnath 			}
1652303bf60bSsdebnath 
1653303bf60bSsdebnath 			if (allocblk) {
1654303bf60bSsdebnath 				totblks++;
16554f21de4dSjr 				if ((uoff + nbytes) > ip->i_size)
16564f21de4dSjr 					ip->i_size = (uoff + nbytes);
1657303bf60bSsdebnath 			}
16584f21de4dSjr 			done_len += nbytes;
1659303bf60bSsdebnath 		}
1660303bf60bSsdebnath 
1661303bf60bSsdebnath 		TRANS_INODE(ufsvfsp, ip);
1662303bf60bSsdebnath 		rw_exit(&ip->i_contents);
1663303bf60bSsdebnath 		rw_exit(&ufsvfsp->vfs_dqrwlock);
1664303bf60bSsdebnath 		TRANS_END_CSYNC(ufsvfsp, err, issync, TOP_ALLOCSP, resv);
1665303bf60bSsdebnath 
16664f21de4dSjr 		/* start offset for indirect allocation */
16674f21de4dSjr 		istart =  (uoff + nbytes);
1668303bf60bSsdebnath 	}
1669303bf60bSsdebnath 
1670303bf60bSsdebnath 	/* Break the transactions into vfs_iotransz units */
1671303bf60bSsdebnath 	ufs_trans_trunc_resv(ip, ip->i_size +
1672303bf60bSsdebnath 	    blkroundup(fs, ufsvfsp->vfs_iotransz), &resv, &resid);
1673303bf60bSsdebnath 	TRANS_BEGIN_CSYNC(ufsvfsp, issync, TOP_ALLOCSP, resv);
1674303bf60bSsdebnath 
1675303bf60bSsdebnath 	rw_enter(&ufsvfsp->vfs_dqrwlock, RW_READER);
1676303bf60bSsdebnath 	rw_enter(&ip->i_contents, RW_WRITER);
1677303bf60bSsdebnath 
1678303bf60bSsdebnath 	/* Now go about fallocating necessary indirect blocks */
16794f21de4dSjr 	for (i = istart; i < (lp->l_start + lp->l_len); i += fs->fs_bsize) {
1680303bf60bSsdebnath 		berr = bmap_write(ip, i, fs->fs_bsize, BI_FALLOCATE,
1681303bf60bSsdebnath 		    &allocblk, cr);
1682303bf60bSsdebnath 		if (berr) {
1683303bf60bSsdebnath 			TRANS_INODE(ufsvfsp, ip);
1684303bf60bSsdebnath 			rw_exit(&ip->i_contents);
1685303bf60bSsdebnath 			rw_exit(&ufsvfsp->vfs_dqrwlock);
1686303bf60bSsdebnath 			TRANS_END_CSYNC(ufsvfsp, err, issync,
1687303bf60bSsdebnath 			    TOP_ALLOCSP, resv);
1688303bf60bSsdebnath 			err = allocsp_unlockfs(vp, &lf);
1689303bf60bSsdebnath 			goto exit;
1690303bf60bSsdebnath 		}
1691303bf60bSsdebnath 
1692303bf60bSsdebnath 		/* Update the blk counter only if new block was added */
1693303bf60bSsdebnath 		if (allocblk) {
1694303bf60bSsdebnath 			/* Save undo information */
1695303bf60bSsdebnath 			undo = kmem_alloc(sizeof (struct allocsp_undo),
1696303bf60bSsdebnath 			    KM_SLEEP);
1697303bf60bSsdebnath 			undo->offset = i;
1698303bf60bSsdebnath 			undo->blk = allocblk;
1699303bf60bSsdebnath 			undo->next = ib_undo;
1700303bf60bSsdebnath 			ib_undo = undo;
1701303bf60bSsdebnath 			totblks++;
170233c22cb3Smishra 
170333c22cb3Smishra 			if (i >= ip->i_size)
170433c22cb3Smishra 				ip->i_size += fs->fs_bsize;
1705303bf60bSsdebnath 		}
1706303bf60bSsdebnath 		cnt++;
1707303bf60bSsdebnath 
1708303bf60bSsdebnath 		/* Being a good UFS citizen, let others get a share */
1709303bf60bSsdebnath 		if (cnt == chunkblks) {
1710303bf60bSsdebnath 			/*
1711303bf60bSsdebnath 			 * If there are waiters or the fs is hard locked,
1712303bf60bSsdebnath 			 * error locked, or read-only error locked,
1713303bf60bSsdebnath 			 * quit with EIO
1714303bf60bSsdebnath 			 */
1715303bf60bSsdebnath 			if (ULOCKFS_IS_HLOCK(ulp) || ULOCKFS_IS_ELOCK(ulp) ||
1716303bf60bSsdebnath 			    ULOCKFS_IS_ROELOCK(ulp)) {
1717303bf60bSsdebnath 				ip->i_cflags |= IFALLOCATE;
1718303bf60bSsdebnath 				TRANS_INODE(ufsvfsp, ip);
1719303bf60bSsdebnath 				rw_exit(&ip->i_contents);
1720303bf60bSsdebnath 				rw_exit(&ufsvfsp->vfs_dqrwlock);
1721303bf60bSsdebnath 
1722303bf60bSsdebnath 				TRANS_END_CSYNC(ufsvfsp, err, issync,
1723303bf60bSsdebnath 				    TOP_ALLOCSP, resv);
1724303bf60bSsdebnath 				rw_exit(&ip->i_rwlock);
17254f21de4dSjr 				(void) allocsp_unlockfs(vp, &lf);
1726303bf60bSsdebnath 				return (EIO);
1727303bf60bSsdebnath 			}
1728303bf60bSsdebnath 
1729303bf60bSsdebnath 			TRANS_INODE(ufsvfsp, ip);
1730303bf60bSsdebnath 			rw_exit(&ip->i_contents);
1731303bf60bSsdebnath 			rw_exit(&ufsvfsp->vfs_dqrwlock);
1732303bf60bSsdebnath 
1733303bf60bSsdebnath 			/* End the current transaction */
1734303bf60bSsdebnath 			TRANS_END_CSYNC(ufsvfsp, err, issync,
1735303bf60bSsdebnath 			    TOP_ALLOCSP, resv);
1736303bf60bSsdebnath 
1737303bf60bSsdebnath 			if (CV_HAS_WAITERS(&ulp->ul_cv)) {
1738303bf60bSsdebnath 				/* Release the write lock */
1739303bf60bSsdebnath 				if (err = allocsp_unlockfs(vp, &lf))
1740303bf60bSsdebnath 					goto exit;
1741303bf60bSsdebnath 
1742303bf60bSsdebnath 				/* Wake up others waiting to do operations */
1743303bf60bSsdebnath 				mutex_enter(&ulp->ul_lock);
1744303bf60bSsdebnath 				cv_broadcast(&ulp->ul_cv);
1745303bf60bSsdebnath 				mutex_exit(&ulp->ul_lock);
1746303bf60bSsdebnath 
1747303bf60bSsdebnath 				/* Grab the write lock again */
1748303bf60bSsdebnath 				if (err = allocsp_wlockfs(vp, &lf))
1749303bf60bSsdebnath 					goto exit;
1750303bf60bSsdebnath 			} /* end of CV_HAS_WAITERS(&ulp->ul_cv) */
1751303bf60bSsdebnath 
1752303bf60bSsdebnath 			/* Reserve more space in log for this file */
1753303bf60bSsdebnath 			ufs_trans_trunc_resv(ip,
1754303bf60bSsdebnath 			    ip->i_size + blkroundup(fs, ufsvfsp->vfs_iotransz),
1755303bf60bSsdebnath 			    &resv, &resid);
1756303bf60bSsdebnath 			TRANS_BEGIN_CSYNC(ufsvfsp, issync, TOP_ALLOCSP, resv);
1757303bf60bSsdebnath 
1758303bf60bSsdebnath 			rw_enter(&ufsvfsp->vfs_dqrwlock, RW_READER);
1759303bf60bSsdebnath 			rw_enter(&ip->i_contents, RW_WRITER);
1760303bf60bSsdebnath 
1761303bf60bSsdebnath 			cnt = 0;	/* reset cnt b/c of new transaction */
1762303bf60bSsdebnath 		}
1763303bf60bSsdebnath 	}
1764303bf60bSsdebnath 
1765303bf60bSsdebnath 	if (!err && !berr)
1766303bf60bSsdebnath 		ip->i_cflags |= IFALLOCATE;
1767303bf60bSsdebnath 
17684f21de4dSjr 	/* If the file has grown then correct the file size */
17694f21de4dSjr 	if (osz < (lp->l_start + lp->l_len))
17704f21de4dSjr 		ip->i_size = (lp->l_start + lp->l_len);
17714f21de4dSjr 
1772303bf60bSsdebnath 	/* Release locks, end log transaction and unlock fs */
1773303bf60bSsdebnath 	TRANS_INODE(ufsvfsp, ip);
1774303bf60bSsdebnath 	rw_exit(&ip->i_contents);
1775303bf60bSsdebnath 	rw_exit(&ufsvfsp->vfs_dqrwlock);
1776303bf60bSsdebnath 
1777303bf60bSsdebnath 	TRANS_END_CSYNC(ufsvfsp, err, issync, TOP_ALLOCSP, resv);
1778303bf60bSsdebnath 	err = allocsp_unlockfs(vp, &lf);
1779303bf60bSsdebnath 
1780303bf60bSsdebnath 	/*
1781303bf60bSsdebnath 	 * @ exit label, we should no longer be holding the fs write lock, and
1782303bf60bSsdebnath 	 * all logging transactions should have been ended. We still hold
1783303bf60bSsdebnath 	 * ip->i_rwlock.
1784303bf60bSsdebnath 	 */
1785303bf60bSsdebnath exit:
1786303bf60bSsdebnath 	/*
1787303bf60bSsdebnath 	 * File has grown larger than 2GB. Set flag
1788303bf60bSsdebnath 	 * in superblock to indicate this, if it
1789303bf60bSsdebnath 	 * is not already set.
1790303bf60bSsdebnath 	 */
1791303bf60bSsdebnath 	if ((ip->i_size > MAXOFF32_T) &&
179280d34432Sfrankho 	    !(fs->fs_flags & FSLARGEFILES)) {
1793303bf60bSsdebnath 		ASSERT(ufsvfsp->vfs_lfflags & UFS_LARGEFILES);
1794303bf60bSsdebnath 		mutex_enter(&ufsvfsp->vfs_lock);
1795303bf60bSsdebnath 		fs->fs_flags |= FSLARGEFILES;
1796303bf60bSsdebnath 		ufs_sbwrite(ufsvfsp);
1797303bf60bSsdebnath 		mutex_exit(&ufsvfsp->vfs_lock);
1798303bf60bSsdebnath 	}
1799303bf60bSsdebnath 
1800303bf60bSsdebnath 	/*
1801303bf60bSsdebnath 	 * Since we couldn't allocate completely, we will undo the allocations.
1802303bf60bSsdebnath 	 */
1803303bf60bSsdebnath 	if (berr) {
1804303bf60bSsdebnath 		ufs_trans_trunc_resv(ip, totblks * fs->fs_bsize, &resv, &resid);
1805303bf60bSsdebnath 		TRANS_BEGIN_CSYNC(ufsvfsp, issync, TOP_ALLOCSP, resv);
1806303bf60bSsdebnath 
1807303bf60bSsdebnath 		rw_enter(&ufsvfsp->vfs_dqrwlock, RW_READER);
1808303bf60bSsdebnath 		rw_enter(&ip->i_contents, RW_WRITER);
1809303bf60bSsdebnath 
1810303bf60bSsdebnath 		/* Direct blocks */
1811303bf60bSsdebnath 		for (i = 0; i < NDADDR; i++) {
1812303bf60bSsdebnath 			/*
1813303bf60bSsdebnath 			 * Only free the block if they are not same, and
1814303bf60bSsdebnath 			 * the old one isn't zero (the fragment was
1815303bf60bSsdebnath 			 * re-allocated).
1816303bf60bSsdebnath 			 */
1817303bf60bSsdebnath 			if (db_undo[i] != ip->i_db[i] && db_undo[i] == 0) {
1818303bf60bSsdebnath 				free(ip, ip->i_db[i], fs->fs_bsize, 0);
1819303bf60bSsdebnath 				ip->i_db[i] = 0;
1820303bf60bSsdebnath 			}
1821303bf60bSsdebnath 		}
1822303bf60bSsdebnath 
1823303bf60bSsdebnath 		/* Undo the indirect blocks */
1824303bf60bSsdebnath 		while (ib_undo != NULL) {
1825303bf60bSsdebnath 			undo = ib_undo;
1826303bf60bSsdebnath 			err = bmap_set_bn(vp, undo->offset, 0);
1827303bf60bSsdebnath 			if (err)
1828303bf60bSsdebnath 				cmn_err(CE_PANIC, "ufs_allocsp(): failed to "
1829303bf60bSsdebnath 				    "undo allocation of block %ld",
1830303bf60bSsdebnath 				    undo->offset);
1831303bf60bSsdebnath 			free(ip, undo->blk, fs->fs_bsize, I_IBLK);
1832303bf60bSsdebnath 			ib_undo = undo->next;
1833303bf60bSsdebnath 			kmem_free(undo, sizeof (struct allocsp_undo));
1834303bf60bSsdebnath 		}
1835303bf60bSsdebnath 
1836303bf60bSsdebnath 		ip->i_size = osz;
1837303bf60bSsdebnath 		TRANS_INODE(ufsvfsp, ip);
1838303bf60bSsdebnath 
1839303bf60bSsdebnath 		rw_exit(&ip->i_contents);
1840303bf60bSsdebnath 		rw_exit(&ufsvfsp->vfs_dqrwlock);
1841303bf60bSsdebnath 
1842303bf60bSsdebnath 		TRANS_END_CSYNC(ufsvfsp, err, issync, TOP_ALLOCSP, resv);
1843303bf60bSsdebnath 
1844303bf60bSsdebnath 		rw_exit(&ip->i_rwlock);
1845303bf60bSsdebnath 		return (berr);
1846303bf60bSsdebnath 	}
1847303bf60bSsdebnath 
1848303bf60bSsdebnath 	/*
1849303bf60bSsdebnath 	 * Don't forget to free the undo chain :)
1850303bf60bSsdebnath 	 */
1851303bf60bSsdebnath 	while (ib_undo != NULL) {
1852303bf60bSsdebnath 		undo = ib_undo;
1853303bf60bSsdebnath 		ib_undo = undo->next;
1854303bf60bSsdebnath 		kmem_free(undo, sizeof (struct allocsp_undo));
1855303bf60bSsdebnath 	}
1856303bf60bSsdebnath 
1857303bf60bSsdebnath 	rw_exit(&ip->i_rwlock);
1858303bf60bSsdebnath 
1859303bf60bSsdebnath out_allocsp:
1860303bf60bSsdebnath 	return (err);
1861303bf60bSsdebnath }
1862303bf60bSsdebnath 
18637c478bd9Sstevel@tonic-gate /*
18647c478bd9Sstevel@tonic-gate  * Free storage space associated with the specified inode.  The portion
18657c478bd9Sstevel@tonic-gate  * to be freed is specified by lp->l_start and lp->l_len (already
18667c478bd9Sstevel@tonic-gate  * normalized to a "whence" of 0).
18677c478bd9Sstevel@tonic-gate  *
18687c478bd9Sstevel@tonic-gate  * This is an experimental facility whose continued existence is not
18697c478bd9Sstevel@tonic-gate  * guaranteed.  Currently, we only support the special case
18707c478bd9Sstevel@tonic-gate  * of l_len == 0, meaning free to end of file.
18717c478bd9Sstevel@tonic-gate  *
18727c478bd9Sstevel@tonic-gate  * Blocks are freed in reverse order.  This FILO algorithm will tend to
18737c478bd9Sstevel@tonic-gate  * maintain a contiguous free list much longer than FIFO.
18747c478bd9Sstevel@tonic-gate  * See also ufs_itrunc() in ufs_inode.c.
18757c478bd9Sstevel@tonic-gate  *
18767c478bd9Sstevel@tonic-gate  * Bug: unused bytes in the last retained block are not cleared.
18777c478bd9Sstevel@tonic-gate  * This may result in a "hole" in the file that does not read as zeroes.
18787c478bd9Sstevel@tonic-gate  */
18797c478bd9Sstevel@tonic-gate /* ARGSUSED */
18807c478bd9Sstevel@tonic-gate int
ufs_freesp(struct vnode * vp,struct flock64 * lp,int flag,cred_t * cr)18817c478bd9Sstevel@tonic-gate ufs_freesp(struct vnode *vp, struct flock64 *lp, int flag, cred_t *cr)
18827c478bd9Sstevel@tonic-gate {
18837c478bd9Sstevel@tonic-gate 	int i;
18847c478bd9Sstevel@tonic-gate 	struct inode *ip = VTOI(vp);
18857c478bd9Sstevel@tonic-gate 	int error;
18867c478bd9Sstevel@tonic-gate 
18877c478bd9Sstevel@tonic-gate 	ASSERT(vp->v_type == VREG);
18887c478bd9Sstevel@tonic-gate 	ASSERT(lp->l_start >= 0);	/* checked by convoff */
18897c478bd9Sstevel@tonic-gate 
18907c478bd9Sstevel@tonic-gate 	if (lp->l_len != 0)
18917c478bd9Sstevel@tonic-gate 		return (EINVAL);
18927c478bd9Sstevel@tonic-gate 
18937c478bd9Sstevel@tonic-gate 	rw_enter(&ip->i_contents, RW_READER);
18947c478bd9Sstevel@tonic-gate 	if (ip->i_size == (u_offset_t)lp->l_start) {
18957c478bd9Sstevel@tonic-gate 		rw_exit(&ip->i_contents);
18967c478bd9Sstevel@tonic-gate 		return (0);
18977c478bd9Sstevel@tonic-gate 	}
18987c478bd9Sstevel@tonic-gate 
18997c478bd9Sstevel@tonic-gate 	/*
19007c478bd9Sstevel@tonic-gate 	 * Check if there is any active mandatory lock on the
19017c478bd9Sstevel@tonic-gate 	 * range that will be truncated/expanded.
19027c478bd9Sstevel@tonic-gate 	 */
19037c478bd9Sstevel@tonic-gate 	if (MANDLOCK(vp, ip->i_mode)) {
19047c478bd9Sstevel@tonic-gate 		offset_t save_start;
19057c478bd9Sstevel@tonic-gate 
19067c478bd9Sstevel@tonic-gate 		save_start = lp->l_start;
19077c478bd9Sstevel@tonic-gate 
19087c478bd9Sstevel@tonic-gate 		if (ip->i_size < lp->l_start) {
19097c478bd9Sstevel@tonic-gate 			/*
19107c478bd9Sstevel@tonic-gate 			 * "Truncate up" case: need to make sure there
19117c478bd9Sstevel@tonic-gate 			 * is no lock beyond current end-of-file. To
19127c478bd9Sstevel@tonic-gate 			 * do so, we need to set l_start to the size
19137c478bd9Sstevel@tonic-gate 			 * of the file temporarily.
19147c478bd9Sstevel@tonic-gate 			 */
19157c478bd9Sstevel@tonic-gate 			lp->l_start = ip->i_size;
19167c478bd9Sstevel@tonic-gate 		}
19177c478bd9Sstevel@tonic-gate 		lp->l_type = F_WRLCK;
19187c478bd9Sstevel@tonic-gate 		lp->l_sysid = 0;
19197c478bd9Sstevel@tonic-gate 		lp->l_pid = ttoproc(curthread)->p_pid;
19207c478bd9Sstevel@tonic-gate 		i = (flag & (FNDELAY|FNONBLOCK)) ? 0 : SLPFLCK;
19217c478bd9Sstevel@tonic-gate 		rw_exit(&ip->i_contents);
19227c478bd9Sstevel@tonic-gate 		if ((i = reclock(vp, lp, i, 0, lp->l_start, NULL)) != 0 ||
19237c478bd9Sstevel@tonic-gate 		    lp->l_type != F_UNLCK) {
19247c478bd9Sstevel@tonic-gate 			return (i ? i : EAGAIN);
19257c478bd9Sstevel@tonic-gate 		}
19267c478bd9Sstevel@tonic-gate 		rw_enter(&ip->i_contents, RW_READER);
19277c478bd9Sstevel@tonic-gate 
19287c478bd9Sstevel@tonic-gate 		lp->l_start = save_start;
19297c478bd9Sstevel@tonic-gate 	}
19307c478bd9Sstevel@tonic-gate 
19317c478bd9Sstevel@tonic-gate 	/*
19327c478bd9Sstevel@tonic-gate 	 * Make sure a write isn't in progress (allocating blocks)
19337c478bd9Sstevel@tonic-gate 	 * by acquiring i_rwlock (we promised ufs_bmap we wouldn't
19347c478bd9Sstevel@tonic-gate 	 * truncate while it was allocating blocks).
19357c478bd9Sstevel@tonic-gate 	 * Grab the locks in the right order.
19367c478bd9Sstevel@tonic-gate 	 */
19377c478bd9Sstevel@tonic-gate 	rw_exit(&ip->i_contents);
19387c478bd9Sstevel@tonic-gate 	rw_enter(&ip->i_rwlock, RW_WRITER);
19397c478bd9Sstevel@tonic-gate 	error = TRANS_ITRUNC(ip, (u_offset_t)lp->l_start, 0, cr);
19407c478bd9Sstevel@tonic-gate 	rw_exit(&ip->i_rwlock);
19417c478bd9Sstevel@tonic-gate 	return (error);
19427c478bd9Sstevel@tonic-gate }
19437c478bd9Sstevel@tonic-gate 
19447c478bd9Sstevel@tonic-gate /*
19457c478bd9Sstevel@tonic-gate  * Find a cg with as close to nb contiguous bytes as possible
19467c478bd9Sstevel@tonic-gate  *	THIS MAY TAKE MANY DISK READS!
19477c478bd9Sstevel@tonic-gate  *
19487c478bd9Sstevel@tonic-gate  * Implemented in an attempt to allocate contiguous blocks for
19497c478bd9Sstevel@tonic-gate  * writing the ufs log file to, minimizing future disk head seeking
19507c478bd9Sstevel@tonic-gate  */
19517c478bd9Sstevel@tonic-gate daddr_t
contigpref(ufsvfs_t * ufsvfsp,size_t nb,size_t minb)1952e7da395aSOwen Roberts contigpref(ufsvfs_t *ufsvfsp, size_t nb, size_t minb)
19537c478bd9Sstevel@tonic-gate {
19547c478bd9Sstevel@tonic-gate 	struct fs	*fs	= ufsvfsp->vfs_fs;
19557c478bd9Sstevel@tonic-gate 	daddr_t		nblk	= lblkno(fs, blkroundup(fs, nb));
1956e7da395aSOwen Roberts 	daddr_t		minblk	= lblkno(fs, blkroundup(fs, minb));
19577c478bd9Sstevel@tonic-gate 	daddr_t		savebno, curbno, cgbno;
1958e7da395aSOwen Roberts 	int		cg, cgblks, savecg, savenblk, curnblk, startcg;
19597c478bd9Sstevel@tonic-gate 	uchar_t		*blksfree;
19607c478bd9Sstevel@tonic-gate 	buf_t		*bp;
19617c478bd9Sstevel@tonic-gate 	struct cg	*cgp;
19627c478bd9Sstevel@tonic-gate 
19637c478bd9Sstevel@tonic-gate 	savenblk = 0;
19647c478bd9Sstevel@tonic-gate 	savecg = 0;
19657c478bd9Sstevel@tonic-gate 	savebno = 0;
19667c478bd9Sstevel@tonic-gate 
1967e7da395aSOwen Roberts 	if ((startcg = findlogstartcg(fs, nblk, minblk)) == -1)
1968e7da395aSOwen Roberts 		cg = 0;	/* Nothing suitable found */
1969e7da395aSOwen Roberts 	else
1970e7da395aSOwen Roberts 		cg = startcg;
19717c478bd9Sstevel@tonic-gate 
1972e7da395aSOwen Roberts 	for (; cg < fs->fs_ncg; ++cg) {
19737c478bd9Sstevel@tonic-gate 		/*
19747c478bd9Sstevel@tonic-gate 		 * find the largest contiguous range in this cg
19757c478bd9Sstevel@tonic-gate 		 */
19767c478bd9Sstevel@tonic-gate 		bp = UFS_BREAD(ufsvfsp, ufsvfsp->vfs_dev,
1977303bf60bSsdebnath 		    (daddr_t)fsbtodb(fs, cgtod(fs, cg)),
1978303bf60bSsdebnath 		    (int)fs->fs_cgsize);
19797c478bd9Sstevel@tonic-gate 		cgp = bp->b_un.b_cg;
19807c478bd9Sstevel@tonic-gate 		if (bp->b_flags & B_ERROR || !cg_chkmagic(cgp)) {
19817c478bd9Sstevel@tonic-gate 			brelse(bp);
19827c478bd9Sstevel@tonic-gate 			continue;
19837c478bd9Sstevel@tonic-gate 		}
19847c478bd9Sstevel@tonic-gate 		blksfree = cg_blksfree(cgp);	    /* free array */
19857c478bd9Sstevel@tonic-gate 		cgblks = fragstoblks(fs, fs->fs_fpg); /* blks in free array */
19867c478bd9Sstevel@tonic-gate 		cgbno = 0;
19877c478bd9Sstevel@tonic-gate 		while (cgbno < cgblks && savenblk < nblk) {
19887c478bd9Sstevel@tonic-gate 			/* find a free block */
1989e7da395aSOwen Roberts 			for (; cgbno < cgblks; ++cgbno) {
1990e7da395aSOwen Roberts 				if (isblock(fs, blksfree, cgbno)) {
1991b323a615SJim Rice 					if (startcg != -1) {
1992b323a615SJim Rice 						brelse(bp);
1993b323a615SJim Rice 						savecg = startcg;
1994b323a615SJim Rice 						savebno = cgbno;
1995e7da395aSOwen Roberts 						goto done;
1996b323a615SJim Rice 					} else
1997e7da395aSOwen Roberts 						break;
1998e7da395aSOwen Roberts 				}
1999e7da395aSOwen Roberts 			}
20007c478bd9Sstevel@tonic-gate 			curbno = cgbno;
20017c478bd9Sstevel@tonic-gate 			/* count the number of free blocks */
20027c478bd9Sstevel@tonic-gate 			for (curnblk = 0; cgbno < cgblks; ++cgbno) {
20037c478bd9Sstevel@tonic-gate 				if (!isblock(fs, blksfree, cgbno))
20047c478bd9Sstevel@tonic-gate 					break;
20057c478bd9Sstevel@tonic-gate 				if (++curnblk >= nblk)
20067c478bd9Sstevel@tonic-gate 					break;
20077c478bd9Sstevel@tonic-gate 			}
20087c478bd9Sstevel@tonic-gate 			if (curnblk > savenblk) {
20097c478bd9Sstevel@tonic-gate 				savecg = cg;
20107c478bd9Sstevel@tonic-gate 				savenblk = curnblk;
20117c478bd9Sstevel@tonic-gate 				savebno = curbno;
20127c478bd9Sstevel@tonic-gate 			}
20137c478bd9Sstevel@tonic-gate 		}
20147c478bd9Sstevel@tonic-gate 		brelse(bp);
20157c478bd9Sstevel@tonic-gate 		if (savenblk >= nblk)
20167c478bd9Sstevel@tonic-gate 			break;
20177c478bd9Sstevel@tonic-gate 	}
20187c478bd9Sstevel@tonic-gate 
2019e7da395aSOwen Roberts done:
2020e7da395aSOwen Roberts 
20217c478bd9Sstevel@tonic-gate 	/* convert block offset in cg to frag offset in cg */
20227c478bd9Sstevel@tonic-gate 	savebno = blkstofrags(fs, savebno);
20237c478bd9Sstevel@tonic-gate 
20247c478bd9Sstevel@tonic-gate 	/* convert frag offset in cg to frag offset in fs */
20257c478bd9Sstevel@tonic-gate 	savebno += (savecg * fs->fs_fpg);
20267c478bd9Sstevel@tonic-gate 
20277c478bd9Sstevel@tonic-gate 	return (savebno);
20287c478bd9Sstevel@tonic-gate }
2029e7da395aSOwen Roberts 
2030e7da395aSOwen Roberts /*
2031e7da395aSOwen Roberts  * The object of this routine is to find a start point for the UFS log.
2032e7da395aSOwen Roberts  * Ideally the space should be allocated from the smallest possible number
2033e7da395aSOwen Roberts  * of contiguous cylinder groups. This is found by using a sliding window
2034e7da395aSOwen Roberts  * technique. The smallest window of contiguous cylinder groups, which is
2035e7da395aSOwen Roberts  * still able to accommodate the target, is found by moving the window
2036e7da395aSOwen Roberts  * through the cylinder groups in a single pass. The end of the window is
2037e7da395aSOwen Roberts  * advanced until the space is accommodated, then the start is advanced until
2038e7da395aSOwen Roberts  * it no longer fits, the end is then advanced again and so on until the
2039e7da395aSOwen Roberts  * final cylinder group is reached. The first suitable instance is recorded
2040e7da395aSOwen Roberts  * and its starting cg number is returned.
2041e7da395aSOwen Roberts  *
2042e7da395aSOwen Roberts  * If we are not able to find a minimum amount of space, represented by
2043e7da395aSOwen Roberts  * minblk, or to do so uses more than the available extents, then return -1.
2044e7da395aSOwen Roberts  */
2045e7da395aSOwen Roberts 
2046e7da395aSOwen Roberts int
findlogstartcg(struct fs * fs,daddr_t requested,daddr_t minblk)2047e7da395aSOwen Roberts findlogstartcg(struct fs *fs, daddr_t requested, daddr_t minblk)
2048e7da395aSOwen Roberts {
2049e7da395aSOwen Roberts 	int	 ncgs;		 /* number of cylinder groups */
2050e7da395aSOwen Roberts 	daddr_t target;		 /* amount of space sought */
2051e7da395aSOwen Roberts 	int	 cwidth, ctotal; /* current window width and total */
2052e7da395aSOwen Roberts 	int	 bwidth, btotal; /* best window width and total so far */
2053e7da395aSOwen Roberts 	int	 s;	/* index of the first element in the current window */
2054e7da395aSOwen Roberts 	int	 e;	/* index of the first element + the width */
2055e7da395aSOwen Roberts 			/*  (i.e. 1 + index of last element) */
2056e7da395aSOwen Roberts 	int	 bs; /* index of the first element in the best window so far */
2057e7da395aSOwen Roberts 	int	 header, max_extents;
2058e7da395aSOwen Roberts 
2059e7da395aSOwen Roberts 	target = requested;
2060e7da395aSOwen Roberts 	ncgs = fs->fs_ncg;
2061e7da395aSOwen Roberts 
2062e7da395aSOwen Roberts 	header = sizeof (extent_block_t) - sizeof (extent_t);
2063e7da395aSOwen Roberts 	max_extents = ((fs->fs_bsize)-header) / sizeof (extent_t);
2064e7da395aSOwen Roberts 	cwidth = ctotal = 0;
2065e7da395aSOwen Roberts 	btotal = -1;
2066e7da395aSOwen Roberts 	bwidth = ncgs;
2067e7da395aSOwen Roberts 	s = e = 0;
2068e7da395aSOwen Roberts 	while (e < ncgs) {
2069e7da395aSOwen Roberts 	/* Advance the end of the window until it accommodates the target. */
2070e7da395aSOwen Roberts 		while (ctotal < target && e < ncgs) {
2071e7da395aSOwen Roberts 			ctotal += fs->fs_cs(fs, e).cs_nbfree;
2072e7da395aSOwen Roberts 			e++;
2073e7da395aSOwen Roberts 		}
2074e7da395aSOwen Roberts 
2075e7da395aSOwen Roberts 		/*
2076e7da395aSOwen Roberts 		 * Advance the start of the window until it no longer
2077e7da395aSOwen Roberts 		 * accommodates the target.
2078e7da395aSOwen Roberts 		 */
2079e7da395aSOwen Roberts 		while (ctotal >= target && s < e) {
2080e7da395aSOwen Roberts 			/* See if this is the smallest window so far. */
2081e7da395aSOwen Roberts 			cwidth = e - s;
2082e7da395aSOwen Roberts 			if (cwidth <= bwidth) {
2083e7da395aSOwen Roberts 				if (cwidth == bwidth && ctotal <= btotal)
2084e7da395aSOwen Roberts 					goto more;
2085e7da395aSOwen Roberts 				bwidth = cwidth;
2086e7da395aSOwen Roberts 				btotal = ctotal;
2087e7da395aSOwen Roberts 				bs = s;
2088e7da395aSOwen Roberts 			}
2089e7da395aSOwen Roberts more:
2090e7da395aSOwen Roberts 			ctotal -= fs->fs_cs(fs, s).cs_nbfree;
2091e7da395aSOwen Roberts 			s++;
2092e7da395aSOwen Roberts 		}
2093e7da395aSOwen Roberts 	}
2094e7da395aSOwen Roberts 
2095e7da395aSOwen Roberts 	/*
2096e7da395aSOwen Roberts 	 * If we cannot allocate the minimum required or we use too many
2097e7da395aSOwen Roberts 	 * extents to do so, return -1.
2098e7da395aSOwen Roberts 	 */
2099e7da395aSOwen Roberts 	if (btotal < minblk || bwidth > max_extents)
2100e7da395aSOwen Roberts 		bs = -1;
2101e7da395aSOwen Roberts 
2102e7da395aSOwen Roberts 	return (bs);
2103e7da395aSOwen Roberts }
2104