xref: /illumos-gate/usr/src/uts/common/fs/ufs/ufs_alloc.c (revision 7c478bd95313f5f23a4c958a745db2134aa0324)
1*7c478bd9Sstevel@tonic-gate /*
2*7c478bd9Sstevel@tonic-gate  * CDDL HEADER START
3*7c478bd9Sstevel@tonic-gate  *
4*7c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*7c478bd9Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
6*7c478bd9Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
7*7c478bd9Sstevel@tonic-gate  * with the License.
8*7c478bd9Sstevel@tonic-gate  *
9*7c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10*7c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
11*7c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
12*7c478bd9Sstevel@tonic-gate  * and limitations under the License.
13*7c478bd9Sstevel@tonic-gate  *
14*7c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
15*7c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16*7c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
17*7c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
18*7c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
19*7c478bd9Sstevel@tonic-gate  *
20*7c478bd9Sstevel@tonic-gate  * CDDL HEADER END
21*7c478bd9Sstevel@tonic-gate  */
22*7c478bd9Sstevel@tonic-gate /*
23*7c478bd9Sstevel@tonic-gate  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24*7c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
25*7c478bd9Sstevel@tonic-gate  */
26*7c478bd9Sstevel@tonic-gate 
27*7c478bd9Sstevel@tonic-gate /*	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
28*7c478bd9Sstevel@tonic-gate /*	  All Rights Reserved  	*/
29*7c478bd9Sstevel@tonic-gate 
30*7c478bd9Sstevel@tonic-gate /*
31*7c478bd9Sstevel@tonic-gate  * University Copyright- Copyright (c) 1982, 1986, 1988
32*7c478bd9Sstevel@tonic-gate  * The Regents of the University of California
33*7c478bd9Sstevel@tonic-gate  * All Rights Reserved
34*7c478bd9Sstevel@tonic-gate  *
35*7c478bd9Sstevel@tonic-gate  * University Acknowledgment- Portions of this document are derived from
36*7c478bd9Sstevel@tonic-gate  * software developed by the University of California, Berkeley, and its
37*7c478bd9Sstevel@tonic-gate  * contributors.
38*7c478bd9Sstevel@tonic-gate  */
39*7c478bd9Sstevel@tonic-gate 
40*7c478bd9Sstevel@tonic-gate 
41*7c478bd9Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
42*7c478bd9Sstevel@tonic-gate 
43*7c478bd9Sstevel@tonic-gate #include <sys/types.h>
44*7c478bd9Sstevel@tonic-gate #include <sys/t_lock.h>
45*7c478bd9Sstevel@tonic-gate #include <sys/debug.h>
46*7c478bd9Sstevel@tonic-gate #include <sys/param.h>
47*7c478bd9Sstevel@tonic-gate #include <sys/systm.h>
48*7c478bd9Sstevel@tonic-gate #include <sys/signal.h>
49*7c478bd9Sstevel@tonic-gate #include <sys/cred.h>
50*7c478bd9Sstevel@tonic-gate #include <sys/proc.h>
51*7c478bd9Sstevel@tonic-gate #include <sys/disp.h>
52*7c478bd9Sstevel@tonic-gate #include <sys/user.h>
53*7c478bd9Sstevel@tonic-gate #include <sys/buf.h>
54*7c478bd9Sstevel@tonic-gate #include <sys/vfs.h>
55*7c478bd9Sstevel@tonic-gate #include <sys/vnode.h>
56*7c478bd9Sstevel@tonic-gate #include <sys/acl.h>
57*7c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_fs.h>
58*7c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_inode.h>
59*7c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_acl.h>
60*7c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_bio.h>
61*7c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_quota.h>
62*7c478bd9Sstevel@tonic-gate #include <sys/kmem.h>
63*7c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_trans.h>
64*7c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_panic.h>
65*7c478bd9Sstevel@tonic-gate #include <sys/errno.h>
66*7c478bd9Sstevel@tonic-gate #include <sys/time.h>
67*7c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h>
68*7c478bd9Sstevel@tonic-gate #include <sys/file.h>
69*7c478bd9Sstevel@tonic-gate #include <sys/fcntl.h>
70*7c478bd9Sstevel@tonic-gate #include <sys/flock.h>
71*7c478bd9Sstevel@tonic-gate #include <fs/fs_subr.h>
72*7c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h>
73*7c478bd9Sstevel@tonic-gate #include <sys/policy.h>
74*7c478bd9Sstevel@tonic-gate 
75*7c478bd9Sstevel@tonic-gate static ino_t	hashalloc();
76*7c478bd9Sstevel@tonic-gate static daddr_t	fragextend();
77*7c478bd9Sstevel@tonic-gate static daddr_t	alloccg();
78*7c478bd9Sstevel@tonic-gate static daddr_t	alloccgblk();
79*7c478bd9Sstevel@tonic-gate static ino_t	ialloccg();
80*7c478bd9Sstevel@tonic-gate static daddr_t	mapsearch();
81*7c478bd9Sstevel@tonic-gate 
82*7c478bd9Sstevel@tonic-gate extern int	inside[], around[];
83*7c478bd9Sstevel@tonic-gate extern uchar_t	*fragtbl[];
84*7c478bd9Sstevel@tonic-gate void delay();
85*7c478bd9Sstevel@tonic-gate 
86*7c478bd9Sstevel@tonic-gate /*
87*7c478bd9Sstevel@tonic-gate  * Allocate a block in the file system.
88*7c478bd9Sstevel@tonic-gate  *
89*7c478bd9Sstevel@tonic-gate  * The size of the requested block is given, which must be some
90*7c478bd9Sstevel@tonic-gate  * multiple of fs_fsize and <= fs_bsize.
91*7c478bd9Sstevel@tonic-gate  * A preference may be optionally specified. If a preference is given
92*7c478bd9Sstevel@tonic-gate  * the following hierarchy is used to allocate a block:
93*7c478bd9Sstevel@tonic-gate  *   1) allocate the requested block.
94*7c478bd9Sstevel@tonic-gate  *   2) allocate a rotationally optimal block in the same cylinder.
95*7c478bd9Sstevel@tonic-gate  *   3) allocate a block in the same cylinder group.
96*7c478bd9Sstevel@tonic-gate  *   4) quadratically rehash into other cylinder groups, until an
97*7c478bd9Sstevel@tonic-gate  *	available block is located.
98*7c478bd9Sstevel@tonic-gate  * If no block preference is given the following hierarchy is used
99*7c478bd9Sstevel@tonic-gate  * to allocate a block:
100*7c478bd9Sstevel@tonic-gate  *   1) allocate a block in the cylinder group that contains the
101*7c478bd9Sstevel@tonic-gate  *	inode for the file.
102*7c478bd9Sstevel@tonic-gate  *   2) quadratically rehash into other cylinder groups, until an
103*7c478bd9Sstevel@tonic-gate  *	available block is located.
104*7c478bd9Sstevel@tonic-gate  */
105*7c478bd9Sstevel@tonic-gate int
106*7c478bd9Sstevel@tonic-gate alloc(struct inode *ip, daddr_t bpref, int size, daddr_t *bnp, cred_t *cr)
107*7c478bd9Sstevel@tonic-gate {
108*7c478bd9Sstevel@tonic-gate 	struct fs *fs;
109*7c478bd9Sstevel@tonic-gate 	struct ufsvfs *ufsvfsp;
110*7c478bd9Sstevel@tonic-gate 	daddr_t bno;
111*7c478bd9Sstevel@tonic-gate 	int cg;
112*7c478bd9Sstevel@tonic-gate 	int err;
113*7c478bd9Sstevel@tonic-gate 	char *errmsg = NULL;
114*7c478bd9Sstevel@tonic-gate 	size_t len;
115*7c478bd9Sstevel@tonic-gate 
116*7c478bd9Sstevel@tonic-gate 	ufsvfsp = ip->i_ufsvfs;
117*7c478bd9Sstevel@tonic-gate 	fs = ufsvfsp->vfs_fs;
118*7c478bd9Sstevel@tonic-gate 	if ((unsigned)size > fs->fs_bsize || fragoff(fs, size) != 0) {
119*7c478bd9Sstevel@tonic-gate 		err = ufs_fault(ITOV(ip),
120*7c478bd9Sstevel@tonic-gate 	    "alloc: bad size, dev = 0x%lx, bsize = %d, size = %d, fs = %s\n",
121*7c478bd9Sstevel@tonic-gate 	    ip->i_dev, fs->fs_bsize, size, fs->fs_fsmnt);
122*7c478bd9Sstevel@tonic-gate 		return (err);
123*7c478bd9Sstevel@tonic-gate 	}
124*7c478bd9Sstevel@tonic-gate 	if (size == fs->fs_bsize && fs->fs_cstotal.cs_nbfree == 0)
125*7c478bd9Sstevel@tonic-gate 		goto nospace;
126*7c478bd9Sstevel@tonic-gate 	if (freespace(fs, ufsvfsp) <= 0 &&
127*7c478bd9Sstevel@tonic-gate 	    secpolicy_fs_minfree(cr, ufsvfsp->vfs_vfs) != 0)
128*7c478bd9Sstevel@tonic-gate 		goto nospace;
129*7c478bd9Sstevel@tonic-gate 	err = chkdq(ip, (long)btodb(size), 0, cr, &errmsg, &len);
130*7c478bd9Sstevel@tonic-gate 	/* Note that may not have err, but may have errmsg */
131*7c478bd9Sstevel@tonic-gate 	if (errmsg != NULL) {
132*7c478bd9Sstevel@tonic-gate 		uprintf(errmsg);
133*7c478bd9Sstevel@tonic-gate 		kmem_free(errmsg, len);
134*7c478bd9Sstevel@tonic-gate 		errmsg = NULL;
135*7c478bd9Sstevel@tonic-gate 	}
136*7c478bd9Sstevel@tonic-gate 	if (err)
137*7c478bd9Sstevel@tonic-gate 		return (err);
138*7c478bd9Sstevel@tonic-gate 	if (bpref >= fs->fs_size)
139*7c478bd9Sstevel@tonic-gate 		bpref = 0;
140*7c478bd9Sstevel@tonic-gate 	if (bpref == 0)
141*7c478bd9Sstevel@tonic-gate 		cg = (int)itog(fs, ip->i_number);
142*7c478bd9Sstevel@tonic-gate 	else
143*7c478bd9Sstevel@tonic-gate 		cg = dtog(fs, bpref);
144*7c478bd9Sstevel@tonic-gate 
145*7c478bd9Sstevel@tonic-gate 	bno = (daddr_t)hashalloc(ip, cg, (long)bpref, size,
146*7c478bd9Sstevel@tonic-gate 	    (ulong_t (*)())alloccg);
147*7c478bd9Sstevel@tonic-gate 	if (bno > 0) {
148*7c478bd9Sstevel@tonic-gate 		*bnp = bno;
149*7c478bd9Sstevel@tonic-gate 		return (0);
150*7c478bd9Sstevel@tonic-gate 	}
151*7c478bd9Sstevel@tonic-gate 
152*7c478bd9Sstevel@tonic-gate 	/*
153*7c478bd9Sstevel@tonic-gate 	 * hashalloc() failed because some other thread grabbed
154*7c478bd9Sstevel@tonic-gate 	 * the last block so unwind the quota operation.  We can
155*7c478bd9Sstevel@tonic-gate 	 * ignore the return because subtractions don't fail and
156*7c478bd9Sstevel@tonic-gate 	 * size is guaranteed to be >= zero by our caller.
157*7c478bd9Sstevel@tonic-gate 	 */
158*7c478bd9Sstevel@tonic-gate 	(void) chkdq(ip, -(long)btodb(size), 0, cr, (char **)NULL,
159*7c478bd9Sstevel@tonic-gate 		(size_t *)NULL);
160*7c478bd9Sstevel@tonic-gate 
161*7c478bd9Sstevel@tonic-gate nospace:
162*7c478bd9Sstevel@tonic-gate 	mutex_enter(&ufsvfsp->vfs_lock);
163*7c478bd9Sstevel@tonic-gate 	if ((lbolt - ufsvfsp->vfs_lastwhinetime) > (hz << 2) &&
164*7c478bd9Sstevel@tonic-gate 		(!(TRANS_ISTRANS(ufsvfsp)) || !(ip->i_flag & IQUIET))) {
165*7c478bd9Sstevel@tonic-gate 		ufsvfsp->vfs_lastwhinetime = lbolt;
166*7c478bd9Sstevel@tonic-gate 		cmn_err(CE_NOTE, "alloc: %s: file system full", fs->fs_fsmnt);
167*7c478bd9Sstevel@tonic-gate 	}
168*7c478bd9Sstevel@tonic-gate 	mutex_exit(&ufsvfsp->vfs_lock);
169*7c478bd9Sstevel@tonic-gate 	return (ENOSPC);
170*7c478bd9Sstevel@tonic-gate }
171*7c478bd9Sstevel@tonic-gate 
172*7c478bd9Sstevel@tonic-gate /*
173*7c478bd9Sstevel@tonic-gate  * Reallocate a fragment to a bigger size
174*7c478bd9Sstevel@tonic-gate  *
175*7c478bd9Sstevel@tonic-gate  * The number and size of the old block is given, and a preference
176*7c478bd9Sstevel@tonic-gate  * and new size is also specified.  The allocator attempts to extend
177*7c478bd9Sstevel@tonic-gate  * the original block.  Failing that, the regular block allocator is
178*7c478bd9Sstevel@tonic-gate  * invoked to get an appropriate block.
179*7c478bd9Sstevel@tonic-gate  */
180*7c478bd9Sstevel@tonic-gate int
181*7c478bd9Sstevel@tonic-gate realloccg(struct inode *ip, daddr_t bprev, daddr_t bpref, int osize,
182*7c478bd9Sstevel@tonic-gate     int nsize, daddr_t *bnp, cred_t *cr)
183*7c478bd9Sstevel@tonic-gate {
184*7c478bd9Sstevel@tonic-gate 	daddr_t bno;
185*7c478bd9Sstevel@tonic-gate 	struct fs *fs;
186*7c478bd9Sstevel@tonic-gate 	struct ufsvfs *ufsvfsp;
187*7c478bd9Sstevel@tonic-gate 	int cg, request;
188*7c478bd9Sstevel@tonic-gate 	int err;
189*7c478bd9Sstevel@tonic-gate 	char *errmsg = NULL;
190*7c478bd9Sstevel@tonic-gate 	size_t len;
191*7c478bd9Sstevel@tonic-gate 
192*7c478bd9Sstevel@tonic-gate 	ufsvfsp = ip->i_ufsvfs;
193*7c478bd9Sstevel@tonic-gate 	fs = ufsvfsp->vfs_fs;
194*7c478bd9Sstevel@tonic-gate 	if ((unsigned)osize > fs->fs_bsize || fragoff(fs, osize) != 0 ||
195*7c478bd9Sstevel@tonic-gate 	    (unsigned)nsize > fs->fs_bsize || fragoff(fs, nsize) != 0) {
196*7c478bd9Sstevel@tonic-gate 		err = ufs_fault(ITOV(ip),
197*7c478bd9Sstevel@tonic-gate 	"realloccg: bad size, dev=0x%lx, bsize=%d, osize=%d, nsize=%d, fs=%s\n",
198*7c478bd9Sstevel@tonic-gate 		    ip->i_dev, fs->fs_bsize, osize, nsize,
199*7c478bd9Sstevel@tonic-gate 		    fs->fs_fsmnt);
200*7c478bd9Sstevel@tonic-gate 		return (err);
201*7c478bd9Sstevel@tonic-gate 	}
202*7c478bd9Sstevel@tonic-gate 	if (freespace(fs, ufsvfsp) <= 0 &&
203*7c478bd9Sstevel@tonic-gate 	    secpolicy_fs_minfree(cr, ufsvfsp->vfs_vfs) != 0)
204*7c478bd9Sstevel@tonic-gate 		goto nospace;
205*7c478bd9Sstevel@tonic-gate 	if (bprev == 0) {
206*7c478bd9Sstevel@tonic-gate 		err = ufs_fault(ITOV(ip),
207*7c478bd9Sstevel@tonic-gate 	"realloccg: bad bprev, dev = 0x%lx, bsize = %d, bprev = %ld, fs = %s\n",
208*7c478bd9Sstevel@tonic-gate 		    ip->i_dev, fs->fs_bsize, bprev,
209*7c478bd9Sstevel@tonic-gate 		    fs->fs_fsmnt);
210*7c478bd9Sstevel@tonic-gate 		return (err);
211*7c478bd9Sstevel@tonic-gate 	}
212*7c478bd9Sstevel@tonic-gate 	err = chkdq(ip, (long)btodb(nsize - osize), 0, cr, &errmsg, &len);
213*7c478bd9Sstevel@tonic-gate 	/* Note that may not have err, but may have errmsg */
214*7c478bd9Sstevel@tonic-gate 	if (errmsg != NULL) {
215*7c478bd9Sstevel@tonic-gate 		uprintf(errmsg);
216*7c478bd9Sstevel@tonic-gate 		kmem_free(errmsg, len);
217*7c478bd9Sstevel@tonic-gate 		errmsg = NULL;
218*7c478bd9Sstevel@tonic-gate 	}
219*7c478bd9Sstevel@tonic-gate 	if (err)
220*7c478bd9Sstevel@tonic-gate 		return (err);
221*7c478bd9Sstevel@tonic-gate 	cg = dtog(fs, bprev);
222*7c478bd9Sstevel@tonic-gate 	bno = fragextend(ip, cg, (long)bprev, osize, nsize);
223*7c478bd9Sstevel@tonic-gate 	if (bno != 0) {
224*7c478bd9Sstevel@tonic-gate 		*bnp = bno;
225*7c478bd9Sstevel@tonic-gate 		return (0);
226*7c478bd9Sstevel@tonic-gate 	}
227*7c478bd9Sstevel@tonic-gate 	if (bpref >= fs->fs_size)
228*7c478bd9Sstevel@tonic-gate 		bpref = 0;
229*7c478bd9Sstevel@tonic-gate 
230*7c478bd9Sstevel@tonic-gate 	/*
231*7c478bd9Sstevel@tonic-gate 	 * When optimizing for time we allocate a full block and
232*7c478bd9Sstevel@tonic-gate 	 * then only use the upper portion for this request. When
233*7c478bd9Sstevel@tonic-gate 	 * this file grows again it will grow into the unused portion
234*7c478bd9Sstevel@tonic-gate 	 * of the block (See fragextend() above).  This saves time
235*7c478bd9Sstevel@tonic-gate 	 * because an extra disk write would be needed if the frags
236*7c478bd9Sstevel@tonic-gate 	 * following the current allocation were not free. The extra
237*7c478bd9Sstevel@tonic-gate 	 * disk write is needed to move the data from its current
238*7c478bd9Sstevel@tonic-gate 	 * location into the newly allocated position.
239*7c478bd9Sstevel@tonic-gate 	 *
240*7c478bd9Sstevel@tonic-gate 	 * When optimizing for space we allocate a run of frags
241*7c478bd9Sstevel@tonic-gate 	 * that is just the right size for this request.
242*7c478bd9Sstevel@tonic-gate 	 */
243*7c478bd9Sstevel@tonic-gate 	request = (fs->fs_optim == FS_OPTTIME) ? fs->fs_bsize : nsize;
244*7c478bd9Sstevel@tonic-gate 	bno = (daddr_t)hashalloc(ip, cg, (long)bpref, request,
245*7c478bd9Sstevel@tonic-gate 		(ulong_t (*)())alloccg);
246*7c478bd9Sstevel@tonic-gate 	if (bno > 0) {
247*7c478bd9Sstevel@tonic-gate 		*bnp = bno;
248*7c478bd9Sstevel@tonic-gate 		if (nsize < request)
249*7c478bd9Sstevel@tonic-gate 			(void) free(ip, bno + numfrags(fs, nsize),
250*7c478bd9Sstevel@tonic-gate 			    (off_t)(request - nsize), I_NOCANCEL);
251*7c478bd9Sstevel@tonic-gate 		return (0);
252*7c478bd9Sstevel@tonic-gate 	}
253*7c478bd9Sstevel@tonic-gate 
254*7c478bd9Sstevel@tonic-gate 	/*
255*7c478bd9Sstevel@tonic-gate 	 * hashalloc() failed because some other thread grabbed
256*7c478bd9Sstevel@tonic-gate 	 * the last block so unwind the quota operation.  We can
257*7c478bd9Sstevel@tonic-gate 	 * ignore the return because subtractions don't fail, and
258*7c478bd9Sstevel@tonic-gate 	 * our caller guarantees nsize >= osize.
259*7c478bd9Sstevel@tonic-gate 	 */
260*7c478bd9Sstevel@tonic-gate 	(void) chkdq(ip, -(long)btodb(nsize - osize), 0, cr, (char **)NULL,
261*7c478bd9Sstevel@tonic-gate 		(size_t *)NULL);
262*7c478bd9Sstevel@tonic-gate 
263*7c478bd9Sstevel@tonic-gate nospace:
264*7c478bd9Sstevel@tonic-gate 	mutex_enter(&ufsvfsp->vfs_lock);
265*7c478bd9Sstevel@tonic-gate 	if ((lbolt - ufsvfsp->vfs_lastwhinetime) > (hz << 2) &&
266*7c478bd9Sstevel@tonic-gate 		(!(TRANS_ISTRANS(ufsvfsp)) || !(ip->i_flag & IQUIET))) {
267*7c478bd9Sstevel@tonic-gate 		ufsvfsp->vfs_lastwhinetime = lbolt;
268*7c478bd9Sstevel@tonic-gate 		cmn_err(CE_NOTE,
269*7c478bd9Sstevel@tonic-gate 			"realloccg %s: file system full", fs->fs_fsmnt);
270*7c478bd9Sstevel@tonic-gate 	}
271*7c478bd9Sstevel@tonic-gate 	mutex_exit(&ufsvfsp->vfs_lock);
272*7c478bd9Sstevel@tonic-gate 	return (ENOSPC);
273*7c478bd9Sstevel@tonic-gate }
274*7c478bd9Sstevel@tonic-gate 
275*7c478bd9Sstevel@tonic-gate /*
276*7c478bd9Sstevel@tonic-gate  * Allocate an inode in the file system.
277*7c478bd9Sstevel@tonic-gate  *
278*7c478bd9Sstevel@tonic-gate  * A preference may be optionally specified. If a preference is given
279*7c478bd9Sstevel@tonic-gate  * the following hierarchy is used to allocate an inode:
280*7c478bd9Sstevel@tonic-gate  *   1) allocate the requested inode.
281*7c478bd9Sstevel@tonic-gate  *   2) allocate an inode in the same cylinder group.
282*7c478bd9Sstevel@tonic-gate  *   3) quadratically rehash into other cylinder groups, until an
283*7c478bd9Sstevel@tonic-gate  *	available inode is located.
284*7c478bd9Sstevel@tonic-gate  * If no inode preference is given the following hierarchy is used
285*7c478bd9Sstevel@tonic-gate  * to allocate an inode:
286*7c478bd9Sstevel@tonic-gate  *   1) allocate an inode in cylinder group 0.
287*7c478bd9Sstevel@tonic-gate  *   2) quadratically rehash into other cylinder groups, until an
288*7c478bd9Sstevel@tonic-gate  *	available inode is located.
289*7c478bd9Sstevel@tonic-gate  */
290*7c478bd9Sstevel@tonic-gate int
291*7c478bd9Sstevel@tonic-gate ufs_ialloc(struct inode *pip,
292*7c478bd9Sstevel@tonic-gate     ino_t ipref, mode_t mode, struct inode **ipp, cred_t *cr)
293*7c478bd9Sstevel@tonic-gate {
294*7c478bd9Sstevel@tonic-gate 	struct inode *ip;
295*7c478bd9Sstevel@tonic-gate 	struct fs *fs;
296*7c478bd9Sstevel@tonic-gate 	int cg;
297*7c478bd9Sstevel@tonic-gate 	ino_t ino;
298*7c478bd9Sstevel@tonic-gate 	int err;
299*7c478bd9Sstevel@tonic-gate 	int nifree;
300*7c478bd9Sstevel@tonic-gate 	struct ufsvfs *ufsvfsp = pip->i_ufsvfs;
301*7c478bd9Sstevel@tonic-gate 	char *errmsg = NULL;
302*7c478bd9Sstevel@tonic-gate 	size_t len;
303*7c478bd9Sstevel@tonic-gate 
304*7c478bd9Sstevel@tonic-gate 	ASSERT(RW_WRITE_HELD(&pip->i_rwlock));
305*7c478bd9Sstevel@tonic-gate 	fs = pip->i_fs;
306*7c478bd9Sstevel@tonic-gate loop:
307*7c478bd9Sstevel@tonic-gate 	nifree = fs->fs_cstotal.cs_nifree;
308*7c478bd9Sstevel@tonic-gate 
309*7c478bd9Sstevel@tonic-gate 	if (nifree == 0)
310*7c478bd9Sstevel@tonic-gate 		goto noinodes;
311*7c478bd9Sstevel@tonic-gate 	/*
312*7c478bd9Sstevel@tonic-gate 	 * Shadow inodes don't count against a user's inode allocation.
313*7c478bd9Sstevel@tonic-gate 	 * They are an implementation method and not a resource.
314*7c478bd9Sstevel@tonic-gate 	 */
315*7c478bd9Sstevel@tonic-gate 	if ((mode != IFSHAD) && (mode != IFATTRDIR)) {
316*7c478bd9Sstevel@tonic-gate 		err = chkiq((struct ufsvfs *)ITOV(pip)->v_vfsp->vfs_data,
317*7c478bd9Sstevel@tonic-gate 			/* change */ 1, (struct inode *)NULL, crgetuid(cr), 0,
318*7c478bd9Sstevel@tonic-gate 			cr, &errmsg, &len);
319*7c478bd9Sstevel@tonic-gate 		/*
320*7c478bd9Sstevel@tonic-gate 		 * As we haven't acquired any locks yet, dump the message
321*7c478bd9Sstevel@tonic-gate 		 * now.
322*7c478bd9Sstevel@tonic-gate 		 */
323*7c478bd9Sstevel@tonic-gate 		if (errmsg != NULL) {
324*7c478bd9Sstevel@tonic-gate 			uprintf(errmsg);
325*7c478bd9Sstevel@tonic-gate 			kmem_free(errmsg, len);
326*7c478bd9Sstevel@tonic-gate 			errmsg = NULL;
327*7c478bd9Sstevel@tonic-gate 		}
328*7c478bd9Sstevel@tonic-gate 		if (err)
329*7c478bd9Sstevel@tonic-gate 			return (err);
330*7c478bd9Sstevel@tonic-gate 	}
331*7c478bd9Sstevel@tonic-gate 
332*7c478bd9Sstevel@tonic-gate 	if (ipref >= (ulong_t)(fs->fs_ncg * fs->fs_ipg))
333*7c478bd9Sstevel@tonic-gate 		ipref = 0;
334*7c478bd9Sstevel@tonic-gate 	cg = (int)itog(fs, ipref);
335*7c478bd9Sstevel@tonic-gate 	ino = (ino_t)hashalloc(pip, cg, (long)ipref, (int)mode,
336*7c478bd9Sstevel@tonic-gate 	    (ulong_t (*)())ialloccg);
337*7c478bd9Sstevel@tonic-gate 	if (ino == 0) {
338*7c478bd9Sstevel@tonic-gate 		if ((mode != IFSHAD) && (mode != IFATTRDIR)) {
339*7c478bd9Sstevel@tonic-gate 			/*
340*7c478bd9Sstevel@tonic-gate 			 * We can safely ignore the return from chkiq()
341*7c478bd9Sstevel@tonic-gate 			 * because deallocations can only fail if we
342*7c478bd9Sstevel@tonic-gate 			 * can't get the user's quota info record off
343*7c478bd9Sstevel@tonic-gate 			 * the disk due to an I/O error.  In that case,
344*7c478bd9Sstevel@tonic-gate 			 * the quota subsystem is already messed up.
345*7c478bd9Sstevel@tonic-gate 			 */
346*7c478bd9Sstevel@tonic-gate 			(void) chkiq(ufsvfsp, /* change */ -1,
347*7c478bd9Sstevel@tonic-gate 				(struct inode *)NULL, crgetuid(cr), 0, cr,
348*7c478bd9Sstevel@tonic-gate 				(char **)NULL, (size_t *)NULL);
349*7c478bd9Sstevel@tonic-gate 		}
350*7c478bd9Sstevel@tonic-gate 		goto noinodes;
351*7c478bd9Sstevel@tonic-gate 	}
352*7c478bd9Sstevel@tonic-gate 	err = ufs_iget(pip->i_vfs, ino, ipp, cr);
353*7c478bd9Sstevel@tonic-gate 	if (err) {
354*7c478bd9Sstevel@tonic-gate 		if ((mode != IFSHAD) && (mode != IFATTRDIR)) {
355*7c478bd9Sstevel@tonic-gate 			/*
356*7c478bd9Sstevel@tonic-gate 			 * See above comment about why it is safe to ignore an
357*7c478bd9Sstevel@tonic-gate 			 * error return here.
358*7c478bd9Sstevel@tonic-gate 			 */
359*7c478bd9Sstevel@tonic-gate 			(void) chkiq(ufsvfsp, /* change */ -1,
360*7c478bd9Sstevel@tonic-gate 				(struct inode *)NULL, crgetuid(cr), 0, cr,
361*7c478bd9Sstevel@tonic-gate 				(char **)NULL, (size_t *)NULL);
362*7c478bd9Sstevel@tonic-gate 		}
363*7c478bd9Sstevel@tonic-gate 		ufs_ifree(pip, ino, 0);
364*7c478bd9Sstevel@tonic-gate 		return (err);
365*7c478bd9Sstevel@tonic-gate 	}
366*7c478bd9Sstevel@tonic-gate 	ip = *ipp;
367*7c478bd9Sstevel@tonic-gate 	ASSERT(!ip->i_ufs_acl);
368*7c478bd9Sstevel@tonic-gate 	ASSERT(!ip->i_dquot);
369*7c478bd9Sstevel@tonic-gate 	rw_enter(&ip->i_contents, RW_WRITER);
370*7c478bd9Sstevel@tonic-gate 
371*7c478bd9Sstevel@tonic-gate 	/*
372*7c478bd9Sstevel@tonic-gate 	 * Check if we really got a free inode, if not then complain
373*7c478bd9Sstevel@tonic-gate 	 * and mark the inode ISTALE so that it will be freed by the
374*7c478bd9Sstevel@tonic-gate 	 * ufs idle thread eventually and will not be sent to ufs_delete().
375*7c478bd9Sstevel@tonic-gate 	 */
376*7c478bd9Sstevel@tonic-gate 	if (ip->i_mode || (ip->i_nlink > 0)) {
377*7c478bd9Sstevel@tonic-gate 		ip->i_flag |= ISTALE;
378*7c478bd9Sstevel@tonic-gate 		rw_exit(&ip->i_contents);
379*7c478bd9Sstevel@tonic-gate 		VN_RELE(ITOV(ip));
380*7c478bd9Sstevel@tonic-gate 		cmn_err(CE_WARN,
381*7c478bd9Sstevel@tonic-gate 			"%s: unexpected allocated inode %d, run fsck(1M)%s",
382*7c478bd9Sstevel@tonic-gate 			fs->fs_fsmnt, (int)ino,
383*7c478bd9Sstevel@tonic-gate 			(TRANS_ISTRANS(ufsvfsp) ? " -o f" : ""));
384*7c478bd9Sstevel@tonic-gate 		goto loop;
385*7c478bd9Sstevel@tonic-gate 	}
386*7c478bd9Sstevel@tonic-gate 
387*7c478bd9Sstevel@tonic-gate 	/*
388*7c478bd9Sstevel@tonic-gate 	 * Check the inode has no size or data blocks.
389*7c478bd9Sstevel@tonic-gate 	 * This could have happened if the truncation failed when
390*7c478bd9Sstevel@tonic-gate 	 * deleting the inode. It used to be possible for this to occur
391*7c478bd9Sstevel@tonic-gate 	 * if a block allocation failed when iteratively truncating a
392*7c478bd9Sstevel@tonic-gate 	 * large file using logging and with a full file system.
393*7c478bd9Sstevel@tonic-gate 	 * This was fixed with bug fix 4348738. However, truncation may
394*7c478bd9Sstevel@tonic-gate 	 * still fail on an IO error. So in all cases for safety and
395*7c478bd9Sstevel@tonic-gate 	 * security we clear out the size; the blocks allocated; and
396*7c478bd9Sstevel@tonic-gate 	 * pointers to the blocks. This will ultimately cause a fsck
397*7c478bd9Sstevel@tonic-gate 	 * error of un-accounted for blocks, but its a fairly benign error,
398*7c478bd9Sstevel@tonic-gate 	 * and possibly the correct thing to do anyway as accesssing those
399*7c478bd9Sstevel@tonic-gate 	 * blocks agains may lead to more IO errors.
400*7c478bd9Sstevel@tonic-gate 	 */
401*7c478bd9Sstevel@tonic-gate 	if (ip->i_size || ip->i_blocks) {
402*7c478bd9Sstevel@tonic-gate 		int i;
403*7c478bd9Sstevel@tonic-gate 
404*7c478bd9Sstevel@tonic-gate 		if (ip->i_size) {
405*7c478bd9Sstevel@tonic-gate 			cmn_err(CE_WARN,
406*7c478bd9Sstevel@tonic-gate 			"%s: free inode %d had size 0x%llx, run fsck(1M)%s",
407*7c478bd9Sstevel@tonic-gate 			fs->fs_fsmnt, (int)ino, ip->i_size,
408*7c478bd9Sstevel@tonic-gate 			(TRANS_ISTRANS(ufsvfsp) ? " -o f" : ""));
409*7c478bd9Sstevel@tonic-gate 		}
410*7c478bd9Sstevel@tonic-gate 		/*
411*7c478bd9Sstevel@tonic-gate 		 * Clear any garbage left behind.
412*7c478bd9Sstevel@tonic-gate 		 */
413*7c478bd9Sstevel@tonic-gate 		ip->i_size = (u_offset_t)0;
414*7c478bd9Sstevel@tonic-gate 		ip->i_blocks = 0;
415*7c478bd9Sstevel@tonic-gate 		for (i = 0; i < NDADDR; i++)
416*7c478bd9Sstevel@tonic-gate 			ip->i_db[i] = 0;
417*7c478bd9Sstevel@tonic-gate 		for (i = 0; i < NIADDR; i++)
418*7c478bd9Sstevel@tonic-gate 			ip->i_ib[i] = 0;
419*7c478bd9Sstevel@tonic-gate 	}
420*7c478bd9Sstevel@tonic-gate 
421*7c478bd9Sstevel@tonic-gate 	/*
422*7c478bd9Sstevel@tonic-gate 	 * Initialize the link count
423*7c478bd9Sstevel@tonic-gate 	 */
424*7c478bd9Sstevel@tonic-gate 	ip->i_nlink = 0;
425*7c478bd9Sstevel@tonic-gate 
426*7c478bd9Sstevel@tonic-gate 	/*
427*7c478bd9Sstevel@tonic-gate 	 * Clear the old flags
428*7c478bd9Sstevel@tonic-gate 	 */
429*7c478bd9Sstevel@tonic-gate 	ip->i_flag &= IREF;
430*7c478bd9Sstevel@tonic-gate 
431*7c478bd9Sstevel@tonic-gate 	/*
432*7c478bd9Sstevel@tonic-gate 	 * Access times are not really defined if the fs is mounted
433*7c478bd9Sstevel@tonic-gate 	 * with 'noatime'. But it can cause nfs clients to fail
434*7c478bd9Sstevel@tonic-gate 	 * open() if the atime is not a legal value. Set a legal value
435*7c478bd9Sstevel@tonic-gate 	 * here when the inode is allocated.
436*7c478bd9Sstevel@tonic-gate 	 */
437*7c478bd9Sstevel@tonic-gate 	if (ufsvfsp->vfs_noatime) {
438*7c478bd9Sstevel@tonic-gate 		mutex_enter(&ufs_iuniqtime_lock);
439*7c478bd9Sstevel@tonic-gate 		ip->i_atime = iuniqtime;
440*7c478bd9Sstevel@tonic-gate 		mutex_exit(&ufs_iuniqtime_lock);
441*7c478bd9Sstevel@tonic-gate 	}
442*7c478bd9Sstevel@tonic-gate 	rw_exit(&ip->i_contents);
443*7c478bd9Sstevel@tonic-gate 	return (0);
444*7c478bd9Sstevel@tonic-gate noinodes:
445*7c478bd9Sstevel@tonic-gate 	if (!(TRANS_ISTRANS(ufsvfsp)) || !(pip->i_flag & IQUIET))
446*7c478bd9Sstevel@tonic-gate 		cmn_err(CE_NOTE, "%s: out of inodes\n", fs->fs_fsmnt);
447*7c478bd9Sstevel@tonic-gate 	return (ENOSPC);
448*7c478bd9Sstevel@tonic-gate }
449*7c478bd9Sstevel@tonic-gate 
450*7c478bd9Sstevel@tonic-gate /*
451*7c478bd9Sstevel@tonic-gate  * Find a cylinder group to place a directory.
452*7c478bd9Sstevel@tonic-gate  * Returns an inumber within the selected cylinder group.
453*7c478bd9Sstevel@tonic-gate  * Note, the vfs_lock is not needed as we don't require exact cg summary info.
454*7c478bd9Sstevel@tonic-gate  *
455*7c478bd9Sstevel@tonic-gate  * If the switch ufs_close_dirs is set, then the policy is to use
456*7c478bd9Sstevel@tonic-gate  * the current cg if it has more than 25% free inodes and more
457*7c478bd9Sstevel@tonic-gate  * than 25% free blocks. Otherwise the cgs are searched from
458*7c478bd9Sstevel@tonic-gate  * the beginning and the first cg with the same criteria is
459*7c478bd9Sstevel@tonic-gate  * used. If that is also null then we revert to the old algorithm.
460*7c478bd9Sstevel@tonic-gate  * This tends to cluster files at the beginning of the disk
461*7c478bd9Sstevel@tonic-gate  * until the disk gets full.
462*7c478bd9Sstevel@tonic-gate  *
463*7c478bd9Sstevel@tonic-gate  * Otherwise if ufs_close_dirs is not set then the original policy is
464*7c478bd9Sstevel@tonic-gate  * used which is to select from among those cylinder groups with
465*7c478bd9Sstevel@tonic-gate  * above the average number of free inodes, the one with the smallest
466*7c478bd9Sstevel@tonic-gate  * number of directories.
467*7c478bd9Sstevel@tonic-gate  */
468*7c478bd9Sstevel@tonic-gate 
469*7c478bd9Sstevel@tonic-gate int ufs_close_dirs = 1;	/* allocate directories close as possible */
470*7c478bd9Sstevel@tonic-gate 
471*7c478bd9Sstevel@tonic-gate ino_t
472*7c478bd9Sstevel@tonic-gate dirpref(inode_t *dp)
473*7c478bd9Sstevel@tonic-gate {
474*7c478bd9Sstevel@tonic-gate 	int cg, minndir, mincg, avgifree, mininode, minbpg, ifree;
475*7c478bd9Sstevel@tonic-gate 	struct fs *fs = dp->i_fs;
476*7c478bd9Sstevel@tonic-gate 
477*7c478bd9Sstevel@tonic-gate 	cg = itog(fs, dp->i_number);
478*7c478bd9Sstevel@tonic-gate 	mininode = fs->fs_ipg >> 2;
479*7c478bd9Sstevel@tonic-gate 	minbpg = fs->fs_maxbpg >> 2;
480*7c478bd9Sstevel@tonic-gate 	if (ufs_close_dirs &&
481*7c478bd9Sstevel@tonic-gate 	    (fs->fs_cs(fs, cg).cs_nifree > mininode) &&
482*7c478bd9Sstevel@tonic-gate 	    (fs->fs_cs(fs, cg).cs_nbfree > minbpg)) {
483*7c478bd9Sstevel@tonic-gate 		return (dp->i_number);
484*7c478bd9Sstevel@tonic-gate 	}
485*7c478bd9Sstevel@tonic-gate 
486*7c478bd9Sstevel@tonic-gate 	avgifree = fs->fs_cstotal.cs_nifree / fs->fs_ncg;
487*7c478bd9Sstevel@tonic-gate 	minndir = fs->fs_ipg;
488*7c478bd9Sstevel@tonic-gate 	mincg = 0;
489*7c478bd9Sstevel@tonic-gate 	for (cg = 0; cg < fs->fs_ncg; cg++) {
490*7c478bd9Sstevel@tonic-gate 		ifree = fs->fs_cs(fs, cg).cs_nifree;
491*7c478bd9Sstevel@tonic-gate 		if (ufs_close_dirs &&
492*7c478bd9Sstevel@tonic-gate 		    (ifree > mininode) &&
493*7c478bd9Sstevel@tonic-gate 		    (fs->fs_cs(fs, cg).cs_nbfree > minbpg)) {
494*7c478bd9Sstevel@tonic-gate 			return ((ino_t)(fs->fs_ipg * cg));
495*7c478bd9Sstevel@tonic-gate 		}
496*7c478bd9Sstevel@tonic-gate 		if ((fs->fs_cs(fs, cg).cs_ndir < minndir) &&
497*7c478bd9Sstevel@tonic-gate 		    (ifree >= avgifree)) {
498*7c478bd9Sstevel@tonic-gate 			mincg = cg;
499*7c478bd9Sstevel@tonic-gate 			minndir = fs->fs_cs(fs, cg).cs_ndir;
500*7c478bd9Sstevel@tonic-gate 		}
501*7c478bd9Sstevel@tonic-gate 	}
502*7c478bd9Sstevel@tonic-gate 	return ((ino_t)(fs->fs_ipg * mincg));
503*7c478bd9Sstevel@tonic-gate }
504*7c478bd9Sstevel@tonic-gate 
505*7c478bd9Sstevel@tonic-gate /*
506*7c478bd9Sstevel@tonic-gate  * Select the desired position for the next block in a file.  The file is
507*7c478bd9Sstevel@tonic-gate  * logically divided into sections. The first section is composed of the
508*7c478bd9Sstevel@tonic-gate  * direct blocks. Each additional section contains fs_maxbpg blocks.
509*7c478bd9Sstevel@tonic-gate  *
510*7c478bd9Sstevel@tonic-gate  * If no blocks have been allocated in the first section, the policy is to
511*7c478bd9Sstevel@tonic-gate  * request a block in the same cylinder group as the inode that describes
512*7c478bd9Sstevel@tonic-gate  * the file. If no blocks have been allocated in any other section, the
513*7c478bd9Sstevel@tonic-gate  * policy is to place the section in a cylinder group with a greater than
514*7c478bd9Sstevel@tonic-gate  * average number of free blocks.  An appropriate cylinder group is found
515*7c478bd9Sstevel@tonic-gate  * by using a rotor that sweeps the cylinder groups. When a new group of
516*7c478bd9Sstevel@tonic-gate  * blocks is needed, the sweep begins in the cylinder group following the
517*7c478bd9Sstevel@tonic-gate  * cylinder group from which the previous allocation was made. The sweep
518*7c478bd9Sstevel@tonic-gate  * continues until a cylinder group with greater than the average number
519*7c478bd9Sstevel@tonic-gate  * of free blocks is found. If the allocation is for the first block in an
520*7c478bd9Sstevel@tonic-gate  * indirect block, the information on the previous allocation is unavailable;
521*7c478bd9Sstevel@tonic-gate  * here a best guess is made based upon the logical block number being
522*7c478bd9Sstevel@tonic-gate  * allocated.
523*7c478bd9Sstevel@tonic-gate  *
524*7c478bd9Sstevel@tonic-gate  * If a section is already partially allocated, the policy is to
525*7c478bd9Sstevel@tonic-gate  * contiguously allocate fs_maxcontig blocks.  The end of one of these
526*7c478bd9Sstevel@tonic-gate  * contiguous blocks and the beginning of the next is physically separated
527*7c478bd9Sstevel@tonic-gate  * so that the disk head will be in transit between them for at least
528*7c478bd9Sstevel@tonic-gate  * fs_rotdelay milliseconds.  This is to allow time for the processor to
529*7c478bd9Sstevel@tonic-gate  * schedule another I/O transfer.
530*7c478bd9Sstevel@tonic-gate  */
531*7c478bd9Sstevel@tonic-gate daddr_t
532*7c478bd9Sstevel@tonic-gate blkpref(struct inode *ip, daddr_t lbn, int indx, daddr32_t *bap)
533*7c478bd9Sstevel@tonic-gate {
534*7c478bd9Sstevel@tonic-gate 	struct fs *fs;
535*7c478bd9Sstevel@tonic-gate 	struct ufsvfs *ufsvfsp;
536*7c478bd9Sstevel@tonic-gate 	int cg;
537*7c478bd9Sstevel@tonic-gate 	int avgbfree, startcg;
538*7c478bd9Sstevel@tonic-gate 	daddr_t nextblk;
539*7c478bd9Sstevel@tonic-gate 
540*7c478bd9Sstevel@tonic-gate 	ufsvfsp = ip->i_ufsvfs;
541*7c478bd9Sstevel@tonic-gate 	fs = ip->i_fs;
542*7c478bd9Sstevel@tonic-gate 	if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) {
543*7c478bd9Sstevel@tonic-gate 		if (lbn < NDADDR) {
544*7c478bd9Sstevel@tonic-gate 			cg = itog(fs, ip->i_number);
545*7c478bd9Sstevel@tonic-gate 			return (fs->fs_fpg * cg + fs->fs_frag);
546*7c478bd9Sstevel@tonic-gate 		}
547*7c478bd9Sstevel@tonic-gate 		/*
548*7c478bd9Sstevel@tonic-gate 		 * Find a cylinder with greater than average
549*7c478bd9Sstevel@tonic-gate 		 * number of unused data blocks.
550*7c478bd9Sstevel@tonic-gate 		 */
551*7c478bd9Sstevel@tonic-gate 		if (indx == 0 || bap[indx - 1] == 0)
552*7c478bd9Sstevel@tonic-gate 			startcg = itog(fs, ip->i_number) + lbn / fs->fs_maxbpg;
553*7c478bd9Sstevel@tonic-gate 		else
554*7c478bd9Sstevel@tonic-gate 			startcg = dtog(fs, bap[indx - 1]) + 1;
555*7c478bd9Sstevel@tonic-gate 		startcg %= fs->fs_ncg;
556*7c478bd9Sstevel@tonic-gate 
557*7c478bd9Sstevel@tonic-gate 		mutex_enter(&ufsvfsp->vfs_lock);
558*7c478bd9Sstevel@tonic-gate 		avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg;
559*7c478bd9Sstevel@tonic-gate 		/*
560*7c478bd9Sstevel@tonic-gate 		 * used for computing log space for writes/truncs
561*7c478bd9Sstevel@tonic-gate 		 */
562*7c478bd9Sstevel@tonic-gate 		ufsvfsp->vfs_avgbfree = avgbfree;
563*7c478bd9Sstevel@tonic-gate 		for (cg = startcg; cg < fs->fs_ncg; cg++)
564*7c478bd9Sstevel@tonic-gate 			if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
565*7c478bd9Sstevel@tonic-gate 				fs->fs_cgrotor = cg;
566*7c478bd9Sstevel@tonic-gate 				mutex_exit(&ufsvfsp->vfs_lock);
567*7c478bd9Sstevel@tonic-gate 				return (fs->fs_fpg * cg + fs->fs_frag);
568*7c478bd9Sstevel@tonic-gate 			}
569*7c478bd9Sstevel@tonic-gate 		for (cg = 0; cg <= startcg; cg++)
570*7c478bd9Sstevel@tonic-gate 			if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
571*7c478bd9Sstevel@tonic-gate 				fs->fs_cgrotor = cg;
572*7c478bd9Sstevel@tonic-gate 				mutex_exit(&ufsvfsp->vfs_lock);
573*7c478bd9Sstevel@tonic-gate 				return (fs->fs_fpg * cg + fs->fs_frag);
574*7c478bd9Sstevel@tonic-gate 			}
575*7c478bd9Sstevel@tonic-gate 		mutex_exit(&ufsvfsp->vfs_lock);
576*7c478bd9Sstevel@tonic-gate 		return (NULL);
577*7c478bd9Sstevel@tonic-gate 	}
578*7c478bd9Sstevel@tonic-gate 	/*
579*7c478bd9Sstevel@tonic-gate 	 * One or more previous blocks have been laid out. If less
580*7c478bd9Sstevel@tonic-gate 	 * than fs_maxcontig previous blocks are contiguous, the
581*7c478bd9Sstevel@tonic-gate 	 * next block is requested contiguously, otherwise it is
582*7c478bd9Sstevel@tonic-gate 	 * requested rotationally delayed by fs_rotdelay milliseconds.
583*7c478bd9Sstevel@tonic-gate 	 */
584*7c478bd9Sstevel@tonic-gate 	nextblk = bap[indx - 1] + fs->fs_frag;
585*7c478bd9Sstevel@tonic-gate 	if (indx > fs->fs_maxcontig &&
586*7c478bd9Sstevel@tonic-gate 	    bap[indx - fs->fs_maxcontig] + blkstofrags(fs, fs->fs_maxcontig)
587*7c478bd9Sstevel@tonic-gate 	    != nextblk)
588*7c478bd9Sstevel@tonic-gate 		return (nextblk);
589*7c478bd9Sstevel@tonic-gate 	if (fs->fs_rotdelay != 0)
590*7c478bd9Sstevel@tonic-gate 		/*
591*7c478bd9Sstevel@tonic-gate 		 * Here we convert ms of delay to frags as:
592*7c478bd9Sstevel@tonic-gate 		 * (frags) = (ms) * (rev/sec) * (sect/rev) /
593*7c478bd9Sstevel@tonic-gate 		 *	((sect/frag) * (ms/sec))
594*7c478bd9Sstevel@tonic-gate 		 * then round up to the next block.
595*7c478bd9Sstevel@tonic-gate 		 */
596*7c478bd9Sstevel@tonic-gate 		nextblk += roundup(fs->fs_rotdelay * fs->fs_rps * fs->fs_nsect /
597*7c478bd9Sstevel@tonic-gate 		    (NSPF(fs) * 1000), fs->fs_frag);
598*7c478bd9Sstevel@tonic-gate 	return (nextblk);
599*7c478bd9Sstevel@tonic-gate }
600*7c478bd9Sstevel@tonic-gate 
601*7c478bd9Sstevel@tonic-gate /*
602*7c478bd9Sstevel@tonic-gate  * Free a block or fragment.
603*7c478bd9Sstevel@tonic-gate  *
604*7c478bd9Sstevel@tonic-gate  * The specified block or fragment is placed back in the
605*7c478bd9Sstevel@tonic-gate  * free map. If a fragment is deallocated, a possible
606*7c478bd9Sstevel@tonic-gate  * block reassembly is checked.
607*7c478bd9Sstevel@tonic-gate  */
608*7c478bd9Sstevel@tonic-gate void
609*7c478bd9Sstevel@tonic-gate free(struct inode *ip, daddr_t bno, off_t size, int flags)
610*7c478bd9Sstevel@tonic-gate {
611*7c478bd9Sstevel@tonic-gate 	struct fs *fs = ip->i_fs;
612*7c478bd9Sstevel@tonic-gate 	struct ufsvfs *ufsvfsp = ip->i_ufsvfs;
613*7c478bd9Sstevel@tonic-gate 	struct cg *cgp;
614*7c478bd9Sstevel@tonic-gate 	struct buf *bp;
615*7c478bd9Sstevel@tonic-gate 	int cg, bmap, bbase;
616*7c478bd9Sstevel@tonic-gate 	int i;
617*7c478bd9Sstevel@tonic-gate 	uchar_t *blksfree;
618*7c478bd9Sstevel@tonic-gate 	int *blktot;
619*7c478bd9Sstevel@tonic-gate 	short *blks;
620*7c478bd9Sstevel@tonic-gate 	daddr_t blkno, cylno, rpos;
621*7c478bd9Sstevel@tonic-gate 
622*7c478bd9Sstevel@tonic-gate 	if ((unsigned long)size > fs->fs_bsize || fragoff(fs, size) != 0) {
623*7c478bd9Sstevel@tonic-gate 		(void) ufs_fault(ITOV(ip),
624*7c478bd9Sstevel@tonic-gate 		"free: bad size, dev = 0x%lx, bsize = %d, size = %d, fs = %s\n",
625*7c478bd9Sstevel@tonic-gate 		    ip->i_dev, fs->fs_bsize, (int)size, fs->fs_fsmnt);
626*7c478bd9Sstevel@tonic-gate 		return;
627*7c478bd9Sstevel@tonic-gate 	}
628*7c478bd9Sstevel@tonic-gate 	cg = dtog(fs, bno);
629*7c478bd9Sstevel@tonic-gate 	ASSERT(!ufs_badblock(ip, bno));
630*7c478bd9Sstevel@tonic-gate 	bp = UFS_BREAD(ufsvfsp, ip->i_dev, (daddr_t)fsbtodb(fs, cgtod(fs, cg)),
631*7c478bd9Sstevel@tonic-gate 		    (int)fs->fs_cgsize);
632*7c478bd9Sstevel@tonic-gate 
633*7c478bd9Sstevel@tonic-gate 	cgp = bp->b_un.b_cg;
634*7c478bd9Sstevel@tonic-gate 	if (bp->b_flags & B_ERROR || !cg_chkmagic(cgp)) {
635*7c478bd9Sstevel@tonic-gate 		brelse(bp);
636*7c478bd9Sstevel@tonic-gate 		return;
637*7c478bd9Sstevel@tonic-gate 	}
638*7c478bd9Sstevel@tonic-gate 
639*7c478bd9Sstevel@tonic-gate 	if (!(flags & I_NOCANCEL))
640*7c478bd9Sstevel@tonic-gate 		TRANS_CANCEL(ufsvfsp, ldbtob(fsbtodb(fs, bno)), size, flags);
641*7c478bd9Sstevel@tonic-gate 	if (flags & (I_DIR|I_IBLK|I_SHAD|I_QUOTA)) {
642*7c478bd9Sstevel@tonic-gate 		TRANS_MATA_FREE(ufsvfsp, ldbtob(fsbtodb(fs, bno)), size);
643*7c478bd9Sstevel@tonic-gate 	}
644*7c478bd9Sstevel@tonic-gate 	blksfree = cg_blksfree(cgp);
645*7c478bd9Sstevel@tonic-gate 	blktot = cg_blktot(cgp);
646*7c478bd9Sstevel@tonic-gate 	mutex_enter(&ufsvfsp->vfs_lock);
647*7c478bd9Sstevel@tonic-gate 	cgp->cg_time = gethrestime_sec();
648*7c478bd9Sstevel@tonic-gate 	bno = dtogd(fs, bno);
649*7c478bd9Sstevel@tonic-gate 	if (size == fs->fs_bsize) {
650*7c478bd9Sstevel@tonic-gate 		blkno = fragstoblks(fs, bno);
651*7c478bd9Sstevel@tonic-gate 		cylno = cbtocylno(fs, bno);
652*7c478bd9Sstevel@tonic-gate 		rpos = cbtorpos(ufsvfsp, bno);
653*7c478bd9Sstevel@tonic-gate 		blks = cg_blks(ufsvfsp, cgp, cylno);
654*7c478bd9Sstevel@tonic-gate 		if (!isclrblock(fs, blksfree, blkno)) {
655*7c478bd9Sstevel@tonic-gate 			mutex_exit(&ufsvfsp->vfs_lock);
656*7c478bd9Sstevel@tonic-gate 			brelse(bp);
657*7c478bd9Sstevel@tonic-gate 			(void) ufs_fault(ITOV(ip), "free: freeing free block, "
658*7c478bd9Sstevel@tonic-gate 			    "dev:0x%lx, block:%ld, ino:%lu, fs:%s",
659*7c478bd9Sstevel@tonic-gate 			    ip->i_dev, bno, ip->i_number, fs->fs_fsmnt);
660*7c478bd9Sstevel@tonic-gate 			return;
661*7c478bd9Sstevel@tonic-gate 		}
662*7c478bd9Sstevel@tonic-gate 		setblock(fs, blksfree, blkno);
663*7c478bd9Sstevel@tonic-gate 		blks[rpos]++;
664*7c478bd9Sstevel@tonic-gate 		blktot[cylno]++;
665*7c478bd9Sstevel@tonic-gate 		cgp->cg_cs.cs_nbfree++;		/* Log below */
666*7c478bd9Sstevel@tonic-gate 		fs->fs_cstotal.cs_nbfree++;
667*7c478bd9Sstevel@tonic-gate 		fs->fs_cs(fs, cg).cs_nbfree++;
668*7c478bd9Sstevel@tonic-gate 	} else {
669*7c478bd9Sstevel@tonic-gate 		bbase = bno - fragnum(fs, bno);
670*7c478bd9Sstevel@tonic-gate 		/*
671*7c478bd9Sstevel@tonic-gate 		 * Decrement the counts associated with the old frags
672*7c478bd9Sstevel@tonic-gate 		 */
673*7c478bd9Sstevel@tonic-gate 		bmap = blkmap(fs, blksfree, bbase);
674*7c478bd9Sstevel@tonic-gate 		fragacct(fs, bmap, cgp->cg_frsum, -1);
675*7c478bd9Sstevel@tonic-gate 		/*
676*7c478bd9Sstevel@tonic-gate 		 * Deallocate the fragment
677*7c478bd9Sstevel@tonic-gate 		 */
678*7c478bd9Sstevel@tonic-gate 		for (i = 0; i < numfrags(fs, size); i++) {
679*7c478bd9Sstevel@tonic-gate 			if (isset(blksfree, bno + i)) {
680*7c478bd9Sstevel@tonic-gate 				brelse(bp);
681*7c478bd9Sstevel@tonic-gate 				mutex_exit(&ufsvfsp->vfs_lock);
682*7c478bd9Sstevel@tonic-gate 				(void) ufs_fault(ITOV(ip),
683*7c478bd9Sstevel@tonic-gate 				    "free: freeing free frag, "
684*7c478bd9Sstevel@tonic-gate 				    "dev:0x%lx, blk:%ld, cg:%d, "
685*7c478bd9Sstevel@tonic-gate 				    "ino:%lu, fs:%s",
686*7c478bd9Sstevel@tonic-gate 				    ip->i_dev,
687*7c478bd9Sstevel@tonic-gate 				    bno + i,
688*7c478bd9Sstevel@tonic-gate 				    cgp->cg_cgx,
689*7c478bd9Sstevel@tonic-gate 				    ip->i_number,
690*7c478bd9Sstevel@tonic-gate 				    fs->fs_fsmnt);
691*7c478bd9Sstevel@tonic-gate 				return;
692*7c478bd9Sstevel@tonic-gate 			}
693*7c478bd9Sstevel@tonic-gate 			setbit(blksfree, bno + i);
694*7c478bd9Sstevel@tonic-gate 		}
695*7c478bd9Sstevel@tonic-gate 		cgp->cg_cs.cs_nffree += i;
696*7c478bd9Sstevel@tonic-gate 		fs->fs_cstotal.cs_nffree += i;
697*7c478bd9Sstevel@tonic-gate 		fs->fs_cs(fs, cg).cs_nffree += i;
698*7c478bd9Sstevel@tonic-gate 		/*
699*7c478bd9Sstevel@tonic-gate 		 * Add back in counts associated with the new frags
700*7c478bd9Sstevel@tonic-gate 		 */
701*7c478bd9Sstevel@tonic-gate 		bmap = blkmap(fs, blksfree, bbase);
702*7c478bd9Sstevel@tonic-gate 		fragacct(fs, bmap, cgp->cg_frsum, 1);
703*7c478bd9Sstevel@tonic-gate 		/*
704*7c478bd9Sstevel@tonic-gate 		 * If a complete block has been reassembled, account for it
705*7c478bd9Sstevel@tonic-gate 		 */
706*7c478bd9Sstevel@tonic-gate 		blkno = fragstoblks(fs, bbase);
707*7c478bd9Sstevel@tonic-gate 		if (isblock(fs, blksfree, blkno)) {
708*7c478bd9Sstevel@tonic-gate 			cylno = cbtocylno(fs, bbase);
709*7c478bd9Sstevel@tonic-gate 			rpos = cbtorpos(ufsvfsp, bbase);
710*7c478bd9Sstevel@tonic-gate 			blks = cg_blks(ufsvfsp, cgp, cylno);
711*7c478bd9Sstevel@tonic-gate 			blks[rpos]++;
712*7c478bd9Sstevel@tonic-gate 			blktot[cylno]++;
713*7c478bd9Sstevel@tonic-gate 			cgp->cg_cs.cs_nffree -= fs->fs_frag;
714*7c478bd9Sstevel@tonic-gate 			fs->fs_cstotal.cs_nffree -= fs->fs_frag;
715*7c478bd9Sstevel@tonic-gate 			fs->fs_cs(fs, cg).cs_nffree -= fs->fs_frag;
716*7c478bd9Sstevel@tonic-gate 			cgp->cg_cs.cs_nbfree++;
717*7c478bd9Sstevel@tonic-gate 			fs->fs_cstotal.cs_nbfree++;
718*7c478bd9Sstevel@tonic-gate 			fs->fs_cs(fs, cg).cs_nbfree++;
719*7c478bd9Sstevel@tonic-gate 		}
720*7c478bd9Sstevel@tonic-gate 	}
721*7c478bd9Sstevel@tonic-gate 	fs->fs_fmod = 1;
722*7c478bd9Sstevel@tonic-gate 	ufs_notclean(ufsvfsp);
723*7c478bd9Sstevel@tonic-gate 	TRANS_BUF(ufsvfsp, 0, fs->fs_cgsize, bp, DT_CG);
724*7c478bd9Sstevel@tonic-gate 	TRANS_SI(ufsvfsp, fs, cg);
725*7c478bd9Sstevel@tonic-gate 	bdrwrite(bp);
726*7c478bd9Sstevel@tonic-gate }
727*7c478bd9Sstevel@tonic-gate 
728*7c478bd9Sstevel@tonic-gate /*
729*7c478bd9Sstevel@tonic-gate  * Free an inode.
730*7c478bd9Sstevel@tonic-gate  *
731*7c478bd9Sstevel@tonic-gate  * The specified inode is placed back in the free map.
732*7c478bd9Sstevel@tonic-gate  */
733*7c478bd9Sstevel@tonic-gate void
734*7c478bd9Sstevel@tonic-gate ufs_ifree(struct inode *ip, ino_t ino, mode_t mode)
735*7c478bd9Sstevel@tonic-gate {
736*7c478bd9Sstevel@tonic-gate 	struct fs *fs = ip->i_fs;
737*7c478bd9Sstevel@tonic-gate 	struct ufsvfs *ufsvfsp = ip->i_ufsvfs;
738*7c478bd9Sstevel@tonic-gate 	struct cg *cgp;
739*7c478bd9Sstevel@tonic-gate 	struct buf *bp;
740*7c478bd9Sstevel@tonic-gate 	unsigned int inot;
741*7c478bd9Sstevel@tonic-gate 	int cg;
742*7c478bd9Sstevel@tonic-gate 	char *iused;
743*7c478bd9Sstevel@tonic-gate 
744*7c478bd9Sstevel@tonic-gate 	if (ip->i_number == ino && ip->i_mode != 0) {
745*7c478bd9Sstevel@tonic-gate 		(void) ufs_fault(ITOV(ip),
746*7c478bd9Sstevel@tonic-gate 		    "ufs_ifree: illegal mode: (imode) %o, (omode) %o, ino %d, "
747*7c478bd9Sstevel@tonic-gate 		    "fs = %s\n",
748*7c478bd9Sstevel@tonic-gate 		    ip->i_mode, mode, (int)ip->i_number, fs->fs_fsmnt);
749*7c478bd9Sstevel@tonic-gate 		return;
750*7c478bd9Sstevel@tonic-gate 	}
751*7c478bd9Sstevel@tonic-gate 	if (ino >= fs->fs_ipg * fs->fs_ncg) {
752*7c478bd9Sstevel@tonic-gate 		(void) ufs_fault(ITOV(ip),
753*7c478bd9Sstevel@tonic-gate 		    "ifree: range, dev = 0x%x, ino = %d, fs = %s\n",
754*7c478bd9Sstevel@tonic-gate 		    (int)ip->i_dev, (int)ino, fs->fs_fsmnt);
755*7c478bd9Sstevel@tonic-gate 		return;
756*7c478bd9Sstevel@tonic-gate 	}
757*7c478bd9Sstevel@tonic-gate 	cg = (int)itog(fs, ino);
758*7c478bd9Sstevel@tonic-gate 	bp = UFS_BREAD(ufsvfsp, ip->i_dev, (daddr_t)fsbtodb(fs, cgtod(fs, cg)),
759*7c478bd9Sstevel@tonic-gate 		    (int)fs->fs_cgsize);
760*7c478bd9Sstevel@tonic-gate 
761*7c478bd9Sstevel@tonic-gate 	cgp = bp->b_un.b_cg;
762*7c478bd9Sstevel@tonic-gate 	if (bp->b_flags & B_ERROR || !cg_chkmagic(cgp)) {
763*7c478bd9Sstevel@tonic-gate 		brelse(bp);
764*7c478bd9Sstevel@tonic-gate 		return;
765*7c478bd9Sstevel@tonic-gate 	}
766*7c478bd9Sstevel@tonic-gate 	mutex_enter(&ufsvfsp->vfs_lock);
767*7c478bd9Sstevel@tonic-gate 	cgp->cg_time = gethrestime_sec();
768*7c478bd9Sstevel@tonic-gate 	iused = cg_inosused(cgp);
769*7c478bd9Sstevel@tonic-gate 	inot = (unsigned int)(ino % (ulong_t)fs->fs_ipg);
770*7c478bd9Sstevel@tonic-gate 	if (isclr(iused, inot)) {
771*7c478bd9Sstevel@tonic-gate 		mutex_exit(&ufsvfsp->vfs_lock);
772*7c478bd9Sstevel@tonic-gate 		brelse(bp);
773*7c478bd9Sstevel@tonic-gate 		(void) ufs_fault(ITOV(ip), "ufs_ifree: freeing free inode, "
774*7c478bd9Sstevel@tonic-gate 				    "mode: (imode) %o, (omode) %o, ino:%d, "
775*7c478bd9Sstevel@tonic-gate 				    "fs:%s",
776*7c478bd9Sstevel@tonic-gate 				    ip->i_mode, mode, (int)ino, fs->fs_fsmnt);
777*7c478bd9Sstevel@tonic-gate 		return;
778*7c478bd9Sstevel@tonic-gate 	}
779*7c478bd9Sstevel@tonic-gate 	clrbit(iused, inot);
780*7c478bd9Sstevel@tonic-gate 
781*7c478bd9Sstevel@tonic-gate 	if (inot < (ulong_t)cgp->cg_irotor)
782*7c478bd9Sstevel@tonic-gate 		cgp->cg_irotor = inot;
783*7c478bd9Sstevel@tonic-gate 	cgp->cg_cs.cs_nifree++;
784*7c478bd9Sstevel@tonic-gate 	fs->fs_cstotal.cs_nifree++;
785*7c478bd9Sstevel@tonic-gate 	fs->fs_cs(fs, cg).cs_nifree++;
786*7c478bd9Sstevel@tonic-gate 	if (((mode & IFMT) == IFDIR) || ((mode & IFMT) == IFATTRDIR)) {
787*7c478bd9Sstevel@tonic-gate 		cgp->cg_cs.cs_ndir--;
788*7c478bd9Sstevel@tonic-gate 		fs->fs_cstotal.cs_ndir--;
789*7c478bd9Sstevel@tonic-gate 		fs->fs_cs(fs, cg).cs_ndir--;
790*7c478bd9Sstevel@tonic-gate 	}
791*7c478bd9Sstevel@tonic-gate 	fs->fs_fmod = 1;
792*7c478bd9Sstevel@tonic-gate 	ufs_notclean(ufsvfsp);
793*7c478bd9Sstevel@tonic-gate 	TRANS_BUF(ufsvfsp, 0, fs->fs_cgsize, bp, DT_CG);
794*7c478bd9Sstevel@tonic-gate 	TRANS_SI(ufsvfsp, fs, cg);
795*7c478bd9Sstevel@tonic-gate 	bdrwrite(bp);
796*7c478bd9Sstevel@tonic-gate }
797*7c478bd9Sstevel@tonic-gate 
798*7c478bd9Sstevel@tonic-gate /*
799*7c478bd9Sstevel@tonic-gate  * Implement the cylinder overflow algorithm.
800*7c478bd9Sstevel@tonic-gate  *
801*7c478bd9Sstevel@tonic-gate  * The policy implemented by this algorithm is:
802*7c478bd9Sstevel@tonic-gate  *   1) allocate the block in its requested cylinder group.
803*7c478bd9Sstevel@tonic-gate  *   2) quadratically rehash on the cylinder group number.
804*7c478bd9Sstevel@tonic-gate  *   3) brute force search for a free block.
805*7c478bd9Sstevel@tonic-gate  * The size parameter means size for data blocks, mode for inodes.
806*7c478bd9Sstevel@tonic-gate  */
807*7c478bd9Sstevel@tonic-gate static ino_t
808*7c478bd9Sstevel@tonic-gate hashalloc(struct inode *ip, int cg, long pref, int size, ulong_t (*allocator)())
809*7c478bd9Sstevel@tonic-gate {
810*7c478bd9Sstevel@tonic-gate 	struct fs *fs;
811*7c478bd9Sstevel@tonic-gate 	int i;
812*7c478bd9Sstevel@tonic-gate 	long result;
813*7c478bd9Sstevel@tonic-gate 	int icg = cg;
814*7c478bd9Sstevel@tonic-gate 
815*7c478bd9Sstevel@tonic-gate 	fs = ip->i_fs;
816*7c478bd9Sstevel@tonic-gate 	/*
817*7c478bd9Sstevel@tonic-gate 	 * 1: preferred cylinder group
818*7c478bd9Sstevel@tonic-gate 	 */
819*7c478bd9Sstevel@tonic-gate 	result = (*allocator)(ip, cg, pref, size);
820*7c478bd9Sstevel@tonic-gate 	if (result)
821*7c478bd9Sstevel@tonic-gate 		return (result);
822*7c478bd9Sstevel@tonic-gate 	/*
823*7c478bd9Sstevel@tonic-gate 	 * 2: quadratic rehash
824*7c478bd9Sstevel@tonic-gate 	 */
825*7c478bd9Sstevel@tonic-gate 	for (i = 1; i < fs->fs_ncg; i *= 2) {
826*7c478bd9Sstevel@tonic-gate 		cg += i;
827*7c478bd9Sstevel@tonic-gate 		if (cg >= fs->fs_ncg)
828*7c478bd9Sstevel@tonic-gate 			cg -= fs->fs_ncg;
829*7c478bd9Sstevel@tonic-gate 		result = (*allocator)(ip, cg, 0, size);
830*7c478bd9Sstevel@tonic-gate 		if (result)
831*7c478bd9Sstevel@tonic-gate 			return (result);
832*7c478bd9Sstevel@tonic-gate 	}
833*7c478bd9Sstevel@tonic-gate 	/*
834*7c478bd9Sstevel@tonic-gate 	 * 3: brute force search
835*7c478bd9Sstevel@tonic-gate 	 * Note that we start at i == 2, since 0 was checked initially,
836*7c478bd9Sstevel@tonic-gate 	 * and 1 is always checked in the quadratic rehash.
837*7c478bd9Sstevel@tonic-gate 	 */
838*7c478bd9Sstevel@tonic-gate 	cg = (icg + 2) % fs->fs_ncg;
839*7c478bd9Sstevel@tonic-gate 	for (i = 2; i < fs->fs_ncg; i++) {
840*7c478bd9Sstevel@tonic-gate 		result = (*allocator)(ip, cg, 0, size);
841*7c478bd9Sstevel@tonic-gate 		if (result)
842*7c478bd9Sstevel@tonic-gate 			return (result);
843*7c478bd9Sstevel@tonic-gate 		cg++;
844*7c478bd9Sstevel@tonic-gate 		if (cg == fs->fs_ncg)
845*7c478bd9Sstevel@tonic-gate 			cg = 0;
846*7c478bd9Sstevel@tonic-gate 	}
847*7c478bd9Sstevel@tonic-gate 	return (NULL);
848*7c478bd9Sstevel@tonic-gate }
849*7c478bd9Sstevel@tonic-gate 
850*7c478bd9Sstevel@tonic-gate /*
851*7c478bd9Sstevel@tonic-gate  * Determine whether a fragment can be extended.
852*7c478bd9Sstevel@tonic-gate  *
853*7c478bd9Sstevel@tonic-gate  * Check to see if the necessary fragments are available, and
854*7c478bd9Sstevel@tonic-gate  * if they are, allocate them.
855*7c478bd9Sstevel@tonic-gate  */
856*7c478bd9Sstevel@tonic-gate static daddr_t
857*7c478bd9Sstevel@tonic-gate fragextend(struct inode *ip, int cg, long bprev, int osize, int nsize)
858*7c478bd9Sstevel@tonic-gate {
859*7c478bd9Sstevel@tonic-gate 	struct ufsvfs *ufsvfsp = ip->i_ufsvfs;
860*7c478bd9Sstevel@tonic-gate 	struct fs *fs = ip->i_fs;
861*7c478bd9Sstevel@tonic-gate 	struct buf *bp;
862*7c478bd9Sstevel@tonic-gate 	struct cg *cgp;
863*7c478bd9Sstevel@tonic-gate 	uchar_t *blksfree;
864*7c478bd9Sstevel@tonic-gate 	long bno;
865*7c478bd9Sstevel@tonic-gate 	int frags, bbase;
866*7c478bd9Sstevel@tonic-gate 	int i, j;
867*7c478bd9Sstevel@tonic-gate 
868*7c478bd9Sstevel@tonic-gate 	if (fs->fs_cs(fs, cg).cs_nffree < numfrags(fs, nsize - osize))
869*7c478bd9Sstevel@tonic-gate 		return (NULL);
870*7c478bd9Sstevel@tonic-gate 	frags = numfrags(fs, nsize);
871*7c478bd9Sstevel@tonic-gate 	bbase = (int)fragnum(fs, bprev);
872*7c478bd9Sstevel@tonic-gate 	if (bbase > fragnum(fs, (bprev + frags - 1))) {
873*7c478bd9Sstevel@tonic-gate 		/* cannot extend across a block boundary */
874*7c478bd9Sstevel@tonic-gate 		return (NULL);
875*7c478bd9Sstevel@tonic-gate 	}
876*7c478bd9Sstevel@tonic-gate 
877*7c478bd9Sstevel@tonic-gate 	bp = UFS_BREAD(ufsvfsp, ip->i_dev, (daddr_t)fsbtodb(fs, cgtod(fs, cg)),
878*7c478bd9Sstevel@tonic-gate 		    (int)fs->fs_cgsize);
879*7c478bd9Sstevel@tonic-gate 	cgp = bp->b_un.b_cg;
880*7c478bd9Sstevel@tonic-gate 	if (bp->b_flags & B_ERROR || !cg_chkmagic(cgp)) {
881*7c478bd9Sstevel@tonic-gate 		brelse(bp);
882*7c478bd9Sstevel@tonic-gate 		return (NULL);
883*7c478bd9Sstevel@tonic-gate 	}
884*7c478bd9Sstevel@tonic-gate 
885*7c478bd9Sstevel@tonic-gate 	blksfree = cg_blksfree(cgp);
886*7c478bd9Sstevel@tonic-gate 	mutex_enter(&ufsvfsp->vfs_lock);
887*7c478bd9Sstevel@tonic-gate 	bno = dtogd(fs, bprev);
888*7c478bd9Sstevel@tonic-gate 	for (i = numfrags(fs, osize); i < frags; i++) {
889*7c478bd9Sstevel@tonic-gate 		if (isclr(blksfree, bno + i)) {
890*7c478bd9Sstevel@tonic-gate 			mutex_exit(&ufsvfsp->vfs_lock);
891*7c478bd9Sstevel@tonic-gate 			brelse(bp);
892*7c478bd9Sstevel@tonic-gate 			return (NULL);
893*7c478bd9Sstevel@tonic-gate 		}
894*7c478bd9Sstevel@tonic-gate 		if ((TRANS_ISCANCEL(ufsvfsp, ldbtob(fsbtodb(fs, bprev + i)),
895*7c478bd9Sstevel@tonic-gate 			fs->fs_fsize))) {
896*7c478bd9Sstevel@tonic-gate 			mutex_exit(&ufsvfsp->vfs_lock);
897*7c478bd9Sstevel@tonic-gate 			brelse(bp);
898*7c478bd9Sstevel@tonic-gate 			return (NULL);
899*7c478bd9Sstevel@tonic-gate 		}
900*7c478bd9Sstevel@tonic-gate 	}
901*7c478bd9Sstevel@tonic-gate 
902*7c478bd9Sstevel@tonic-gate 	cgp->cg_time = gethrestime_sec();
903*7c478bd9Sstevel@tonic-gate 	/*
904*7c478bd9Sstevel@tonic-gate 	 * The current fragment can be extended,
905*7c478bd9Sstevel@tonic-gate 	 * deduct the count on fragment being extended into
906*7c478bd9Sstevel@tonic-gate 	 * increase the count on the remaining fragment (if any)
907*7c478bd9Sstevel@tonic-gate 	 * allocate the extended piece.
908*7c478bd9Sstevel@tonic-gate 	 */
909*7c478bd9Sstevel@tonic-gate 	for (i = frags; i < fs->fs_frag - bbase; i++)
910*7c478bd9Sstevel@tonic-gate 		if (isclr(blksfree, bno + i))
911*7c478bd9Sstevel@tonic-gate 			break;
912*7c478bd9Sstevel@tonic-gate 	j = i - numfrags(fs, osize);
913*7c478bd9Sstevel@tonic-gate 	cgp->cg_frsum[j]--;
914*7c478bd9Sstevel@tonic-gate 	ASSERT(cgp->cg_frsum[j] >= 0);
915*7c478bd9Sstevel@tonic-gate 	if (i != frags)
916*7c478bd9Sstevel@tonic-gate 		cgp->cg_frsum[i - frags]++;
917*7c478bd9Sstevel@tonic-gate 	for (i = numfrags(fs, osize); i < frags; i++) {
918*7c478bd9Sstevel@tonic-gate 		clrbit(blksfree, bno + i);
919*7c478bd9Sstevel@tonic-gate 		cgp->cg_cs.cs_nffree--;
920*7c478bd9Sstevel@tonic-gate 		fs->fs_cs(fs, cg).cs_nffree--;
921*7c478bd9Sstevel@tonic-gate 		fs->fs_cstotal.cs_nffree--;
922*7c478bd9Sstevel@tonic-gate 	}
923*7c478bd9Sstevel@tonic-gate 	fs->fs_fmod = 1;
924*7c478bd9Sstevel@tonic-gate 	ufs_notclean(ufsvfsp);
925*7c478bd9Sstevel@tonic-gate 	TRANS_BUF(ufsvfsp, 0, fs->fs_cgsize, bp, DT_CG);
926*7c478bd9Sstevel@tonic-gate 	TRANS_SI(ufsvfsp, fs, cg);
927*7c478bd9Sstevel@tonic-gate 	bdrwrite(bp);
928*7c478bd9Sstevel@tonic-gate 	return ((daddr_t)bprev);
929*7c478bd9Sstevel@tonic-gate }
930*7c478bd9Sstevel@tonic-gate 
931*7c478bd9Sstevel@tonic-gate /*
932*7c478bd9Sstevel@tonic-gate  * Determine whether a block can be allocated.
933*7c478bd9Sstevel@tonic-gate  *
934*7c478bd9Sstevel@tonic-gate  * Check to see if a block of the apprpriate size
935*7c478bd9Sstevel@tonic-gate  * is available, and if it is, allocate it.
936*7c478bd9Sstevel@tonic-gate  */
937*7c478bd9Sstevel@tonic-gate static daddr_t
938*7c478bd9Sstevel@tonic-gate alloccg(struct inode *ip, int cg, daddr_t bpref, int size)
939*7c478bd9Sstevel@tonic-gate {
940*7c478bd9Sstevel@tonic-gate 	struct ufsvfs *ufsvfsp = ip->i_ufsvfs;
941*7c478bd9Sstevel@tonic-gate 	struct fs *fs = ip->i_fs;
942*7c478bd9Sstevel@tonic-gate 	struct buf *bp;
943*7c478bd9Sstevel@tonic-gate 	struct cg *cgp;
944*7c478bd9Sstevel@tonic-gate 	uchar_t *blksfree;
945*7c478bd9Sstevel@tonic-gate 	int bno, frags;
946*7c478bd9Sstevel@tonic-gate 	int allocsiz;
947*7c478bd9Sstevel@tonic-gate 	int i;
948*7c478bd9Sstevel@tonic-gate 
949*7c478bd9Sstevel@tonic-gate 	if (fs->fs_cs(fs, cg).cs_nbfree == 0 && size == fs->fs_bsize)
950*7c478bd9Sstevel@tonic-gate 		return (0);
951*7c478bd9Sstevel@tonic-gate 	bp = UFS_BREAD(ufsvfsp, ip->i_dev, (daddr_t)fsbtodb(fs, cgtod(fs, cg)),
952*7c478bd9Sstevel@tonic-gate 		    (int)fs->fs_cgsize);
953*7c478bd9Sstevel@tonic-gate 
954*7c478bd9Sstevel@tonic-gate 	cgp = bp->b_un.b_cg;
955*7c478bd9Sstevel@tonic-gate 	if (bp->b_flags & B_ERROR || !cg_chkmagic(cgp) ||
956*7c478bd9Sstevel@tonic-gate 	    (cgp->cg_cs.cs_nbfree == 0 && size == fs->fs_bsize)) {
957*7c478bd9Sstevel@tonic-gate 		brelse(bp);
958*7c478bd9Sstevel@tonic-gate 		return (0);
959*7c478bd9Sstevel@tonic-gate 	}
960*7c478bd9Sstevel@tonic-gate 	blksfree = cg_blksfree(cgp);
961*7c478bd9Sstevel@tonic-gate 	mutex_enter(&ufsvfsp->vfs_lock);
962*7c478bd9Sstevel@tonic-gate 	cgp->cg_time = gethrestime_sec();
963*7c478bd9Sstevel@tonic-gate 	if (size == fs->fs_bsize) {
964*7c478bd9Sstevel@tonic-gate 		if ((bno = alloccgblk(ufsvfsp, cgp, bpref, bp)) == 0)
965*7c478bd9Sstevel@tonic-gate 			goto errout;
966*7c478bd9Sstevel@tonic-gate 		fs->fs_fmod = 1;
967*7c478bd9Sstevel@tonic-gate 		ufs_notclean(ufsvfsp);
968*7c478bd9Sstevel@tonic-gate 		TRANS_SI(ufsvfsp, fs, cg);
969*7c478bd9Sstevel@tonic-gate 		bdrwrite(bp);
970*7c478bd9Sstevel@tonic-gate 		return (bno);
971*7c478bd9Sstevel@tonic-gate 	}
972*7c478bd9Sstevel@tonic-gate 	/*
973*7c478bd9Sstevel@tonic-gate 	 * Check to see if any fragments are already available
974*7c478bd9Sstevel@tonic-gate 	 * allocsiz is the size which will be allocated, hacking
975*7c478bd9Sstevel@tonic-gate 	 * it down to a smaller size if necessary.
976*7c478bd9Sstevel@tonic-gate 	 */
977*7c478bd9Sstevel@tonic-gate 	frags = numfrags(fs, size);
978*7c478bd9Sstevel@tonic-gate 	for (allocsiz = frags; allocsiz < fs->fs_frag; allocsiz++)
979*7c478bd9Sstevel@tonic-gate 		if (cgp->cg_frsum[allocsiz] != 0)
980*7c478bd9Sstevel@tonic-gate 			break;
981*7c478bd9Sstevel@tonic-gate 
982*7c478bd9Sstevel@tonic-gate 	if (allocsiz != fs->fs_frag)
983*7c478bd9Sstevel@tonic-gate 		bno = mapsearch(ufsvfsp, cgp, bpref, allocsiz);
984*7c478bd9Sstevel@tonic-gate 
985*7c478bd9Sstevel@tonic-gate 	if (allocsiz == fs->fs_frag || bno < 0) {
986*7c478bd9Sstevel@tonic-gate 		/*
987*7c478bd9Sstevel@tonic-gate 		 * No fragments were available, so a block
988*7c478bd9Sstevel@tonic-gate 		 * will be allocated and hacked up.
989*7c478bd9Sstevel@tonic-gate 		 */
990*7c478bd9Sstevel@tonic-gate 		if (cgp->cg_cs.cs_nbfree == 0)
991*7c478bd9Sstevel@tonic-gate 			goto errout;
992*7c478bd9Sstevel@tonic-gate 		if ((bno = alloccgblk(ufsvfsp, cgp, bpref, bp)) == 0)
993*7c478bd9Sstevel@tonic-gate 			goto errout;
994*7c478bd9Sstevel@tonic-gate 		bpref = dtogd(fs, bno);
995*7c478bd9Sstevel@tonic-gate 		for (i = frags; i < fs->fs_frag; i++)
996*7c478bd9Sstevel@tonic-gate 			setbit(blksfree, bpref + i);
997*7c478bd9Sstevel@tonic-gate 		i = fs->fs_frag - frags;
998*7c478bd9Sstevel@tonic-gate 		cgp->cg_cs.cs_nffree += i;
999*7c478bd9Sstevel@tonic-gate 		fs->fs_cstotal.cs_nffree += i;
1000*7c478bd9Sstevel@tonic-gate 		fs->fs_cs(fs, cg).cs_nffree += i;
1001*7c478bd9Sstevel@tonic-gate 		cgp->cg_frsum[i]++;
1002*7c478bd9Sstevel@tonic-gate 		fs->fs_fmod = 1;
1003*7c478bd9Sstevel@tonic-gate 		ufs_notclean(ufsvfsp);
1004*7c478bd9Sstevel@tonic-gate 		TRANS_SI(ufsvfsp, fs, cg);
1005*7c478bd9Sstevel@tonic-gate 		bdrwrite(bp);
1006*7c478bd9Sstevel@tonic-gate 		return (bno);
1007*7c478bd9Sstevel@tonic-gate 	}
1008*7c478bd9Sstevel@tonic-gate 
1009*7c478bd9Sstevel@tonic-gate 	for (i = 0; i < frags; i++)
1010*7c478bd9Sstevel@tonic-gate 		clrbit(blksfree, bno + i);
1011*7c478bd9Sstevel@tonic-gate 	cgp->cg_cs.cs_nffree -= frags;
1012*7c478bd9Sstevel@tonic-gate 	fs->fs_cstotal.cs_nffree -= frags;
1013*7c478bd9Sstevel@tonic-gate 	fs->fs_cs(fs, cg).cs_nffree -= frags;
1014*7c478bd9Sstevel@tonic-gate 	cgp->cg_frsum[allocsiz]--;
1015*7c478bd9Sstevel@tonic-gate 	ASSERT(cgp->cg_frsum[allocsiz] >= 0);
1016*7c478bd9Sstevel@tonic-gate 	if (frags != allocsiz) {
1017*7c478bd9Sstevel@tonic-gate 		cgp->cg_frsum[allocsiz - frags]++;
1018*7c478bd9Sstevel@tonic-gate 	}
1019*7c478bd9Sstevel@tonic-gate 	fs->fs_fmod = 1;
1020*7c478bd9Sstevel@tonic-gate 	ufs_notclean(ufsvfsp);
1021*7c478bd9Sstevel@tonic-gate 	TRANS_BUF(ufsvfsp, 0, fs->fs_cgsize, bp, DT_CG);
1022*7c478bd9Sstevel@tonic-gate 	TRANS_SI(ufsvfsp, fs, cg);
1023*7c478bd9Sstevel@tonic-gate 	bdrwrite(bp);
1024*7c478bd9Sstevel@tonic-gate 	return (cg * fs->fs_fpg + bno);
1025*7c478bd9Sstevel@tonic-gate errout:
1026*7c478bd9Sstevel@tonic-gate 	mutex_exit(&ufsvfsp->vfs_lock);
1027*7c478bd9Sstevel@tonic-gate 	brelse(bp);
1028*7c478bd9Sstevel@tonic-gate 	return (0);
1029*7c478bd9Sstevel@tonic-gate }
1030*7c478bd9Sstevel@tonic-gate 
1031*7c478bd9Sstevel@tonic-gate /*
1032*7c478bd9Sstevel@tonic-gate  * Allocate a block in a cylinder group.
1033*7c478bd9Sstevel@tonic-gate  *
1034*7c478bd9Sstevel@tonic-gate  * This algorithm implements the following policy:
1035*7c478bd9Sstevel@tonic-gate  *   1) allocate the requested block.
1036*7c478bd9Sstevel@tonic-gate  *   2) allocate a rotationally optimal block in the same cylinder.
1037*7c478bd9Sstevel@tonic-gate  *   3) allocate the next available block on the block rotor for the
1038*7c478bd9Sstevel@tonic-gate  *	specified cylinder group.
1039*7c478bd9Sstevel@tonic-gate  * Note that this routine only allocates fs_bsize blocks; these
1040*7c478bd9Sstevel@tonic-gate  * blocks may be fragmented by the routine that allocates them.
1041*7c478bd9Sstevel@tonic-gate  */
1042*7c478bd9Sstevel@tonic-gate static daddr_t
1043*7c478bd9Sstevel@tonic-gate alloccgblk(
1044*7c478bd9Sstevel@tonic-gate 	struct ufsvfs *ufsvfsp,
1045*7c478bd9Sstevel@tonic-gate 	struct cg *cgp,
1046*7c478bd9Sstevel@tonic-gate 	daddr_t bpref,
1047*7c478bd9Sstevel@tonic-gate 	struct buf *bp)
1048*7c478bd9Sstevel@tonic-gate {
1049*7c478bd9Sstevel@tonic-gate 	daddr_t bno;
1050*7c478bd9Sstevel@tonic-gate 	int cylno, pos, delta, rotbl_size;
1051*7c478bd9Sstevel@tonic-gate 	short *cylbp;
1052*7c478bd9Sstevel@tonic-gate 	int i;
1053*7c478bd9Sstevel@tonic-gate 	struct fs *fs;
1054*7c478bd9Sstevel@tonic-gate 	uchar_t *blksfree;
1055*7c478bd9Sstevel@tonic-gate 	daddr_t blkno, rpos, frag;
1056*7c478bd9Sstevel@tonic-gate 	short *blks;
1057*7c478bd9Sstevel@tonic-gate 	int32_t *blktot;
1058*7c478bd9Sstevel@tonic-gate 
1059*7c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&ufsvfsp->vfs_lock));
1060*7c478bd9Sstevel@tonic-gate 	fs = ufsvfsp->vfs_fs;
1061*7c478bd9Sstevel@tonic-gate 	blksfree = cg_blksfree(cgp);
1062*7c478bd9Sstevel@tonic-gate 	if (bpref == 0) {
1063*7c478bd9Sstevel@tonic-gate 		bpref = cgp->cg_rotor;
1064*7c478bd9Sstevel@tonic-gate 		goto norot;
1065*7c478bd9Sstevel@tonic-gate 	}
1066*7c478bd9Sstevel@tonic-gate 	bpref = blknum(fs, bpref);
1067*7c478bd9Sstevel@tonic-gate 	bpref = dtogd(fs, bpref);
1068*7c478bd9Sstevel@tonic-gate 	/*
1069*7c478bd9Sstevel@tonic-gate 	 * If the requested block is available, use it.
1070*7c478bd9Sstevel@tonic-gate 	 */
1071*7c478bd9Sstevel@tonic-gate 	if (isblock(fs, blksfree, (daddr_t)fragstoblks(fs, bpref))) {
1072*7c478bd9Sstevel@tonic-gate 		bno = bpref;
1073*7c478bd9Sstevel@tonic-gate 		goto gotit;
1074*7c478bd9Sstevel@tonic-gate 	}
1075*7c478bd9Sstevel@tonic-gate 	/*
1076*7c478bd9Sstevel@tonic-gate 	 * Check for a block available on the same cylinder.
1077*7c478bd9Sstevel@tonic-gate 	 */
1078*7c478bd9Sstevel@tonic-gate 	cylno = cbtocylno(fs, bpref);
1079*7c478bd9Sstevel@tonic-gate 	if (cg_blktot(cgp)[cylno] == 0)
1080*7c478bd9Sstevel@tonic-gate 		goto norot;
1081*7c478bd9Sstevel@tonic-gate 	if (fs->fs_cpc == 0) {
1082*7c478bd9Sstevel@tonic-gate 		/*
1083*7c478bd9Sstevel@tonic-gate 		 * Block layout info is not available, so just
1084*7c478bd9Sstevel@tonic-gate 		 * have to take any block in this cylinder.
1085*7c478bd9Sstevel@tonic-gate 		 */
1086*7c478bd9Sstevel@tonic-gate 		bpref = howmany(fs->fs_spc * cylno, NSPF(fs));
1087*7c478bd9Sstevel@tonic-gate 		goto norot;
1088*7c478bd9Sstevel@tonic-gate 	}
1089*7c478bd9Sstevel@tonic-gate 	/*
1090*7c478bd9Sstevel@tonic-gate 	 * Check the summary information to see if a block is
1091*7c478bd9Sstevel@tonic-gate 	 * available in the requested cylinder starting at the
1092*7c478bd9Sstevel@tonic-gate 	 * requested rotational position and proceeding around.
1093*7c478bd9Sstevel@tonic-gate 	 */
1094*7c478bd9Sstevel@tonic-gate 	cylbp = cg_blks(ufsvfsp, cgp, cylno);
1095*7c478bd9Sstevel@tonic-gate 	pos = cbtorpos(ufsvfsp, bpref);
1096*7c478bd9Sstevel@tonic-gate 	for (i = pos; i < ufsvfsp->vfs_nrpos; i++)
1097*7c478bd9Sstevel@tonic-gate 		if (cylbp[i] > 0)
1098*7c478bd9Sstevel@tonic-gate 			break;
1099*7c478bd9Sstevel@tonic-gate 	if (i == ufsvfsp->vfs_nrpos)
1100*7c478bd9Sstevel@tonic-gate 		for (i = 0; i < pos; i++)
1101*7c478bd9Sstevel@tonic-gate 			if (cylbp[i] > 0)
1102*7c478bd9Sstevel@tonic-gate 				break;
1103*7c478bd9Sstevel@tonic-gate 	if (cylbp[i] > 0) {
1104*7c478bd9Sstevel@tonic-gate 		/*
1105*7c478bd9Sstevel@tonic-gate 		 * Found a rotational position, now find the actual
1106*7c478bd9Sstevel@tonic-gate 		 * block.  A "panic" if none is actually there.
1107*7c478bd9Sstevel@tonic-gate 		 */
1108*7c478bd9Sstevel@tonic-gate 
1109*7c478bd9Sstevel@tonic-gate 		/*
1110*7c478bd9Sstevel@tonic-gate 		 * Up to this point, "pos" has referred to the rotational
1111*7c478bd9Sstevel@tonic-gate 		 * position of the desired block.  From now on, it holds
1112*7c478bd9Sstevel@tonic-gate 		 * the offset of the current cylinder within a cylinder
1113*7c478bd9Sstevel@tonic-gate 		 * cycle.  (A cylinder cycle refers to a set of cylinders
1114*7c478bd9Sstevel@tonic-gate 		 * which are described by a single rotational table; the
1115*7c478bd9Sstevel@tonic-gate 		 * size of the cycle is fs_cpc.)
1116*7c478bd9Sstevel@tonic-gate 		 *
1117*7c478bd9Sstevel@tonic-gate 		 * bno is set to the block number of the first block within
1118*7c478bd9Sstevel@tonic-gate 		 * the current cylinder cycle.
1119*7c478bd9Sstevel@tonic-gate 		 */
1120*7c478bd9Sstevel@tonic-gate 
1121*7c478bd9Sstevel@tonic-gate 		pos = cylno % fs->fs_cpc;
1122*7c478bd9Sstevel@tonic-gate 		bno = (cylno - pos) * fs->fs_spc / NSPB(fs);
1123*7c478bd9Sstevel@tonic-gate 
1124*7c478bd9Sstevel@tonic-gate 		/*
1125*7c478bd9Sstevel@tonic-gate 		 * The blocks within a cylinder are grouped into equivalence
1126*7c478bd9Sstevel@tonic-gate 		 * classes according to their "rotational position."  There
1127*7c478bd9Sstevel@tonic-gate 		 * are two tables used to determine these classes.
1128*7c478bd9Sstevel@tonic-gate 		 *
1129*7c478bd9Sstevel@tonic-gate 		 * The positional offset table (fs_postbl) has an entry for
1130*7c478bd9Sstevel@tonic-gate 		 * each rotational position of each cylinder in a cylinder
1131*7c478bd9Sstevel@tonic-gate 		 * cycle.  This entry contains the relative block number
1132*7c478bd9Sstevel@tonic-gate 		 * (counting from the start of the cylinder cycle) of the
1133*7c478bd9Sstevel@tonic-gate 		 * first block in the equivalence class for that position
1134*7c478bd9Sstevel@tonic-gate 		 * and that cylinder.  Positions for which no blocks exist
1135*7c478bd9Sstevel@tonic-gate 		 * are indicated by a -1.
1136*7c478bd9Sstevel@tonic-gate 		 *
1137*7c478bd9Sstevel@tonic-gate 		 * The rotational delta table (fs_rotbl) has an entry for
1138*7c478bd9Sstevel@tonic-gate 		 * each block in a cylinder cycle.  This entry contains
1139*7c478bd9Sstevel@tonic-gate 		 * the offset from that block to the next block in the
1140*7c478bd9Sstevel@tonic-gate 		 * same equivalence class.  The last block in the class
1141*7c478bd9Sstevel@tonic-gate 		 * is indicated by a zero in the table.
1142*7c478bd9Sstevel@tonic-gate 		 *
1143*7c478bd9Sstevel@tonic-gate 		 * The following code, then, walks through all of the blocks
1144*7c478bd9Sstevel@tonic-gate 		 * in the cylinder (cylno) which we're allocating within
1145*7c478bd9Sstevel@tonic-gate 		 * which are in the equivalence class for the rotational
1146*7c478bd9Sstevel@tonic-gate 		 * position (i) which we're allocating within.
1147*7c478bd9Sstevel@tonic-gate 		 */
1148*7c478bd9Sstevel@tonic-gate 
1149*7c478bd9Sstevel@tonic-gate 		if (fs_postbl(ufsvfsp, pos)[i] == -1) {
1150*7c478bd9Sstevel@tonic-gate 			(void) ufs_fault(ufsvfsp->vfs_root,
1151*7c478bd9Sstevel@tonic-gate 	    "alloccgblk: cyl groups corrupted, pos = %d, i = %d, fs = %s\n",
1152*7c478bd9Sstevel@tonic-gate 				    pos, i, fs->fs_fsmnt);
1153*7c478bd9Sstevel@tonic-gate 			return (0);
1154*7c478bd9Sstevel@tonic-gate 		}
1155*7c478bd9Sstevel@tonic-gate 
1156*7c478bd9Sstevel@tonic-gate 		/*
1157*7c478bd9Sstevel@tonic-gate 		 * There is one entry in the rotational table for each block
1158*7c478bd9Sstevel@tonic-gate 		 * in the cylinder cycle.  These are whole blocks, not frags.
1159*7c478bd9Sstevel@tonic-gate 		 */
1160*7c478bd9Sstevel@tonic-gate 
1161*7c478bd9Sstevel@tonic-gate 		rotbl_size = (fs->fs_cpc * fs->fs_spc) >>
1162*7c478bd9Sstevel@tonic-gate 		    (fs->fs_fragshift + fs->fs_fsbtodb);
1163*7c478bd9Sstevel@tonic-gate 
1164*7c478bd9Sstevel@tonic-gate 		/*
1165*7c478bd9Sstevel@tonic-gate 		 * As we start, "i" is the rotational position within which
1166*7c478bd9Sstevel@tonic-gate 		 * we're searching.  After the next line, it will be a block
1167*7c478bd9Sstevel@tonic-gate 		 * number (relative to the start of the cylinder cycle)
1168*7c478bd9Sstevel@tonic-gate 		 * within the equivalence class of that rotational position.
1169*7c478bd9Sstevel@tonic-gate 		 */
1170*7c478bd9Sstevel@tonic-gate 
1171*7c478bd9Sstevel@tonic-gate 		i = fs_postbl(ufsvfsp, pos)[i];
1172*7c478bd9Sstevel@tonic-gate 
1173*7c478bd9Sstevel@tonic-gate 		for (;;) {
1174*7c478bd9Sstevel@tonic-gate 			if (isblock(fs, blksfree, (daddr_t)(bno + i))) {
1175*7c478bd9Sstevel@tonic-gate 				bno = blkstofrags(fs, (bno + i));
1176*7c478bd9Sstevel@tonic-gate 				goto gotit;
1177*7c478bd9Sstevel@tonic-gate 			}
1178*7c478bd9Sstevel@tonic-gate 			delta = fs_rotbl(fs)[i];
1179*7c478bd9Sstevel@tonic-gate 			if (delta <= 0 ||		/* End of chain, or */
1180*7c478bd9Sstevel@tonic-gate 			    delta + i > rotbl_size)	/* end of table? */
1181*7c478bd9Sstevel@tonic-gate 				break;			/* If so, panic. */
1182*7c478bd9Sstevel@tonic-gate 			i += delta;
1183*7c478bd9Sstevel@tonic-gate 		}
1184*7c478bd9Sstevel@tonic-gate 		(void) ufs_fault(ufsvfsp->vfs_root,
1185*7c478bd9Sstevel@tonic-gate 	"alloccgblk: can't find blk in cyl, pos:%d, i:%d, fs:%s bno: %x\n",
1186*7c478bd9Sstevel@tonic-gate 		    pos, i, fs->fs_fsmnt, (int)bno);
1187*7c478bd9Sstevel@tonic-gate 		return (0);
1188*7c478bd9Sstevel@tonic-gate 	}
1189*7c478bd9Sstevel@tonic-gate norot:
1190*7c478bd9Sstevel@tonic-gate 	/*
1191*7c478bd9Sstevel@tonic-gate 	 * No blocks in the requested cylinder, so take
1192*7c478bd9Sstevel@tonic-gate 	 * next available one in this cylinder group.
1193*7c478bd9Sstevel@tonic-gate 	 */
1194*7c478bd9Sstevel@tonic-gate 	bno = mapsearch(ufsvfsp, cgp, bpref, (int)fs->fs_frag);
1195*7c478bd9Sstevel@tonic-gate 	if (bno < 0)
1196*7c478bd9Sstevel@tonic-gate 		return (0);
1197*7c478bd9Sstevel@tonic-gate 	cgp->cg_rotor = bno;
1198*7c478bd9Sstevel@tonic-gate gotit:
1199*7c478bd9Sstevel@tonic-gate 	blkno = fragstoblks(fs, bno);
1200*7c478bd9Sstevel@tonic-gate 	frag = (cgp->cg_cgx * fs->fs_fpg) + bno;
1201*7c478bd9Sstevel@tonic-gate 	if (TRANS_ISCANCEL(ufsvfsp, ldbtob(fsbtodb(fs, frag)), fs->fs_bsize))
1202*7c478bd9Sstevel@tonic-gate 		goto norot;
1203*7c478bd9Sstevel@tonic-gate 	clrblock(fs, blksfree, (long)blkno);
1204*7c478bd9Sstevel@tonic-gate 	/*
1205*7c478bd9Sstevel@tonic-gate 	 * the other cg/sb/si fields are TRANS'ed by the caller
1206*7c478bd9Sstevel@tonic-gate 	 */
1207*7c478bd9Sstevel@tonic-gate 	cgp->cg_cs.cs_nbfree--;
1208*7c478bd9Sstevel@tonic-gate 	fs->fs_cstotal.cs_nbfree--;
1209*7c478bd9Sstevel@tonic-gate 	fs->fs_cs(fs, cgp->cg_cgx).cs_nbfree--;
1210*7c478bd9Sstevel@tonic-gate 	cylno = cbtocylno(fs, bno);
1211*7c478bd9Sstevel@tonic-gate 	blks = cg_blks(ufsvfsp, cgp, cylno);
1212*7c478bd9Sstevel@tonic-gate 	rpos = cbtorpos(ufsvfsp, bno);
1213*7c478bd9Sstevel@tonic-gate 	blktot = cg_blktot(cgp);
1214*7c478bd9Sstevel@tonic-gate 	blks[rpos]--;
1215*7c478bd9Sstevel@tonic-gate 	blktot[cylno]--;
1216*7c478bd9Sstevel@tonic-gate 	TRANS_BUF(ufsvfsp, 0, fs->fs_cgsize, bp, DT_CG);
1217*7c478bd9Sstevel@tonic-gate 	fs->fs_fmod = 1;
1218*7c478bd9Sstevel@tonic-gate 	return (frag);
1219*7c478bd9Sstevel@tonic-gate }
1220*7c478bd9Sstevel@tonic-gate 
1221*7c478bd9Sstevel@tonic-gate /*
1222*7c478bd9Sstevel@tonic-gate  * Determine whether an inode can be allocated.
1223*7c478bd9Sstevel@tonic-gate  *
1224*7c478bd9Sstevel@tonic-gate  * Check to see if an inode is available, and if it is,
1225*7c478bd9Sstevel@tonic-gate  * allocate it using the following policy:
1226*7c478bd9Sstevel@tonic-gate  *   1) allocate the requested inode.
1227*7c478bd9Sstevel@tonic-gate  *   2) allocate the next available inode after the requested
1228*7c478bd9Sstevel@tonic-gate  *	inode in the specified cylinder group.
1229*7c478bd9Sstevel@tonic-gate  */
1230*7c478bd9Sstevel@tonic-gate static ino_t
1231*7c478bd9Sstevel@tonic-gate ialloccg(struct inode *ip, int cg, daddr_t ipref, int mode)
1232*7c478bd9Sstevel@tonic-gate {
1233*7c478bd9Sstevel@tonic-gate 	struct ufsvfs *ufsvfsp = ip->i_ufsvfs;
1234*7c478bd9Sstevel@tonic-gate 	struct fs *fs = ip->i_fs;
1235*7c478bd9Sstevel@tonic-gate 	struct cg *cgp;
1236*7c478bd9Sstevel@tonic-gate 	struct buf *bp;
1237*7c478bd9Sstevel@tonic-gate 	int start, len, loc, map, i;
1238*7c478bd9Sstevel@tonic-gate 	char *iused;
1239*7c478bd9Sstevel@tonic-gate 
1240*7c478bd9Sstevel@tonic-gate 	if (fs->fs_cs(fs, cg).cs_nifree == 0)
1241*7c478bd9Sstevel@tonic-gate 		return (0);
1242*7c478bd9Sstevel@tonic-gate 	bp = UFS_BREAD(ufsvfsp, ip->i_dev, (daddr_t)fsbtodb(fs, cgtod(fs, cg)),
1243*7c478bd9Sstevel@tonic-gate 		    (int)fs->fs_cgsize);
1244*7c478bd9Sstevel@tonic-gate 
1245*7c478bd9Sstevel@tonic-gate 	cgp = bp->b_un.b_cg;
1246*7c478bd9Sstevel@tonic-gate 	if (bp->b_flags & B_ERROR || !cg_chkmagic(cgp) ||
1247*7c478bd9Sstevel@tonic-gate 	    cgp->cg_cs.cs_nifree == 0) {
1248*7c478bd9Sstevel@tonic-gate 		brelse(bp);
1249*7c478bd9Sstevel@tonic-gate 		return (0);
1250*7c478bd9Sstevel@tonic-gate 	}
1251*7c478bd9Sstevel@tonic-gate 	iused = cg_inosused(cgp);
1252*7c478bd9Sstevel@tonic-gate 	mutex_enter(&ufsvfsp->vfs_lock);
1253*7c478bd9Sstevel@tonic-gate 	/*
1254*7c478bd9Sstevel@tonic-gate 	 * While we are waiting for the mutex, someone may have taken
1255*7c478bd9Sstevel@tonic-gate 	 * the last available inode.  Need to recheck.
1256*7c478bd9Sstevel@tonic-gate 	 */
1257*7c478bd9Sstevel@tonic-gate 	if (cgp->cg_cs.cs_nifree == 0) {
1258*7c478bd9Sstevel@tonic-gate 		mutex_exit(&ufsvfsp->vfs_lock);
1259*7c478bd9Sstevel@tonic-gate 		brelse(bp);
1260*7c478bd9Sstevel@tonic-gate 		return (0);
1261*7c478bd9Sstevel@tonic-gate 	}
1262*7c478bd9Sstevel@tonic-gate 
1263*7c478bd9Sstevel@tonic-gate 	cgp->cg_time = gethrestime_sec();
1264*7c478bd9Sstevel@tonic-gate 	if (ipref) {
1265*7c478bd9Sstevel@tonic-gate 		ipref %= fs->fs_ipg;
1266*7c478bd9Sstevel@tonic-gate 		if (isclr(iused, ipref))
1267*7c478bd9Sstevel@tonic-gate 			goto gotit;
1268*7c478bd9Sstevel@tonic-gate 	}
1269*7c478bd9Sstevel@tonic-gate 	start = cgp->cg_irotor / NBBY;
1270*7c478bd9Sstevel@tonic-gate 	len = howmany(fs->fs_ipg - cgp->cg_irotor, NBBY);
1271*7c478bd9Sstevel@tonic-gate 	loc = skpc(0xff, (uint_t)len, &iused[start]);
1272*7c478bd9Sstevel@tonic-gate 	if (loc == 0) {
1273*7c478bd9Sstevel@tonic-gate 		len = start + 1;
1274*7c478bd9Sstevel@tonic-gate 		start = 0;
1275*7c478bd9Sstevel@tonic-gate 		loc = skpc(0xff, (uint_t)len, &iused[0]);
1276*7c478bd9Sstevel@tonic-gate 		if (loc == 0) {
1277*7c478bd9Sstevel@tonic-gate 			mutex_exit(&ufsvfsp->vfs_lock);
1278*7c478bd9Sstevel@tonic-gate 			(void) ufs_fault(ITOV(ip),
1279*7c478bd9Sstevel@tonic-gate 		    "ialloccg: map corrupted, cg = %d, irotor = %d, fs = %s\n",
1280*7c478bd9Sstevel@tonic-gate 				    cg, (int)cgp->cg_irotor, fs->fs_fsmnt);
1281*7c478bd9Sstevel@tonic-gate 			return (0);
1282*7c478bd9Sstevel@tonic-gate 		}
1283*7c478bd9Sstevel@tonic-gate 	}
1284*7c478bd9Sstevel@tonic-gate 	i = start + len - loc;
1285*7c478bd9Sstevel@tonic-gate 	map = iused[i];
1286*7c478bd9Sstevel@tonic-gate 	ipref = i * NBBY;
1287*7c478bd9Sstevel@tonic-gate 	for (i = 1; i < (1 << NBBY); i <<= 1, ipref++) {
1288*7c478bd9Sstevel@tonic-gate 		if ((map & i) == 0) {
1289*7c478bd9Sstevel@tonic-gate 			cgp->cg_irotor = ipref;
1290*7c478bd9Sstevel@tonic-gate 			goto gotit;
1291*7c478bd9Sstevel@tonic-gate 		}
1292*7c478bd9Sstevel@tonic-gate 	}
1293*7c478bd9Sstevel@tonic-gate 
1294*7c478bd9Sstevel@tonic-gate 	mutex_exit(&ufsvfsp->vfs_lock);
1295*7c478bd9Sstevel@tonic-gate 	(void) ufs_fault(ITOV(ip), "ialloccg: block not in mapfs = %s",
1296*7c478bd9Sstevel@tonic-gate 							    fs->fs_fsmnt);
1297*7c478bd9Sstevel@tonic-gate 	return (0);
1298*7c478bd9Sstevel@tonic-gate gotit:
1299*7c478bd9Sstevel@tonic-gate 	setbit(iused, ipref);
1300*7c478bd9Sstevel@tonic-gate 	cgp->cg_cs.cs_nifree--;
1301*7c478bd9Sstevel@tonic-gate 	fs->fs_cstotal.cs_nifree--;
1302*7c478bd9Sstevel@tonic-gate 	fs->fs_cs(fs, cg).cs_nifree--;
1303*7c478bd9Sstevel@tonic-gate 	if (((mode & IFMT) == IFDIR) || ((mode & IFMT) == IFATTRDIR)) {
1304*7c478bd9Sstevel@tonic-gate 		cgp->cg_cs.cs_ndir++;
1305*7c478bd9Sstevel@tonic-gate 		fs->fs_cstotal.cs_ndir++;
1306*7c478bd9Sstevel@tonic-gate 		fs->fs_cs(fs, cg).cs_ndir++;
1307*7c478bd9Sstevel@tonic-gate 	}
1308*7c478bd9Sstevel@tonic-gate 	fs->fs_fmod = 1;
1309*7c478bd9Sstevel@tonic-gate 	ufs_notclean(ufsvfsp);
1310*7c478bd9Sstevel@tonic-gate 	TRANS_BUF(ufsvfsp, 0, fs->fs_cgsize, bp, DT_CG);
1311*7c478bd9Sstevel@tonic-gate 	TRANS_SI(ufsvfsp, fs, cg);
1312*7c478bd9Sstevel@tonic-gate 	bdrwrite(bp);
1313*7c478bd9Sstevel@tonic-gate 	return (cg * fs->fs_ipg + ipref);
1314*7c478bd9Sstevel@tonic-gate }
1315*7c478bd9Sstevel@tonic-gate 
1316*7c478bd9Sstevel@tonic-gate /*
1317*7c478bd9Sstevel@tonic-gate  * Find a block of the specified size in the specified cylinder group.
1318*7c478bd9Sstevel@tonic-gate  *
1319*7c478bd9Sstevel@tonic-gate  * It is a panic if a request is made to find a block if none are
1320*7c478bd9Sstevel@tonic-gate  * available.
1321*7c478bd9Sstevel@tonic-gate  */
1322*7c478bd9Sstevel@tonic-gate static daddr_t
1323*7c478bd9Sstevel@tonic-gate mapsearch(struct ufsvfs *ufsvfsp, struct cg *cgp, daddr_t bpref,
1324*7c478bd9Sstevel@tonic-gate 	int allocsiz)
1325*7c478bd9Sstevel@tonic-gate {
1326*7c478bd9Sstevel@tonic-gate 	struct fs *fs	= ufsvfsp->vfs_fs;
1327*7c478bd9Sstevel@tonic-gate 	daddr_t bno, cfrag;
1328*7c478bd9Sstevel@tonic-gate 	int start, len, loc, i, last, first, secondtime;
1329*7c478bd9Sstevel@tonic-gate 	int blk, field, subfield, pos;
1330*7c478bd9Sstevel@tonic-gate 	int gotit;
1331*7c478bd9Sstevel@tonic-gate 
1332*7c478bd9Sstevel@tonic-gate 	/*
1333*7c478bd9Sstevel@tonic-gate 	 * ufsvfs->vfs_lock is held when calling this.
1334*7c478bd9Sstevel@tonic-gate 	 */
1335*7c478bd9Sstevel@tonic-gate 	/*
1336*7c478bd9Sstevel@tonic-gate 	 * Find the fragment by searching through the
1337*7c478bd9Sstevel@tonic-gate 	 * free block map for an appropriate bit pattern.
1338*7c478bd9Sstevel@tonic-gate 	 */
1339*7c478bd9Sstevel@tonic-gate 	if (bpref)
1340*7c478bd9Sstevel@tonic-gate 		start = dtogd(fs, bpref) / NBBY;
1341*7c478bd9Sstevel@tonic-gate 	else
1342*7c478bd9Sstevel@tonic-gate 		start = cgp->cg_frotor / NBBY;
1343*7c478bd9Sstevel@tonic-gate 	/*
1344*7c478bd9Sstevel@tonic-gate 	 * the following loop performs two scans -- the first scan
1345*7c478bd9Sstevel@tonic-gate 	 * searches the bottom half of the array for a match and the
1346*7c478bd9Sstevel@tonic-gate 	 * second scan searches the top half of the array.  The loops
1347*7c478bd9Sstevel@tonic-gate 	 * have been merged just to make things difficult.
1348*7c478bd9Sstevel@tonic-gate 	 */
1349*7c478bd9Sstevel@tonic-gate 	first = start;
1350*7c478bd9Sstevel@tonic-gate 	last = howmany(fs->fs_fpg, NBBY);
1351*7c478bd9Sstevel@tonic-gate 	secondtime = 0;
1352*7c478bd9Sstevel@tonic-gate 	cfrag = cgp->cg_cgx * fs->fs_fpg;
1353*7c478bd9Sstevel@tonic-gate 	while (first < last) {
1354*7c478bd9Sstevel@tonic-gate 		len = last - first;
1355*7c478bd9Sstevel@tonic-gate 		/*
1356*7c478bd9Sstevel@tonic-gate 		 * search the array for a match
1357*7c478bd9Sstevel@tonic-gate 		 */
1358*7c478bd9Sstevel@tonic-gate 		loc = scanc((unsigned)len, (uchar_t *)&cg_blksfree(cgp)[first],
1359*7c478bd9Sstevel@tonic-gate 			(uchar_t *)fragtbl[fs->fs_frag],
1360*7c478bd9Sstevel@tonic-gate 			(int)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY))));
1361*7c478bd9Sstevel@tonic-gate 		/*
1362*7c478bd9Sstevel@tonic-gate 		 * match found
1363*7c478bd9Sstevel@tonic-gate 		 */
1364*7c478bd9Sstevel@tonic-gate 		if (loc) {
1365*7c478bd9Sstevel@tonic-gate 			bno = (last - loc) * NBBY;
1366*7c478bd9Sstevel@tonic-gate 
1367*7c478bd9Sstevel@tonic-gate 			/*
1368*7c478bd9Sstevel@tonic-gate 			 * Found the byte in the map, sift
1369*7c478bd9Sstevel@tonic-gate 			 * through the bits to find the selected frag
1370*7c478bd9Sstevel@tonic-gate 			 */
1371*7c478bd9Sstevel@tonic-gate 			cgp->cg_frotor = bno;
1372*7c478bd9Sstevel@tonic-gate 			gotit = 0;
1373*7c478bd9Sstevel@tonic-gate 			for (i = bno + NBBY; bno < i; bno += fs->fs_frag) {
1374*7c478bd9Sstevel@tonic-gate 				blk = blkmap(fs, cg_blksfree(cgp), bno);
1375*7c478bd9Sstevel@tonic-gate 				blk <<= 1;
1376*7c478bd9Sstevel@tonic-gate 				field = around[allocsiz];
1377*7c478bd9Sstevel@tonic-gate 				subfield = inside[allocsiz];
1378*7c478bd9Sstevel@tonic-gate 				for (pos = 0;
1379*7c478bd9Sstevel@tonic-gate 				    pos <= fs->fs_frag - allocsiz;
1380*7c478bd9Sstevel@tonic-gate 				    pos++) {
1381*7c478bd9Sstevel@tonic-gate 					if ((blk & field) == subfield) {
1382*7c478bd9Sstevel@tonic-gate 						gotit++;
1383*7c478bd9Sstevel@tonic-gate 						break;
1384*7c478bd9Sstevel@tonic-gate 					}
1385*7c478bd9Sstevel@tonic-gate 					field <<= 1;
1386*7c478bd9Sstevel@tonic-gate 					subfield <<= 1;
1387*7c478bd9Sstevel@tonic-gate 				}
1388*7c478bd9Sstevel@tonic-gate 				if (gotit)
1389*7c478bd9Sstevel@tonic-gate 					break;
1390*7c478bd9Sstevel@tonic-gate 			}
1391*7c478bd9Sstevel@tonic-gate 			bno += pos;
1392*7c478bd9Sstevel@tonic-gate 
1393*7c478bd9Sstevel@tonic-gate 			/*
1394*7c478bd9Sstevel@tonic-gate 			 * success if block is *not* being converted from
1395*7c478bd9Sstevel@tonic-gate 			 * metadata into userdata (harpy).  If so, ignore.
1396*7c478bd9Sstevel@tonic-gate 			 */
1397*7c478bd9Sstevel@tonic-gate 			if (!TRANS_ISCANCEL(ufsvfsp,
1398*7c478bd9Sstevel@tonic-gate 				ldbtob(fsbtodb(fs, (cfrag+bno))),
1399*7c478bd9Sstevel@tonic-gate 				allocsiz * fs->fs_fsize))
1400*7c478bd9Sstevel@tonic-gate 				return (bno);
1401*7c478bd9Sstevel@tonic-gate 			/*
1402*7c478bd9Sstevel@tonic-gate 			 * keep looking -- this block is being converted
1403*7c478bd9Sstevel@tonic-gate 			 */
1404*7c478bd9Sstevel@tonic-gate 			first = (last - loc) + 1;
1405*7c478bd9Sstevel@tonic-gate 			loc = 0;
1406*7c478bd9Sstevel@tonic-gate 			if (first < last)
1407*7c478bd9Sstevel@tonic-gate 				continue;
1408*7c478bd9Sstevel@tonic-gate 		}
1409*7c478bd9Sstevel@tonic-gate 		/*
1410*7c478bd9Sstevel@tonic-gate 		 * no usable matches in bottom half -- now search the top half
1411*7c478bd9Sstevel@tonic-gate 		 */
1412*7c478bd9Sstevel@tonic-gate 		if (secondtime)
1413*7c478bd9Sstevel@tonic-gate 			/*
1414*7c478bd9Sstevel@tonic-gate 			 * no usable matches in top half -- all done
1415*7c478bd9Sstevel@tonic-gate 			 */
1416*7c478bd9Sstevel@tonic-gate 			break;
1417*7c478bd9Sstevel@tonic-gate 		secondtime = 1;
1418*7c478bd9Sstevel@tonic-gate 		last = start + 1;
1419*7c478bd9Sstevel@tonic-gate 		first = 0;
1420*7c478bd9Sstevel@tonic-gate 	}
1421*7c478bd9Sstevel@tonic-gate 	/*
1422*7c478bd9Sstevel@tonic-gate 	 * no usable matches
1423*7c478bd9Sstevel@tonic-gate 	 */
1424*7c478bd9Sstevel@tonic-gate 	return ((daddr_t)-1);
1425*7c478bd9Sstevel@tonic-gate }
1426*7c478bd9Sstevel@tonic-gate 
1427*7c478bd9Sstevel@tonic-gate #define	UFSNADDR (NDADDR + NIADDR)	/* NADDR applies to (obsolete) S5FS */
1428*7c478bd9Sstevel@tonic-gate #define	IB(i)	(NDADDR + (i))	/* index of i'th indirect block ptr */
1429*7c478bd9Sstevel@tonic-gate #define	SINGLE	0		/* single indirect block ptr */
1430*7c478bd9Sstevel@tonic-gate #define	DOUBLE	1		/* double indirect block ptr */
1431*7c478bd9Sstevel@tonic-gate #define	TRIPLE	2		/* triple indirect block ptr */
1432*7c478bd9Sstevel@tonic-gate 
1433*7c478bd9Sstevel@tonic-gate /*
1434*7c478bd9Sstevel@tonic-gate  * Free storage space associated with the specified inode.  The portion
1435*7c478bd9Sstevel@tonic-gate  * to be freed is specified by lp->l_start and lp->l_len (already
1436*7c478bd9Sstevel@tonic-gate  * normalized to a "whence" of 0).
1437*7c478bd9Sstevel@tonic-gate  *
1438*7c478bd9Sstevel@tonic-gate  * This is an experimental facility whose continued existence is not
1439*7c478bd9Sstevel@tonic-gate  * guaranteed.  Currently, we only support the special case
1440*7c478bd9Sstevel@tonic-gate  * of l_len == 0, meaning free to end of file.
1441*7c478bd9Sstevel@tonic-gate  *
1442*7c478bd9Sstevel@tonic-gate  * Blocks are freed in reverse order.  This FILO algorithm will tend to
1443*7c478bd9Sstevel@tonic-gate  * maintain a contiguous free list much longer than FIFO.
1444*7c478bd9Sstevel@tonic-gate  * See also ufs_itrunc() in ufs_inode.c.
1445*7c478bd9Sstevel@tonic-gate  *
1446*7c478bd9Sstevel@tonic-gate  * Bug: unused bytes in the last retained block are not cleared.
1447*7c478bd9Sstevel@tonic-gate  * This may result in a "hole" in the file that does not read as zeroes.
1448*7c478bd9Sstevel@tonic-gate  */
1449*7c478bd9Sstevel@tonic-gate /* ARGSUSED */
1450*7c478bd9Sstevel@tonic-gate int
1451*7c478bd9Sstevel@tonic-gate ufs_freesp(struct vnode *vp, struct flock64 *lp, int flag, cred_t *cr)
1452*7c478bd9Sstevel@tonic-gate {
1453*7c478bd9Sstevel@tonic-gate 	int i;
1454*7c478bd9Sstevel@tonic-gate 	struct inode *ip = VTOI(vp);
1455*7c478bd9Sstevel@tonic-gate 	int error;
1456*7c478bd9Sstevel@tonic-gate 
1457*7c478bd9Sstevel@tonic-gate 	ASSERT(vp->v_type == VREG);
1458*7c478bd9Sstevel@tonic-gate 	ASSERT(lp->l_start >= 0);	/* checked by convoff */
1459*7c478bd9Sstevel@tonic-gate 
1460*7c478bd9Sstevel@tonic-gate 	if (lp->l_len != 0)
1461*7c478bd9Sstevel@tonic-gate 		return (EINVAL);
1462*7c478bd9Sstevel@tonic-gate 
1463*7c478bd9Sstevel@tonic-gate 	rw_enter(&ip->i_contents, RW_READER);
1464*7c478bd9Sstevel@tonic-gate 	if (ip->i_size == (u_offset_t)lp->l_start) {
1465*7c478bd9Sstevel@tonic-gate 		rw_exit(&ip->i_contents);
1466*7c478bd9Sstevel@tonic-gate 		return (0);
1467*7c478bd9Sstevel@tonic-gate 	}
1468*7c478bd9Sstevel@tonic-gate 
1469*7c478bd9Sstevel@tonic-gate 	/*
1470*7c478bd9Sstevel@tonic-gate 	 * Check if there is any active mandatory lock on the
1471*7c478bd9Sstevel@tonic-gate 	 * range that will be truncated/expanded.
1472*7c478bd9Sstevel@tonic-gate 	 */
1473*7c478bd9Sstevel@tonic-gate 	if (MANDLOCK(vp, ip->i_mode)) {
1474*7c478bd9Sstevel@tonic-gate 		offset_t save_start;
1475*7c478bd9Sstevel@tonic-gate 
1476*7c478bd9Sstevel@tonic-gate 		save_start = lp->l_start;
1477*7c478bd9Sstevel@tonic-gate 
1478*7c478bd9Sstevel@tonic-gate 		if (ip->i_size < lp->l_start) {
1479*7c478bd9Sstevel@tonic-gate 			/*
1480*7c478bd9Sstevel@tonic-gate 			 * "Truncate up" case: need to make sure there
1481*7c478bd9Sstevel@tonic-gate 			 * is no lock beyond current end-of-file. To
1482*7c478bd9Sstevel@tonic-gate 			 * do so, we need to set l_start to the size
1483*7c478bd9Sstevel@tonic-gate 			 * of the file temporarily.
1484*7c478bd9Sstevel@tonic-gate 			 */
1485*7c478bd9Sstevel@tonic-gate 			lp->l_start = ip->i_size;
1486*7c478bd9Sstevel@tonic-gate 		}
1487*7c478bd9Sstevel@tonic-gate 		lp->l_type = F_WRLCK;
1488*7c478bd9Sstevel@tonic-gate 		lp->l_sysid = 0;
1489*7c478bd9Sstevel@tonic-gate 		lp->l_pid = ttoproc(curthread)->p_pid;
1490*7c478bd9Sstevel@tonic-gate 		i = (flag & (FNDELAY|FNONBLOCK)) ? 0 : SLPFLCK;
1491*7c478bd9Sstevel@tonic-gate 		rw_exit(&ip->i_contents);
1492*7c478bd9Sstevel@tonic-gate 		if ((i = reclock(vp, lp, i, 0, lp->l_start, NULL)) != 0 ||
1493*7c478bd9Sstevel@tonic-gate 		    lp->l_type != F_UNLCK) {
1494*7c478bd9Sstevel@tonic-gate 			return (i ? i : EAGAIN);
1495*7c478bd9Sstevel@tonic-gate 		}
1496*7c478bd9Sstevel@tonic-gate 		rw_enter(&ip->i_contents, RW_READER);
1497*7c478bd9Sstevel@tonic-gate 
1498*7c478bd9Sstevel@tonic-gate 		lp->l_start = save_start;
1499*7c478bd9Sstevel@tonic-gate 	}
1500*7c478bd9Sstevel@tonic-gate 
1501*7c478bd9Sstevel@tonic-gate 	/*
1502*7c478bd9Sstevel@tonic-gate 	 * Make sure a write isn't in progress (allocating blocks)
1503*7c478bd9Sstevel@tonic-gate 	 * by acquiring i_rwlock (we promised ufs_bmap we wouldn't
1504*7c478bd9Sstevel@tonic-gate 	 * truncate while it was allocating blocks).
1505*7c478bd9Sstevel@tonic-gate 	 * Grab the locks in the right order.
1506*7c478bd9Sstevel@tonic-gate 	 */
1507*7c478bd9Sstevel@tonic-gate 	rw_exit(&ip->i_contents);
1508*7c478bd9Sstevel@tonic-gate 	rw_enter(&ip->i_rwlock, RW_WRITER);
1509*7c478bd9Sstevel@tonic-gate 	error = TRANS_ITRUNC(ip, (u_offset_t)lp->l_start, 0, cr);
1510*7c478bd9Sstevel@tonic-gate 	rw_exit(&ip->i_rwlock);
1511*7c478bd9Sstevel@tonic-gate 	return (error);
1512*7c478bd9Sstevel@tonic-gate }
1513*7c478bd9Sstevel@tonic-gate 
1514*7c478bd9Sstevel@tonic-gate /*
1515*7c478bd9Sstevel@tonic-gate  * Find a cg with as close to nb contiguous bytes as possible
1516*7c478bd9Sstevel@tonic-gate  *	THIS MAY TAKE MANY DISK READS!
1517*7c478bd9Sstevel@tonic-gate  *
1518*7c478bd9Sstevel@tonic-gate  * Implemented in an attempt to allocate contiguous blocks for
1519*7c478bd9Sstevel@tonic-gate  * writing the ufs log file to, minimizing future disk head seeking
1520*7c478bd9Sstevel@tonic-gate  */
1521*7c478bd9Sstevel@tonic-gate daddr_t
1522*7c478bd9Sstevel@tonic-gate contigpref(ufsvfs_t *ufsvfsp, size_t nb)
1523*7c478bd9Sstevel@tonic-gate {
1524*7c478bd9Sstevel@tonic-gate 	struct fs	*fs	= ufsvfsp->vfs_fs;
1525*7c478bd9Sstevel@tonic-gate 	daddr_t		nblk	= lblkno(fs, blkroundup(fs, nb));
1526*7c478bd9Sstevel@tonic-gate 	daddr_t		savebno, curbno, cgbno;
1527*7c478bd9Sstevel@tonic-gate 	int		cg, cgblks, savecg, savenblk, curnblk;
1528*7c478bd9Sstevel@tonic-gate 	uchar_t		*blksfree;
1529*7c478bd9Sstevel@tonic-gate 	buf_t		*bp;
1530*7c478bd9Sstevel@tonic-gate 	struct cg	*cgp;
1531*7c478bd9Sstevel@tonic-gate 
1532*7c478bd9Sstevel@tonic-gate 	savenblk = 0;
1533*7c478bd9Sstevel@tonic-gate 	savecg = 0;
1534*7c478bd9Sstevel@tonic-gate 	savebno = 0;
1535*7c478bd9Sstevel@tonic-gate 	for (cg = 0; cg < fs->fs_ncg; ++cg) {
1536*7c478bd9Sstevel@tonic-gate 
1537*7c478bd9Sstevel@tonic-gate 		/* not enough free blks for a contig check */
1538*7c478bd9Sstevel@tonic-gate 		if (fs->fs_cs(fs, cg).cs_nbfree < nblk)
1539*7c478bd9Sstevel@tonic-gate 			continue;
1540*7c478bd9Sstevel@tonic-gate 
1541*7c478bd9Sstevel@tonic-gate 		/*
1542*7c478bd9Sstevel@tonic-gate 		 * find the largest contiguous range in this cg
1543*7c478bd9Sstevel@tonic-gate 		 */
1544*7c478bd9Sstevel@tonic-gate 		bp = UFS_BREAD(ufsvfsp, ufsvfsp->vfs_dev,
1545*7c478bd9Sstevel@tonic-gate 			(daddr_t)fsbtodb(fs, cgtod(fs, cg)),
1546*7c478bd9Sstevel@tonic-gate 			(int)fs->fs_cgsize);
1547*7c478bd9Sstevel@tonic-gate 		cgp = bp->b_un.b_cg;
1548*7c478bd9Sstevel@tonic-gate 		if (bp->b_flags & B_ERROR || !cg_chkmagic(cgp)) {
1549*7c478bd9Sstevel@tonic-gate 			brelse(bp);
1550*7c478bd9Sstevel@tonic-gate 			continue;
1551*7c478bd9Sstevel@tonic-gate 		}
1552*7c478bd9Sstevel@tonic-gate 		blksfree = cg_blksfree(cgp);	    /* free array */
1553*7c478bd9Sstevel@tonic-gate 		cgblks = fragstoblks(fs, fs->fs_fpg); /* blks in free array */
1554*7c478bd9Sstevel@tonic-gate 		cgbno = 0;
1555*7c478bd9Sstevel@tonic-gate 		while (cgbno < cgblks && savenblk < nblk) {
1556*7c478bd9Sstevel@tonic-gate 			/* find a free block */
1557*7c478bd9Sstevel@tonic-gate 			for (; cgbno < cgblks; ++cgbno)
1558*7c478bd9Sstevel@tonic-gate 				if (isblock(fs, blksfree, cgbno))
1559*7c478bd9Sstevel@tonic-gate 					break;
1560*7c478bd9Sstevel@tonic-gate 			curbno = cgbno;
1561*7c478bd9Sstevel@tonic-gate 			/* count the number of free blocks */
1562*7c478bd9Sstevel@tonic-gate 			for (curnblk = 0; cgbno < cgblks; ++cgbno) {
1563*7c478bd9Sstevel@tonic-gate 				if (!isblock(fs, blksfree, cgbno))
1564*7c478bd9Sstevel@tonic-gate 					break;
1565*7c478bd9Sstevel@tonic-gate 				if (++curnblk >= nblk)
1566*7c478bd9Sstevel@tonic-gate 					break;
1567*7c478bd9Sstevel@tonic-gate 			}
1568*7c478bd9Sstevel@tonic-gate 			if (curnblk > savenblk) {
1569*7c478bd9Sstevel@tonic-gate 				savecg = cg;
1570*7c478bd9Sstevel@tonic-gate 				savenblk = curnblk;
1571*7c478bd9Sstevel@tonic-gate 				savebno = curbno;
1572*7c478bd9Sstevel@tonic-gate 			}
1573*7c478bd9Sstevel@tonic-gate 		}
1574*7c478bd9Sstevel@tonic-gate 		brelse(bp);
1575*7c478bd9Sstevel@tonic-gate 		if (savenblk >= nblk)
1576*7c478bd9Sstevel@tonic-gate 			break;
1577*7c478bd9Sstevel@tonic-gate 	}
1578*7c478bd9Sstevel@tonic-gate 
1579*7c478bd9Sstevel@tonic-gate 	/* convert block offset in cg to frag offset in cg */
1580*7c478bd9Sstevel@tonic-gate 	savebno = blkstofrags(fs, savebno);
1581*7c478bd9Sstevel@tonic-gate 
1582*7c478bd9Sstevel@tonic-gate 	/* convert frag offset in cg to frag offset in fs */
1583*7c478bd9Sstevel@tonic-gate 	savebno += (savecg * fs->fs_fpg);
1584*7c478bd9Sstevel@tonic-gate 
1585*7c478bd9Sstevel@tonic-gate 	return (savebno);
1586*7c478bd9Sstevel@tonic-gate }
1587