xref: /illumos-gate/usr/src/uts/common/fs/ufs/ufs_bmap.c (revision 303bf60b)
17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
57c478bd9Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
67c478bd9Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
77c478bd9Sstevel@tonic-gate  * with the License.
87c478bd9Sstevel@tonic-gate  *
97c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
107c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
117c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
127c478bd9Sstevel@tonic-gate  * and limitations under the License.
137c478bd9Sstevel@tonic-gate  *
147c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
157c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
167c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
177c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
187c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
197c478bd9Sstevel@tonic-gate  *
207c478bd9Sstevel@tonic-gate  * CDDL HEADER END
217c478bd9Sstevel@tonic-gate  */
227c478bd9Sstevel@tonic-gate /*
237c478bd9Sstevel@tonic-gate  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
247c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
257c478bd9Sstevel@tonic-gate  */
267c478bd9Sstevel@tonic-gate 
277c478bd9Sstevel@tonic-gate /*	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
287c478bd9Sstevel@tonic-gate /*	  All Rights Reserved  	*/
297c478bd9Sstevel@tonic-gate 
307c478bd9Sstevel@tonic-gate /*
317c478bd9Sstevel@tonic-gate  * University Copyright- Copyright (c) 1982, 1986, 1988
327c478bd9Sstevel@tonic-gate  * The Regents of the University of California
337c478bd9Sstevel@tonic-gate  * All Rights Reserved
347c478bd9Sstevel@tonic-gate  *
357c478bd9Sstevel@tonic-gate  * University Acknowledgment- Portions of this document are derived from
367c478bd9Sstevel@tonic-gate  * software developed by the University of California, Berkeley, and its
377c478bd9Sstevel@tonic-gate  * contributors.
387c478bd9Sstevel@tonic-gate  */
397c478bd9Sstevel@tonic-gate 
407c478bd9Sstevel@tonic-gate 
417c478bd9Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
427c478bd9Sstevel@tonic-gate 
437c478bd9Sstevel@tonic-gate #include <sys/types.h>
447c478bd9Sstevel@tonic-gate #include <sys/t_lock.h>
457c478bd9Sstevel@tonic-gate #include <sys/param.h>
467c478bd9Sstevel@tonic-gate #include <sys/systm.h>
477c478bd9Sstevel@tonic-gate #include <sys/signal.h>
487c478bd9Sstevel@tonic-gate #include <sys/user.h>
497c478bd9Sstevel@tonic-gate #include <sys/vnode.h>
507c478bd9Sstevel@tonic-gate #include <sys/buf.h>
517c478bd9Sstevel@tonic-gate #include <sys/disp.h>
527c478bd9Sstevel@tonic-gate #include <sys/proc.h>
537c478bd9Sstevel@tonic-gate #include <sys/conf.h>
547c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_inode.h>
557c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_fs.h>
567c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_quota.h>
577c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_trans.h>
587c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_bio.h>
597c478bd9Sstevel@tonic-gate #include <vm/seg.h>
607c478bd9Sstevel@tonic-gate #include <sys/errno.h>
617c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h>
627c478bd9Sstevel@tonic-gate #include <sys/vfs.h>
637c478bd9Sstevel@tonic-gate #include <sys/debug.h>
647c478bd9Sstevel@tonic-gate #include <sys/kmem.h>
65*303bf60bSsdebnath #include <sys/cmn_err.h>
667c478bd9Sstevel@tonic-gate 
677c478bd9Sstevel@tonic-gate /*
687c478bd9Sstevel@tonic-gate  * This structure is used to track blocks as we allocate them, so that
697c478bd9Sstevel@tonic-gate  * we can free them if we encounter an error during allocation.  We
707c478bd9Sstevel@tonic-gate  * keep track of five pieces of information for each allocated block:
717c478bd9Sstevel@tonic-gate  *   - The number of the newly allocated block
727c478bd9Sstevel@tonic-gate  *   - The size of the block (lets us deal with fragments if we want)
737c478bd9Sstevel@tonic-gate  *   - The number of the block containing a pointer to it; or whether
747c478bd9Sstevel@tonic-gate  *     the pointer is in the inode
757c478bd9Sstevel@tonic-gate  *   - The offset within the block (or inode) containing a pointer to it.
767c478bd9Sstevel@tonic-gate  *   - A flag indicating the usage of the block.  (Logging needs to know
777c478bd9Sstevel@tonic-gate  *     this to avoid overwriting a data block if it was previously used
787c478bd9Sstevel@tonic-gate  *     for metadata.)
797c478bd9Sstevel@tonic-gate  */
807c478bd9Sstevel@tonic-gate 
817c478bd9Sstevel@tonic-gate enum ufs_owner_type {
827c478bd9Sstevel@tonic-gate 	ufs_no_owner,		/* Owner has not yet been updated */
837c478bd9Sstevel@tonic-gate 	ufs_inode_direct,	/* Listed in inode's direct block table */
847c478bd9Sstevel@tonic-gate 	ufs_inode_indirect,	/* Listed in inode's indirect block table */
857c478bd9Sstevel@tonic-gate 	ufs_indirect_block	/* Listed in an indirect block */
867c478bd9Sstevel@tonic-gate };
877c478bd9Sstevel@tonic-gate 
887c478bd9Sstevel@tonic-gate struct ufs_allocated_block {
897c478bd9Sstevel@tonic-gate 	daddr_t this_block;	    /* Number of this block */
907c478bd9Sstevel@tonic-gate 	off_t block_size;	    /* Size of this block, in bytes */
917c478bd9Sstevel@tonic-gate 	enum ufs_owner_type owner;  /* Who points to this block? */
927c478bd9Sstevel@tonic-gate 	daddr_t owner_block;	    /* Number of the owning block */
937c478bd9Sstevel@tonic-gate 	uint_t owner_offset;	    /* Offset within that block or inode */
947c478bd9Sstevel@tonic-gate 	int usage_flags;	    /* Usage flags, as expected by free() */
957c478bd9Sstevel@tonic-gate };
967c478bd9Sstevel@tonic-gate 
977c478bd9Sstevel@tonic-gate 
987c478bd9Sstevel@tonic-gate static int findextent(struct fs *fs, daddr32_t *sbp, int n, int *lenp,
997c478bd9Sstevel@tonic-gate 		int maxtrans);
1007c478bd9Sstevel@tonic-gate 
1017c478bd9Sstevel@tonic-gate static void ufs_undo_allocation(inode_t *ip, int block_count,
1027c478bd9Sstevel@tonic-gate 	struct ufs_allocated_block table[], int inode_sector_adjust);
1037c478bd9Sstevel@tonic-gate 
1047c478bd9Sstevel@tonic-gate /*
1057c478bd9Sstevel@tonic-gate  * Find the extent and the matching block number.
1067c478bd9Sstevel@tonic-gate  *
1077c478bd9Sstevel@tonic-gate  * bsize > PAGESIZE
1087c478bd9Sstevel@tonic-gate  *	boff indicates that we want a page in the middle
1097c478bd9Sstevel@tonic-gate  *	min expression is supposed to make sure no extra page[s] after EOF
1107c478bd9Sstevel@tonic-gate  * PAGESIZE >= bsize
1117c478bd9Sstevel@tonic-gate  *	we assume that a page is a multiple of bsize, i.e.,
1127c478bd9Sstevel@tonic-gate  *	boff always == 0
1137c478bd9Sstevel@tonic-gate  *
1147c478bd9Sstevel@tonic-gate  * We always return a length that is suitable for a disk transfer.
1157c478bd9Sstevel@tonic-gate  */
1167c478bd9Sstevel@tonic-gate #define	DOEXTENT(fs, lbn, boff, bnp, lenp, size, tblp, n, chkfrag, maxtrans) {\
1177c478bd9Sstevel@tonic-gate 	register daddr32_t *dp = (tblp);				\
1187c478bd9Sstevel@tonic-gate 	register int _chkfrag = chkfrag; /* for lint. sigh */		\
1197c478bd9Sstevel@tonic-gate 									\
1207c478bd9Sstevel@tonic-gate 	if (*dp == 0) {							\
1217c478bd9Sstevel@tonic-gate 		*(bnp) = UFS_HOLE;					\
1227c478bd9Sstevel@tonic-gate 	} else {							\
1237c478bd9Sstevel@tonic-gate 		register int len;					\
1247c478bd9Sstevel@tonic-gate 									\
1257c478bd9Sstevel@tonic-gate 		len = findextent(fs, dp, (int)(n), lenp, maxtrans) << 	\
1267c478bd9Sstevel@tonic-gate 			(fs)->fs_bshift; 				\
1277c478bd9Sstevel@tonic-gate 		if (_chkfrag) {						\
1287c478bd9Sstevel@tonic-gate 			register u_offset_t tmp;			\
1297c478bd9Sstevel@tonic-gate 									\
1307c478bd9Sstevel@tonic-gate 			tmp = fragroundup((fs), size) -			\
1317c478bd9Sstevel@tonic-gate 			    (((u_offset_t)lbn) << fs->fs_bshift);	\
1327c478bd9Sstevel@tonic-gate 			len = (int)MIN(tmp, len);			\
1337c478bd9Sstevel@tonic-gate 		}							\
1347c478bd9Sstevel@tonic-gate 		len -= (boff);						\
1357c478bd9Sstevel@tonic-gate 		if (len <= 0) {						\
1367c478bd9Sstevel@tonic-gate 			*(bnp) = UFS_HOLE;				\
1377c478bd9Sstevel@tonic-gate 		} else {						\
1387c478bd9Sstevel@tonic-gate 			*(bnp) = fsbtodb(fs, *dp) + btodb(boff);	\
1397c478bd9Sstevel@tonic-gate 			*(lenp) = len;					\
1407c478bd9Sstevel@tonic-gate 		}							\
1417c478bd9Sstevel@tonic-gate 	}								\
1427c478bd9Sstevel@tonic-gate }
1437c478bd9Sstevel@tonic-gate 
1447c478bd9Sstevel@tonic-gate /*
1457c478bd9Sstevel@tonic-gate  * The maximum supported file size is actually somewhat less that 1
1467c478bd9Sstevel@tonic-gate  * terabyte.  This is because the total number of blocks used for the
1477c478bd9Sstevel@tonic-gate  * file and its metadata must fit into the ic_blocks field of the
1487c478bd9Sstevel@tonic-gate  * inode, which is a signed 32-bit quantity.  The metadata allocated
1497c478bd9Sstevel@tonic-gate  * for a file (that is, the single, double, and triple indirect blocks
1507c478bd9Sstevel@tonic-gate  * used to reference the file blocks) is actually quite small,
1517c478bd9Sstevel@tonic-gate  * but just to make sure, we check for overflow in the ic_blocks
1527c478bd9Sstevel@tonic-gate  * ic_blocks fields for all files whose total block count is
1537c478bd9Sstevel@tonic-gate  * within 1 GB of a terabyte.  VERYLARGEFILESIZE below is the number of
1547c478bd9Sstevel@tonic-gate  * 512-byte blocks in a terabyte (2^31), minus the number of 512-byte blocks
1557c478bd9Sstevel@tonic-gate  * in a gigabyte (2^21).  We only check for overflow in the ic_blocks
1567c478bd9Sstevel@tonic-gate  * field if the number of blocks currently allocated to the file is
1577c478bd9Sstevel@tonic-gate  * greater than VERYLARGEFILESIZE.
1587c478bd9Sstevel@tonic-gate  *
1597c478bd9Sstevel@tonic-gate  * Note that file "size" is the not the same as file "length".  A
1607c478bd9Sstevel@tonic-gate  * file's "size" is the number of blocks allocated to it.  A file's
1617c478bd9Sstevel@tonic-gate  * "length" is the maximum offset in the file.  A UFS FILE can have a
1627c478bd9Sstevel@tonic-gate  * length of a terabyte, but the size is limited to somewhat less than
1637c478bd9Sstevel@tonic-gate  * a terabyte, as described above.
1647c478bd9Sstevel@tonic-gate  */
1657c478bd9Sstevel@tonic-gate #define	VERYLARGEFILESIZE	0x7FE00000
1667c478bd9Sstevel@tonic-gate 
1677c478bd9Sstevel@tonic-gate /*
168*303bf60bSsdebnath  * bmap{read,write} define the structure of file system storage by mapping
1697c478bd9Sstevel@tonic-gate  * a logical offset in a file to a physical block number on the device.
1707c478bd9Sstevel@tonic-gate  * It should be called with a locked inode when allocation is to be
171*303bf60bSsdebnath  * done (bmap_write).  Note this strangeness: bmap_write is always called from
1727c478bd9Sstevel@tonic-gate  * getpage(), not putpage(), since getpage() is where all the allocation
1737c478bd9Sstevel@tonic-gate  * is done.
1747c478bd9Sstevel@tonic-gate  *
175*303bf60bSsdebnath  * S_READ, S_OTHER -> bmap_read; S_WRITE -> bmap_write.
1767c478bd9Sstevel@tonic-gate  *
1777c478bd9Sstevel@tonic-gate  * NOTICE: the block number returned is the disk block number, not the
1787c478bd9Sstevel@tonic-gate  * file system block number.  All the worries about block offsets and
1797c478bd9Sstevel@tonic-gate  * page/block sizes are hidden inside of bmap.  Well, not quite,
1807c478bd9Sstevel@tonic-gate  * unfortunately.  It's impossible to find one place to hide all this
1817c478bd9Sstevel@tonic-gate  * mess.  There are 3 cases:
1827c478bd9Sstevel@tonic-gate  *
1837c478bd9Sstevel@tonic-gate  * PAGESIZE < bsize
1847c478bd9Sstevel@tonic-gate  *	In this case, the {get,put}page routines will attempt to align to
1857c478bd9Sstevel@tonic-gate  *	a file system block boundry (XXX - maybe this is a mistake?).  Since
1867c478bd9Sstevel@tonic-gate  *	the kluster routines may be out of memory, we don't always get all
1877c478bd9Sstevel@tonic-gate  *	the pages we wanted.  If we called bmap first, to find out how much
1887c478bd9Sstevel@tonic-gate  *	to kluster, we handed in the block aligned offset.  If we didn't get
1897c478bd9Sstevel@tonic-gate  *	all the pages, we have to chop off the amount we didn't get from the
1907c478bd9Sstevel@tonic-gate  *	amount handed back by bmap.
1917c478bd9Sstevel@tonic-gate  *
1927c478bd9Sstevel@tonic-gate  * PAGESIZE == bsize
1937c478bd9Sstevel@tonic-gate  *	Life is quite pleasant here, no extra work needed, mainly because we
1947c478bd9Sstevel@tonic-gate  *	(probably?) won't kluster backwards, just forwards.
1957c478bd9Sstevel@tonic-gate  *
1967c478bd9Sstevel@tonic-gate  * PAGESIZE > bsize
1977c478bd9Sstevel@tonic-gate  *	This one has a different set of problems, specifically, we may have to
1987c478bd9Sstevel@tonic-gate  *	do N reads to fill one page.  Let us hope that Sun will stay with small
1997c478bd9Sstevel@tonic-gate  *	pages.
2007c478bd9Sstevel@tonic-gate  *
2017c478bd9Sstevel@tonic-gate  * Returns 0 on success, or a non-zero errno if an error occurs.
2027c478bd9Sstevel@tonic-gate  *
2037c478bd9Sstevel@tonic-gate  * TODO
2047c478bd9Sstevel@tonic-gate  *	LMXXX - add a bmap cache.  This could be a couple of extents in the
2057c478bd9Sstevel@tonic-gate  *	inode.  Two is nice for PAGESIZE > bsize.
2067c478bd9Sstevel@tonic-gate  */
2077c478bd9Sstevel@tonic-gate 
2087c478bd9Sstevel@tonic-gate int
2097c478bd9Sstevel@tonic-gate bmap_read(struct inode *ip, u_offset_t off, daddr_t *bnp, int *lenp)
2107c478bd9Sstevel@tonic-gate {
2117c478bd9Sstevel@tonic-gate 	daddr_t lbn;
2127c478bd9Sstevel@tonic-gate 	ufsvfs_t *ufsvfsp = ip->i_ufsvfs;
2137c478bd9Sstevel@tonic-gate 	struct	fs *fs = ufsvfsp->vfs_fs;
2147c478bd9Sstevel@tonic-gate 	struct	buf *bp;
2157c478bd9Sstevel@tonic-gate 	int	i, j, boff;
2167c478bd9Sstevel@tonic-gate 	int	shft;			/* we maintain sh = 1 << shft */
2177c478bd9Sstevel@tonic-gate 	daddr_t	ob, nb, tbn;
2187c478bd9Sstevel@tonic-gate 	daddr32_t *bap;
2197c478bd9Sstevel@tonic-gate 	int	nindirshift, nindiroffset;
2207c478bd9Sstevel@tonic-gate 
2217c478bd9Sstevel@tonic-gate 	ASSERT(RW_LOCK_HELD(&ip->i_contents));
2227c478bd9Sstevel@tonic-gate 	lbn = (daddr_t)lblkno(fs, off);
2237c478bd9Sstevel@tonic-gate 	boff = (int)blkoff(fs, off);
2247c478bd9Sstevel@tonic-gate 	if (lbn < 0)
2257c478bd9Sstevel@tonic-gate 		return (EFBIG);
2267c478bd9Sstevel@tonic-gate 
2277c478bd9Sstevel@tonic-gate 	/*
2287c478bd9Sstevel@tonic-gate 	 * The first NDADDR blocks are direct blocks.
2297c478bd9Sstevel@tonic-gate 	 */
2307c478bd9Sstevel@tonic-gate 	if (lbn < NDADDR) {
2317c478bd9Sstevel@tonic-gate 		DOEXTENT(fs, lbn, boff, bnp, lenp,
2327c478bd9Sstevel@tonic-gate 		    ip->i_size, &ip->i_db[lbn], NDADDR - lbn, 1,
2337c478bd9Sstevel@tonic-gate 			ufsvfsp->vfs_iotransz);
2347c478bd9Sstevel@tonic-gate 		return (0);
2357c478bd9Sstevel@tonic-gate 	}
2367c478bd9Sstevel@tonic-gate 
2377c478bd9Sstevel@tonic-gate 	nindirshift = ufsvfsp->vfs_nindirshift;
2387c478bd9Sstevel@tonic-gate 	nindiroffset = ufsvfsp->vfs_nindiroffset;
2397c478bd9Sstevel@tonic-gate 	/*
2407c478bd9Sstevel@tonic-gate 	 * Determine how many levels of indirection.
2417c478bd9Sstevel@tonic-gate 	 */
2427c478bd9Sstevel@tonic-gate 	shft = 0;				/* sh = 1 */
2437c478bd9Sstevel@tonic-gate 	tbn = lbn - NDADDR;
2447c478bd9Sstevel@tonic-gate 	for (j = NIADDR; j > 0; j--) {
2457c478bd9Sstevel@tonic-gate 		longlong_t	sh;
2467c478bd9Sstevel@tonic-gate 
2477c478bd9Sstevel@tonic-gate 		shft += nindirshift;		/* sh *= nindir */
2487c478bd9Sstevel@tonic-gate 		sh = 1LL << shft;
2497c478bd9Sstevel@tonic-gate 		if (tbn < sh)
2507c478bd9Sstevel@tonic-gate 			break;
2517c478bd9Sstevel@tonic-gate 		tbn -= sh;
2527c478bd9Sstevel@tonic-gate 	}
2537c478bd9Sstevel@tonic-gate 	if (j == 0)
2547c478bd9Sstevel@tonic-gate 		return (EFBIG);
2557c478bd9Sstevel@tonic-gate 
2567c478bd9Sstevel@tonic-gate 	/*
2577c478bd9Sstevel@tonic-gate 	 * Fetch the first indirect block.
2587c478bd9Sstevel@tonic-gate 	 */
2597c478bd9Sstevel@tonic-gate 	nb = ip->i_ib[NIADDR - j];
2607c478bd9Sstevel@tonic-gate 	if (nb == 0) {
2617c478bd9Sstevel@tonic-gate 		*bnp = UFS_HOLE;
2627c478bd9Sstevel@tonic-gate 		return (0);
2637c478bd9Sstevel@tonic-gate 	}
2647c478bd9Sstevel@tonic-gate 
2657c478bd9Sstevel@tonic-gate 	/*
2667c478bd9Sstevel@tonic-gate 	 * Fetch through the indirect blocks.
2677c478bd9Sstevel@tonic-gate 	 */
2687c478bd9Sstevel@tonic-gate 	for (; j <= NIADDR; j++) {
2697c478bd9Sstevel@tonic-gate 		ob = nb;
2707c478bd9Sstevel@tonic-gate 		bp = UFS_BREAD(ufsvfsp,
2717c478bd9Sstevel@tonic-gate 				ip->i_dev, fsbtodb(fs, ob), fs->fs_bsize);
2727c478bd9Sstevel@tonic-gate 		if (bp->b_flags & B_ERROR) {
2737c478bd9Sstevel@tonic-gate 			brelse(bp);
2747c478bd9Sstevel@tonic-gate 			return (EIO);
2757c478bd9Sstevel@tonic-gate 		}
2767c478bd9Sstevel@tonic-gate 		bap = bp->b_un.b_daddr;
2777c478bd9Sstevel@tonic-gate 
2787c478bd9Sstevel@tonic-gate 		ASSERT(!ufs_indir_badblock(ip, bap));
2797c478bd9Sstevel@tonic-gate 
2807c478bd9Sstevel@tonic-gate 		shft -= nindirshift;		/* sh / nindir */
2817c478bd9Sstevel@tonic-gate 		i = (tbn >> shft) & nindiroffset; /* (tbn / sh) % nindir */
2827c478bd9Sstevel@tonic-gate 		nb = bap[i];
2837c478bd9Sstevel@tonic-gate 		if (nb == 0) {
2847c478bd9Sstevel@tonic-gate 			*bnp = UFS_HOLE;
2857c478bd9Sstevel@tonic-gate 			brelse(bp);
2867c478bd9Sstevel@tonic-gate 			return (0);
2877c478bd9Sstevel@tonic-gate 		}
2887c478bd9Sstevel@tonic-gate 		if (j != NIADDR)
2897c478bd9Sstevel@tonic-gate 			brelse(bp);
2907c478bd9Sstevel@tonic-gate 	}
2917c478bd9Sstevel@tonic-gate 	DOEXTENT(fs, lbn, boff, bnp, lenp, ip->i_size, &bap[i],
2927c478bd9Sstevel@tonic-gate 	    MIN(NINDIR(fs) - i, (daddr_t)lblkno(fs, ip->i_size - 1) - lbn + 1),
2937c478bd9Sstevel@tonic-gate 		0, ufsvfsp->vfs_iotransz);
2947c478bd9Sstevel@tonic-gate 	brelse(bp);
2957c478bd9Sstevel@tonic-gate 	return (0);
2967c478bd9Sstevel@tonic-gate }
2977c478bd9Sstevel@tonic-gate 
2987c478bd9Sstevel@tonic-gate /*
299*303bf60bSsdebnath  * See bmap_read for general notes.
3007c478bd9Sstevel@tonic-gate  *
3017c478bd9Sstevel@tonic-gate  * The block must be at least size bytes and will be extended or
302*303bf60bSsdebnath  * allocated as needed.  If alloc_type is of type BI_ALLOC_ONLY, then bmap
303*303bf60bSsdebnath  * will not create any in-core pages that correspond to the new disk allocation.
304*303bf60bSsdebnath  * If alloc_type is of BI_FALLOCATE, blocks will be stored as (-1) * block addr
305*303bf60bSsdebnath  * and security is maintained b/c upon reading a negative block number pages
306*303bf60bSsdebnath  * are zeroed. For all other allocation types (BI_NORMAL) the in-core pages will
307*303bf60bSsdebnath  * be created and initialized as needed.
3087c478bd9Sstevel@tonic-gate  *
3097c478bd9Sstevel@tonic-gate  * Returns 0 on success, or a non-zero errno if an error occurs.
3107c478bd9Sstevel@tonic-gate  */
3117c478bd9Sstevel@tonic-gate int
312*303bf60bSsdebnath bmap_write(struct inode	*ip, u_offset_t	off, int size,
313*303bf60bSsdebnath     enum bi_type alloc_type, daddr_t *allocblk, struct cred *cr)
3147c478bd9Sstevel@tonic-gate {
3157c478bd9Sstevel@tonic-gate 	struct	fs *fs;
3167c478bd9Sstevel@tonic-gate 	struct	buf *bp;
3177c478bd9Sstevel@tonic-gate 	int	i;
3187c478bd9Sstevel@tonic-gate 	struct	buf *nbp;
3197c478bd9Sstevel@tonic-gate 	int	j;
3207c478bd9Sstevel@tonic-gate 	int	shft;				/* we maintain sh = 1 << shft */
3217c478bd9Sstevel@tonic-gate 	daddr_t	ob, nb, pref, lbn, llbn, tbn;
3227c478bd9Sstevel@tonic-gate 	daddr32_t *bap;
3237c478bd9Sstevel@tonic-gate 	struct	vnode *vp = ITOV(ip);
3247c478bd9Sstevel@tonic-gate 	long	bsize = VBSIZE(vp);
3257c478bd9Sstevel@tonic-gate 	long	osize, nsize;
3267c478bd9Sstevel@tonic-gate 	int	issync, metaflag, isdirquota;
3277c478bd9Sstevel@tonic-gate 	int	err;
3287c478bd9Sstevel@tonic-gate 	dev_t	dev;
3297c478bd9Sstevel@tonic-gate 	struct	fbuf *fbp;
3307c478bd9Sstevel@tonic-gate 	int	nindirshift;
3317c478bd9Sstevel@tonic-gate 	int	nindiroffset;
3327c478bd9Sstevel@tonic-gate 	struct	ufsvfs	*ufsvfsp;
3337c478bd9Sstevel@tonic-gate 	int	added_sectors;		/* sectors added to this inode */
3347c478bd9Sstevel@tonic-gate 	int	alloced_blocks;		/* fs blocks newly allocated */
3357c478bd9Sstevel@tonic-gate 	struct  ufs_allocated_block undo_table[NIADDR+1];
3367c478bd9Sstevel@tonic-gate 	int	verylargefile = 0;
3377c478bd9Sstevel@tonic-gate 
3387c478bd9Sstevel@tonic-gate 	ASSERT(RW_WRITE_HELD(&ip->i_contents));
3397c478bd9Sstevel@tonic-gate 
340*303bf60bSsdebnath 	if (allocblk)
341*303bf60bSsdebnath 		*allocblk = 0;
342*303bf60bSsdebnath 
3437c478bd9Sstevel@tonic-gate 	ufsvfsp = ip->i_ufsvfs;
3447c478bd9Sstevel@tonic-gate 	fs = ufsvfsp->vfs_bufp->b_un.b_fs;
3457c478bd9Sstevel@tonic-gate 	lbn = (daddr_t)lblkno(fs, off);
3467c478bd9Sstevel@tonic-gate 	if (lbn < 0)
3477c478bd9Sstevel@tonic-gate 		return (EFBIG);
3487c478bd9Sstevel@tonic-gate 	if (ip->i_blocks >= VERYLARGEFILESIZE)
3497c478bd9Sstevel@tonic-gate 		verylargefile = 1;
3507c478bd9Sstevel@tonic-gate 	llbn = (daddr_t)((ip->i_size) ? lblkno(fs, ip->i_size - 1) : 0);
3517c478bd9Sstevel@tonic-gate 	metaflag = isdirquota = 0;
3527c478bd9Sstevel@tonic-gate 	if (((ip->i_mode & IFMT) == IFDIR) ||
3537c478bd9Sstevel@tonic-gate 	    ((ip->i_mode & IFMT) == IFATTRDIR))
3547c478bd9Sstevel@tonic-gate 		isdirquota = metaflag = I_DIR;
3557c478bd9Sstevel@tonic-gate 	else if ((ip->i_mode & IFMT) == IFSHAD)
3567c478bd9Sstevel@tonic-gate 		metaflag = I_SHAD;
3577c478bd9Sstevel@tonic-gate 	else if (ip->i_ufsvfs->vfs_qinod == ip)
3587c478bd9Sstevel@tonic-gate 		isdirquota = metaflag = I_QUOTA;
3597c478bd9Sstevel@tonic-gate 
3607c478bd9Sstevel@tonic-gate 	issync = ((ip->i_flag & ISYNC) != 0);
3617c478bd9Sstevel@tonic-gate 
3627c478bd9Sstevel@tonic-gate 	if (isdirquota || issync) {
363*303bf60bSsdebnath 		alloc_type = BI_NORMAL;	/* make sure */
3647c478bd9Sstevel@tonic-gate 	}
3657c478bd9Sstevel@tonic-gate 
3667c478bd9Sstevel@tonic-gate 	/*
3677c478bd9Sstevel@tonic-gate 	 * If the next write will extend the file into a new block,
3687c478bd9Sstevel@tonic-gate 	 * and the file is currently composed of a fragment
3697c478bd9Sstevel@tonic-gate 	 * this fragment has to be extended to be a full block.
3707c478bd9Sstevel@tonic-gate 	 */
3717c478bd9Sstevel@tonic-gate 	if (llbn < NDADDR && llbn < lbn && (ob = ip->i_db[llbn]) != 0) {
3727c478bd9Sstevel@tonic-gate 		osize = blksize(fs, ip, llbn);
3737c478bd9Sstevel@tonic-gate 		if (osize < bsize && osize > 0) {
3747c478bd9Sstevel@tonic-gate 			/*
3757c478bd9Sstevel@tonic-gate 			 * Check to see if doing this will make the file too
3767c478bd9Sstevel@tonic-gate 			 * big.  Only check if we are dealing with a very
3777c478bd9Sstevel@tonic-gate 			 * large file.
3787c478bd9Sstevel@tonic-gate 			 */
3797c478bd9Sstevel@tonic-gate 			if (verylargefile == 1) {
3807c478bd9Sstevel@tonic-gate 				if (((unsigned)ip->i_blocks +
3817c478bd9Sstevel@tonic-gate 				    btodb(bsize - osize)) > INT_MAX) {
3827c478bd9Sstevel@tonic-gate 					return (EFBIG);
3837c478bd9Sstevel@tonic-gate 				}
3847c478bd9Sstevel@tonic-gate 			}
3857c478bd9Sstevel@tonic-gate 			/*
3867c478bd9Sstevel@tonic-gate 			 * Make sure we have all needed pages setup correctly.
3877c478bd9Sstevel@tonic-gate 			 *
3887c478bd9Sstevel@tonic-gate 			 * We pass S_OTHER to fbread here because we want
3897c478bd9Sstevel@tonic-gate 			 * an exclusive lock on the page in question
3907c478bd9Sstevel@tonic-gate 			 * (see ufs_getpage). I/O to the old block location
3917c478bd9Sstevel@tonic-gate 			 * may still be in progress and we are about to free
3927c478bd9Sstevel@tonic-gate 			 * the old block. We don't want anyone else to get
3937c478bd9Sstevel@tonic-gate 			 * a hold of the old block once we free it until
3947c478bd9Sstevel@tonic-gate 			 * the I/O is complete.
3957c478bd9Sstevel@tonic-gate 			 */
3967c478bd9Sstevel@tonic-gate 			err = fbread(ITOV(ip),
3977c478bd9Sstevel@tonic-gate 				    ((offset_t)llbn << fs->fs_bshift),
3987c478bd9Sstevel@tonic-gate 					(uint_t)bsize, S_OTHER, &fbp);
3997c478bd9Sstevel@tonic-gate 			if (err)
4007c478bd9Sstevel@tonic-gate 				return (err);
4017c478bd9Sstevel@tonic-gate 			pref = blkpref(ip, llbn, (int)llbn, &ip->i_db[0]);
4027c478bd9Sstevel@tonic-gate 			err = realloccg(ip, ob, pref, (int)osize, (int)bsize,
4037c478bd9Sstevel@tonic-gate 					&nb, cr);
4047c478bd9Sstevel@tonic-gate 			if (err) {
4057c478bd9Sstevel@tonic-gate 				if (fbp)
4067c478bd9Sstevel@tonic-gate 					fbrelse(fbp, S_OTHER);
4077c478bd9Sstevel@tonic-gate 				return (err);
4087c478bd9Sstevel@tonic-gate 			}
4097c478bd9Sstevel@tonic-gate 			ASSERT(!ufs_badblock(ip, nb));
4107c478bd9Sstevel@tonic-gate 
4117c478bd9Sstevel@tonic-gate 			/*
4127c478bd9Sstevel@tonic-gate 			 * Update the inode before releasing the
4137c478bd9Sstevel@tonic-gate 			 * lock on the page. If we released the page
4147c478bd9Sstevel@tonic-gate 			 * lock first, the data could be written to it's
4157c478bd9Sstevel@tonic-gate 			 * old address and then destroyed.
4167c478bd9Sstevel@tonic-gate 			 */
4177c478bd9Sstevel@tonic-gate 			TRANS_MATA_ALLOC(ufsvfsp, ip, nb, bsize, 0);
4187c478bd9Sstevel@tonic-gate 			ip->i_db[llbn] = nb;
4197c478bd9Sstevel@tonic-gate 			UFS_SET_ISIZE(((u_offset_t)(llbn + 1)) << fs->fs_bshift,
4207c478bd9Sstevel@tonic-gate 			    ip);
4217c478bd9Sstevel@tonic-gate 			ip->i_blocks += btodb(bsize - osize);
4227c478bd9Sstevel@tonic-gate 			ASSERT((unsigned)ip->i_blocks <= INT_MAX);
4237c478bd9Sstevel@tonic-gate 			TRANS_INODE(ufsvfsp, ip);
4247c478bd9Sstevel@tonic-gate 			ip->i_flag |= IUPD | ICHG | IATTCHG;
425*303bf60bSsdebnath 
4267c478bd9Sstevel@tonic-gate 			/* Caller is responsible for updating i_seq */
4277c478bd9Sstevel@tonic-gate 			/*
4287c478bd9Sstevel@tonic-gate 			 * Don't check metaflag here, directories won't do this
4297c478bd9Sstevel@tonic-gate 			 *
4307c478bd9Sstevel@tonic-gate 			 */
4317c478bd9Sstevel@tonic-gate 			if (issync) {
4327c478bd9Sstevel@tonic-gate 				(void) ufs_fbiwrite(fbp, ip, nb, fs->fs_fsize);
4337c478bd9Sstevel@tonic-gate 			} else {
4347c478bd9Sstevel@tonic-gate 				ASSERT(fbp);
4357c478bd9Sstevel@tonic-gate 				fbrelse(fbp, S_WRITE);
4367c478bd9Sstevel@tonic-gate 			}
4377c478bd9Sstevel@tonic-gate 
4387c478bd9Sstevel@tonic-gate 			if (nb != ob) {
4397c478bd9Sstevel@tonic-gate 				(void) free(ip, ob, (off_t)osize, metaflag);
4407c478bd9Sstevel@tonic-gate 			}
4417c478bd9Sstevel@tonic-gate 		}
4427c478bd9Sstevel@tonic-gate 	}
4437c478bd9Sstevel@tonic-gate 
4447c478bd9Sstevel@tonic-gate 	/*
4457c478bd9Sstevel@tonic-gate 	 * The first NDADDR blocks are direct blocks.
4467c478bd9Sstevel@tonic-gate 	 */
4477c478bd9Sstevel@tonic-gate 	if (lbn < NDADDR) {
4487c478bd9Sstevel@tonic-gate 		nb = ip->i_db[lbn];
4497c478bd9Sstevel@tonic-gate 		if (nb == 0 ||
4507c478bd9Sstevel@tonic-gate 		    ip->i_size < ((u_offset_t)(lbn + 1)) << fs->fs_bshift) {
4517c478bd9Sstevel@tonic-gate 			if (nb != 0) {
4527c478bd9Sstevel@tonic-gate 				/* consider need to reallocate a frag */
4537c478bd9Sstevel@tonic-gate 				osize = fragroundup(fs, blkoff(fs, ip->i_size));
4547c478bd9Sstevel@tonic-gate 				nsize = fragroundup(fs, size);
4557c478bd9Sstevel@tonic-gate 				if (nsize <= osize)
4567c478bd9Sstevel@tonic-gate 					goto gotit;
4577c478bd9Sstevel@tonic-gate 				/*
4587c478bd9Sstevel@tonic-gate 				 * Check to see if doing this will make the
4597c478bd9Sstevel@tonic-gate 				 * file too big.  Only check if we are dealing
4607c478bd9Sstevel@tonic-gate 				 * with a very large file.
4617c478bd9Sstevel@tonic-gate 				 */
4627c478bd9Sstevel@tonic-gate 				if (verylargefile == 1) {
4637c478bd9Sstevel@tonic-gate 					if (((unsigned)ip->i_blocks +
4647c478bd9Sstevel@tonic-gate 					    btodb(nsize - osize)) > INT_MAX) {
4657c478bd9Sstevel@tonic-gate 						return (EFBIG);
4667c478bd9Sstevel@tonic-gate 					}
4677c478bd9Sstevel@tonic-gate 				}
4687c478bd9Sstevel@tonic-gate 				/*
469*303bf60bSsdebnath 				 * need to re-allocate a block or frag
4707c478bd9Sstevel@tonic-gate 				 */
4717c478bd9Sstevel@tonic-gate 				ob = nb;
4727c478bd9Sstevel@tonic-gate 				pref = blkpref(ip, lbn, (int)lbn,
4737c478bd9Sstevel@tonic-gate 								&ip->i_db[0]);
4747c478bd9Sstevel@tonic-gate 				err = realloccg(ip, ob, pref, (int)osize,
4757c478bd9Sstevel@tonic-gate 						(int)nsize, &nb, cr);
4767c478bd9Sstevel@tonic-gate 				if (err)
4777c478bd9Sstevel@tonic-gate 					return (err);
478*303bf60bSsdebnath 				if (allocblk)
479*303bf60bSsdebnath 					*allocblk = nb;
4807c478bd9Sstevel@tonic-gate 				ASSERT(!ufs_badblock(ip, nb));
4817c478bd9Sstevel@tonic-gate 
4827c478bd9Sstevel@tonic-gate 			} else {
4837c478bd9Sstevel@tonic-gate 				/*
4847c478bd9Sstevel@tonic-gate 				 * need to allocate a block or frag
4857c478bd9Sstevel@tonic-gate 				 */
4867c478bd9Sstevel@tonic-gate 				osize = 0;
4877c478bd9Sstevel@tonic-gate 				if (ip->i_size <
4887c478bd9Sstevel@tonic-gate 				    ((u_offset_t)(lbn + 1)) << fs->fs_bshift)
4897c478bd9Sstevel@tonic-gate 					nsize = fragroundup(fs, size);
4907c478bd9Sstevel@tonic-gate 				else
4917c478bd9Sstevel@tonic-gate 					nsize = bsize;
4927c478bd9Sstevel@tonic-gate 				/*
4937c478bd9Sstevel@tonic-gate 				 * Check to see if doing this will make the
4947c478bd9Sstevel@tonic-gate 				 * file too big.  Only check if we are dealing
4957c478bd9Sstevel@tonic-gate 				 * with a very large file.
4967c478bd9Sstevel@tonic-gate 				 */
4977c478bd9Sstevel@tonic-gate 				if (verylargefile == 1) {
4987c478bd9Sstevel@tonic-gate 					if (((unsigned)ip->i_blocks +
4997c478bd9Sstevel@tonic-gate 					    btodb(nsize - osize)) > INT_MAX) {
5007c478bd9Sstevel@tonic-gate 						return (EFBIG);
5017c478bd9Sstevel@tonic-gate 					}
5027c478bd9Sstevel@tonic-gate 				}
5037c478bd9Sstevel@tonic-gate 				pref = blkpref(ip, lbn, (int)lbn, &ip->i_db[0]);
5047c478bd9Sstevel@tonic-gate 				err = alloc(ip, pref, (int)nsize, &nb, cr);
5057c478bd9Sstevel@tonic-gate 				if (err)
5067c478bd9Sstevel@tonic-gate 					return (err);
507*303bf60bSsdebnath 				if (allocblk)
508*303bf60bSsdebnath 					*allocblk = nb;
5097c478bd9Sstevel@tonic-gate 				ASSERT(!ufs_badblock(ip, nb));
5107c478bd9Sstevel@tonic-gate 				ob = nb;
5117c478bd9Sstevel@tonic-gate 			}
5127c478bd9Sstevel@tonic-gate 
5137c478bd9Sstevel@tonic-gate 			/*
5147c478bd9Sstevel@tonic-gate 			 * Read old/create new zero pages
5157c478bd9Sstevel@tonic-gate 			 */
5167c478bd9Sstevel@tonic-gate 			fbp = NULL;
5177c478bd9Sstevel@tonic-gate 			if (osize == 0) {
5187c478bd9Sstevel@tonic-gate 				/*
5197c478bd9Sstevel@tonic-gate 				 * mmap S_WRITE faults always enter here
5207c478bd9Sstevel@tonic-gate 				 */
521*303bf60bSsdebnath 				/*
522*303bf60bSsdebnath 				 * We zero it if its also BI_FALLOCATE, but
523*303bf60bSsdebnath 				 * only for direct blocks!
524*303bf60bSsdebnath 				 */
525*303bf60bSsdebnath 				if (alloc_type == BI_NORMAL ||
526*303bf60bSsdebnath 				    alloc_type == BI_FALLOCATE ||
527*303bf60bSsdebnath 				    P2ROUNDUP_TYPED(size,
5287c478bd9Sstevel@tonic-gate 				    PAGESIZE, u_offset_t) < nsize) {
5297c478bd9Sstevel@tonic-gate 					/* fbzero doesn't cause a pagefault */
5307c478bd9Sstevel@tonic-gate 					fbzero(ITOV(ip),
5317c478bd9Sstevel@tonic-gate 					    ((offset_t)lbn << fs->fs_bshift),
5327c478bd9Sstevel@tonic-gate 					    (uint_t)nsize, &fbp);
5337c478bd9Sstevel@tonic-gate 				}
5347c478bd9Sstevel@tonic-gate 			} else {
5357c478bd9Sstevel@tonic-gate 				err = fbread(vp,
5367c478bd9Sstevel@tonic-gate 				    ((offset_t)lbn << fs->fs_bshift),
5377c478bd9Sstevel@tonic-gate 				    (uint_t)nsize, S_OTHER, &fbp);
5387c478bd9Sstevel@tonic-gate 				if (err) {
5397c478bd9Sstevel@tonic-gate 					if (nb != ob) {
5407c478bd9Sstevel@tonic-gate 						(void) free(ip, nb,
5417c478bd9Sstevel@tonic-gate 						    (off_t)nsize, metaflag);
5427c478bd9Sstevel@tonic-gate 					} else {
5437c478bd9Sstevel@tonic-gate 						(void) free(ip,
5447c478bd9Sstevel@tonic-gate 						    ob + numfrags(fs, osize),
5457c478bd9Sstevel@tonic-gate 						    (off_t)(nsize - osize),
5467c478bd9Sstevel@tonic-gate 						    metaflag);
5477c478bd9Sstevel@tonic-gate 					}
5487c478bd9Sstevel@tonic-gate 					ASSERT(nsize >= osize);
5497c478bd9Sstevel@tonic-gate 					(void) chkdq(ip,
5507c478bd9Sstevel@tonic-gate 						-(long)btodb(nsize - osize),
5517c478bd9Sstevel@tonic-gate 						0, cr, (char **)NULL,
5527c478bd9Sstevel@tonic-gate 						(size_t *)NULL);
5537c478bd9Sstevel@tonic-gate 					return (err);
5547c478bd9Sstevel@tonic-gate 				}
5557c478bd9Sstevel@tonic-gate 			}
5567c478bd9Sstevel@tonic-gate 			TRANS_MATA_ALLOC(ufsvfsp, ip, nb, nsize, 0);
5577c478bd9Sstevel@tonic-gate 			ip->i_db[lbn] = nb;
5587c478bd9Sstevel@tonic-gate 			ip->i_blocks += btodb(nsize - osize);
5597c478bd9Sstevel@tonic-gate 			ASSERT((unsigned)ip->i_blocks <= INT_MAX);
5607c478bd9Sstevel@tonic-gate 			TRANS_INODE(ufsvfsp, ip);
5617c478bd9Sstevel@tonic-gate 			ip->i_flag |= IUPD | ICHG | IATTCHG;
562*303bf60bSsdebnath 
5637c478bd9Sstevel@tonic-gate 			/* Caller is responsible for updating i_seq */
5647c478bd9Sstevel@tonic-gate 
5657c478bd9Sstevel@tonic-gate 			/*
5667c478bd9Sstevel@tonic-gate 			 * Write directory and shadow blocks synchronously so
5677c478bd9Sstevel@tonic-gate 			 * that they never appear with garbage in them on the
5687c478bd9Sstevel@tonic-gate 			 * disk.
5697c478bd9Sstevel@tonic-gate 			 *
5707c478bd9Sstevel@tonic-gate 			 */
5717c478bd9Sstevel@tonic-gate 			if (isdirquota && (ip->i_size ||
5727c478bd9Sstevel@tonic-gate 			    TRANS_ISTRANS(ufsvfsp))) {
5737c478bd9Sstevel@tonic-gate 			/*
5747c478bd9Sstevel@tonic-gate 			 * XXX man not be necessary with harpy trans
5757c478bd9Sstevel@tonic-gate 			 * bug id 1130055
5767c478bd9Sstevel@tonic-gate 			 */
5777c478bd9Sstevel@tonic-gate 				(void) ufs_fbiwrite(fbp, ip, nb, fs->fs_fsize);
5787c478bd9Sstevel@tonic-gate 			} else if (fbp) {
5797c478bd9Sstevel@tonic-gate 				fbrelse(fbp, S_WRITE);
5807c478bd9Sstevel@tonic-gate 			}
5817c478bd9Sstevel@tonic-gate 
5827c478bd9Sstevel@tonic-gate 			if (nb != ob)
5837c478bd9Sstevel@tonic-gate 				(void) free(ip, ob, (off_t)osize, metaflag);
5847c478bd9Sstevel@tonic-gate 		}
5857c478bd9Sstevel@tonic-gate gotit:
5867c478bd9Sstevel@tonic-gate 		return (0);
5877c478bd9Sstevel@tonic-gate 	}
5887c478bd9Sstevel@tonic-gate 
5897c478bd9Sstevel@tonic-gate 	added_sectors = alloced_blocks = 0;	/* No blocks alloced yet */
5907c478bd9Sstevel@tonic-gate 
5917c478bd9Sstevel@tonic-gate 	/*
5927c478bd9Sstevel@tonic-gate 	 * Determine how many levels of indirection.
5937c478bd9Sstevel@tonic-gate 	 */
5947c478bd9Sstevel@tonic-gate 	nindirshift = ip->i_ufsvfs->vfs_nindirshift;
5957c478bd9Sstevel@tonic-gate 	nindiroffset = ip->i_ufsvfs->vfs_nindiroffset;
5967c478bd9Sstevel@tonic-gate 	pref = 0;
5977c478bd9Sstevel@tonic-gate 	shft = 0;				/* sh = 1 */
5987c478bd9Sstevel@tonic-gate 	tbn = lbn - NDADDR;
5997c478bd9Sstevel@tonic-gate 	for (j = NIADDR; j > 0; j--) {
6007c478bd9Sstevel@tonic-gate 		longlong_t	sh;
6017c478bd9Sstevel@tonic-gate 
6027c478bd9Sstevel@tonic-gate 		shft += nindirshift;		/* sh *= nindir */
6037c478bd9Sstevel@tonic-gate 		sh = 1LL << shft;
6047c478bd9Sstevel@tonic-gate 		if (tbn < sh)
6057c478bd9Sstevel@tonic-gate 			break;
6067c478bd9Sstevel@tonic-gate 		tbn -= sh;
6077c478bd9Sstevel@tonic-gate 	}
6087c478bd9Sstevel@tonic-gate 
6097c478bd9Sstevel@tonic-gate 	if (j == 0)
6107c478bd9Sstevel@tonic-gate 		return (EFBIG);
6117c478bd9Sstevel@tonic-gate 
6127c478bd9Sstevel@tonic-gate 	/*
6137c478bd9Sstevel@tonic-gate 	 * Fetch the first indirect block.
6147c478bd9Sstevel@tonic-gate 	 */
6157c478bd9Sstevel@tonic-gate 	dev = ip->i_dev;
6167c478bd9Sstevel@tonic-gate 	nb = ip->i_ib[NIADDR - j];
6177c478bd9Sstevel@tonic-gate 	if (nb == 0) {
6187c478bd9Sstevel@tonic-gate 		/*
6197c478bd9Sstevel@tonic-gate 		 * Check to see if doing this will make the
6207c478bd9Sstevel@tonic-gate 		 * file too big.  Only check if we are dealing
6217c478bd9Sstevel@tonic-gate 		 * with a very large file.
6227c478bd9Sstevel@tonic-gate 		 */
6237c478bd9Sstevel@tonic-gate 		if (verylargefile == 1) {
6247c478bd9Sstevel@tonic-gate 			if (((unsigned)ip->i_blocks + btodb(bsize))
6257c478bd9Sstevel@tonic-gate 			    > INT_MAX) {
6267c478bd9Sstevel@tonic-gate 				return (EFBIG);
6277c478bd9Sstevel@tonic-gate 			}
6287c478bd9Sstevel@tonic-gate 		}
6297c478bd9Sstevel@tonic-gate 		/*
6307c478bd9Sstevel@tonic-gate 		 * Need to allocate an indirect block.
6317c478bd9Sstevel@tonic-gate 		 */
6327c478bd9Sstevel@tonic-gate 		pref = blkpref(ip, lbn, 0, (daddr32_t *)0);
6337c478bd9Sstevel@tonic-gate 		err = alloc(ip, pref, (int)bsize, &nb, cr);
6347c478bd9Sstevel@tonic-gate 		if (err)
6357c478bd9Sstevel@tonic-gate 			return (err);
6367c478bd9Sstevel@tonic-gate 		TRANS_MATA_ALLOC(ufsvfsp, ip, nb, bsize, 1);
6377c478bd9Sstevel@tonic-gate 		ASSERT(!ufs_badblock(ip, nb));
6387c478bd9Sstevel@tonic-gate 
6397c478bd9Sstevel@tonic-gate 		/*
6407c478bd9Sstevel@tonic-gate 		 * Keep track of this allocation so we can undo it if we
6417c478bd9Sstevel@tonic-gate 		 * get an error later.
6427c478bd9Sstevel@tonic-gate 		 */
6437c478bd9Sstevel@tonic-gate 
6447c478bd9Sstevel@tonic-gate 		ASSERT(alloced_blocks <= NIADDR);
6457c478bd9Sstevel@tonic-gate 
6467c478bd9Sstevel@tonic-gate 		undo_table[alloced_blocks].this_block = nb;
6477c478bd9Sstevel@tonic-gate 		undo_table[alloced_blocks].block_size = bsize;
6487c478bd9Sstevel@tonic-gate 		undo_table[alloced_blocks].owner = ufs_no_owner;
6497c478bd9Sstevel@tonic-gate 		undo_table[alloced_blocks].usage_flags = metaflag | I_IBLK;
6507c478bd9Sstevel@tonic-gate 
6517c478bd9Sstevel@tonic-gate 		alloced_blocks++;
6527c478bd9Sstevel@tonic-gate 
6537c478bd9Sstevel@tonic-gate 		/*
6547c478bd9Sstevel@tonic-gate 		 * Write zero block synchronously so that
6557c478bd9Sstevel@tonic-gate 		 * indirect blocks never point at garbage.
6567c478bd9Sstevel@tonic-gate 		 */
6577c478bd9Sstevel@tonic-gate 		bp = UFS_GETBLK(ufsvfsp, dev, fsbtodb(fs, nb), bsize);
6587c478bd9Sstevel@tonic-gate 
6597c478bd9Sstevel@tonic-gate 		clrbuf(bp);
6607c478bd9Sstevel@tonic-gate 		/* XXX Maybe special-case this? */
6617c478bd9Sstevel@tonic-gate 		TRANS_BUF(ufsvfsp, 0, bsize, bp, DT_ABZERO);
6627c478bd9Sstevel@tonic-gate 		UFS_BWRITE2(ufsvfsp, bp);
6637c478bd9Sstevel@tonic-gate 		if (bp->b_flags & B_ERROR) {
6647c478bd9Sstevel@tonic-gate 			err = geterror(bp);
6657c478bd9Sstevel@tonic-gate 			brelse(bp);
6667c478bd9Sstevel@tonic-gate 			ufs_undo_allocation(ip, alloced_blocks,
6677c478bd9Sstevel@tonic-gate 			    undo_table, added_sectors);
6687c478bd9Sstevel@tonic-gate 			return (err);
6697c478bd9Sstevel@tonic-gate 		}
6707c478bd9Sstevel@tonic-gate 		brelse(bp);
6717c478bd9Sstevel@tonic-gate 
6727c478bd9Sstevel@tonic-gate 		ip->i_ib[NIADDR - j] = nb;
6737c478bd9Sstevel@tonic-gate 		added_sectors += btodb(bsize);
6747c478bd9Sstevel@tonic-gate 		ip->i_blocks += btodb(bsize);
6757c478bd9Sstevel@tonic-gate 		ASSERT((unsigned)ip->i_blocks <= INT_MAX);
6767c478bd9Sstevel@tonic-gate 		TRANS_INODE(ufsvfsp, ip);
6777c478bd9Sstevel@tonic-gate 		ip->i_flag |= IUPD | ICHG | IATTCHG;
6787c478bd9Sstevel@tonic-gate 		/* Caller is responsible for updating i_seq */
6797c478bd9Sstevel@tonic-gate 
6807c478bd9Sstevel@tonic-gate 		/*
6817c478bd9Sstevel@tonic-gate 		 * Update the 'undo table' now that we've linked this block
6827c478bd9Sstevel@tonic-gate 		 * to an inode.
6837c478bd9Sstevel@tonic-gate 		 */
6847c478bd9Sstevel@tonic-gate 
6857c478bd9Sstevel@tonic-gate 		undo_table[alloced_blocks-1].owner = ufs_inode_indirect;
6867c478bd9Sstevel@tonic-gate 		undo_table[alloced_blocks-1].owner_offset = NIADDR - j;
6877c478bd9Sstevel@tonic-gate 
6887c478bd9Sstevel@tonic-gate 		/*
6897c478bd9Sstevel@tonic-gate 		 * In the ISYNC case, wrip will notice that the block
6907c478bd9Sstevel@tonic-gate 		 * count on the inode has changed and will be sure to
6917c478bd9Sstevel@tonic-gate 		 * ufs_iupdat the inode at the end of wrip.
6927c478bd9Sstevel@tonic-gate 		 */
6937c478bd9Sstevel@tonic-gate 	}
6947c478bd9Sstevel@tonic-gate 
6957c478bd9Sstevel@tonic-gate 	/*
6967c478bd9Sstevel@tonic-gate 	 * Fetch through the indirect blocks.
6977c478bd9Sstevel@tonic-gate 	 */
6987c478bd9Sstevel@tonic-gate 	for (; j <= NIADDR; j++) {
6997c478bd9Sstevel@tonic-gate 		ob = nb;
7007c478bd9Sstevel@tonic-gate 		bp = UFS_BREAD(ufsvfsp, ip->i_dev, fsbtodb(fs, ob), bsize);
7017c478bd9Sstevel@tonic-gate 
7027c478bd9Sstevel@tonic-gate 		if (bp->b_flags & B_ERROR) {
7037c478bd9Sstevel@tonic-gate 			err = geterror(bp);
7047c478bd9Sstevel@tonic-gate 			brelse(bp);
7057c478bd9Sstevel@tonic-gate 			/*
7067c478bd9Sstevel@tonic-gate 			 * Return any partial allocations.
7077c478bd9Sstevel@tonic-gate 			 *
7087c478bd9Sstevel@tonic-gate 			 * It is possible that we have not yet made any
7097c478bd9Sstevel@tonic-gate 			 * allocations at this point (if this is the first
7107c478bd9Sstevel@tonic-gate 			 * pass through the loop and we didn't have to
7117c478bd9Sstevel@tonic-gate 			 * allocate the first indirect block, above).
7127c478bd9Sstevel@tonic-gate 			 * In this case, alloced_blocks and added_sectors will
7137c478bd9Sstevel@tonic-gate 			 * be zero, and ufs_undo_allocation will do nothing.
7147c478bd9Sstevel@tonic-gate 			 */
7157c478bd9Sstevel@tonic-gate 			ufs_undo_allocation(ip, alloced_blocks,
7167c478bd9Sstevel@tonic-gate 			    undo_table, added_sectors);
7177c478bd9Sstevel@tonic-gate 			return (err);
7187c478bd9Sstevel@tonic-gate 		}
7197c478bd9Sstevel@tonic-gate 		bap = bp->b_un.b_daddr;
7207c478bd9Sstevel@tonic-gate 		shft -= nindirshift;		/* sh /= nindir */
7217c478bd9Sstevel@tonic-gate 		i = (tbn >> shft) & nindiroffset; /* (tbn / sh) % nindir */
7227c478bd9Sstevel@tonic-gate 		nb = bap[i];
723*303bf60bSsdebnath 
7247c478bd9Sstevel@tonic-gate 		if (nb == 0) {
7257c478bd9Sstevel@tonic-gate 			/*
7267c478bd9Sstevel@tonic-gate 			 * Check to see if doing this will make the
7277c478bd9Sstevel@tonic-gate 			 * file too big.  Only check if we are dealing
7287c478bd9Sstevel@tonic-gate 			 * with a very large file.
7297c478bd9Sstevel@tonic-gate 			 */
7307c478bd9Sstevel@tonic-gate 			if (verylargefile == 1) {
7317c478bd9Sstevel@tonic-gate 				if (((unsigned)ip->i_blocks + btodb(bsize))
7327c478bd9Sstevel@tonic-gate 				    > INT_MAX) {
7337c478bd9Sstevel@tonic-gate 					brelse(bp);
7347c478bd9Sstevel@tonic-gate 					ufs_undo_allocation(ip, alloced_blocks,
7357c478bd9Sstevel@tonic-gate 					    undo_table, added_sectors);
7367c478bd9Sstevel@tonic-gate 					return (EFBIG);
7377c478bd9Sstevel@tonic-gate 				}
7387c478bd9Sstevel@tonic-gate 			}
7397c478bd9Sstevel@tonic-gate 			if (pref == 0) {
7407c478bd9Sstevel@tonic-gate 				if (j < NIADDR) {
7417c478bd9Sstevel@tonic-gate 					/* Indirect block */
7427c478bd9Sstevel@tonic-gate 					pref = blkpref(ip, lbn, 0,
7437c478bd9Sstevel@tonic-gate 						(daddr32_t *)0);
7447c478bd9Sstevel@tonic-gate 				} else {
7457c478bd9Sstevel@tonic-gate 					/* Data block */
7467c478bd9Sstevel@tonic-gate 					pref = blkpref(ip, lbn, i, &bap[0]);
7477c478bd9Sstevel@tonic-gate 				}
7487c478bd9Sstevel@tonic-gate 			}
7497c478bd9Sstevel@tonic-gate 
7507c478bd9Sstevel@tonic-gate 			/*
7517c478bd9Sstevel@tonic-gate 			 * release "bp" buf to avoid deadlock (re-bread later)
7527c478bd9Sstevel@tonic-gate 			 */
7537c478bd9Sstevel@tonic-gate 			brelse(bp);
7547c478bd9Sstevel@tonic-gate 
7557c478bd9Sstevel@tonic-gate 			err = alloc(ip, pref, (int)bsize, &nb, cr);
7567c478bd9Sstevel@tonic-gate 			if (err) {
7577c478bd9Sstevel@tonic-gate 				/*
7587c478bd9Sstevel@tonic-gate 				 * Return any partial allocations.
7597c478bd9Sstevel@tonic-gate 				 */
7607c478bd9Sstevel@tonic-gate 				ufs_undo_allocation(ip, alloced_blocks,
7617c478bd9Sstevel@tonic-gate 				    undo_table, added_sectors);
7627c478bd9Sstevel@tonic-gate 				return (err);
7637c478bd9Sstevel@tonic-gate 			}
7647c478bd9Sstevel@tonic-gate 
7657c478bd9Sstevel@tonic-gate 			ASSERT(!ufs_badblock(ip, nb));
7667c478bd9Sstevel@tonic-gate 			ASSERT(alloced_blocks <= NIADDR);
7677c478bd9Sstevel@tonic-gate 
768*303bf60bSsdebnath 			if (allocblk)
769*303bf60bSsdebnath 				*allocblk = nb;
770*303bf60bSsdebnath 
7717c478bd9Sstevel@tonic-gate 			undo_table[alloced_blocks].this_block = nb;
7727c478bd9Sstevel@tonic-gate 			undo_table[alloced_blocks].block_size = bsize;
7737c478bd9Sstevel@tonic-gate 			undo_table[alloced_blocks].owner = ufs_no_owner;
7747c478bd9Sstevel@tonic-gate 			undo_table[alloced_blocks].usage_flags = metaflag |
7757c478bd9Sstevel@tonic-gate 			    ((j < NIADDR) ? I_IBLK : 0);
7767c478bd9Sstevel@tonic-gate 
7777c478bd9Sstevel@tonic-gate 			alloced_blocks++;
7787c478bd9Sstevel@tonic-gate 
7797c478bd9Sstevel@tonic-gate 			if (j < NIADDR) {
7807c478bd9Sstevel@tonic-gate 				TRANS_MATA_ALLOC(ufsvfsp, ip, nb, bsize, 1);
7817c478bd9Sstevel@tonic-gate 				/*
7827c478bd9Sstevel@tonic-gate 				 * Write synchronously so indirect
7837c478bd9Sstevel@tonic-gate 				 * blocks never point at garbage.
7847c478bd9Sstevel@tonic-gate 				 */
7857c478bd9Sstevel@tonic-gate 				nbp = UFS_GETBLK(
7867c478bd9Sstevel@tonic-gate 					ufsvfsp, dev, fsbtodb(fs, nb), bsize);
7877c478bd9Sstevel@tonic-gate 
7887c478bd9Sstevel@tonic-gate 				clrbuf(nbp);
7897c478bd9Sstevel@tonic-gate 				/* XXX Maybe special-case this? */
7907c478bd9Sstevel@tonic-gate 				TRANS_BUF(ufsvfsp, 0, bsize, nbp, DT_ABZERO);
7917c478bd9Sstevel@tonic-gate 				UFS_BWRITE2(ufsvfsp, nbp);
7927c478bd9Sstevel@tonic-gate 				if (nbp->b_flags & B_ERROR) {
7937c478bd9Sstevel@tonic-gate 					err = geterror(nbp);
7947c478bd9Sstevel@tonic-gate 					brelse(nbp);
7957c478bd9Sstevel@tonic-gate 					/*
7967c478bd9Sstevel@tonic-gate 					 * Return any partial
7977c478bd9Sstevel@tonic-gate 					 * allocations.
7987c478bd9Sstevel@tonic-gate 					 */
7997c478bd9Sstevel@tonic-gate 					ufs_undo_allocation(ip,
8007c478bd9Sstevel@tonic-gate 					    alloced_blocks,
8017c478bd9Sstevel@tonic-gate 					    undo_table, added_sectors);
8027c478bd9Sstevel@tonic-gate 					return (err);
8037c478bd9Sstevel@tonic-gate 				}
8047c478bd9Sstevel@tonic-gate 				brelse(nbp);
805*303bf60bSsdebnath 			} else if (alloc_type == BI_NORMAL ||
806*303bf60bSsdebnath 			    P2ROUNDUP_TYPED(size,
8077c478bd9Sstevel@tonic-gate 			    PAGESIZE, u_offset_t) < bsize) {
8087c478bd9Sstevel@tonic-gate 				TRANS_MATA_ALLOC(ufsvfsp, ip, nb, bsize, 0);
8097c478bd9Sstevel@tonic-gate 				fbzero(ITOV(ip),
8107c478bd9Sstevel@tonic-gate 				    ((offset_t)lbn << fs->fs_bshift),
8117c478bd9Sstevel@tonic-gate 				    (uint_t)bsize, &fbp);
8127c478bd9Sstevel@tonic-gate 
8137c478bd9Sstevel@tonic-gate 				/*
8147c478bd9Sstevel@tonic-gate 				 * Cases which we need to do a synchronous
8157c478bd9Sstevel@tonic-gate 				 * write of the zeroed data pages:
8167c478bd9Sstevel@tonic-gate 				 *
8177c478bd9Sstevel@tonic-gate 				 * 1) If we are writing a directory then we
8187c478bd9Sstevel@tonic-gate 				 * want to write synchronously so blocks in
8197c478bd9Sstevel@tonic-gate 				 * directories never contain garbage.
8207c478bd9Sstevel@tonic-gate 				 *
8217c478bd9Sstevel@tonic-gate 				 * 2) If we are filling in a hole and the
8227c478bd9Sstevel@tonic-gate 				 * indirect block is going to be synchronously
8237c478bd9Sstevel@tonic-gate 				 * written back below we need to make sure
8247c478bd9Sstevel@tonic-gate 				 * that the zeroes are written here before
8257c478bd9Sstevel@tonic-gate 				 * the indirect block is updated so that if
8267c478bd9Sstevel@tonic-gate 				 * we crash before the real data is pushed
8277c478bd9Sstevel@tonic-gate 				 * we will not end up with random data is
8287c478bd9Sstevel@tonic-gate 				 * the middle of the file.
8297c478bd9Sstevel@tonic-gate 				 *
8307c478bd9Sstevel@tonic-gate 				 * 3) If the size of the request rounded up
8317c478bd9Sstevel@tonic-gate 				 * to the system page size is smaller than
8327c478bd9Sstevel@tonic-gate 				 * the file system block size, we want to
8337c478bd9Sstevel@tonic-gate 				 * write out all the pages now so that
8347c478bd9Sstevel@tonic-gate 				 * they are not aborted before they actually
8357c478bd9Sstevel@tonic-gate 				 * make it to ufs_putpage since the length
8367c478bd9Sstevel@tonic-gate 				 * of the inode will not include the pages.
8377c478bd9Sstevel@tonic-gate 				 */
8387c478bd9Sstevel@tonic-gate 
8397c478bd9Sstevel@tonic-gate 				if (isdirquota || (issync &&
8407c478bd9Sstevel@tonic-gate 				    lbn < llbn))
8417c478bd9Sstevel@tonic-gate 					(void) ufs_fbiwrite(fbp, ip, nb,
8427c478bd9Sstevel@tonic-gate 						fs->fs_fsize);
8437c478bd9Sstevel@tonic-gate 				else
8447c478bd9Sstevel@tonic-gate 					fbrelse(fbp, S_WRITE);
8457c478bd9Sstevel@tonic-gate 			}
8467c478bd9Sstevel@tonic-gate 
8477c478bd9Sstevel@tonic-gate 			/*
8487c478bd9Sstevel@tonic-gate 			 * re-acquire "bp" buf
8497c478bd9Sstevel@tonic-gate 			 */
8507c478bd9Sstevel@tonic-gate 			bp = UFS_BREAD(ufsvfsp,
8517c478bd9Sstevel@tonic-gate 					ip->i_dev, fsbtodb(fs, ob), bsize);
8527c478bd9Sstevel@tonic-gate 			if (bp->b_flags & B_ERROR) {
8537c478bd9Sstevel@tonic-gate 				err = geterror(bp);
8547c478bd9Sstevel@tonic-gate 				brelse(bp);
8557c478bd9Sstevel@tonic-gate 				/*
8567c478bd9Sstevel@tonic-gate 				 * Return any partial allocations.
8577c478bd9Sstevel@tonic-gate 				 */
8587c478bd9Sstevel@tonic-gate 				ufs_undo_allocation(ip,
8597c478bd9Sstevel@tonic-gate 				    alloced_blocks,
8607c478bd9Sstevel@tonic-gate 				    undo_table, added_sectors);
8617c478bd9Sstevel@tonic-gate 				return (err);
8627c478bd9Sstevel@tonic-gate 			}
8637c478bd9Sstevel@tonic-gate 			bap = bp->b_un.b_daddr;
8647c478bd9Sstevel@tonic-gate 			bap[i] = nb;
865*303bf60bSsdebnath 
866*303bf60bSsdebnath 			/*
867*303bf60bSsdebnath 			 * The magic explained: j will be equal to NIADDR
868*303bf60bSsdebnath 			 * when we are at the lowest level, this is where the
869*303bf60bSsdebnath 			 * array entries point directly to data blocks. Since
870*303bf60bSsdebnath 			 * we will be 'fallocate'ing we will go ahead and negate
871*303bf60bSsdebnath 			 * the addresses.
872*303bf60bSsdebnath 			 */
873*303bf60bSsdebnath 			if (alloc_type == BI_FALLOCATE && j == NIADDR)
874*303bf60bSsdebnath 				bap[i] = -bap[i];
875*303bf60bSsdebnath 
8767c478bd9Sstevel@tonic-gate 			TRANS_BUF_ITEM_128(ufsvfsp, bap[i], bap, bp, DT_AB);
8777c478bd9Sstevel@tonic-gate 			added_sectors += btodb(bsize);
8787c478bd9Sstevel@tonic-gate 			ip->i_blocks += btodb(bsize);
8797c478bd9Sstevel@tonic-gate 			ASSERT((unsigned)ip->i_blocks <= INT_MAX);
8807c478bd9Sstevel@tonic-gate 			TRANS_INODE(ufsvfsp, ip);
8817c478bd9Sstevel@tonic-gate 			ip->i_flag |= IUPD | ICHG | IATTCHG;
882*303bf60bSsdebnath 
8837c478bd9Sstevel@tonic-gate 			/* Caller is responsible for updating i_seq */
8847c478bd9Sstevel@tonic-gate 
8857c478bd9Sstevel@tonic-gate 			undo_table[alloced_blocks-1].owner =
8867c478bd9Sstevel@tonic-gate 			    ufs_indirect_block;
8877c478bd9Sstevel@tonic-gate 			undo_table[alloced_blocks-1].owner_block = ob;
8887c478bd9Sstevel@tonic-gate 			undo_table[alloced_blocks-1].owner_offset = i;
8897c478bd9Sstevel@tonic-gate 
8907c478bd9Sstevel@tonic-gate 			if (issync) {
8917c478bd9Sstevel@tonic-gate 				UFS_BWRITE2(ufsvfsp, bp);
8927c478bd9Sstevel@tonic-gate 				if (bp->b_flags & B_ERROR) {
8937c478bd9Sstevel@tonic-gate 					err = geterror(bp);
8947c478bd9Sstevel@tonic-gate 					brelse(bp);
8957c478bd9Sstevel@tonic-gate 					/*
8967c478bd9Sstevel@tonic-gate 					 * Return any partial
8977c478bd9Sstevel@tonic-gate 					 * allocations.
8987c478bd9Sstevel@tonic-gate 					 */
8997c478bd9Sstevel@tonic-gate 					ufs_undo_allocation(ip,
9007c478bd9Sstevel@tonic-gate 					    alloced_blocks,
9017c478bd9Sstevel@tonic-gate 					    undo_table, added_sectors);
9027c478bd9Sstevel@tonic-gate 					return (err);
9037c478bd9Sstevel@tonic-gate 				}
9047c478bd9Sstevel@tonic-gate 				brelse(bp);
9057c478bd9Sstevel@tonic-gate 			} else {
9067c478bd9Sstevel@tonic-gate 				bdrwrite(bp);
9077c478bd9Sstevel@tonic-gate 			}
9087c478bd9Sstevel@tonic-gate 		} else {
9097c478bd9Sstevel@tonic-gate 			brelse(bp);
9107c478bd9Sstevel@tonic-gate 		}
9117c478bd9Sstevel@tonic-gate 	}
9127c478bd9Sstevel@tonic-gate 	return (0);
9137c478bd9Sstevel@tonic-gate }
9147c478bd9Sstevel@tonic-gate 
9157c478bd9Sstevel@tonic-gate /*
9167c478bd9Sstevel@tonic-gate  * Return 1 if inode has unmapped blocks (UFS holes).
9177c478bd9Sstevel@tonic-gate  */
9187c478bd9Sstevel@tonic-gate int
9197c478bd9Sstevel@tonic-gate bmap_has_holes(struct inode *ip)
9207c478bd9Sstevel@tonic-gate {
9217c478bd9Sstevel@tonic-gate 	struct fs *fs = ip->i_fs;
9227c478bd9Sstevel@tonic-gate 	uint_t	dblks; 			/* # of data blocks */
9237c478bd9Sstevel@tonic-gate 	uint_t	mblks;			/* # of data + metadata blocks */
9247c478bd9Sstevel@tonic-gate 	int	nindirshift;
9257c478bd9Sstevel@tonic-gate 	int	nindiroffset;
9267c478bd9Sstevel@tonic-gate 	uint_t	cnt;
9277c478bd9Sstevel@tonic-gate 	int	n, j, shft;
9287c478bd9Sstevel@tonic-gate 	uint_t nindirblks;
9297c478bd9Sstevel@tonic-gate 
9307c478bd9Sstevel@tonic-gate 	int	fsbshift = fs->fs_bshift;
9317c478bd9Sstevel@tonic-gate 	int	fsboffset = (1 << fsbshift) - 1;
9327c478bd9Sstevel@tonic-gate 
9337c478bd9Sstevel@tonic-gate 	dblks = (ip->i_size + fsboffset) >> fsbshift;
9347c478bd9Sstevel@tonic-gate 	mblks = (ldbtob((u_offset_t)ip->i_blocks) + fsboffset) >> fsbshift;
9357c478bd9Sstevel@tonic-gate 
9367c478bd9Sstevel@tonic-gate 	/*
9377c478bd9Sstevel@tonic-gate 	 * File has only direct blocks.
9387c478bd9Sstevel@tonic-gate 	 */
9397c478bd9Sstevel@tonic-gate 	if (dblks <= NDADDR)
9407c478bd9Sstevel@tonic-gate 		return (mblks < dblks);
9417c478bd9Sstevel@tonic-gate 	nindirshift = ip->i_ufsvfs->vfs_nindirshift;
942*303bf60bSsdebnath 
9437c478bd9Sstevel@tonic-gate 	nindiroffset = ip->i_ufsvfs->vfs_nindiroffset;
9447c478bd9Sstevel@tonic-gate 	nindirblks = nindiroffset + 1;
9457c478bd9Sstevel@tonic-gate 
9467c478bd9Sstevel@tonic-gate 	dblks -= NDADDR;
9477c478bd9Sstevel@tonic-gate 	shft = 0;
9487c478bd9Sstevel@tonic-gate 	/*
9497c478bd9Sstevel@tonic-gate 	 * Determine how many levels of indirection.
9507c478bd9Sstevel@tonic-gate 	 */
9517c478bd9Sstevel@tonic-gate 	for (j = NIADDR; j > 0; j--) {
9527c478bd9Sstevel@tonic-gate 		longlong_t	sh;
9537c478bd9Sstevel@tonic-gate 
9547c478bd9Sstevel@tonic-gate 		shft += nindirshift;	/* sh *= nindir */
9557c478bd9Sstevel@tonic-gate 		sh = 1LL << shft;
9567c478bd9Sstevel@tonic-gate 		if (dblks <= sh)
9577c478bd9Sstevel@tonic-gate 			break;
9587c478bd9Sstevel@tonic-gate 		dblks -= sh;
9597c478bd9Sstevel@tonic-gate 	}
9607c478bd9Sstevel@tonic-gate 	/* LINTED: warning: logical expression always true: op "||" */
9617c478bd9Sstevel@tonic-gate 	ASSERT(NIADDR <= 3);
9627c478bd9Sstevel@tonic-gate 	ASSERT(j <= NIADDR);
9637c478bd9Sstevel@tonic-gate 	if (j == NIADDR)	/* single level indirection */
9647c478bd9Sstevel@tonic-gate 		cnt = NDADDR + 1 + dblks;
9657c478bd9Sstevel@tonic-gate 	else if (j == NIADDR-1) /* double indirection */
9667c478bd9Sstevel@tonic-gate 		cnt = NDADDR + 1 + nindirblks +
9677c478bd9Sstevel@tonic-gate 			1 + (dblks + nindiroffset)/nindirblks + dblks;
9687c478bd9Sstevel@tonic-gate 	else if (j == NIADDR-2) { /* triple indirection */
9697c478bd9Sstevel@tonic-gate 		n = (dblks + nindiroffset)/nindirblks;
9707c478bd9Sstevel@tonic-gate 		cnt = NDADDR + 1 + nindirblks +
9717c478bd9Sstevel@tonic-gate 			1 + nindirblks + nindirblks*nindirblks +
9727c478bd9Sstevel@tonic-gate 			1 + (n + nindiroffset)/nindirblks + n + dblks;
9737c478bd9Sstevel@tonic-gate 	}
9747c478bd9Sstevel@tonic-gate 
9757c478bd9Sstevel@tonic-gate 	return (mblks < cnt);
9767c478bd9Sstevel@tonic-gate }
9777c478bd9Sstevel@tonic-gate 
9787c478bd9Sstevel@tonic-gate /*
9797c478bd9Sstevel@tonic-gate  * find some contig blocks starting at *sbp and going for min(n, max_contig)
9807c478bd9Sstevel@tonic-gate  * return the number of blocks (not frags) found.
9817c478bd9Sstevel@tonic-gate  * The array passed in must be at least [0..n-1].
9827c478bd9Sstevel@tonic-gate  */
9837c478bd9Sstevel@tonic-gate static int
9847c478bd9Sstevel@tonic-gate findextent(struct fs *fs, daddr32_t *sbp, int n, int *lenp, int maxtransfer)
9857c478bd9Sstevel@tonic-gate {
9867c478bd9Sstevel@tonic-gate 	register daddr_t bn, nextbn;
9877c478bd9Sstevel@tonic-gate 	register daddr32_t *bp;
9887c478bd9Sstevel@tonic-gate 	register int diff;
9897c478bd9Sstevel@tonic-gate 	int maxtransblk;
9907c478bd9Sstevel@tonic-gate 
9917c478bd9Sstevel@tonic-gate 	if (n <= 0)
9927c478bd9Sstevel@tonic-gate 		return (0);
9937c478bd9Sstevel@tonic-gate 	bn = *sbp;
9947c478bd9Sstevel@tonic-gate 	if (bn == 0)
9957c478bd9Sstevel@tonic-gate 		return (0);
996*303bf60bSsdebnath 
9977c478bd9Sstevel@tonic-gate 	diff = fs->fs_frag;
9987c478bd9Sstevel@tonic-gate 	if (*lenp) {
9997c478bd9Sstevel@tonic-gate 		n = MIN(n, lblkno(fs, *lenp));
10007c478bd9Sstevel@tonic-gate 	} else {
10017c478bd9Sstevel@tonic-gate 		/*
10027c478bd9Sstevel@tonic-gate 		 * If the user has set the value for maxcontig lower than
10037c478bd9Sstevel@tonic-gate 		 * the drive transfer size, then assume they want this
10047c478bd9Sstevel@tonic-gate 		 * to be the maximum value for the size of the data transfer.
10057c478bd9Sstevel@tonic-gate 		 */
10067c478bd9Sstevel@tonic-gate 		maxtransblk = maxtransfer >> DEV_BSHIFT;
10077c478bd9Sstevel@tonic-gate 		if (fs->fs_maxcontig < maxtransblk) {
10087c478bd9Sstevel@tonic-gate 			n = MIN(n, fs->fs_maxcontig);
10097c478bd9Sstevel@tonic-gate 		} else {
10107c478bd9Sstevel@tonic-gate 			n = MIN(n, maxtransblk);
10117c478bd9Sstevel@tonic-gate 		}
10127c478bd9Sstevel@tonic-gate 	}
10137c478bd9Sstevel@tonic-gate 	bp = sbp;
10147c478bd9Sstevel@tonic-gate 	while (--n > 0) {
10157c478bd9Sstevel@tonic-gate 		nextbn = *(bp + 1);
10167c478bd9Sstevel@tonic-gate 		if (nextbn == 0 || bn + diff != nextbn)
10177c478bd9Sstevel@tonic-gate 			break;
10187c478bd9Sstevel@tonic-gate 		bn = nextbn;
10197c478bd9Sstevel@tonic-gate 		bp++;
10207c478bd9Sstevel@tonic-gate 	}
10217c478bd9Sstevel@tonic-gate 	return ((int)(bp - sbp) + 1);
10227c478bd9Sstevel@tonic-gate }
10237c478bd9Sstevel@tonic-gate 
10247c478bd9Sstevel@tonic-gate /*
10257c478bd9Sstevel@tonic-gate  * Free any blocks which had been successfully allocated.  Always called
10267c478bd9Sstevel@tonic-gate  * as a result of an error, so we don't bother returning an error code
10277c478bd9Sstevel@tonic-gate  * from here.
10287c478bd9Sstevel@tonic-gate  *
10297c478bd9Sstevel@tonic-gate  * If block_count and inode_sector_adjust are both zero, we'll do nothing.
10307c478bd9Sstevel@tonic-gate  * Thus it is safe to call this as part of error handling, whether or not
10317c478bd9Sstevel@tonic-gate  * any blocks have been allocated.
10327c478bd9Sstevel@tonic-gate  *
10337c478bd9Sstevel@tonic-gate  * The ufs_inode_direct case is currently unused.
10347c478bd9Sstevel@tonic-gate  */
10357c478bd9Sstevel@tonic-gate 
10367c478bd9Sstevel@tonic-gate static void
10377c478bd9Sstevel@tonic-gate ufs_undo_allocation(
10387c478bd9Sstevel@tonic-gate 	inode_t *ip,
10397c478bd9Sstevel@tonic-gate 	int block_count,
10407c478bd9Sstevel@tonic-gate 	struct ufs_allocated_block table[],
10417c478bd9Sstevel@tonic-gate 	int inode_sector_adjust)
10427c478bd9Sstevel@tonic-gate {
10437c478bd9Sstevel@tonic-gate 	int i;
10447c478bd9Sstevel@tonic-gate 	int inode_changed;
10457c478bd9Sstevel@tonic-gate 	int error_updating_pointers;
10467c478bd9Sstevel@tonic-gate 	struct ufsvfs *ufsvfsp;
10477c478bd9Sstevel@tonic-gate 
10487c478bd9Sstevel@tonic-gate 	inode_changed = 0;
10497c478bd9Sstevel@tonic-gate 	error_updating_pointers = 0;
10507c478bd9Sstevel@tonic-gate 
10517c478bd9Sstevel@tonic-gate 	ufsvfsp = ip->i_ufsvfs;
10527c478bd9Sstevel@tonic-gate 
10537c478bd9Sstevel@tonic-gate 	/*
10547c478bd9Sstevel@tonic-gate 	 * Update pointers on disk before freeing blocks.  If we fail,
10557c478bd9Sstevel@tonic-gate 	 * some blocks may remain busy; but they will be reclaimed by
10567c478bd9Sstevel@tonic-gate 	 * an fsck.  (This is better than letting a block wind up with
10577c478bd9Sstevel@tonic-gate 	 * two owners if we successfully freed it but could not remove
10587c478bd9Sstevel@tonic-gate 	 * the pointer to it.)
10597c478bd9Sstevel@tonic-gate 	 */
10607c478bd9Sstevel@tonic-gate 
10617c478bd9Sstevel@tonic-gate 	for (i = 0; i < block_count; i++) {
10627c478bd9Sstevel@tonic-gate 		switch (table[i].owner) {
10637c478bd9Sstevel@tonic-gate 		case ufs_no_owner:
10647c478bd9Sstevel@tonic-gate 			/* Nothing to do here, nobody points to us */
10657c478bd9Sstevel@tonic-gate 			break;
10667c478bd9Sstevel@tonic-gate 		case ufs_inode_direct:
10677c478bd9Sstevel@tonic-gate 			ASSERT(table[i].owner_offset < NDADDR);
10687c478bd9Sstevel@tonic-gate 			ip->i_db[table[i].owner_offset] = 0;
10697c478bd9Sstevel@tonic-gate 			inode_changed = 1;
10707c478bd9Sstevel@tonic-gate 			break;
10717c478bd9Sstevel@tonic-gate 		case ufs_inode_indirect:
10727c478bd9Sstevel@tonic-gate 			ASSERT(table[i].owner_offset < NIADDR);
10737c478bd9Sstevel@tonic-gate 			ip->i_ib[table[i].owner_offset] = 0;
10747c478bd9Sstevel@tonic-gate 			inode_changed = 1;
10757c478bd9Sstevel@tonic-gate 			break;
10767c478bd9Sstevel@tonic-gate 		case ufs_indirect_block: {
10777c478bd9Sstevel@tonic-gate 			buf_t *bp;
10787c478bd9Sstevel@tonic-gate 			daddr32_t *block_data;
10797c478bd9Sstevel@tonic-gate 
10807c478bd9Sstevel@tonic-gate 			/* Read/modify/log/write. */
10817c478bd9Sstevel@tonic-gate 
10827c478bd9Sstevel@tonic-gate 			ASSERT(table[i].owner_offset <
10837c478bd9Sstevel@tonic-gate 			    (VBSIZE(ITOV(ip)) / sizeof (daddr32_t)));
10847c478bd9Sstevel@tonic-gate 
10857c478bd9Sstevel@tonic-gate 			bp = UFS_BREAD(ufsvfsp, ip->i_dev,
10867c478bd9Sstevel@tonic-gate 			    fsbtodb(ufsvfsp->vfs_fs, table[i].owner_block),
10877c478bd9Sstevel@tonic-gate 			    VBSIZE(ITOV(ip)));
10887c478bd9Sstevel@tonic-gate 
10897c478bd9Sstevel@tonic-gate 			if (bp->b_flags & B_ERROR) {
10907c478bd9Sstevel@tonic-gate 				/* Couldn't read this block; give up. */
10917c478bd9Sstevel@tonic-gate 				error_updating_pointers = 1;
10927c478bd9Sstevel@tonic-gate 				brelse(bp);
10937c478bd9Sstevel@tonic-gate 				break;		/* out of SWITCH */
10947c478bd9Sstevel@tonic-gate 			}
10957c478bd9Sstevel@tonic-gate 
10967c478bd9Sstevel@tonic-gate 			block_data = bp->b_un.b_daddr;
10977c478bd9Sstevel@tonic-gate 			block_data[table[i].owner_offset] = 0;
10987c478bd9Sstevel@tonic-gate 
10997c478bd9Sstevel@tonic-gate 			/* Write a log entry which includes the zero. */
11007c478bd9Sstevel@tonic-gate 			/* It might be possible to optimize this by using */
11017c478bd9Sstevel@tonic-gate 			/* TRANS_BUF directly and zeroing only the four */
11027c478bd9Sstevel@tonic-gate 			/* bytes involved, but an attempt to do that led */
11037c478bd9Sstevel@tonic-gate 			/* to panics in the logging code.  The attempt was */
11047c478bd9Sstevel@tonic-gate 			/* TRANS_BUF(ufsvfsp,				  */
11057c478bd9Sstevel@tonic-gate 			/*    table[i].owner_offset * sizeof (daddr32_t), */
11067c478bd9Sstevel@tonic-gate 			/*    sizeof (daddr32_t),			  */
11077c478bd9Sstevel@tonic-gate 			/*    bp,					  */
11087c478bd9Sstevel@tonic-gate 			/*    DT_ABZERO);				  */
11097c478bd9Sstevel@tonic-gate 
11107c478bd9Sstevel@tonic-gate 			TRANS_BUF_ITEM_128(ufsvfsp,
11117c478bd9Sstevel@tonic-gate 			    block_data[table[i].owner_offset],
11127c478bd9Sstevel@tonic-gate 			    block_data, bp, DT_AB);
11137c478bd9Sstevel@tonic-gate 
11147c478bd9Sstevel@tonic-gate 			/* Now we can write the buffer itself. */
11157c478bd9Sstevel@tonic-gate 
11167c478bd9Sstevel@tonic-gate 			UFS_BWRITE2(ufsvfsp, bp);
11177c478bd9Sstevel@tonic-gate 
11187c478bd9Sstevel@tonic-gate 			if (bp->b_flags & B_ERROR) {
11197c478bd9Sstevel@tonic-gate 				error_updating_pointers = 1;
11207c478bd9Sstevel@tonic-gate 			}
11217c478bd9Sstevel@tonic-gate 
11227c478bd9Sstevel@tonic-gate 			brelse(bp);
11237c478bd9Sstevel@tonic-gate 			break;
11247c478bd9Sstevel@tonic-gate 		}
11257c478bd9Sstevel@tonic-gate 		default:
11267c478bd9Sstevel@tonic-gate 			(void) ufs_fault(ITOV(ip),
11277c478bd9Sstevel@tonic-gate 			    "ufs_undo_allocation failure\n");
11287c478bd9Sstevel@tonic-gate 			break;
11297c478bd9Sstevel@tonic-gate 		}
11307c478bd9Sstevel@tonic-gate 	}
11317c478bd9Sstevel@tonic-gate 
11327c478bd9Sstevel@tonic-gate 	/*
11337c478bd9Sstevel@tonic-gate 	 * If the inode changed, or if we need to update its block count,
11347c478bd9Sstevel@tonic-gate 	 * then do that now.  We update the inode synchronously on disk
11357c478bd9Sstevel@tonic-gate 	 * to ensure that it won't transiently point at a block we've
11367c478bd9Sstevel@tonic-gate 	 * freed (only necessary if we're not logging).
11377c478bd9Sstevel@tonic-gate 	 *
11387c478bd9Sstevel@tonic-gate 	 * NOTE: Currently ufs_iupdat() does not check for errors.  When
11397c478bd9Sstevel@tonic-gate 	 * it is fixed, we should verify that we successfully updated the
11407c478bd9Sstevel@tonic-gate 	 * inode before freeing blocks below.
11417c478bd9Sstevel@tonic-gate 	 */
11427c478bd9Sstevel@tonic-gate 
11437c478bd9Sstevel@tonic-gate 	if (inode_changed || (inode_sector_adjust != 0)) {
11447c478bd9Sstevel@tonic-gate 		ip->i_blocks -= inode_sector_adjust;
11457c478bd9Sstevel@tonic-gate 		ASSERT((unsigned)ip->i_blocks <= INT_MAX);
11467c478bd9Sstevel@tonic-gate 		TRANS_INODE(ufsvfsp, ip);
11477c478bd9Sstevel@tonic-gate 		ip->i_flag |= IUPD | ICHG | IATTCHG;
11487c478bd9Sstevel@tonic-gate 		ip->i_seq++;
11497c478bd9Sstevel@tonic-gate 		if (!TRANS_ISTRANS(ufsvfsp))
11507c478bd9Sstevel@tonic-gate 			ufs_iupdat(ip, I_SYNC);
11517c478bd9Sstevel@tonic-gate 	}
11527c478bd9Sstevel@tonic-gate 
11537c478bd9Sstevel@tonic-gate 	/*
11547c478bd9Sstevel@tonic-gate 	 * Now we go through and actually free the blocks, but only if we
11557c478bd9Sstevel@tonic-gate 	 * successfully removed the pointers to them.
11567c478bd9Sstevel@tonic-gate 	 */
11577c478bd9Sstevel@tonic-gate 
11587c478bd9Sstevel@tonic-gate 	if (!error_updating_pointers) {
11597c478bd9Sstevel@tonic-gate 		for (i = 0; i < block_count; i++) {
11607c478bd9Sstevel@tonic-gate 			free(ip, table[i].this_block, table[i].block_size,
11617c478bd9Sstevel@tonic-gate 			    table[i].usage_flags);
11627c478bd9Sstevel@tonic-gate 		}
11637c478bd9Sstevel@tonic-gate 	}
11647c478bd9Sstevel@tonic-gate }
11657c478bd9Sstevel@tonic-gate 
11667c478bd9Sstevel@tonic-gate /*
11677c478bd9Sstevel@tonic-gate  * Find the next hole or data block in file starting at *off
1168a6595b40Sperrin  * Return found offset in *off, which can be less than the
1169a6595b40Sperrin  * starting offset if not block aligned.
11707c478bd9Sstevel@tonic-gate  * This code is based on bmap_read().
11717c478bd9Sstevel@tonic-gate  * Errors: ENXIO for end of file
11727c478bd9Sstevel@tonic-gate  *         EIO for block read error.
11737c478bd9Sstevel@tonic-gate  */
11747c478bd9Sstevel@tonic-gate int
11757c478bd9Sstevel@tonic-gate bmap_find(struct inode *ip, boolean_t hole, u_offset_t *off)
11767c478bd9Sstevel@tonic-gate {
11777c478bd9Sstevel@tonic-gate 	ufsvfs_t *ufsvfsp = ip->i_ufsvfs;
11787c478bd9Sstevel@tonic-gate 	struct fs *fs = ufsvfsp->vfs_fs;
11797c478bd9Sstevel@tonic-gate 	buf_t *bp[NIADDR];
11807c478bd9Sstevel@tonic-gate 	int i, j;
11817c478bd9Sstevel@tonic-gate 	int shft;			/* we maintain sh = 1 << shft */
11827c478bd9Sstevel@tonic-gate 	int nindirshift, nindiroffset;
11837c478bd9Sstevel@tonic-gate 	daddr_t	ob, nb, tbn, lbn, skip;
11847c478bd9Sstevel@tonic-gate 	daddr32_t *bap;
11857c478bd9Sstevel@tonic-gate 	u_offset_t isz = (offset_t)ip->i_size;
11867c478bd9Sstevel@tonic-gate 	int32_t bs = fs->fs_bsize; /* file system block size */
11877c478bd9Sstevel@tonic-gate 	int32_t nindir = fs->fs_nindir;
11887c478bd9Sstevel@tonic-gate 	dev_t dev;
11897c478bd9Sstevel@tonic-gate 	int error = 0;
11907c478bd9Sstevel@tonic-gate 	daddr_t limits[NIADDR];
11917c478bd9Sstevel@tonic-gate 
11927c478bd9Sstevel@tonic-gate 	ASSERT(*off < isz);
11937c478bd9Sstevel@tonic-gate 	ASSERT(RW_LOCK_HELD(&ip->i_contents));
11947c478bd9Sstevel@tonic-gate 	lbn = (daddr_t)lblkno(fs, *off);
11957c478bd9Sstevel@tonic-gate 	ASSERT(lbn >= 0);
11967c478bd9Sstevel@tonic-gate 
11977c478bd9Sstevel@tonic-gate 	for (i = 0; i < NIADDR; i++)
11987c478bd9Sstevel@tonic-gate 		bp[i] = NULL;
11997c478bd9Sstevel@tonic-gate 
12007c478bd9Sstevel@tonic-gate 	/*
12017c478bd9Sstevel@tonic-gate 	 * The first NDADDR blocks are direct blocks.
12027c478bd9Sstevel@tonic-gate 	 */
12037c478bd9Sstevel@tonic-gate 	if (lbn < NDADDR) {
12047c478bd9Sstevel@tonic-gate 		for (; lbn < NDADDR; lbn++) {
12057c478bd9Sstevel@tonic-gate 			if ((hole && (ip->i_db[lbn] == 0)) ||
12067c478bd9Sstevel@tonic-gate 			    (!hole && (ip->i_db[lbn] != 0))) {
12077c478bd9Sstevel@tonic-gate 				goto out;
12087c478bd9Sstevel@tonic-gate 			}
12097c478bd9Sstevel@tonic-gate 		}
12107c478bd9Sstevel@tonic-gate 		if ((u_offset_t)lbn << fs->fs_bshift >= isz)
12117c478bd9Sstevel@tonic-gate 			goto out;
12127c478bd9Sstevel@tonic-gate 	}
12137c478bd9Sstevel@tonic-gate 
12147c478bd9Sstevel@tonic-gate 	nindir = fs->fs_nindir;
12157c478bd9Sstevel@tonic-gate 	nindirshift = ufsvfsp->vfs_nindirshift;
12167c478bd9Sstevel@tonic-gate 	nindiroffset = ufsvfsp->vfs_nindiroffset;
12177c478bd9Sstevel@tonic-gate 	dev = ip->i_dev;
12187c478bd9Sstevel@tonic-gate 
12197c478bd9Sstevel@tonic-gate 	/* Set up limits array */
12207c478bd9Sstevel@tonic-gate 	for (limits[0] = NDADDR, j = 1; j  < NIADDR; j++)
12217c478bd9Sstevel@tonic-gate 		limits[j] = limits[j-1] + (1ULL << (nindirshift * j));
12227c478bd9Sstevel@tonic-gate 
12237c478bd9Sstevel@tonic-gate loop:
12247c478bd9Sstevel@tonic-gate 	/*
12257c478bd9Sstevel@tonic-gate 	 * Determine how many levels of indirection.
12267c478bd9Sstevel@tonic-gate 	 */
12277c478bd9Sstevel@tonic-gate 	shft = 0;				/* sh = 1 */
12287c478bd9Sstevel@tonic-gate 	tbn = lbn - NDADDR;
12297c478bd9Sstevel@tonic-gate 	for (j = NIADDR; j > 0; j--) {
12307c478bd9Sstevel@tonic-gate 		longlong_t sh;
12317c478bd9Sstevel@tonic-gate 
12327c478bd9Sstevel@tonic-gate 		shft += nindirshift;		/* sh *= nindir */
12337c478bd9Sstevel@tonic-gate 		sh = 1LL << shft;
12347c478bd9Sstevel@tonic-gate 		if (tbn < sh)
12357c478bd9Sstevel@tonic-gate 			break;
12367c478bd9Sstevel@tonic-gate 		tbn -= sh;
12377c478bd9Sstevel@tonic-gate 	}
12387c478bd9Sstevel@tonic-gate 	if (j == 0) {
12397c478bd9Sstevel@tonic-gate 		/* must have passed end of file */
12407c478bd9Sstevel@tonic-gate 		ASSERT(((u_offset_t)lbn << fs->fs_bshift) >= isz);
12417c478bd9Sstevel@tonic-gate 		goto out;
12427c478bd9Sstevel@tonic-gate 	}
12437c478bd9Sstevel@tonic-gate 
12447c478bd9Sstevel@tonic-gate 	/*
12457c478bd9Sstevel@tonic-gate 	 * Fetch the first indirect block.
12467c478bd9Sstevel@tonic-gate 	 */
12477c478bd9Sstevel@tonic-gate 	nb = ip->i_ib[NIADDR - j];
12487c478bd9Sstevel@tonic-gate 	if (nb == 0) {
12497c478bd9Sstevel@tonic-gate 		if (hole) {
12507c478bd9Sstevel@tonic-gate 			lbn = limits[NIADDR - j];
12517c478bd9Sstevel@tonic-gate 			goto out;
12527c478bd9Sstevel@tonic-gate 		} else {
12537c478bd9Sstevel@tonic-gate 			lbn = limits[NIADDR - j + 1];
12547c478bd9Sstevel@tonic-gate 			if ((u_offset_t)lbn << fs->fs_bshift >= isz)
12557c478bd9Sstevel@tonic-gate 				goto out;
12567c478bd9Sstevel@tonic-gate 			goto loop;
12577c478bd9Sstevel@tonic-gate 		}
12587c478bd9Sstevel@tonic-gate 	}
12597c478bd9Sstevel@tonic-gate 
12607c478bd9Sstevel@tonic-gate 	/*
12617c478bd9Sstevel@tonic-gate 	 * Fetch through the indirect blocks.
12627c478bd9Sstevel@tonic-gate 	 */
12637c478bd9Sstevel@tonic-gate 	for (; ((j <= NIADDR) && (nb != 0)); j++) {
12647c478bd9Sstevel@tonic-gate 		ob = nb;
12657c478bd9Sstevel@tonic-gate 		/*
12667c478bd9Sstevel@tonic-gate 		 * if there's a different block at this level then release
12677c478bd9Sstevel@tonic-gate 		 * the old one and in with the new.
12687c478bd9Sstevel@tonic-gate 		 */
12697c478bd9Sstevel@tonic-gate 		if ((bp[j-1] == NULL) || bp[j-1]->b_blkno != fsbtodb(fs, ob)) {
12707c478bd9Sstevel@tonic-gate 			if (bp[j-1] != NULL)
12717c478bd9Sstevel@tonic-gate 				brelse(bp[j-1]);
12727c478bd9Sstevel@tonic-gate 			bp[j-1] = UFS_BREAD(ufsvfsp, dev, fsbtodb(fs, ob), bs);
12737c478bd9Sstevel@tonic-gate 			if (bp[j-1]->b_flags & B_ERROR) {
12747c478bd9Sstevel@tonic-gate 				error = EIO;
12757c478bd9Sstevel@tonic-gate 				goto out;
12767c478bd9Sstevel@tonic-gate 			}
12777c478bd9Sstevel@tonic-gate 		}
12787c478bd9Sstevel@tonic-gate 		bap = bp[j-1]->b_un.b_daddr;
12797c478bd9Sstevel@tonic-gate 
12807c478bd9Sstevel@tonic-gate 		shft -= nindirshift;		/* sh / nindir */
12817c478bd9Sstevel@tonic-gate 		i = (tbn >> shft) & nindiroffset; /* (tbn / sh) % nindir */
12827c478bd9Sstevel@tonic-gate 		nb = bap[i];
12837c478bd9Sstevel@tonic-gate 		skip = 1LL << (nindirshift * (NIADDR - j));
12847c478bd9Sstevel@tonic-gate 	}
12857c478bd9Sstevel@tonic-gate 
12867c478bd9Sstevel@tonic-gate 	/*
12877c478bd9Sstevel@tonic-gate 	 * Scan through the blocks in this array.
12887c478bd9Sstevel@tonic-gate 	 */
12897c478bd9Sstevel@tonic-gate 	for (; i < nindir; i++, lbn += skip) {
12907c478bd9Sstevel@tonic-gate 		if (hole && (bap[i] == 0))
12917c478bd9Sstevel@tonic-gate 			goto out;
12927c478bd9Sstevel@tonic-gate 		if (!hole && (bap[i] != 0)) {
12937c478bd9Sstevel@tonic-gate 			if (skip == 1) {
12947c478bd9Sstevel@tonic-gate 				/* we're at the lowest level */
12957c478bd9Sstevel@tonic-gate 				goto out;
12967c478bd9Sstevel@tonic-gate 			} else {
12977c478bd9Sstevel@tonic-gate 				goto loop;
12987c478bd9Sstevel@tonic-gate 			}
12997c478bd9Sstevel@tonic-gate 		}
13007c478bd9Sstevel@tonic-gate 	}
13017c478bd9Sstevel@tonic-gate 	if (((u_offset_t)lbn << fs->fs_bshift) < isz)
13027c478bd9Sstevel@tonic-gate 		goto loop;
13037c478bd9Sstevel@tonic-gate out:
13047c478bd9Sstevel@tonic-gate 	for (i = 0; i < NIADDR; i++) {
13057c478bd9Sstevel@tonic-gate 		if (bp[i])
13067c478bd9Sstevel@tonic-gate 			brelse(bp[i]);
13077c478bd9Sstevel@tonic-gate 	}
13087c478bd9Sstevel@tonic-gate 	if (error == 0) {
13097c478bd9Sstevel@tonic-gate 		if (((u_offset_t)lbn << fs->fs_bshift) >= isz) {
13107c478bd9Sstevel@tonic-gate 			error = ENXIO;
13117c478bd9Sstevel@tonic-gate 		} else {
13127c478bd9Sstevel@tonic-gate 			/* success */
13137c478bd9Sstevel@tonic-gate 			*off = (u_offset_t)lbn << fs->fs_bshift;
13147c478bd9Sstevel@tonic-gate 		}
13157c478bd9Sstevel@tonic-gate 	}
13167c478bd9Sstevel@tonic-gate 	return (error);
13177c478bd9Sstevel@tonic-gate }
1318*303bf60bSsdebnath 
1319*303bf60bSsdebnath /*
1320*303bf60bSsdebnath  * Set a particular offset in the inode list to be a certain block.
1321*303bf60bSsdebnath  * User is responsible for calling TRANS* functions
1322*303bf60bSsdebnath  */
1323*303bf60bSsdebnath int
1324*303bf60bSsdebnath bmap_set_bn(struct vnode *vp, u_offset_t off, daddr32_t bn)
1325*303bf60bSsdebnath {
1326*303bf60bSsdebnath 	daddr_t lbn;
1327*303bf60bSsdebnath 	struct inode *ip;
1328*303bf60bSsdebnath 	ufsvfs_t *ufsvfsp;
1329*303bf60bSsdebnath 	struct	fs *fs;
1330*303bf60bSsdebnath 	struct	buf *bp;
1331*303bf60bSsdebnath 	int	i, j;
1332*303bf60bSsdebnath 	int	shft;			/* we maintain sh = 1 << shft */
1333*303bf60bSsdebnath 	int err;
1334*303bf60bSsdebnath 	daddr_t	ob, nb, tbn;
1335*303bf60bSsdebnath 	daddr32_t *bap;
1336*303bf60bSsdebnath 	int	nindirshift, nindiroffset;
1337*303bf60bSsdebnath 
1338*303bf60bSsdebnath 	ip = VTOI(vp);
1339*303bf60bSsdebnath 	ufsvfsp = ip->i_ufsvfs;
1340*303bf60bSsdebnath 	fs = ufsvfsp->vfs_fs;
1341*303bf60bSsdebnath 	lbn = (daddr_t)lblkno(fs, off);
1342*303bf60bSsdebnath 
1343*303bf60bSsdebnath 	ASSERT(RW_LOCK_HELD(&ip->i_contents));
1344*303bf60bSsdebnath 
1345*303bf60bSsdebnath 	if (lbn < 0)
1346*303bf60bSsdebnath 		return (EFBIG);
1347*303bf60bSsdebnath 
1348*303bf60bSsdebnath 	/*
1349*303bf60bSsdebnath 	 * Take care of direct block assignment
1350*303bf60bSsdebnath 	 */
1351*303bf60bSsdebnath 	if (lbn < NDADDR) {
1352*303bf60bSsdebnath 		ip->i_db[lbn] = bn;
1353*303bf60bSsdebnath 		return (0);
1354*303bf60bSsdebnath 	}
1355*303bf60bSsdebnath 
1356*303bf60bSsdebnath 	nindirshift = ip->i_ufsvfs->vfs_nindirshift;
1357*303bf60bSsdebnath 	nindiroffset = ip->i_ufsvfs->vfs_nindiroffset;
1358*303bf60bSsdebnath 	/*
1359*303bf60bSsdebnath 	 * Determine how many levels of indirection.
1360*303bf60bSsdebnath 	 */
1361*303bf60bSsdebnath 	shft = 0;				/* sh = 1 */
1362*303bf60bSsdebnath 	tbn = lbn - NDADDR;
1363*303bf60bSsdebnath 	for (j = NIADDR; j > 0; j--) {
1364*303bf60bSsdebnath 		longlong_t	sh;
1365*303bf60bSsdebnath 
1366*303bf60bSsdebnath 		shft += nindirshift;		/* sh *= nindir */
1367*303bf60bSsdebnath 		sh = 1LL << shft;
1368*303bf60bSsdebnath 		if (tbn < sh)
1369*303bf60bSsdebnath 			break;
1370*303bf60bSsdebnath 		tbn -= sh;
1371*303bf60bSsdebnath 	}
1372*303bf60bSsdebnath 	if (j == 0)
1373*303bf60bSsdebnath 		return (EFBIG);
1374*303bf60bSsdebnath 
1375*303bf60bSsdebnath 	/*
1376*303bf60bSsdebnath 	 * Fetch the first indirect block.
1377*303bf60bSsdebnath 	 */
1378*303bf60bSsdebnath 	nb = ip->i_ib[NIADDR - j];
1379*303bf60bSsdebnath 	if (nb == 0)
1380*303bf60bSsdebnath 		err = ufs_fault(ITOV(ip), "ufs_set_bn: nb == UFS_HOLE");
1381*303bf60bSsdebnath 
1382*303bf60bSsdebnath 	/*
1383*303bf60bSsdebnath 	 * Fetch through the indirect blocks.
1384*303bf60bSsdebnath 	 */
1385*303bf60bSsdebnath 	for (; j <= NIADDR; j++) {
1386*303bf60bSsdebnath 		ob = nb;
1387*303bf60bSsdebnath 		bp = UFS_BREAD(ufsvfsp,
1388*303bf60bSsdebnath 				ip->i_dev, fsbtodb(fs, ob), fs->fs_bsize);
1389*303bf60bSsdebnath 		if (bp->b_flags & B_ERROR) {
1390*303bf60bSsdebnath 			err = geterror(bp);
1391*303bf60bSsdebnath 			brelse(bp);
1392*303bf60bSsdebnath 			return (err);
1393*303bf60bSsdebnath 		}
1394*303bf60bSsdebnath 		bap = bp->b_un.b_daddr;
1395*303bf60bSsdebnath 
1396*303bf60bSsdebnath 		ASSERT(!ufs_indir_badblock(ip, bap));
1397*303bf60bSsdebnath 
1398*303bf60bSsdebnath 		shft -= nindirshift;		/* sh / nindir */
1399*303bf60bSsdebnath 		i = (tbn >> shft) & nindiroffset; /* (tbn / sh) % nindir */
1400*303bf60bSsdebnath 
1401*303bf60bSsdebnath 		if (j == NIADDR) {
1402*303bf60bSsdebnath 			bap[i] = bn;
1403*303bf60bSsdebnath 			bdrwrite(bp);
1404*303bf60bSsdebnath 			return (0);
1405*303bf60bSsdebnath 		}
1406*303bf60bSsdebnath 		brelse(bp);
1407*303bf60bSsdebnath 	}
1408*303bf60bSsdebnath 	return (0);
1409*303bf60bSsdebnath }
1410