17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*da6c28aaSamw  * Common Development and Distribution License (the "License").
6*da6c28aaSamw  * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate  *
87c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate  * and limitations under the License.
127c478bd9Sstevel@tonic-gate  *
137c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate  *
197c478bd9Sstevel@tonic-gate  * CDDL HEADER END
207c478bd9Sstevel@tonic-gate  */
217c478bd9Sstevel@tonic-gate /*
22*da6c28aaSamw  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
237c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
247c478bd9Sstevel@tonic-gate  */
257c478bd9Sstevel@tonic-gate 
267c478bd9Sstevel@tonic-gate #include <sys/types.h>
277c478bd9Sstevel@tonic-gate #include <sys/param.h>
287c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h>
297c478bd9Sstevel@tonic-gate #include <sys/conf.h>
307c478bd9Sstevel@tonic-gate #include <sys/fssnap_if.h>
317c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_inode.h>
327c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_lockfs.h>
337c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_log.h>
347c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_trans.h>
357c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h>
367c478bd9Sstevel@tonic-gate #include <vm/pvn.h>
377c478bd9Sstevel@tonic-gate #include <vm/seg_map.h>
387c478bd9Sstevel@tonic-gate #include <sys/fdbuffer.h>
397c478bd9Sstevel@tonic-gate 
407c478bd9Sstevel@tonic-gate #ifdef DEBUG
417c478bd9Sstevel@tonic-gate int evn_ufs_debug = 0;
427c478bd9Sstevel@tonic-gate #define	DEBUGF(args)	{ if (evn_ufs_debug) cmn_err args; }
437c478bd9Sstevel@tonic-gate #else
447c478bd9Sstevel@tonic-gate #define	DEBUGF(args)
457c478bd9Sstevel@tonic-gate #endif
467c478bd9Sstevel@tonic-gate 
477c478bd9Sstevel@tonic-gate /*
487c478bd9Sstevel@tonic-gate  * ufs_rdwr_data - supports reading or writing data when
497c478bd9Sstevel@tonic-gate  * no changes are permitted in file size or space allocation.
507c478bd9Sstevel@tonic-gate  *
517c478bd9Sstevel@tonic-gate  * Inputs:
527c478bd9Sstevel@tonic-gate  * fdb - The mandatory fdbuffer supports
537c478bd9Sstevel@tonic-gate  *	the read or write operation.
547c478bd9Sstevel@tonic-gate  * flags - defaults (zero value) to synchronous write
557c478bd9Sstevel@tonic-gate  *	B_READ - indicates read operation
567c478bd9Sstevel@tonic-gate  *	B_ASYNC - indicates perform operation asynchronously
577c478bd9Sstevel@tonic-gate  */
587c478bd9Sstevel@tonic-gate /*ARGSUSED*/
597c478bd9Sstevel@tonic-gate int
ufs_rdwr_data(vnode_t * vnodep,u_offset_t offset,size_t len,fdbuffer_t * fdbp,int flags,cred_t * credp)607c478bd9Sstevel@tonic-gate ufs_rdwr_data(
617c478bd9Sstevel@tonic-gate 	vnode_t		*vnodep,
627c478bd9Sstevel@tonic-gate 	u_offset_t	offset,
637c478bd9Sstevel@tonic-gate 	size_t		len,
647c478bd9Sstevel@tonic-gate 	fdbuffer_t	*fdbp,
657c478bd9Sstevel@tonic-gate 	int		flags,
667c478bd9Sstevel@tonic-gate 	cred_t		*credp)
677c478bd9Sstevel@tonic-gate {
687c478bd9Sstevel@tonic-gate 	struct inode	*ip = VTOI(vnodep);
697c478bd9Sstevel@tonic-gate 	struct fs	*fs;
707c478bd9Sstevel@tonic-gate 	struct ufsvfs	*ufsvfsp = ip->i_ufsvfs;
717c478bd9Sstevel@tonic-gate 	struct buf	*bp;
727c478bd9Sstevel@tonic-gate 	krw_t		rwtype = RW_READER;
737c478bd9Sstevel@tonic-gate 	u_offset_t	offset1 = offset;	/* Initial offset */
747c478bd9Sstevel@tonic-gate 	size_t		iolen;
757c478bd9Sstevel@tonic-gate 	int		curlen = 0;
767c478bd9Sstevel@tonic-gate 	int		pplen;
777c478bd9Sstevel@tonic-gate 	daddr_t		bn;
787c478bd9Sstevel@tonic-gate 	int		contig = 0;
797c478bd9Sstevel@tonic-gate 	int		error = 0;
807c478bd9Sstevel@tonic-gate 	int		nbytes;			/* Number bytes this IO */
817c478bd9Sstevel@tonic-gate 	int		offsetn;		/* Start point this IO */
827c478bd9Sstevel@tonic-gate 	int		iswrite = flags & B_WRITE;
837c478bd9Sstevel@tonic-gate 	int		io_started = 0;		/* No IO started */
847c478bd9Sstevel@tonic-gate 	struct ulockfs	*ulp;
857c478bd9Sstevel@tonic-gate 	uint_t		protp = PROT_ALL;
867c478bd9Sstevel@tonic-gate 
877c478bd9Sstevel@tonic-gate 	error = ufs_lockfs_begin_getpage(ufsvfsp, &ulp, segkmap, !iswrite,
887c478bd9Sstevel@tonic-gate 	    &protp);
897c478bd9Sstevel@tonic-gate 	if (error) {
907c478bd9Sstevel@tonic-gate 		if (flags & B_ASYNC) {
917c478bd9Sstevel@tonic-gate 			fdb_ioerrdone(fdbp, error);
927c478bd9Sstevel@tonic-gate 		}
937c478bd9Sstevel@tonic-gate 		return (error);
947c478bd9Sstevel@tonic-gate 	}
957c478bd9Sstevel@tonic-gate 	fs = ufsvfsp->vfs_fs;
967c478bd9Sstevel@tonic-gate 	iolen = len;
977c478bd9Sstevel@tonic-gate 
987c478bd9Sstevel@tonic-gate 	DEBUGF((CE_CONT, "?ufs_rdwr: %s vp: %p pages:%p  off %llx len %lx"
997c478bd9Sstevel@tonic-gate 	    " isize: %llx fdb: %p\n",
1007c478bd9Sstevel@tonic-gate 	    flags & B_READ ? "READ" : "WRITE", (void *)vnodep,
1017c478bd9Sstevel@tonic-gate 	    (void *)vnodep->v_pages, offset1, iolen, ip->i_size, (void *)fdbp));
1027c478bd9Sstevel@tonic-gate 
1037c478bd9Sstevel@tonic-gate 	rw_enter(&ip->i_ufsvfs->vfs_dqrwlock, RW_READER);
1047c478bd9Sstevel@tonic-gate 	rw_enter(&ip->i_contents, rwtype);
1057c478bd9Sstevel@tonic-gate 
1067c478bd9Sstevel@tonic-gate 	ASSERT(offset1 < ip->i_size);
1077c478bd9Sstevel@tonic-gate 
1087c478bd9Sstevel@tonic-gate 	if ((offset1 + iolen) > ip->i_size) {
1097c478bd9Sstevel@tonic-gate 		iolen = ip->i_size - offset1;
1107c478bd9Sstevel@tonic-gate 	}
1117c478bd9Sstevel@tonic-gate 	while (!error && curlen < iolen) {
1127c478bd9Sstevel@tonic-gate 
1137c478bd9Sstevel@tonic-gate 		contig = 0;
1147c478bd9Sstevel@tonic-gate 
1157c478bd9Sstevel@tonic-gate 		if ((error = bmap_read(ip, offset1, &bn, &contig)) != 0) {
1167c478bd9Sstevel@tonic-gate 			break;
1177c478bd9Sstevel@tonic-gate 		}
1187c478bd9Sstevel@tonic-gate 		ASSERT(!(bn == UFS_HOLE && iswrite));
1197c478bd9Sstevel@tonic-gate 		if (bn == UFS_HOLE) {
1207c478bd9Sstevel@tonic-gate 			/*
1217c478bd9Sstevel@tonic-gate 			 * If the above assertion is true,
1227c478bd9Sstevel@tonic-gate 			 * then the following if statement can never be true.
1237c478bd9Sstevel@tonic-gate 			 */
1247c478bd9Sstevel@tonic-gate 			if (iswrite && (rwtype == RW_READER)) {
1257c478bd9Sstevel@tonic-gate 				rwtype = RW_WRITER;
1267c478bd9Sstevel@tonic-gate 				if (!rw_tryupgrade(&ip->i_contents)) {
1277c478bd9Sstevel@tonic-gate 					rw_exit(&ip->i_contents);
1287c478bd9Sstevel@tonic-gate 					rw_enter(&ip->i_contents, rwtype);
1297c478bd9Sstevel@tonic-gate 					continue;
1307c478bd9Sstevel@tonic-gate 				}
1317c478bd9Sstevel@tonic-gate 			}
1327c478bd9Sstevel@tonic-gate 			offsetn = blkoff(fs, offset1);
1337c478bd9Sstevel@tonic-gate 			pplen = P2ROUNDUP(len, PAGESIZE);
1347c478bd9Sstevel@tonic-gate 			nbytes = MIN((pplen - curlen),
1357c478bd9Sstevel@tonic-gate 			    (fs->fs_bsize - offsetn));
1367c478bd9Sstevel@tonic-gate 			ASSERT(nbytes > 0);
1377c478bd9Sstevel@tonic-gate 
1387c478bd9Sstevel@tonic-gate 			/*
1397c478bd9Sstevel@tonic-gate 			 * We may be reading or writing.
1407c478bd9Sstevel@tonic-gate 			 */
1417c478bd9Sstevel@tonic-gate 			DEBUGF((CE_CONT, "?ufs_rdwr_data: hole %llx - %lx\n",
1427c478bd9Sstevel@tonic-gate 			    offset1, (iolen - curlen)));
1437c478bd9Sstevel@tonic-gate 
1447c478bd9Sstevel@tonic-gate 			if (iswrite) {
1457c478bd9Sstevel@tonic-gate 				printf("**WARNING: ignoring hole in write\n");
1467c478bd9Sstevel@tonic-gate 				error = ENOSPC;
1477c478bd9Sstevel@tonic-gate 			} else {
1487c478bd9Sstevel@tonic-gate 				fdb_add_hole(fdbp, offset1 - offset, nbytes);
1497c478bd9Sstevel@tonic-gate 			}
1507c478bd9Sstevel@tonic-gate 			offset1 += nbytes;
1517c478bd9Sstevel@tonic-gate 			curlen += nbytes;
1527c478bd9Sstevel@tonic-gate 			continue;
1537c478bd9Sstevel@tonic-gate 
1547c478bd9Sstevel@tonic-gate 		}
1557c478bd9Sstevel@tonic-gate 		ASSERT(contig > 0);
1567c478bd9Sstevel@tonic-gate 		pplen = P2ROUNDUP(len, PAGESIZE);
1577c478bd9Sstevel@tonic-gate 
1587c478bd9Sstevel@tonic-gate 		contig = MIN(contig, len - curlen);
1597c478bd9Sstevel@tonic-gate 		contig = P2ROUNDUP(contig, DEV_BSIZE);
1607c478bd9Sstevel@tonic-gate 
1617c478bd9Sstevel@tonic-gate 		bp = fdb_iosetup(fdbp, offset1 - offset, contig, vnodep, flags);
1627c478bd9Sstevel@tonic-gate 
1637c478bd9Sstevel@tonic-gate 		bp->b_edev = ip->i_dev;
1647c478bd9Sstevel@tonic-gate 		bp->b_dev = cmpdev(ip->i_dev);
1657c478bd9Sstevel@tonic-gate 		bp->b_blkno = bn;
1667c478bd9Sstevel@tonic-gate 		bp->b_file = ip->i_vnode;
1677c478bd9Sstevel@tonic-gate 		bp->b_offset = (offset_t)offset1;
1687c478bd9Sstevel@tonic-gate 
1697c478bd9Sstevel@tonic-gate 		if (ufsvfsp->vfs_snapshot) {
1707c478bd9Sstevel@tonic-gate 			fssnap_strategy(&ufsvfsp->vfs_snapshot, bp);
1717c478bd9Sstevel@tonic-gate 		} else {
1727c478bd9Sstevel@tonic-gate 			(void) bdev_strategy(bp);
1737c478bd9Sstevel@tonic-gate 		}
1747c478bd9Sstevel@tonic-gate 		io_started = 1;
1757c478bd9Sstevel@tonic-gate 
1767c478bd9Sstevel@tonic-gate 		offset1 += contig;
1777c478bd9Sstevel@tonic-gate 		curlen += contig;
1787c478bd9Sstevel@tonic-gate 		if (iswrite)
1797c478bd9Sstevel@tonic-gate 			lwp_stat_update(LWP_STAT_OUBLK, 1);
1807c478bd9Sstevel@tonic-gate 		else
1817c478bd9Sstevel@tonic-gate 			lwp_stat_update(LWP_STAT_INBLK, 1);
1827c478bd9Sstevel@tonic-gate 
1837c478bd9Sstevel@tonic-gate 		if ((flags & B_ASYNC) == 0) {
1847c478bd9Sstevel@tonic-gate 			error = biowait(bp);
1857c478bd9Sstevel@tonic-gate 			fdb_iodone(bp);
1867c478bd9Sstevel@tonic-gate 		}
1877c478bd9Sstevel@tonic-gate 
1887c478bd9Sstevel@tonic-gate 		DEBUGF((CE_CONT, "?loop ufs_rdwr_data.. off %llx len %lx\n",
1897c478bd9Sstevel@tonic-gate 		    offset1, (iolen - curlen)));
1907c478bd9Sstevel@tonic-gate 	}
1917c478bd9Sstevel@tonic-gate 
1927c478bd9Sstevel@tonic-gate 	DEBUGF((CE_CONT, "?ufs_rdwr_data: off %llx len %lx pages: %p ------\n",
1937c478bd9Sstevel@tonic-gate 	    offset1, (iolen - curlen), (void *)vnodep->v_pages));
1947c478bd9Sstevel@tonic-gate 
1957c478bd9Sstevel@tonic-gate 	rw_exit(&ip->i_contents);
1967c478bd9Sstevel@tonic-gate 	rw_exit(&ip->i_ufsvfs->vfs_dqrwlock);
1977c478bd9Sstevel@tonic-gate 
1987c478bd9Sstevel@tonic-gate 	if (flags & B_ASYNC) {
1997c478bd9Sstevel@tonic-gate 		/*
2007c478bd9Sstevel@tonic-gate 		 * Show that no more asynchronous IO will be added
2017c478bd9Sstevel@tonic-gate 		 */
2027c478bd9Sstevel@tonic-gate 		fdb_ioerrdone(fdbp, error);
2037c478bd9Sstevel@tonic-gate 	}
2047c478bd9Sstevel@tonic-gate 	if (ulp) {
2057c478bd9Sstevel@tonic-gate 		ufs_lockfs_end(ulp);
2067c478bd9Sstevel@tonic-gate 	}
2077c478bd9Sstevel@tonic-gate 	if (io_started && flags & B_ASYNC) {
2087c478bd9Sstevel@tonic-gate 		return (0);
2097c478bd9Sstevel@tonic-gate 	} else {
2107c478bd9Sstevel@tonic-gate 		return (error);
2117c478bd9Sstevel@tonic-gate 	}
2127c478bd9Sstevel@tonic-gate }
2137c478bd9Sstevel@tonic-gate 
2147c478bd9Sstevel@tonic-gate /*
2157c478bd9Sstevel@tonic-gate  * ufs_alloc_data - supports allocating space and reads or writes
2167c478bd9Sstevel@tonic-gate  * that involve changes to file length or space allocation.
2177c478bd9Sstevel@tonic-gate  *
2187c478bd9Sstevel@tonic-gate  * This function is more expensive, because of the UFS log transaction,
2197c478bd9Sstevel@tonic-gate  * so ufs_rdwr_data() should be used when space or file length changes
2207c478bd9Sstevel@tonic-gate  * will not occur.
2217c478bd9Sstevel@tonic-gate  *
2227c478bd9Sstevel@tonic-gate  * Inputs:
2237c478bd9Sstevel@tonic-gate  * fdb - A null pointer instructs this function to only allocate
2247c478bd9Sstevel@tonic-gate  *	space for the specified offset and length.
2257c478bd9Sstevel@tonic-gate  *	An actual fdbuffer instructs this function to perform
2267c478bd9Sstevel@tonic-gate  *	the read or write operation.
2277c478bd9Sstevel@tonic-gate  * flags - defaults (zero value) to synchronous write
2287c478bd9Sstevel@tonic-gate  *	B_READ - indicates read operation
2297c478bd9Sstevel@tonic-gate  *	B_ASYNC - indicates perform operation asynchronously
2307c478bd9Sstevel@tonic-gate  */
2317c478bd9Sstevel@tonic-gate int
ufs_alloc_data(vnode_t * vnodep,u_offset_t offset,size_t * len,fdbuffer_t * fdbp,int flags,cred_t * credp)2327c478bd9Sstevel@tonic-gate ufs_alloc_data(
2337c478bd9Sstevel@tonic-gate 	vnode_t		*vnodep,
2347c478bd9Sstevel@tonic-gate 	u_offset_t	offset,
2357c478bd9Sstevel@tonic-gate 	size_t		*len,
2367c478bd9Sstevel@tonic-gate 	fdbuffer_t	*fdbp,
2377c478bd9Sstevel@tonic-gate 	int		flags,
2387c478bd9Sstevel@tonic-gate 	cred_t		*credp)
2397c478bd9Sstevel@tonic-gate {
2407c478bd9Sstevel@tonic-gate 	struct inode	*ip = VTOI(vnodep);
2417c478bd9Sstevel@tonic-gate 	size_t		done_len, io_len;
2427c478bd9Sstevel@tonic-gate 	int		contig;
2437c478bd9Sstevel@tonic-gate 	u_offset_t	uoff, io_off;
244*da6c28aaSamw 	int		error = 0;		/* No error occurred */
2457c478bd9Sstevel@tonic-gate 	int		offsetn;		/* Start point this IO */
2467c478bd9Sstevel@tonic-gate 	int		nbytes;			/* Number bytes in this IO */
2477c478bd9Sstevel@tonic-gate 	daddr_t		bn;
2487c478bd9Sstevel@tonic-gate 	struct fs	*fs;
2497c478bd9Sstevel@tonic-gate 	struct ufsvfs	*ufsvfsp = ip->i_ufsvfs;
2507c478bd9Sstevel@tonic-gate 	int		i_size_changed = 0;
2517c478bd9Sstevel@tonic-gate 	u_offset_t	old_i_size;
2527c478bd9Sstevel@tonic-gate 	struct ulockfs	*ulp;
2537c478bd9Sstevel@tonic-gate 	int		trans_size;
2547c478bd9Sstevel@tonic-gate 	int		issync;			/* UFS Log transaction */
2557c478bd9Sstevel@tonic-gate 						/* synchronous when non-zero */
2567c478bd9Sstevel@tonic-gate 
2577c478bd9Sstevel@tonic-gate 	int		io_started = 0;		/* No IO started */
2587c478bd9Sstevel@tonic-gate 	uint_t		protp = PROT_ALL;
2597c478bd9Sstevel@tonic-gate 
2607c478bd9Sstevel@tonic-gate 	ASSERT((flags & B_WRITE) == 0);
2617c478bd9Sstevel@tonic-gate 
2627c478bd9Sstevel@tonic-gate 	/*
2637c478bd9Sstevel@tonic-gate 	 * Obey the lockfs protocol
2647c478bd9Sstevel@tonic-gate 	 */
2657c478bd9Sstevel@tonic-gate 	error = ufs_lockfs_begin_getpage(ufsvfsp, &ulp, segkmap, 0, &protp);
2667c478bd9Sstevel@tonic-gate 	if (error) {
2677c478bd9Sstevel@tonic-gate 		if ((fdbp != NULL) && (flags & B_ASYNC)) {
2687c478bd9Sstevel@tonic-gate 			fdb_ioerrdone(fdbp, error);
2697c478bd9Sstevel@tonic-gate 		}
2707c478bd9Sstevel@tonic-gate 		return (error);
2717c478bd9Sstevel@tonic-gate 	}
2727c478bd9Sstevel@tonic-gate 	if (ulp) {
2737c478bd9Sstevel@tonic-gate 		/*
2747c478bd9Sstevel@tonic-gate 		 * Try to begin a UFS log transaction
2757c478bd9Sstevel@tonic-gate 		 */
2767c478bd9Sstevel@tonic-gate 		trans_size = TOP_GETPAGE_SIZE(ip);
2777c478bd9Sstevel@tonic-gate 		TRANS_TRY_BEGIN_CSYNC(ufsvfsp, issync, TOP_GETPAGE,
2787c478bd9Sstevel@tonic-gate 		    trans_size, error);
2797c478bd9Sstevel@tonic-gate 		if (error == EWOULDBLOCK) {
2807c478bd9Sstevel@tonic-gate 			ufs_lockfs_end(ulp);
2817c478bd9Sstevel@tonic-gate 			if ((fdbp != NULL) && (flags & B_ASYNC)) {
2827c478bd9Sstevel@tonic-gate 				fdb_ioerrdone(fdbp, EDEADLK);
2837c478bd9Sstevel@tonic-gate 			}
2847c478bd9Sstevel@tonic-gate 			return (EDEADLK);
2857c478bd9Sstevel@tonic-gate 		}
2867c478bd9Sstevel@tonic-gate 	}
2877c478bd9Sstevel@tonic-gate 
2887c478bd9Sstevel@tonic-gate 	uoff = offset;
2897c478bd9Sstevel@tonic-gate 	io_off = offset;
2907c478bd9Sstevel@tonic-gate 	io_len = *len;
2917c478bd9Sstevel@tonic-gate 	done_len = 0;
2927c478bd9Sstevel@tonic-gate 
2937c478bd9Sstevel@tonic-gate 	DEBUGF((CE_CONT, "?ufs_alloc: off %llx len %lx size %llx fdb: %p\n",
2947c478bd9Sstevel@tonic-gate 	    uoff, (io_len - done_len), ip->i_size, (void *)fdbp));
2957c478bd9Sstevel@tonic-gate 
2967c478bd9Sstevel@tonic-gate 	rw_enter(&ip->i_ufsvfs->vfs_dqrwlock, RW_READER);
2977c478bd9Sstevel@tonic-gate 	rw_enter(&ip->i_contents, RW_WRITER);
2987c478bd9Sstevel@tonic-gate 
2997c478bd9Sstevel@tonic-gate 	ASSERT((ip->i_mode & IFMT) == IFREG);
3007c478bd9Sstevel@tonic-gate 
3017c478bd9Sstevel@tonic-gate 	fs = ip->i_fs;
3027c478bd9Sstevel@tonic-gate 
3037c478bd9Sstevel@tonic-gate 	while (error == 0 && done_len < io_len) {
3047c478bd9Sstevel@tonic-gate 		uoff = (u_offset_t)(io_off + done_len);
3057c478bd9Sstevel@tonic-gate 		offsetn = (int)blkoff(fs, uoff);
3067c478bd9Sstevel@tonic-gate 		nbytes = (int)MIN(fs->fs_bsize - offsetn, io_len - done_len);
3077c478bd9Sstevel@tonic-gate 
3087c478bd9Sstevel@tonic-gate 		DEBUGF((CE_CONT, "?ufs_alloc_data: offset: %llx len %x\n",
3097c478bd9Sstevel@tonic-gate 		    uoff, nbytes));
3107c478bd9Sstevel@tonic-gate 
3117c478bd9Sstevel@tonic-gate 		if (uoff + nbytes > ip->i_size) {
3127c478bd9Sstevel@tonic-gate 			/*
3137c478bd9Sstevel@tonic-gate 			 * We are extending the length of the file.
3147c478bd9Sstevel@tonic-gate 			 * bmap is used so that we are sure that
3157c478bd9Sstevel@tonic-gate 			 * if we need to allocate new blocks, that it
3167c478bd9Sstevel@tonic-gate 			 * is done here before we up the file size.
3177c478bd9Sstevel@tonic-gate 			 */
3187c478bd9Sstevel@tonic-gate 			DEBUGF((CE_CONT, "?ufs_alloc_data: grow %llx -> %llx\n",
3197c478bd9Sstevel@tonic-gate 			    ip->i_size, uoff + nbytes));
3207c478bd9Sstevel@tonic-gate 
321303bf60bSsdebnath 			error = bmap_write(ip, uoff, (offsetn + nbytes),
322303bf60bSsdebnath 			    BI_ALLOC_ONLY, NULL, credp);
3237c478bd9Sstevel@tonic-gate 			if (ip->i_flag & (ICHG|IUPD))
3247c478bd9Sstevel@tonic-gate 				ip->i_seq++;
3257c478bd9Sstevel@tonic-gate 			if (error) {
3267c478bd9Sstevel@tonic-gate 				DEBUGF((CE_CONT, "?ufs_alloc_data: grow "
3277c478bd9Sstevel@tonic-gate 				    "failed err: %d\n", error));
3287c478bd9Sstevel@tonic-gate 				break;
3297c478bd9Sstevel@tonic-gate 			}
3307c478bd9Sstevel@tonic-gate 			if (fdbp != NULL) {
3317c478bd9Sstevel@tonic-gate 				if (uoff >= ip->i_size) {
3327c478bd9Sstevel@tonic-gate 					/*
3337c478bd9Sstevel@tonic-gate 					 * Desired offset is past end of bytes
3347c478bd9Sstevel@tonic-gate 					 * in file, so we have a hole.
3357c478bd9Sstevel@tonic-gate 					 */
3367c478bd9Sstevel@tonic-gate 					fdb_add_hole(fdbp, uoff - offset,
3377c478bd9Sstevel@tonic-gate 					    nbytes);
3387c478bd9Sstevel@tonic-gate 				} else {
3397c478bd9Sstevel@tonic-gate 					int contig;
3407c478bd9Sstevel@tonic-gate 					buf_t *bp;
3417c478bd9Sstevel@tonic-gate 
3427c478bd9Sstevel@tonic-gate 					error = bmap_read(ip, uoff, &bn,
3437c478bd9Sstevel@tonic-gate 					    &contig);
3447c478bd9Sstevel@tonic-gate 					if (error) {
3457c478bd9Sstevel@tonic-gate 						break;
3467c478bd9Sstevel@tonic-gate 					}
3477c478bd9Sstevel@tonic-gate 
3487c478bd9Sstevel@tonic-gate 					contig = ip->i_size - uoff;
3497c478bd9Sstevel@tonic-gate 					contig = P2ROUNDUP(contig, DEV_BSIZE);
3507c478bd9Sstevel@tonic-gate 
3517c478bd9Sstevel@tonic-gate 					bp = fdb_iosetup(fdbp, uoff - offset,
3527c478bd9Sstevel@tonic-gate 					    contig, vnodep, flags);
3537c478bd9Sstevel@tonic-gate 
3547c478bd9Sstevel@tonic-gate 					bp->b_edev = ip->i_dev;
3557c478bd9Sstevel@tonic-gate 					bp->b_dev = cmpdev(ip->i_dev);
3567c478bd9Sstevel@tonic-gate 					bp->b_blkno = bn;
3577c478bd9Sstevel@tonic-gate 					bp->b_file = ip->i_vnode;
3587c478bd9Sstevel@tonic-gate 					bp->b_offset = (offset_t)uoff;
3597c478bd9Sstevel@tonic-gate 
3607c478bd9Sstevel@tonic-gate 					if (ufsvfsp->vfs_snapshot) {
3617c478bd9Sstevel@tonic-gate 						fssnap_strategy(
3627c478bd9Sstevel@tonic-gate 						    &ufsvfsp->vfs_snapshot, bp);
3637c478bd9Sstevel@tonic-gate 					} else {
3647c478bd9Sstevel@tonic-gate 						(void) bdev_strategy(bp);
3657c478bd9Sstevel@tonic-gate 					}
3667c478bd9Sstevel@tonic-gate 					io_started = 1;
3677c478bd9Sstevel@tonic-gate 
3687c478bd9Sstevel@tonic-gate 					lwp_stat_update(LWP_STAT_OUBLK, 1);
3697c478bd9Sstevel@tonic-gate 
3707c478bd9Sstevel@tonic-gate 					if ((flags & B_ASYNC) == 0) {
3717c478bd9Sstevel@tonic-gate 						error = biowait(bp);
3727c478bd9Sstevel@tonic-gate 						fdb_iodone(bp);
3737c478bd9Sstevel@tonic-gate 						if (error) {
3747c478bd9Sstevel@tonic-gate 							break;
3757c478bd9Sstevel@tonic-gate 						}
3767c478bd9Sstevel@tonic-gate 					}
3777c478bd9Sstevel@tonic-gate 					if (contig > (ip->i_size - uoff)) {
3787c478bd9Sstevel@tonic-gate 						contig -= ip->i_size - uoff;
3797c478bd9Sstevel@tonic-gate 
3807c478bd9Sstevel@tonic-gate 						fdb_add_hole(fdbp,
3817c478bd9Sstevel@tonic-gate 						    ip->i_size - offset,
3827c478bd9Sstevel@tonic-gate 						    contig);
3837c478bd9Sstevel@tonic-gate 					}
3847c478bd9Sstevel@tonic-gate 				}
3857c478bd9Sstevel@tonic-gate 			}
3867c478bd9Sstevel@tonic-gate 
3877c478bd9Sstevel@tonic-gate 			i_size_changed = 1;
3887c478bd9Sstevel@tonic-gate 			old_i_size = ip->i_size;
3897c478bd9Sstevel@tonic-gate 			UFS_SET_ISIZE(uoff + nbytes, ip);
3907c478bd9Sstevel@tonic-gate 			TRANS_INODE(ip->i_ufsvfs, ip);
3917c478bd9Sstevel@tonic-gate 			/*
3927c478bd9Sstevel@tonic-gate 			 * file has grown larger than 2GB. Set flag
3937c478bd9Sstevel@tonic-gate 			 * in superblock to indicate this, if it
3947c478bd9Sstevel@tonic-gate 			 * is not already set.
3957c478bd9Sstevel@tonic-gate 			 */
3967c478bd9Sstevel@tonic-gate 			if ((ip->i_size > MAXOFF32_T) &&
3977c478bd9Sstevel@tonic-gate 			    !(fs->fs_flags & FSLARGEFILES)) {
3987c478bd9Sstevel@tonic-gate 				ASSERT(ufsvfsp->vfs_lfflags & UFS_LARGEFILES);
3997c478bd9Sstevel@tonic-gate 				mutex_enter(&ufsvfsp->vfs_lock);
4007c478bd9Sstevel@tonic-gate 				fs->fs_flags |= FSLARGEFILES;
4017c478bd9Sstevel@tonic-gate 				ufs_sbwrite(ufsvfsp);
4027c478bd9Sstevel@tonic-gate 				mutex_exit(&ufsvfsp->vfs_lock);
4037c478bd9Sstevel@tonic-gate 			}
4047c478bd9Sstevel@tonic-gate 		} else {
4057c478bd9Sstevel@tonic-gate 			/*
4067c478bd9Sstevel@tonic-gate 			 * The file length is not being extended.
4077c478bd9Sstevel@tonic-gate 			 */
4087c478bd9Sstevel@tonic-gate 			error = bmap_read(ip, uoff, &bn, &contig);
4097c478bd9Sstevel@tonic-gate 			if (error) {
4107c478bd9Sstevel@tonic-gate 				DEBUGF((CE_CONT, "?ufs_alloc_data: "
4117c478bd9Sstevel@tonic-gate 				    "bmap_read err: %d\n", error));
4127c478bd9Sstevel@tonic-gate 				break;
4137c478bd9Sstevel@tonic-gate 			}
4147c478bd9Sstevel@tonic-gate 
4157c478bd9Sstevel@tonic-gate 			if (bn != UFS_HOLE) {
4167c478bd9Sstevel@tonic-gate 				/*
4177c478bd9Sstevel@tonic-gate 				 * Did not map a hole in the file
4187c478bd9Sstevel@tonic-gate 				 */
4197c478bd9Sstevel@tonic-gate 				int	contig = P2ROUNDUP(nbytes, DEV_BSIZE);
4207c478bd9Sstevel@tonic-gate 				buf_t	*bp;
4217c478bd9Sstevel@tonic-gate 
4227c478bd9Sstevel@tonic-gate 				if (fdbp != NULL) {
4237c478bd9Sstevel@tonic-gate 					bp = fdb_iosetup(fdbp, uoff - offset,
4247c478bd9Sstevel@tonic-gate 					    contig, vnodep, flags);
4257c478bd9Sstevel@tonic-gate 
4267c478bd9Sstevel@tonic-gate 					bp->b_edev = ip->i_dev;
4277c478bd9Sstevel@tonic-gate 					bp->b_dev = cmpdev(ip->i_dev);
4287c478bd9Sstevel@tonic-gate 					bp->b_blkno = bn;
4297c478bd9Sstevel@tonic-gate 					bp->b_file = ip->i_vnode;
4307c478bd9Sstevel@tonic-gate 					bp->b_offset = (offset_t)uoff;
4317c478bd9Sstevel@tonic-gate 
4327c478bd9Sstevel@tonic-gate 					if (ufsvfsp->vfs_snapshot) {
4337c478bd9Sstevel@tonic-gate 						fssnap_strategy(
4347c478bd9Sstevel@tonic-gate 						    &ufsvfsp->vfs_snapshot, bp);
4357c478bd9Sstevel@tonic-gate 					} else {
4367c478bd9Sstevel@tonic-gate 						(void) bdev_strategy(bp);
4377c478bd9Sstevel@tonic-gate 					}
4387c478bd9Sstevel@tonic-gate 					io_started = 1;
4397c478bd9Sstevel@tonic-gate 
4407c478bd9Sstevel@tonic-gate 					lwp_stat_update(LWP_STAT_OUBLK, 1);
4417c478bd9Sstevel@tonic-gate 
4427c478bd9Sstevel@tonic-gate 					if ((flags & B_ASYNC) == 0) {
4437c478bd9Sstevel@tonic-gate 						error = biowait(bp);
4447c478bd9Sstevel@tonic-gate 						fdb_iodone(bp);
4457c478bd9Sstevel@tonic-gate 						if (error) {
4467c478bd9Sstevel@tonic-gate 							break;
4477c478bd9Sstevel@tonic-gate 						}
4487c478bd9Sstevel@tonic-gate 					}
4497c478bd9Sstevel@tonic-gate 				}
4507c478bd9Sstevel@tonic-gate 			} else {
4517c478bd9Sstevel@tonic-gate 				/*
4527c478bd9Sstevel@tonic-gate 				 * We read a hole in the file.
4537c478bd9Sstevel@tonic-gate 				 * We have to allocate blocks for the hole.
4547c478bd9Sstevel@tonic-gate 				 */
4557c478bd9Sstevel@tonic-gate 				error = bmap_write(ip, uoff, (offsetn + nbytes),
456303bf60bSsdebnath 				    BI_ALLOC_ONLY, NULL, credp);
4577c478bd9Sstevel@tonic-gate 				if (ip->i_flag & (ICHG|IUPD))
4587c478bd9Sstevel@tonic-gate 					ip->i_seq++;
4597c478bd9Sstevel@tonic-gate 				if (error) {
4607c478bd9Sstevel@tonic-gate 					DEBUGF((CE_CONT, "?ufs_alloc_data: fill"
4617c478bd9Sstevel@tonic-gate 					    " hole failed error: %d\n", error));
4627c478bd9Sstevel@tonic-gate 					break;
4637c478bd9Sstevel@tonic-gate 				}
4647c478bd9Sstevel@tonic-gate 				if (fdbp != NULL) {
4657c478bd9Sstevel@tonic-gate 					fdb_add_hole(fdbp, uoff - offset,
4667c478bd9Sstevel@tonic-gate 					    nbytes);
4677c478bd9Sstevel@tonic-gate 				}
4687c478bd9Sstevel@tonic-gate 			}
4697c478bd9Sstevel@tonic-gate 		}
4707c478bd9Sstevel@tonic-gate 		done_len += nbytes;
4717c478bd9Sstevel@tonic-gate 	}
4727c478bd9Sstevel@tonic-gate 
4737c478bd9Sstevel@tonic-gate 	if (error) {
4747c478bd9Sstevel@tonic-gate 		if (i_size_changed) {
4757c478bd9Sstevel@tonic-gate 			/*
4767c478bd9Sstevel@tonic-gate 			 * Allocation of the blocks for the file failed.
4777c478bd9Sstevel@tonic-gate 			 * So truncate the file size back to its original size.
4787c478bd9Sstevel@tonic-gate 			 */
4797c478bd9Sstevel@tonic-gate 			(void) ufs_itrunc(ip, old_i_size, 0, credp);
4807c478bd9Sstevel@tonic-gate 		}
4817c478bd9Sstevel@tonic-gate 	}
4827c478bd9Sstevel@tonic-gate 
4837c478bd9Sstevel@tonic-gate 	DEBUGF((CE_CONT, "?ufs_alloc: uoff %llx len %lx\n",
4847c478bd9Sstevel@tonic-gate 	    uoff, (io_len - done_len)));
4857c478bd9Sstevel@tonic-gate 
4867c478bd9Sstevel@tonic-gate 	if ((offset + *len) < (NDADDR * fs->fs_bsize)) {
4877c478bd9Sstevel@tonic-gate 		*len = (size_t)(roundup(offset + *len, fs->fs_fsize) - offset);
4887c478bd9Sstevel@tonic-gate 	} else {
4897c478bd9Sstevel@tonic-gate 		*len = (size_t)(roundup(offset + *len, fs->fs_bsize) - offset);
4907c478bd9Sstevel@tonic-gate 	}
4917c478bd9Sstevel@tonic-gate 
4927c478bd9Sstevel@tonic-gate 	/*
4937c478bd9Sstevel@tonic-gate 	 * Flush cached pages.
4947c478bd9Sstevel@tonic-gate 	 *
4957c478bd9Sstevel@tonic-gate 	 * XXX - There should be no pages involved, since the I/O was performed
4967c478bd9Sstevel@tonic-gate 	 * through the device strategy routine and the page cache was bypassed.
4977c478bd9Sstevel@tonic-gate 	 * However, testing has demonstrated that this VOP_PUTPAGE is
4987c478bd9Sstevel@tonic-gate 	 * necessary. Without this, data might not always be read back as it
4997c478bd9Sstevel@tonic-gate 	 * was written.
5007c478bd9Sstevel@tonic-gate 	 *
5017c478bd9Sstevel@tonic-gate 	 */
502*da6c28aaSamw 	(void) VOP_PUTPAGE(vnodep, 0, 0, B_INVAL, credp, NULL);
5037c478bd9Sstevel@tonic-gate 
5047c478bd9Sstevel@tonic-gate 	rw_exit(&ip->i_contents);
5057c478bd9Sstevel@tonic-gate 	rw_exit(&ip->i_ufsvfs->vfs_dqrwlock);
5067c478bd9Sstevel@tonic-gate 
5077c478bd9Sstevel@tonic-gate 	if ((fdbp != NULL) && (flags & B_ASYNC)) {
5087c478bd9Sstevel@tonic-gate 		/*
5097c478bd9Sstevel@tonic-gate 		 * Show that no more asynchronous IO will be added
5107c478bd9Sstevel@tonic-gate 		 */
5117c478bd9Sstevel@tonic-gate 		fdb_ioerrdone(fdbp, error);
5127c478bd9Sstevel@tonic-gate 	}
5137c478bd9Sstevel@tonic-gate 	if (ulp) {
5147c478bd9Sstevel@tonic-gate 		/*
5157c478bd9Sstevel@tonic-gate 		 * End the UFS Log transaction
5167c478bd9Sstevel@tonic-gate 		 */
5177c478bd9Sstevel@tonic-gate 		TRANS_END_CSYNC(ufsvfsp, error, issync, TOP_GETPAGE,
5187c478bd9Sstevel@tonic-gate 		    trans_size);
5197c478bd9Sstevel@tonic-gate 		ufs_lockfs_end(ulp);
5207c478bd9Sstevel@tonic-gate 	}
5217c478bd9Sstevel@tonic-gate 	if (io_started && (flags & B_ASYNC)) {
5227c478bd9Sstevel@tonic-gate 		return (0);
5237c478bd9Sstevel@tonic-gate 	} else {
5247c478bd9Sstevel@tonic-gate 		return (error);
5257c478bd9Sstevel@tonic-gate 	}
5267c478bd9Sstevel@tonic-gate }
527