1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26#pragma ident	"%Z%%M%	%I%	%E% SMI"
27
28#include <sys/types.h>
29#include <sys/param.h>
30#include <sys/sysmacros.h>
31#include <sys/conf.h>
32#include <sys/fssnap_if.h>
33#include <sys/fs/ufs_inode.h>
34#include <sys/fs/ufs_lockfs.h>
35#include <sys/fs/ufs_log.h>
36#include <sys/fs/ufs_trans.h>
37#include <sys/cmn_err.h>
38#include <vm/pvn.h>
39#include <vm/seg_map.h>
40#include <sys/fdbuffer.h>
41
42#ifdef DEBUG
43int evn_ufs_debug = 0;
44#define	DEBUGF(args)	{ if (evn_ufs_debug) cmn_err args; }
45#else
46#define	DEBUGF(args)
47#endif
48
49/*
50 * ufs_rdwr_data - supports reading or writing data when
51 * no changes are permitted in file size or space allocation.
52 *
53 * Inputs:
54 * fdb - The mandatory fdbuffer supports
55 *	the read or write operation.
56 * flags - defaults (zero value) to synchronous write
57 *	B_READ - indicates read operation
58 *	B_ASYNC - indicates perform operation asynchronously
59 */
60/*ARGSUSED*/
61int
62ufs_rdwr_data(
63	vnode_t		*vnodep,
64	u_offset_t	offset,
65	size_t		len,
66	fdbuffer_t	*fdbp,
67	int		flags,
68	cred_t		*credp)
69{
70	struct inode	*ip = VTOI(vnodep);
71	struct fs	*fs;
72	struct ufsvfs	*ufsvfsp = ip->i_ufsvfs;
73	struct buf	*bp;
74	krw_t		rwtype = RW_READER;
75	u_offset_t	offset1 = offset;	/* Initial offset */
76	size_t		iolen;
77	int		curlen = 0;
78	int		pplen;
79	daddr_t		bn;
80	int		contig = 0;
81	int		error = 0;
82	int		nbytes;			/* Number bytes this IO */
83	int		offsetn;		/* Start point this IO */
84	int		iswrite = flags & B_WRITE;
85	int		io_started = 0;		/* No IO started */
86	struct ulockfs	*ulp;
87	uint_t		protp = PROT_ALL;
88
89	error = ufs_lockfs_begin_getpage(ufsvfsp, &ulp, segkmap, !iswrite,
90	    &protp);
91	if (error) {
92		if (flags & B_ASYNC) {
93			fdb_ioerrdone(fdbp, error);
94		}
95		return (error);
96	}
97	fs = ufsvfsp->vfs_fs;
98	iolen = len;
99
100	DEBUGF((CE_CONT, "?ufs_rdwr: %s vp: %p pages:%p  off %llx len %lx"
101	    " isize: %llx fdb: %p\n",
102	    flags & B_READ ? "READ" : "WRITE", (void *)vnodep,
103	    (void *)vnodep->v_pages, offset1, iolen, ip->i_size, (void *)fdbp));
104
105	rw_enter(&ip->i_ufsvfs->vfs_dqrwlock, RW_READER);
106	rw_enter(&ip->i_contents, rwtype);
107
108	ASSERT(offset1 < ip->i_size);
109
110	if ((offset1 + iolen) > ip->i_size) {
111		iolen = ip->i_size - offset1;
112	}
113	while (!error && curlen < iolen) {
114
115		contig = 0;
116
117		if ((error = bmap_read(ip, offset1, &bn, &contig)) != 0) {
118			break;
119		}
120		ASSERT(!(bn == UFS_HOLE && iswrite));
121		if (bn == UFS_HOLE) {
122			/*
123			 * If the above assertion is true,
124			 * then the following if statement can never be true.
125			 */
126			if (iswrite && (rwtype == RW_READER)) {
127				rwtype = RW_WRITER;
128				if (!rw_tryupgrade(&ip->i_contents)) {
129					rw_exit(&ip->i_contents);
130					rw_enter(&ip->i_contents, rwtype);
131					continue;
132				}
133			}
134			offsetn = blkoff(fs, offset1);
135			pplen = P2ROUNDUP(len, PAGESIZE);
136			nbytes = MIN((pplen - curlen),
137			    (fs->fs_bsize - offsetn));
138			ASSERT(nbytes > 0);
139
140			/*
141			 * We may be reading or writing.
142			 */
143			DEBUGF((CE_CONT, "?ufs_rdwr_data: hole %llx - %lx\n",
144			    offset1, (iolen - curlen)));
145
146			if (iswrite) {
147				printf("**WARNING: ignoring hole in write\n");
148				error = ENOSPC;
149			} else {
150				fdb_add_hole(fdbp, offset1 - offset, nbytes);
151			}
152			offset1 += nbytes;
153			curlen += nbytes;
154			continue;
155
156		}
157		ASSERT(contig > 0);
158		pplen = P2ROUNDUP(len, PAGESIZE);
159
160		contig = MIN(contig, len - curlen);
161		contig = P2ROUNDUP(contig, DEV_BSIZE);
162
163		bp = fdb_iosetup(fdbp, offset1 - offset, contig, vnodep, flags);
164
165		bp->b_edev = ip->i_dev;
166		bp->b_dev = cmpdev(ip->i_dev);
167		bp->b_blkno = bn;
168		bp->b_file = ip->i_vnode;
169		bp->b_offset = (offset_t)offset1;
170
171		if (ufsvfsp->vfs_snapshot) {
172			fssnap_strategy(&ufsvfsp->vfs_snapshot, bp);
173		} else {
174			(void) bdev_strategy(bp);
175		}
176		io_started = 1;
177
178		offset1 += contig;
179		curlen += contig;
180		if (iswrite)
181			lwp_stat_update(LWP_STAT_OUBLK, 1);
182		else
183			lwp_stat_update(LWP_STAT_INBLK, 1);
184
185		if ((flags & B_ASYNC) == 0) {
186			error = biowait(bp);
187			fdb_iodone(bp);
188		}
189
190		DEBUGF((CE_CONT, "?loop ufs_rdwr_data.. off %llx len %lx\n",
191		    offset1, (iolen - curlen)));
192	}
193
194	DEBUGF((CE_CONT, "?ufs_rdwr_data: off %llx len %lx pages: %p ------\n",
195	    offset1, (iolen - curlen), (void *)vnodep->v_pages));
196
197	rw_exit(&ip->i_contents);
198	rw_exit(&ip->i_ufsvfs->vfs_dqrwlock);
199
200	if (flags & B_ASYNC) {
201		/*
202		 * Show that no more asynchronous IO will be added
203		 */
204		fdb_ioerrdone(fdbp, error);
205	}
206	if (ulp) {
207		ufs_lockfs_end(ulp);
208	}
209	if (io_started && flags & B_ASYNC) {
210		return (0);
211	} else {
212		return (error);
213	}
214}
215
216/*
217 * ufs_alloc_data - supports allocating space and reads or writes
218 * that involve changes to file length or space allocation.
219 *
220 * This function is more expensive, because of the UFS log transaction,
221 * so ufs_rdwr_data() should be used when space or file length changes
222 * will not occur.
223 *
224 * Inputs:
225 * fdb - A null pointer instructs this function to only allocate
226 *	space for the specified offset and length.
227 *	An actual fdbuffer instructs this function to perform
228 *	the read or write operation.
229 * flags - defaults (zero value) to synchronous write
230 *	B_READ - indicates read operation
231 *	B_ASYNC - indicates perform operation asynchronously
232 */
233int
234ufs_alloc_data(
235	vnode_t		*vnodep,
236	u_offset_t	offset,
237	size_t		*len,
238	fdbuffer_t	*fdbp,
239	int		flags,
240	cred_t		*credp)
241{
242	struct inode	*ip = VTOI(vnodep);
243	size_t		done_len, io_len;
244	int		contig;
245	u_offset_t	uoff, io_off;
246	int		error = 0;		/* No error occurred */
247	int		offsetn;		/* Start point this IO */
248	int		nbytes;			/* Number bytes in this IO */
249	daddr_t		bn;
250	struct fs	*fs;
251	struct ufsvfs	*ufsvfsp = ip->i_ufsvfs;
252	int		i_size_changed = 0;
253	u_offset_t	old_i_size;
254	struct ulockfs	*ulp;
255	int		trans_size;
256	int		issync;			/* UFS Log transaction */
257						/* synchronous when non-zero */
258
259	int		io_started = 0;		/* No IO started */
260	uint_t		protp = PROT_ALL;
261
262	ASSERT((flags & B_WRITE) == 0);
263
264	/*
265	 * Obey the lockfs protocol
266	 */
267	error = ufs_lockfs_begin_getpage(ufsvfsp, &ulp, segkmap, 0, &protp);
268	if (error) {
269		if ((fdbp != NULL) && (flags & B_ASYNC)) {
270			fdb_ioerrdone(fdbp, error);
271		}
272		return (error);
273	}
274	if (ulp) {
275		/*
276		 * Try to begin a UFS log transaction
277		 */
278		trans_size = TOP_GETPAGE_SIZE(ip);
279		TRANS_TRY_BEGIN_CSYNC(ufsvfsp, issync, TOP_GETPAGE,
280		    trans_size, error);
281		if (error == EWOULDBLOCK) {
282			ufs_lockfs_end(ulp);
283			if ((fdbp != NULL) && (flags & B_ASYNC)) {
284				fdb_ioerrdone(fdbp, EDEADLK);
285			}
286			return (EDEADLK);
287		}
288	}
289
290	uoff = offset;
291	io_off = offset;
292	io_len = *len;
293	done_len = 0;
294
295	DEBUGF((CE_CONT, "?ufs_alloc: off %llx len %lx size %llx fdb: %p\n",
296	    uoff, (io_len - done_len), ip->i_size, (void *)fdbp));
297
298	rw_enter(&ip->i_ufsvfs->vfs_dqrwlock, RW_READER);
299	rw_enter(&ip->i_contents, RW_WRITER);
300
301	ASSERT((ip->i_mode & IFMT) == IFREG);
302
303	fs = ip->i_fs;
304
305	while (error == 0 && done_len < io_len) {
306		uoff = (u_offset_t)(io_off + done_len);
307		offsetn = (int)blkoff(fs, uoff);
308		nbytes = (int)MIN(fs->fs_bsize - offsetn, io_len - done_len);
309
310		DEBUGF((CE_CONT, "?ufs_alloc_data: offset: %llx len %x\n",
311		    uoff, nbytes));
312
313		if (uoff + nbytes > ip->i_size) {
314			/*
315			 * We are extending the length of the file.
316			 * bmap is used so that we are sure that
317			 * if we need to allocate new blocks, that it
318			 * is done here before we up the file size.
319			 */
320			DEBUGF((CE_CONT, "?ufs_alloc_data: grow %llx -> %llx\n",
321			    ip->i_size, uoff + nbytes));
322
323			error = bmap_write(ip, uoff, (offsetn + nbytes),
324			    BI_ALLOC_ONLY, NULL, credp);
325			if (ip->i_flag & (ICHG|IUPD))
326				ip->i_seq++;
327			if (error) {
328				DEBUGF((CE_CONT, "?ufs_alloc_data: grow "
329				    "failed err: %d\n", error));
330				break;
331			}
332			if (fdbp != NULL) {
333				if (uoff >= ip->i_size) {
334					/*
335					 * Desired offset is past end of bytes
336					 * in file, so we have a hole.
337					 */
338					fdb_add_hole(fdbp, uoff - offset,
339					    nbytes);
340				} else {
341					int contig;
342					buf_t *bp;
343
344					error = bmap_read(ip, uoff, &bn,
345					    &contig);
346					if (error) {
347						break;
348					}
349
350					contig = ip->i_size - uoff;
351					contig = P2ROUNDUP(contig, DEV_BSIZE);
352
353					bp = fdb_iosetup(fdbp, uoff - offset,
354					    contig, vnodep, flags);
355
356					bp->b_edev = ip->i_dev;
357					bp->b_dev = cmpdev(ip->i_dev);
358					bp->b_blkno = bn;
359					bp->b_file = ip->i_vnode;
360					bp->b_offset = (offset_t)uoff;
361
362					if (ufsvfsp->vfs_snapshot) {
363						fssnap_strategy(
364						    &ufsvfsp->vfs_snapshot, bp);
365					} else {
366						(void) bdev_strategy(bp);
367					}
368					io_started = 1;
369
370					lwp_stat_update(LWP_STAT_OUBLK, 1);
371
372					if ((flags & B_ASYNC) == 0) {
373						error = biowait(bp);
374						fdb_iodone(bp);
375						if (error) {
376							break;
377						}
378					}
379					if (contig > (ip->i_size - uoff)) {
380						contig -= ip->i_size - uoff;
381
382						fdb_add_hole(fdbp,
383						    ip->i_size - offset,
384						    contig);
385					}
386				}
387			}
388
389			i_size_changed = 1;
390			old_i_size = ip->i_size;
391			UFS_SET_ISIZE(uoff + nbytes, ip);
392			TRANS_INODE(ip->i_ufsvfs, ip);
393			/*
394			 * file has grown larger than 2GB. Set flag
395			 * in superblock to indicate this, if it
396			 * is not already set.
397			 */
398			if ((ip->i_size > MAXOFF32_T) &&
399			    !(fs->fs_flags & FSLARGEFILES)) {
400				ASSERT(ufsvfsp->vfs_lfflags & UFS_LARGEFILES);
401				mutex_enter(&ufsvfsp->vfs_lock);
402				fs->fs_flags |= FSLARGEFILES;
403				ufs_sbwrite(ufsvfsp);
404				mutex_exit(&ufsvfsp->vfs_lock);
405			}
406		} else {
407			/*
408			 * The file length is not being extended.
409			 */
410			error = bmap_read(ip, uoff, &bn, &contig);
411			if (error) {
412				DEBUGF((CE_CONT, "?ufs_alloc_data: "
413				    "bmap_read err: %d\n", error));
414				break;
415			}
416
417			if (bn != UFS_HOLE) {
418				/*
419				 * Did not map a hole in the file
420				 */
421				int	contig = P2ROUNDUP(nbytes, DEV_BSIZE);
422				buf_t	*bp;
423
424				if (fdbp != NULL) {
425					bp = fdb_iosetup(fdbp, uoff - offset,
426					    contig, vnodep, flags);
427
428					bp->b_edev = ip->i_dev;
429					bp->b_dev = cmpdev(ip->i_dev);
430					bp->b_blkno = bn;
431					bp->b_file = ip->i_vnode;
432					bp->b_offset = (offset_t)uoff;
433
434					if (ufsvfsp->vfs_snapshot) {
435						fssnap_strategy(
436						    &ufsvfsp->vfs_snapshot, bp);
437					} else {
438						(void) bdev_strategy(bp);
439					}
440					io_started = 1;
441
442					lwp_stat_update(LWP_STAT_OUBLK, 1);
443
444					if ((flags & B_ASYNC) == 0) {
445						error = biowait(bp);
446						fdb_iodone(bp);
447						if (error) {
448							break;
449						}
450					}
451				}
452			} else {
453				/*
454				 * We read a hole in the file.
455				 * We have to allocate blocks for the hole.
456				 */
457				error = bmap_write(ip, uoff, (offsetn + nbytes),
458				    BI_ALLOC_ONLY, NULL, credp);
459				if (ip->i_flag & (ICHG|IUPD))
460					ip->i_seq++;
461				if (error) {
462					DEBUGF((CE_CONT, "?ufs_alloc_data: fill"
463					    " hole failed error: %d\n", error));
464					break;
465				}
466				if (fdbp != NULL) {
467					fdb_add_hole(fdbp, uoff - offset,
468					    nbytes);
469				}
470			}
471		}
472		done_len += nbytes;
473	}
474
475	if (error) {
476		if (i_size_changed) {
477			/*
478			 * Allocation of the blocks for the file failed.
479			 * So truncate the file size back to its original size.
480			 */
481			(void) ufs_itrunc(ip, old_i_size, 0, credp);
482		}
483	}
484
485	DEBUGF((CE_CONT, "?ufs_alloc: uoff %llx len %lx\n",
486	    uoff, (io_len - done_len)));
487
488	if ((offset + *len) < (NDADDR * fs->fs_bsize)) {
489		*len = (size_t)(roundup(offset + *len, fs->fs_fsize) - offset);
490	} else {
491		*len = (size_t)(roundup(offset + *len, fs->fs_bsize) - offset);
492	}
493
494	/*
495	 * Flush cached pages.
496	 *
497	 * XXX - There should be no pages involved, since the I/O was performed
498	 * through the device strategy routine and the page cache was bypassed.
499	 * However, testing has demonstrated that this VOP_PUTPAGE is
500	 * necessary. Without this, data might not always be read back as it
501	 * was written.
502	 *
503	 */
504	(void) VOP_PUTPAGE(vnodep, 0, 0, B_INVAL, credp, NULL);
505
506	rw_exit(&ip->i_contents);
507	rw_exit(&ip->i_ufsvfs->vfs_dqrwlock);
508
509	if ((fdbp != NULL) && (flags & B_ASYNC)) {
510		/*
511		 * Show that no more asynchronous IO will be added
512		 */
513		fdb_ioerrdone(fdbp, error);
514	}
515	if (ulp) {
516		/*
517		 * End the UFS Log transaction
518		 */
519		TRANS_END_CSYNC(ufsvfsp, error, issync, TOP_GETPAGE,
520		    trans_size);
521		ufs_lockfs_end(ulp);
522	}
523	if (io_started && (flags & B_ASYNC)) {
524		return (0);
525	} else {
526		return (error);
527	}
528}
529