1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Copyright 2022 Garrett D'Amore
25  */
26 
27 #include <sys/types.h>
28 #include <sys/t_lock.h>
29 #include <sys/param.h>
30 #include <sys/systm.h>
31 #include <sys/buf.h>
32 #include <sys/conf.h>
33 #include <sys/cred.h>
34 #include <sys/kmem.h>
35 #include <sys/sysmacros.h>
36 #include <sys/vfs.h>
37 #include <sys/vnode.h>
38 #include <sys/debug.h>
39 #include <sys/errno.h>
40 #include <sys/time.h>
41 #include <sys/file.h>
42 #include <sys/open.h>
43 #include <sys/user.h>
44 #include <sys/termios.h>
45 #include <sys/stream.h>
46 #include <sys/strsubr.h>
47 #include <sys/sunddi.h>
48 #include <sys/esunddi.h>
49 #include <sys/flock.h>
50 #include <sys/modctl.h>
51 #include <sys/cmn_err.h>
52 #include <sys/vmsystm.h>
53 
54 #include <sys/socket.h>
55 #include <sys/socketvar.h>
56 #include <fs/sockfs/sockcommon.h>
57 #include <fs/sockfs/socktpi.h>
58 
59 #include <netinet/in.h>
60 #include <sys/sendfile.h>
61 #include <sys/un.h>
62 #include <sys/tihdr.h>
63 #include <sys/atomic.h>
64 
65 #include <inet/common.h>
66 #include <inet/ip.h>
67 #include <inet/ip6.h>
68 #include <inet/tcp.h>
69 
70 extern int sosendfile64(file_t *, file_t *, const struct ksendfilevec64 *,
71 		ssize32_t *);
72 extern int snf_segmap(file_t *, vnode_t *, u_offset_t, u_offset_t, ssize_t *,
73 		boolean_t);
74 extern sotpi_info_t *sotpi_sototpi(struct sonode *);
75 
76 #define	SEND_MAX_CHUNK	16
77 
78 #if defined(_SYSCALL32_IMPL) || defined(_ILP32)
79 /*
80  * 64 bit offsets for 32 bit applications only running either on
81  * 64 bit kernel or 32 bit kernel. For 32 bit apps, we can't transfer
82  * more than 2GB of data.
83  */
84 static int
sendvec_chunk64(file_t * fp,u_offset_t * fileoff,struct ksendfilevec64 * sfv,int copy_cnt,ssize32_t * count)85 sendvec_chunk64(file_t *fp, u_offset_t *fileoff, struct ksendfilevec64 *sfv,
86     int copy_cnt, ssize32_t *count)
87 {
88 	struct vnode *vp;
89 	ushort_t fflag;
90 	int ioflag;
91 	size32_t cnt;
92 	ssize32_t sfv_len;
93 	ssize32_t tmpcount;
94 	u_offset_t sfv_off;
95 	struct uio auio;
96 	struct iovec aiov;
97 	int i, error;
98 
99 	fflag = fp->f_flag;
100 	vp = fp->f_vnode;
101 	for (i = 0; i < copy_cnt; i++) {
102 
103 		if (ISSIG(curthread, JUSTLOOKING))
104 			return (EINTR);
105 
106 		/*
107 		 * Do similar checks as "write" as we are writing
108 		 * sfv_len bytes into "vp".
109 		 */
110 		sfv_len = (ssize32_t)sfv->sfv_len;
111 
112 		if (sfv_len == 0) {
113 			sfv++;
114 			continue;
115 		}
116 
117 		if (sfv_len < 0)
118 			return (EINVAL);
119 
120 		if (vp->v_type == VREG) {
121 			if (*fileoff >= curproc->p_fsz_ctl) {
122 				mutex_enter(&curproc->p_lock);
123 				(void) rctl_action(
124 				    rctlproc_legacy[RLIMIT_FSIZE],
125 				    curproc->p_rctls, curproc, RCA_SAFE);
126 				mutex_exit(&curproc->p_lock);
127 				return (EFBIG);
128 			}
129 
130 			if (*fileoff >= OFFSET_MAX(fp))
131 				return (EFBIG);
132 
133 			if (*fileoff + sfv_len > OFFSET_MAX(fp))
134 				return (EINVAL);
135 		}
136 
137 		tmpcount = *count + sfv_len;
138 		if (tmpcount < 0)
139 			return (EINVAL);
140 
141 		sfv_off = sfv->sfv_off;
142 
143 		auio.uio_extflg = UIO_COPY_DEFAULT;
144 		if (sfv->sfv_fd == SFV_FD_SELF) {
145 			aiov.iov_len = sfv_len;
146 			aiov.iov_base = (caddr_t)(uintptr_t)sfv_off;
147 			auio.uio_loffset = *fileoff;
148 			auio.uio_iovcnt = 1;
149 			auio.uio_resid = sfv_len;
150 			auio.uio_iov = &aiov;
151 			auio.uio_segflg = UIO_USERSPACE;
152 			auio.uio_llimit = curproc->p_fsz_ctl;
153 			auio.uio_fmode = fflag;
154 			ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC);
155 			while (sfv_len > 0) {
156 				error = VOP_WRITE(vp, &auio, ioflag,
157 				    fp->f_cred, NULL);
158 				cnt = sfv_len - auio.uio_resid;
159 				sfv_len -= cnt;
160 				ttolwp(curthread)->lwp_ru.ioch += (ulong_t)cnt;
161 				if (vp->v_type == VREG)
162 					*fileoff += cnt;
163 				*count += cnt;
164 				if (error != 0)
165 					return (error);
166 			}
167 		} else {
168 			file_t	*ffp;
169 			vnode_t	*readvp;
170 			size_t	size;
171 			caddr_t	ptr;
172 
173 			if ((ffp = getf(sfv->sfv_fd)) == NULL)
174 				return (EBADF);
175 
176 			if ((ffp->f_flag & FREAD) == 0) {
177 				releasef(sfv->sfv_fd);
178 				return (EBADF);
179 			}
180 
181 			readvp = ffp->f_vnode;
182 			if (readvp->v_type != VREG) {
183 				releasef(sfv->sfv_fd);
184 				return (EINVAL);
185 			}
186 
187 			/*
188 			 * No point reading and writing to same vp,
189 			 * as long as both are regular files. readvp is not
190 			 * locked; but since we got it from an open file the
191 			 * contents will be valid during the time of access.
192 			 */
193 			if (vn_compare(vp, readvp)) {
194 				releasef(sfv->sfv_fd);
195 				return (EINVAL);
196 			}
197 
198 			/*
199 			 * Optimize the regular file over
200 			 * the socket case.
201 			 */
202 			if (vp->v_type == VSOCK) {
203 				error = sosendfile64(fp, ffp, sfv,
204 				    (ssize32_t *)&cnt);
205 				*count += cnt;
206 				if (error)
207 					return (error);
208 				sfv++;
209 				continue;
210 			}
211 
212 			/*
213 			 * Note: we assume readvp != vp. "vp" is already
214 			 * locked, and "readvp" must not be.
215 			 */
216 			if (readvp < vp) {
217 				VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
218 				(void) VOP_RWLOCK(readvp, V_WRITELOCK_FALSE,
219 				    NULL);
220 				(void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL);
221 			} else {
222 				(void) VOP_RWLOCK(readvp, V_WRITELOCK_FALSE,
223 				    NULL);
224 			}
225 
226 			/*
227 			 * Same checks as in pread64.
228 			 */
229 			if (sfv_off > MAXOFFSET_T) {
230 				VOP_RWUNLOCK(readvp, V_WRITELOCK_FALSE, NULL);
231 				releasef(sfv->sfv_fd);
232 				return (EINVAL);
233 			}
234 
235 			if (sfv_off + sfv_len > MAXOFFSET_T)
236 				sfv_len = (ssize32_t)(MAXOFFSET_T - sfv_off);
237 
238 			/* Find the native blocksize to transfer data */
239 			size = MIN(vp->v_vfsp->vfs_bsize,
240 			    readvp->v_vfsp->vfs_bsize);
241 			size = sfv_len < size ? sfv_len : size;
242 			ptr = kmem_alloc(size, KM_NOSLEEP);
243 			if (ptr == NULL) {
244 				VOP_RWUNLOCK(readvp, V_WRITELOCK_FALSE, NULL);
245 				releasef(sfv->sfv_fd);
246 				return (ENOMEM);
247 			}
248 
249 			while (sfv_len > 0) {
250 				size_t	iov_len;
251 
252 				iov_len = MIN(size, sfv_len);
253 				aiov.iov_base = ptr;
254 				aiov.iov_len = iov_len;
255 				auio.uio_loffset = sfv_off;
256 				auio.uio_iov = &aiov;
257 				auio.uio_iovcnt = 1;
258 				auio.uio_resid = iov_len;
259 				auio.uio_segflg = UIO_SYSSPACE;
260 				auio.uio_llimit = MAXOFFSET_T;
261 				auio.uio_fmode = ffp->f_flag;
262 				ioflag = auio.uio_fmode &
263 				    (FAPPEND|FSYNC|FDSYNC|FRSYNC);
264 
265 				/*
266 				 * If read sync is not asked for,
267 				 * filter sync flags
268 				 */
269 				if ((ioflag & FRSYNC) == 0)
270 					ioflag &= ~(FSYNC|FDSYNC);
271 				error = VOP_READ(readvp, &auio, ioflag,
272 				    fp->f_cred, NULL);
273 				if (error) {
274 					kmem_free(ptr, size);
275 					VOP_RWUNLOCK(readvp, V_WRITELOCK_FALSE,
276 					    NULL);
277 					releasef(sfv->sfv_fd);
278 					return (error);
279 				}
280 
281 				/*
282 				 * Check how must data was really read.
283 				 * Decrement the 'len' and increment the
284 				 * 'off' appropriately.
285 				 */
286 				cnt = iov_len - auio.uio_resid;
287 				if (cnt == 0) {
288 					/*
289 					 * If we were reading a pipe (currently
290 					 * not implemented), we may now lose
291 					 * data.
292 					 */
293 					kmem_free(ptr, size);
294 					VOP_RWUNLOCK(readvp, V_WRITELOCK_FALSE,
295 					    NULL);
296 					releasef(sfv->sfv_fd);
297 					return (EINVAL);
298 				}
299 				sfv_len -= cnt;
300 				sfv_off += cnt;
301 
302 				aiov.iov_base = ptr;
303 				aiov.iov_len = cnt;
304 				auio.uio_loffset = *fileoff;
305 				auio.uio_iov = &aiov;
306 				auio.uio_iovcnt = 1;
307 				auio.uio_resid = cnt;
308 				auio.uio_segflg = UIO_SYSSPACE;
309 				auio.uio_llimit = curproc->p_fsz_ctl;
310 				auio.uio_fmode = fflag;
311 				ioflag = auio.uio_fmode &
312 				    (FAPPEND|FSYNC|FDSYNC|FRSYNC);
313 				error = VOP_WRITE(vp, &auio, ioflag,
314 				    fp->f_cred, NULL);
315 
316 				/*
317 				 * Check how much data was written. Increment
318 				 * the 'len' and decrement the 'off' if all
319 				 * the data was not written.
320 				 */
321 				cnt -= auio.uio_resid;
322 				sfv_len += auio.uio_resid;
323 				sfv_off -= auio.uio_resid;
324 				ttolwp(curthread)->lwp_ru.ioch += (ulong_t)cnt;
325 				if (vp->v_type == VREG)
326 					*fileoff += cnt;
327 				*count += cnt;
328 				if (error != 0) {
329 					kmem_free(ptr, size);
330 					VOP_RWUNLOCK(readvp, V_WRITELOCK_FALSE,
331 					    NULL);
332 					releasef(sfv->sfv_fd);
333 					return (error);
334 				}
335 			}
336 			VOP_RWUNLOCK(readvp, V_WRITELOCK_FALSE, NULL);
337 			releasef(sfv->sfv_fd);
338 			kmem_free(ptr, size);
339 		}
340 		sfv++;
341 	}
342 	return (0);
343 }
344 
345 static ssize32_t
sendvec64(file_t * fp,const struct ksendfilevec64 * vec,int sfvcnt,size32_t * xferred,int fildes)346 sendvec64(file_t *fp, const struct ksendfilevec64 *vec, int sfvcnt,
347     size32_t *xferred, int fildes)
348 {
349 	u_offset_t		fileoff;
350 	int			copy_cnt;
351 	const struct ksendfilevec64 *copy_vec;
352 	struct ksendfilevec64 sfv[SEND_MAX_CHUNK];
353 	struct vnode *vp;
354 	int error;
355 	ssize32_t count = 0;
356 
357 	vp = fp->f_vnode;
358 	(void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL);
359 
360 	copy_vec = vec;
361 	fileoff = fp->f_offset;
362 
363 	do {
364 		copy_cnt = MIN(sfvcnt, SEND_MAX_CHUNK);
365 		if (copyin(copy_vec, sfv, copy_cnt *
366 		    sizeof (struct ksendfilevec64))) {
367 			error = EFAULT;
368 			break;
369 		}
370 
371 		error = sendvec_chunk64(fp, &fileoff, sfv, copy_cnt, &count);
372 		if (error != 0)
373 			break;
374 
375 		copy_vec += copy_cnt;
376 		sfvcnt -= copy_cnt;
377 	} while (sfvcnt > 0);
378 
379 	if (vp->v_type == VREG)
380 		fp->f_offset += count;
381 
382 	VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
383 	if (copyout(&count, xferred, sizeof (count)))
384 		error = EFAULT;
385 	releasef(fildes);
386 	if (error != 0)
387 		return (set_errno(error));
388 	return (count);
389 }
390 #endif
391 
392 static int
sendvec_small_chunk(file_t * fp,u_offset_t * fileoff,struct sendfilevec * sfv,int copy_cnt,ssize_t total_size,int maxblk,ssize_t * count)393 sendvec_small_chunk(file_t *fp, u_offset_t *fileoff, struct sendfilevec *sfv,
394     int copy_cnt, ssize_t total_size, int maxblk, ssize_t *count)
395 {
396 	struct vnode *vp;
397 	struct uio auio;
398 	struct iovec aiov;
399 	ushort_t fflag;
400 	int ioflag;
401 	int i, error;
402 	size_t cnt;
403 	ssize_t sfv_len;
404 	u_offset_t sfv_off;
405 #ifdef _SYSCALL32_IMPL
406 	model_t model = get_udatamodel();
407 	u_offset_t maxoff = (model == DATAMODEL_ILP32) ?
408 	    MAXOFF32_T : MAXOFFSET_T;
409 #else
410 	const u_offset_t maxoff = MAXOFF32_T;
411 #endif
412 	mblk_t *dmp = NULL;
413 	int wroff;
414 	int buf_left = 0;
415 	size_t	iov_len;
416 	mblk_t  *head, *tmp;
417 	size_t  size = total_size;
418 	size_t  extra;
419 	int tail_len;
420 	struct nmsghdr msg;
421 
422 	fflag = fp->f_flag;
423 	vp = fp->f_vnode;
424 
425 	ASSERT(vp->v_type == VSOCK);
426 	ASSERT(maxblk > 0);
427 
428 	/* If nothing to send, return */
429 	if (total_size == 0)
430 		return (0);
431 
432 	if (vp->v_stream != NULL) {
433 		wroff = (int)vp->v_stream->sd_wroff;
434 		tail_len = (int)vp->v_stream->sd_tail;
435 	} else {
436 		struct sonode *so;
437 
438 		so = VTOSO(vp);
439 		wroff = so->so_proto_props.sopp_wroff;
440 		tail_len = so->so_proto_props.sopp_tail;
441 	}
442 
443 	extra = wroff + tail_len;
444 
445 	buf_left = MIN(total_size, maxblk);
446 	head = dmp = allocb(buf_left + extra, BPRI_HI);
447 	if (head == NULL)
448 		return (ENOMEM);
449 	head->b_wptr = head->b_rptr = head->b_rptr + wroff;
450 	bzero(&msg, sizeof (msg));
451 
452 	auio.uio_extflg = UIO_COPY_DEFAULT;
453 	for (i = 0; i < copy_cnt; i++) {
454 		if (ISSIG(curthread, JUSTLOOKING)) {
455 			freemsg(head);
456 			return (EINTR);
457 		}
458 
459 		/*
460 		 * Do similar checks as "write" as we are writing
461 		 * sfv_len bytes into "vp".
462 		 */
463 		sfv_len = (ssize_t)sfv->sfv_len;
464 
465 		if (sfv_len == 0) {
466 			sfv++;
467 			continue;
468 		}
469 
470 		/* Check for overflow */
471 #ifdef _SYSCALL32_IMPL
472 		if (model == DATAMODEL_ILP32) {
473 			if (((ssize32_t)(*count + sfv_len)) < 0) {
474 				freemsg(head);
475 				return (EINVAL);
476 			}
477 		} else
478 #endif
479 		if ((*count + sfv_len) < 0) {
480 			freemsg(head);
481 			return (EINVAL);
482 		}
483 
484 		sfv_off = (u_offset_t)(ulong_t)sfv->sfv_off;
485 
486 		if (sfv->sfv_fd == SFV_FD_SELF) {
487 			while (sfv_len > 0) {
488 				if (buf_left == 0) {
489 					tmp = dmp;
490 					buf_left = MIN(total_size, maxblk);
491 					iov_len = MIN(buf_left, sfv_len);
492 					dmp = allocb(buf_left + extra, BPRI_HI);
493 					if (dmp == NULL) {
494 						freemsg(head);
495 						return (ENOMEM);
496 					}
497 					dmp->b_wptr = dmp->b_rptr =
498 					    dmp->b_rptr + wroff;
499 					tmp->b_cont = dmp;
500 				} else {
501 					iov_len = MIN(buf_left, sfv_len);
502 				}
503 
504 				aiov.iov_len = iov_len;
505 				aiov.iov_base = (caddr_t)(uintptr_t)sfv_off;
506 				auio.uio_loffset = *fileoff;
507 				auio.uio_iovcnt = 1;
508 				auio.uio_resid = iov_len;
509 				auio.uio_iov = &aiov;
510 				auio.uio_segflg = UIO_USERSPACE;
511 				auio.uio_llimit = curproc->p_fsz_ctl;
512 				auio.uio_fmode = fflag;
513 
514 				buf_left -= iov_len;
515 				total_size -= iov_len;
516 				sfv_len -= iov_len;
517 				sfv_off += iov_len;
518 
519 				error = uiomove((caddr_t)dmp->b_wptr,
520 				    iov_len, UIO_WRITE, &auio);
521 				if (error != 0) {
522 					freemsg(head);
523 					return (error);
524 				}
525 				dmp->b_wptr += iov_len;
526 			}
527 		} else {
528 			file_t	*ffp;
529 			vnode_t	*readvp;
530 
531 			if ((ffp = getf(sfv->sfv_fd)) == NULL) {
532 				freemsg(head);
533 				return (EBADF);
534 			}
535 
536 			if ((ffp->f_flag & FREAD) == 0) {
537 				releasef(sfv->sfv_fd);
538 				freemsg(head);
539 				return (EACCES);
540 			}
541 
542 			readvp = ffp->f_vnode;
543 			if (readvp->v_type != VREG) {
544 				releasef(sfv->sfv_fd);
545 				freemsg(head);
546 				return (EINVAL);
547 			}
548 
549 			/*
550 			 * No point reading and writing to same vp,
551 			 * as long as both are regular files. readvp is not
552 			 * locked; but since we got it from an open file the
553 			 * contents will be valid during the time of access.
554 			 */
555 
556 			if (vn_compare(vp, readvp)) {
557 				releasef(sfv->sfv_fd);
558 				freemsg(head);
559 				return (EINVAL);
560 			}
561 
562 			/*
563 			 * Note: we assume readvp != vp. "vp" is already
564 			 * locked, and "readvp" must not be.
565 			 */
566 
567 			if (readvp < vp) {
568 				VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
569 				(void) VOP_RWLOCK(readvp, V_WRITELOCK_FALSE,
570 				    NULL);
571 				(void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL);
572 			} else {
573 				(void) VOP_RWLOCK(readvp, V_WRITELOCK_FALSE,
574 				    NULL);
575 			}
576 
577 			/* Same checks as in pread */
578 			if (sfv_off > maxoff) {
579 				VOP_RWUNLOCK(readvp, V_WRITELOCK_FALSE, NULL);
580 				releasef(sfv->sfv_fd);
581 				freemsg(head);
582 				return (EINVAL);
583 			}
584 			if (sfv_off + sfv_len > maxoff) {
585 				total_size -= (sfv_off + sfv_len - maxoff);
586 				sfv_len = (ssize_t)((offset_t)maxoff -
587 				    sfv_off);
588 			}
589 
590 			while (sfv_len > 0) {
591 				if (buf_left == 0) {
592 					tmp = dmp;
593 					buf_left = MIN(total_size, maxblk);
594 					iov_len = MIN(buf_left, sfv_len);
595 					dmp = allocb(buf_left + extra, BPRI_HI);
596 					if (dmp == NULL) {
597 						VOP_RWUNLOCK(readvp,
598 						    V_WRITELOCK_FALSE, NULL);
599 						releasef(sfv->sfv_fd);
600 						freemsg(head);
601 						return (ENOMEM);
602 					}
603 					dmp->b_wptr = dmp->b_rptr =
604 					    dmp->b_rptr + wroff;
605 					tmp->b_cont = dmp;
606 				} else {
607 					iov_len = MIN(buf_left, sfv_len);
608 				}
609 				aiov.iov_base = (caddr_t)dmp->b_wptr;
610 				aiov.iov_len = iov_len;
611 				auio.uio_loffset = sfv_off;
612 				auio.uio_iov = &aiov;
613 				auio.uio_iovcnt = 1;
614 				auio.uio_resid = iov_len;
615 				auio.uio_segflg = UIO_SYSSPACE;
616 				auio.uio_llimit = MAXOFFSET_T;
617 				auio.uio_fmode = ffp->f_flag;
618 				ioflag = auio.uio_fmode &
619 				    (FAPPEND|FSYNC|FDSYNC|FRSYNC);
620 
621 				/*
622 				 * If read sync is not asked for,
623 				 * filter sync flags
624 				 */
625 				if ((ioflag & FRSYNC) == 0)
626 					ioflag &= ~(FSYNC|FDSYNC);
627 				error = VOP_READ(readvp, &auio, ioflag,
628 				    fp->f_cred, NULL);
629 				if (error != 0) {
630 					/*
631 					 * If we were reading a pipe (currently
632 					 * not implemented), we may now loose
633 					 * data.
634 					 */
635 					VOP_RWUNLOCK(readvp, V_WRITELOCK_FALSE,
636 					    NULL);
637 					releasef(sfv->sfv_fd);
638 					freemsg(head);
639 					return (error);
640 				}
641 
642 				/*
643 				 * Check how much data was really read.
644 				 * Decrement the 'len' and increment the
645 				 * 'off' appropriately.
646 				 */
647 				cnt = iov_len - auio.uio_resid;
648 				if (cnt == 0) {
649 					VOP_RWUNLOCK(readvp, V_WRITELOCK_FALSE,
650 					    NULL);
651 					releasef(sfv->sfv_fd);
652 					freemsg(head);
653 					return (EINVAL);
654 				}
655 				sfv_len -= cnt;
656 				sfv_off += cnt;
657 				total_size -= cnt;
658 				buf_left -= cnt;
659 
660 				dmp->b_wptr += cnt;
661 			}
662 			VOP_RWUNLOCK(readvp, V_WRITELOCK_FALSE, NULL);
663 			releasef(sfv->sfv_fd);
664 		}
665 		sfv++;
666 	}
667 
668 	ASSERT(total_size == 0);
669 	error = socket_sendmblk(VTOSO(vp), &msg, fflag, CRED(), &head);
670 	if (error != 0) {
671 		if (head != NULL)
672 			freemsg(head);
673 		return (error);
674 	}
675 	ttolwp(curthread)->lwp_ru.ioch += (ulong_t)size;
676 	*count += size;
677 
678 	return (0);
679 }
680 
681 
682 static int
sendvec_chunk(file_t * fp,u_offset_t * fileoff,struct sendfilevec * sfv,int copy_cnt,ssize_t * count)683 sendvec_chunk(file_t *fp, u_offset_t *fileoff, struct sendfilevec *sfv,
684     int copy_cnt, ssize_t *count)
685 {
686 	struct vnode *vp;
687 	struct uio auio;
688 	struct iovec aiov;
689 	ushort_t fflag;
690 	int ioflag;
691 	int i, error;
692 	size_t cnt;
693 	ssize_t sfv_len;
694 	u_offset_t sfv_off;
695 #ifdef _SYSCALL32_IMPL
696 	model_t model = get_udatamodel();
697 	u_offset_t maxoff = (model == DATAMODEL_ILP32) ?
698 	    MAXOFF32_T : MAXOFFSET_T;
699 #else
700 	const u_offset_t maxoff = MAXOFF32_T;
701 #endif
702 	mblk_t	*dmp = NULL;
703 	char	*buf = NULL;
704 	size_t  extra = 0;
705 	int maxblk, wroff, tail_len;
706 	struct sonode *so;
707 	stdata_t *stp;
708 	struct nmsghdr msg;
709 
710 	maxblk = 0;
711 	wroff = 0;
712 	fflag = fp->f_flag;
713 	vp = fp->f_vnode;
714 	so = NULL;
715 	stp = NULL;
716 
717 	if (vp->v_type == VSOCK) {
718 		so = VTOSO(vp);
719 		if (vp->v_stream != NULL) {
720 			stp = vp->v_stream;
721 			wroff = (int)stp->sd_wroff;
722 			tail_len = (int)stp->sd_tail;
723 			maxblk = (int)stp->sd_maxblk;
724 		} else {
725 			stp = NULL;
726 			wroff = so->so_proto_props.sopp_wroff;
727 			tail_len = so->so_proto_props.sopp_tail;
728 			maxblk = so->so_proto_props.sopp_maxblk;
729 		}
730 		extra = wroff + tail_len;
731 	}
732 
733 	bzero(&msg, sizeof (msg));
734 	auio.uio_extflg = UIO_COPY_DEFAULT;
735 	for (i = 0; i < copy_cnt; i++) {
736 		if (ISSIG(curthread, JUSTLOOKING))
737 			return (EINTR);
738 
739 		/*
740 		 * Do similar checks as "write" as we are writing
741 		 * sfv_len bytes into "vp".
742 		 */
743 		sfv_len = (ssize_t)sfv->sfv_len;
744 
745 		if (sfv_len == 0) {
746 			sfv++;
747 			continue;
748 		}
749 
750 		if (vp->v_type == VREG) {
751 			if (*fileoff >= curproc->p_fsz_ctl) {
752 				mutex_enter(&curproc->p_lock);
753 				(void) rctl_action(
754 				    rctlproc_legacy[RLIMIT_FSIZE],
755 				    curproc->p_rctls, curproc, RCA_SAFE);
756 				mutex_exit(&curproc->p_lock);
757 
758 				return (EFBIG);
759 			}
760 
761 			if (*fileoff >= maxoff)
762 				return (EFBIG);
763 
764 			if (*fileoff + sfv_len > maxoff)
765 				return (EINVAL);
766 		}
767 
768 		/* Check for overflow */
769 #ifdef _SYSCALL32_IMPL
770 		if (model == DATAMODEL_ILP32) {
771 			if (((ssize32_t)(*count + sfv_len)) < 0)
772 				return (EINVAL);
773 		} else
774 #endif
775 		if ((*count + sfv_len) < 0)
776 			return (EINVAL);
777 
778 		sfv_off = (u_offset_t)(ulong_t)sfv->sfv_off;
779 
780 		if (sfv->sfv_fd == SFV_FD_SELF) {
781 			if (vp->v_type == VSOCK) {
782 				while (sfv_len > 0) {
783 					size_t iov_len;
784 
785 					iov_len = sfv_len;
786 					/*
787 					 * Socket filters can limit the mblk
788 					 * size, so limit reads to maxblk if
789 					 * there are filters present.
790 					 */
791 					if (so->so_filter_active > 0 &&
792 					    maxblk != INFPSZ)
793 						iov_len = MIN(iov_len, maxblk);
794 
795 					aiov.iov_len = iov_len;
796 					aiov.iov_base =
797 					    (caddr_t)(uintptr_t)sfv_off;
798 
799 					auio.uio_iov = &aiov;
800 					auio.uio_iovcnt = 1;
801 					auio.uio_loffset = *fileoff;
802 					auio.uio_segflg = UIO_USERSPACE;
803 					auio.uio_fmode = fflag;
804 					auio.uio_llimit = curproc->p_fsz_ctl;
805 					auio.uio_resid = iov_len;
806 
807 					dmp = allocb(iov_len + extra, BPRI_HI);
808 					if (dmp == NULL)
809 						return (ENOMEM);
810 					dmp->b_wptr = dmp->b_rptr =
811 					    dmp->b_rptr + wroff;
812 					error = uiomove((caddr_t)dmp->b_wptr,
813 					    iov_len, UIO_WRITE, &auio);
814 					if (error != 0) {
815 						freeb(dmp);
816 						return (error);
817 					}
818 					dmp->b_wptr += iov_len;
819 					error = socket_sendmblk(VTOSO(vp),
820 					    &msg, fflag, CRED(), &dmp);
821 
822 					if (error != 0) {
823 						if (dmp != NULL)
824 							freeb(dmp);
825 						return (error);
826 					}
827 					ttolwp(curthread)->lwp_ru.ioch +=
828 					    (ulong_t)iov_len;
829 					*count += iov_len;
830 					sfv_len -= iov_len;
831 					sfv_off += iov_len;
832 				}
833 			} else {
834 				aiov.iov_len = sfv_len;
835 				aiov.iov_base = (caddr_t)(uintptr_t)sfv_off;
836 
837 				auio.uio_iov = &aiov;
838 				auio.uio_iovcnt = 1;
839 				auio.uio_loffset = *fileoff;
840 				auio.uio_segflg = UIO_USERSPACE;
841 				auio.uio_fmode = fflag;
842 				auio.uio_llimit = curproc->p_fsz_ctl;
843 				auio.uio_resid = sfv_len;
844 
845 				ioflag = auio.uio_fmode &
846 				    (FAPPEND|FSYNC|FDSYNC|FRSYNC);
847 				while (sfv_len > 0) {
848 					error = VOP_WRITE(vp, &auio, ioflag,
849 					    fp->f_cred, NULL);
850 					cnt = sfv_len - auio.uio_resid;
851 					sfv_len -= cnt;
852 					ttolwp(curthread)->lwp_ru.ioch +=
853 					    (ulong_t)cnt;
854 					*fileoff += cnt;
855 					*count += cnt;
856 					if (error != 0)
857 						return (error);
858 				}
859 			}
860 		} else {
861 			int segmapit = 0;
862 			file_t	*ffp;
863 			vnode_t	*readvp;
864 			struct vnode *realvp;
865 			size_t	size;
866 			caddr_t	ptr;
867 
868 			if ((ffp = getf(sfv->sfv_fd)) == NULL)
869 				return (EBADF);
870 
871 			if ((ffp->f_flag & FREAD) == 0) {
872 				releasef(sfv->sfv_fd);
873 				return (EBADF);
874 			}
875 
876 			readvp = ffp->f_vnode;
877 			if (VOP_REALVP(readvp, &realvp, NULL) == 0)
878 				readvp = realvp;
879 			if (readvp->v_type != VREG) {
880 				releasef(sfv->sfv_fd);
881 				return (EINVAL);
882 			}
883 
884 			/*
885 			 * No point reading and writing to same vp,
886 			 * as long as both are regular files. readvp is not
887 			 * locked; but since we got it from an open file the
888 			 * contents will be valid during the time of access.
889 			 */
890 			if (vn_compare(vp, readvp)) {
891 				releasef(sfv->sfv_fd);
892 				return (EINVAL);
893 			}
894 
895 			/*
896 			 * Note: we assume readvp != vp. "vp" is already
897 			 * locked, and "readvp" must not be.
898 			 */
899 			if (readvp < vp) {
900 				VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
901 				(void) VOP_RWLOCK(readvp, V_WRITELOCK_FALSE,
902 				    NULL);
903 				(void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL);
904 			} else {
905 				(void) VOP_RWLOCK(readvp, V_WRITELOCK_FALSE,
906 				    NULL);
907 			}
908 
909 			/* Same checks as in pread */
910 			if (sfv_off > maxoff) {
911 				VOP_RWUNLOCK(readvp, V_WRITELOCK_FALSE, NULL);
912 				releasef(sfv->sfv_fd);
913 				return (EINVAL);
914 			}
915 			if (sfv_off + sfv_len > maxoff) {
916 				sfv_len = (ssize_t)((offset_t)maxoff -
917 				    sfv_off);
918 			}
919 			/* Find the native blocksize to transfer data */
920 			size = MIN(vp->v_vfsp->vfs_bsize,
921 			    readvp->v_vfsp->vfs_bsize);
922 			size = sfv_len < size ? sfv_len : size;
923 
924 			if (vp->v_type != VSOCK) {
925 				segmapit = 0;
926 				buf = kmem_alloc(size, KM_NOSLEEP);
927 				if (buf == NULL) {
928 					VOP_RWUNLOCK(readvp, V_WRITELOCK_FALSE,
929 					    NULL);
930 					releasef(sfv->sfv_fd);
931 					return (ENOMEM);
932 				}
933 			} else {
934 				uint_t	copyflag;
935 
936 				copyflag = stp != NULL ? stp->sd_copyflag :
937 				    so->so_proto_props.sopp_zcopyflag;
938 
939 				/*
940 				 * Socket filters can limit the mblk size,
941 				 * so limit reads to maxblk if there are
942 				 * filters present.
943 				 */
944 				if (so->so_filter_active > 0 &&
945 				    maxblk != INFPSZ)
946 					size = MIN(size, maxblk);
947 
948 				if (vn_has_flocks(readvp) ||
949 				    readvp->v_flag & VNOMAP ||
950 				    copyflag & STZCVMUNSAFE) {
951 					segmapit = 0;
952 				} else if (copyflag & STZCVMSAFE) {
953 					segmapit = 1;
954 				} else {
955 					int on = 1;
956 					if (socket_setsockopt(VTOSO(vp),
957 					    SOL_SOCKET, SO_SND_COPYAVOID,
958 					    &on, sizeof (on), CRED()) == 0)
959 						segmapit = 1;
960 				}
961 			}
962 
963 			if (segmapit) {
964 				struct vattr va;
965 				boolean_t nowait;
966 
967 				va.va_mask = AT_SIZE;
968 				error = VOP_GETATTR(readvp, &va, 0, kcred,
969 				    NULL);
970 				if (error != 0 || sfv_off >= va.va_size) {
971 					VOP_RWUNLOCK(readvp, V_WRITELOCK_FALSE,
972 					    NULL);
973 					releasef(sfv->sfv_fd);
974 					return (error);
975 				}
976 				/* Read as much as possible. */
977 				if (sfv_off + sfv_len > va.va_size)
978 					sfv_len = va.va_size - sfv_off;
979 
980 				nowait = (sfv->sfv_flag & SFV_NOWAIT) != 0;
981 				error = snf_segmap(fp, readvp, sfv_off,
982 				    (u_offset_t)sfv_len, (ssize_t *)&cnt,
983 				    nowait);
984 				releasef(sfv->sfv_fd);
985 				*count += cnt;
986 				if (error)
987 					return (error);
988 				sfv++;
989 				continue;
990 			}
991 
992 			while (sfv_len > 0) {
993 				size_t	iov_len;
994 
995 				iov_len = MIN(size, sfv_len);
996 
997 				if (vp->v_type == VSOCK) {
998 					dmp = allocb(iov_len + extra, BPRI_HI);
999 					if (dmp == NULL) {
1000 						VOP_RWUNLOCK(readvp,
1001 						    V_WRITELOCK_FALSE, NULL);
1002 						releasef(sfv->sfv_fd);
1003 						return (ENOMEM);
1004 					}
1005 					dmp->b_wptr = dmp->b_rptr =
1006 					    dmp->b_rptr + wroff;
1007 					ptr = (caddr_t)dmp->b_rptr;
1008 				} else {
1009 					ptr = buf;
1010 				}
1011 
1012 				aiov.iov_base = ptr;
1013 				aiov.iov_len = iov_len;
1014 				auio.uio_loffset = sfv_off;
1015 				auio.uio_iov = &aiov;
1016 				auio.uio_iovcnt = 1;
1017 				auio.uio_resid = iov_len;
1018 				auio.uio_segflg = UIO_SYSSPACE;
1019 				auio.uio_llimit = MAXOFFSET_T;
1020 				auio.uio_fmode = ffp->f_flag;
1021 				ioflag = auio.uio_fmode &
1022 				    (FAPPEND|FSYNC|FDSYNC|FRSYNC);
1023 
1024 				/*
1025 				 * If read sync is not asked for,
1026 				 * filter sync flags
1027 				 */
1028 				if ((ioflag & FRSYNC) == 0)
1029 					ioflag &= ~(FSYNC|FDSYNC);
1030 				error = VOP_READ(readvp, &auio, ioflag,
1031 				    fp->f_cred, NULL);
1032 				if (error != 0) {
1033 					/*
1034 					 * If we were reading a pipe (currently
1035 					 * not implemented), we may now lose
1036 					 * data.
1037 					 */
1038 					if (vp->v_type == VSOCK)
1039 						freeb(dmp);
1040 					else
1041 						kmem_free(buf, size);
1042 					VOP_RWUNLOCK(readvp, V_WRITELOCK_FALSE,
1043 					    NULL);
1044 					releasef(sfv->sfv_fd);
1045 					return (error);
1046 				}
1047 
1048 				/*
1049 				 * Check how much data was really read.
1050 				 * Decrement the 'len' and increment the
1051 				 * 'off' appropriately.
1052 				 */
1053 				cnt = iov_len - auio.uio_resid;
1054 				if (cnt == 0) {
1055 					if (vp->v_type == VSOCK)
1056 						freeb(dmp);
1057 					else
1058 						kmem_free(buf, size);
1059 					VOP_RWUNLOCK(readvp, V_WRITELOCK_FALSE,
1060 					    NULL);
1061 					releasef(sfv->sfv_fd);
1062 					return (EINVAL);
1063 				}
1064 				sfv_len -= cnt;
1065 				sfv_off += cnt;
1066 
1067 				if (vp->v_type == VSOCK) {
1068 					dmp->b_wptr = dmp->b_rptr + cnt;
1069 
1070 					error = socket_sendmblk(VTOSO(vp),
1071 					    &msg, fflag, CRED(), &dmp);
1072 
1073 					if (error != 0) {
1074 						if (dmp != NULL)
1075 							freeb(dmp);
1076 						VOP_RWUNLOCK(readvp,
1077 						    V_WRITELOCK_FALSE, NULL);
1078 						releasef(sfv->sfv_fd);
1079 						return (error);
1080 					}
1081 
1082 					ttolwp(curthread)->lwp_ru.ioch +=
1083 					    (ulong_t)cnt;
1084 					*count += cnt;
1085 				} else {
1086 
1087 					aiov.iov_base = ptr;
1088 					aiov.iov_len = cnt;
1089 					auio.uio_loffset = *fileoff;
1090 					auio.uio_resid = cnt;
1091 					auio.uio_iov = &aiov;
1092 					auio.uio_iovcnt = 1;
1093 					auio.uio_segflg = UIO_SYSSPACE;
1094 					auio.uio_llimit = curproc->p_fsz_ctl;
1095 					auio.uio_fmode = fflag;
1096 					ioflag = auio.uio_fmode &
1097 					    (FAPPEND|FSYNC|FDSYNC|FRSYNC);
1098 					error = VOP_WRITE(vp, &auio, ioflag,
1099 					    fp->f_cred, NULL);
1100 
1101 					/*
1102 					 * Check how much data was written.
1103 					 * Increment the 'len' and decrement the
1104 					 * 'off' if all the data was not
1105 					 * written.
1106 					 */
1107 					cnt -= auio.uio_resid;
1108 					sfv_len += auio.uio_resid;
1109 					sfv_off -= auio.uio_resid;
1110 					ttolwp(curthread)->lwp_ru.ioch +=
1111 					    (ulong_t)cnt;
1112 					*fileoff += cnt;
1113 					*count += cnt;
1114 					if (error != 0) {
1115 						kmem_free(buf, size);
1116 						VOP_RWUNLOCK(readvp,
1117 						    V_WRITELOCK_FALSE, NULL);
1118 						releasef(sfv->sfv_fd);
1119 						return (error);
1120 					}
1121 				}
1122 			}
1123 			if (buf) {
1124 				kmem_free(buf, size);
1125 				buf = NULL;
1126 			}
1127 			VOP_RWUNLOCK(readvp, V_WRITELOCK_FALSE, NULL);
1128 			releasef(sfv->sfv_fd);
1129 		}
1130 		sfv++;
1131 	}
1132 	return (0);
1133 }
1134 
1135 ssize_t
sendfilev(int opcode,int fildes,const struct sendfilevec * vec,int sfvcnt,size_t * xferred)1136 sendfilev(int opcode, int fildes, const struct sendfilevec *vec, int sfvcnt,
1137     size_t *xferred)
1138 {
1139 	int error = 0;
1140 	int first_vector_error = 0;
1141 	file_t *fp;
1142 	struct vnode *vp;
1143 	struct sonode *so = NULL;
1144 	u_offset_t fileoff;
1145 	int copy_cnt;
1146 	const struct sendfilevec *copy_vec;
1147 	struct sendfilevec sfv[SEND_MAX_CHUNK];
1148 	ssize_t count = 0;
1149 #ifdef _SYSCALL32_IMPL
1150 	struct ksendfilevec32 sfv32[SEND_MAX_CHUNK];
1151 #endif
1152 	ssize_t total_size;
1153 	int i;
1154 	boolean_t is_sock = B_FALSE;
1155 	int maxblk = 0;
1156 
1157 	if (sfvcnt <= 0)
1158 		return (set_errno(EINVAL));
1159 
1160 	if ((fp = getf(fildes)) == NULL)
1161 		return (set_errno(EBADF));
1162 
1163 	if (((fp->f_flag) & FWRITE) == 0) {
1164 		error = EBADF;
1165 		goto err;
1166 	}
1167 
1168 	fileoff = fp->f_offset;
1169 	vp = fp->f_vnode;
1170 
1171 	switch (vp->v_type) {
1172 	case VSOCK:
1173 		so = VTOSO(vp);
1174 		is_sock = B_TRUE;
1175 		if (SOCK_IS_NONSTR(so)) {
1176 			maxblk = so->so_proto_props.sopp_maxblk;
1177 		} else {
1178 			maxblk = (int)vp->v_stream->sd_maxblk;
1179 		}
1180 
1181 		/*
1182 		 * We need to make sure that the socket that we're sending on
1183 		 * supports sendfile behavior. sockfs doesn't know that the APIs
1184 		 * we want to use are coming from sendfile, so we can't rely on
1185 		 * it to check for us.
1186 		 */
1187 		if ((so->so_mode & SM_SENDFILESUPP) == 0) {
1188 			error = EOPNOTSUPP;
1189 			goto err;
1190 		}
1191 		break;
1192 	case VREG:
1193 		break;
1194 	default:
1195 		error = EINVAL;
1196 		goto err;
1197 	}
1198 
1199 	switch (opcode) {
1200 	case SENDFILEV :
1201 		break;
1202 #if defined(_SYSCALL32_IMPL) || defined(_ILP32)
1203 	case SENDFILEV64 :
1204 		return (sendvec64(fp, (struct ksendfilevec64 *)vec, sfvcnt,
1205 		    (size32_t *)xferred, fildes));
1206 #endif
1207 	default :
1208 		error = ENOSYS;
1209 		break;
1210 	}
1211 
1212 	(void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL);
1213 	copy_vec = vec;
1214 
1215 	do {
1216 		total_size = 0;
1217 		copy_cnt = MIN(sfvcnt, SEND_MAX_CHUNK);
1218 #ifdef _SYSCALL32_IMPL
1219 		/* 32-bit callers need to have their iovec expanded. */
1220 		if (get_udatamodel() == DATAMODEL_ILP32) {
1221 			if (copyin(copy_vec, sfv32,
1222 			    copy_cnt * sizeof (ksendfilevec32_t))) {
1223 				error = EFAULT;
1224 				break;
1225 			}
1226 
1227 			for (i = 0; i < copy_cnt; i++) {
1228 				sfv[i].sfv_fd = sfv32[i].sfv_fd;
1229 				sfv[i].sfv_off =
1230 				    (off_t)(uint32_t)sfv32[i].sfv_off;
1231 				sfv[i].sfv_len = (size_t)sfv32[i].sfv_len;
1232 				total_size += sfv[i].sfv_len;
1233 				sfv[i].sfv_flag = sfv32[i].sfv_flag;
1234 				/*
1235 				 * Individual elements of the vector must not
1236 				 * wrap or overflow, as later math is signed.
1237 				 * Equally total_size needs to be checked after
1238 				 * each vector is added in, to be sure that
1239 				 * rogue values haven't overflowed the counter.
1240 				 */
1241 				if (((ssize32_t)sfv[i].sfv_len < 0) ||
1242 				    ((ssize32_t)total_size < 0)) {
1243 					/*
1244 					 * Truncate the vector to send data
1245 					 * described by elements before the
1246 					 * error.
1247 					 */
1248 					copy_cnt = i;
1249 					first_vector_error = EINVAL;
1250 					/* total_size can't be trusted */
1251 					if ((ssize32_t)total_size < 0)
1252 						error = EINVAL;
1253 					break;
1254 				}
1255 			}
1256 			/* Nothing to do, process errors */
1257 			if (copy_cnt == 0)
1258 				break;
1259 
1260 		} else {
1261 #endif
1262 			if (copyin(copy_vec, sfv,
1263 			    copy_cnt * sizeof (sendfilevec_t))) {
1264 				error = EFAULT;
1265 				break;
1266 			}
1267 
1268 			for (i = 0; i < copy_cnt; i++) {
1269 				total_size += sfv[i].sfv_len;
1270 				/*
1271 				 * Individual elements of the vector must not
1272 				 * wrap or overflow, as later math is signed.
1273 				 * Equally total_size needs to be checked after
1274 				 * each vector is added in, to be sure that
1275 				 * rogue values haven't overflowed the counter.
1276 				 */
1277 				if (((ssize_t)sfv[i].sfv_len < 0) ||
1278 				    (total_size < 0)) {
1279 					/*
1280 					 * Truncate the vector to send data
1281 					 * described by elements before the
1282 					 * error.
1283 					 */
1284 					copy_cnt = i;
1285 					first_vector_error = EINVAL;
1286 					/* total_size can't be trusted */
1287 					if (total_size < 0)
1288 						error = EINVAL;
1289 					break;
1290 				}
1291 			}
1292 			/* Nothing to do, process errors */
1293 			if (copy_cnt == 0)
1294 				break;
1295 #ifdef _SYSCALL32_IMPL
1296 		}
1297 #endif
1298 
1299 		/*
1300 		 * The task between deciding to use sendvec_small_chunk
1301 		 * and sendvec_chunk is dependant on multiple things:
1302 		 *
1303 		 * i) latency is important for smaller files. So if the
1304 		 * data is smaller than 'tcp_slow_start_initial' times
1305 		 * maxblk, then use sendvec_small_chunk which creates
1306 		 * maxblk size mblks and chains them together and sends
1307 		 * them to TCP in one shot. It also leaves 'wroff' size
1308 		 * space for the headers in each mblk.
1309 		 *
1310 		 * ii) for total size bigger than 'tcp_slow_start_initial'
1311 		 * time maxblk, its probably real file data which is
1312 		 * dominating. So its better to use sendvec_chunk because
1313 		 * performance goes to dog if we don't do pagesize reads.
1314 		 * sendvec_chunk will do pagesize reads and write them
1315 		 * in pagesize mblks to TCP.
1316 		 *
1317 		 * Side Notes: A write to file has not been optimized.
1318 		 * Future zero copy code will plugin into sendvec_chunk
1319 		 * only because doing zero copy for files smaller then
1320 		 * pagesize is useless.
1321 		 */
1322 		if (is_sock) {
1323 			if ((total_size <= (4 * maxblk)) &&
1324 			    error == 0) {
1325 				error = sendvec_small_chunk(fp,
1326 				    &fileoff, sfv, copy_cnt,
1327 				    total_size, maxblk, &count);
1328 			} else {
1329 				error = sendvec_chunk(fp, &fileoff,
1330 				    sfv, copy_cnt, &count);
1331 			}
1332 		} else {
1333 			ASSERT(vp->v_type == VREG);
1334 			error = sendvec_chunk(fp, &fileoff, sfv, copy_cnt,
1335 			    &count);
1336 		}
1337 
1338 
1339 #ifdef _SYSCALL32_IMPL
1340 		if (get_udatamodel() == DATAMODEL_ILP32) {
1341 			copy_vec = (const struct sendfilevec *)
1342 			    ((char *)copy_vec +
1343 			    (copy_cnt * sizeof (ksendfilevec32_t)));
1344 		} else
1345 #endif
1346 			copy_vec += copy_cnt;
1347 		sfvcnt -= copy_cnt;
1348 
1349 	/* Process all vector members up to first error */
1350 	} while ((sfvcnt > 0) && first_vector_error == 0 && error == 0);
1351 
1352 	if (vp->v_type == VREG)
1353 		fp->f_offset += count;
1354 
1355 	VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
1356 
1357 #ifdef _SYSCALL32_IMPL
1358 	if (get_udatamodel() == DATAMODEL_ILP32) {
1359 		ssize32_t count32 = (ssize32_t)count;
1360 		if (copyout(&count32, xferred, sizeof (count32)))
1361 			error = EFAULT;
1362 		releasef(fildes);
1363 		if (error != 0)
1364 			return (set_errno(error));
1365 		if (first_vector_error != 0)
1366 			return (set_errno(first_vector_error));
1367 		return (count32);
1368 	}
1369 #endif
1370 	if (copyout(&count, xferred, sizeof (count)))
1371 		error = EFAULT;
1372 	releasef(fildes);
1373 	if (error != 0)
1374 		return (set_errno(error));
1375 	if (first_vector_error != 0)
1376 		return (set_errno(first_vector_error));
1377 	return (count);
1378 err:
1379 	ASSERT(error != 0);
1380 	releasef(fildes);
1381 	return (set_errno(error));
1382 }
1383