1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 1995, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright 2016 Nexenta Systems, Inc. All rights reserved.
25 * Copyright 2015, Joyent, Inc. All rights reserved.
26 * Copyright 2020 OmniOS Community Edition (OmniOSce) Association.
27 * Copyright 2022 Garrett D'Amore
28 */
29
30 #include <sys/types.h>
31 #include <sys/t_lock.h>
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/buf.h>
35 #include <sys/conf.h>
36 #include <sys/cred.h>
37 #include <sys/kmem.h>
38 #include <sys/sysmacros.h>
39 #include <sys/vfs.h>
40 #include <sys/vfs_opreg.h>
41 #include <sys/vnode.h>
42 #include <sys/debug.h>
43 #include <sys/errno.h>
44 #include <sys/time.h>
45 #include <sys/file.h>
46 #include <sys/open.h>
47 #include <sys/user.h>
48 #include <sys/termios.h>
49 #include <sys/stream.h>
50 #include <sys/strsubr.h>
51 #include <sys/strsun.h>
52 #include <sys/esunddi.h>
53 #include <sys/flock.h>
54 #include <sys/modctl.h>
55 #include <sys/cmn_err.h>
56 #include <sys/mkdev.h>
57 #include <sys/pathname.h>
58 #include <sys/ddi.h>
59 #include <sys/stat.h>
60 #include <sys/fs/snode.h>
61 #include <sys/fs/dv_node.h>
62 #include <sys/zone.h>
63
64 #include <sys/socket.h>
65 #include <sys/socketvar.h>
66 #include <netinet/in.h>
67 #include <sys/un.h>
68 #include <sys/ucred.h>
69
70 #include <sys/tiuser.h>
71 #define _SUN_TPI_VERSION 2
72 #include <sys/tihdr.h>
73
74 #include <c2/audit.h>
75
76 #include <fs/sockfs/sockcommon.h>
77 #include <fs/sockfs/sockfilter_impl.h>
78 #include <fs/sockfs/socktpi.h>
79 #include <fs/sockfs/socktpi_impl.h>
80 #include <fs/sockfs/sodirect.h>
81
82 /*
83 * Macros that operate on struct cmsghdr.
84 * The CMSG_VALID macro does not assume that the last option buffer is padded.
85 */
86 #define CMSG_CONTENT(cmsg) (&((cmsg)[1]))
87 #define CMSG_CONTENTLEN(cmsg) ((cmsg)->cmsg_len - sizeof (struct cmsghdr))
88 #define CMSG_VALID(cmsg, start, end) \
89 (ISALIGNED_cmsghdr(cmsg) && \
90 ((uintptr_t)(cmsg) >= (uintptr_t)(start)) && \
91 ((uintptr_t)(cmsg) < (uintptr_t)(end)) && \
92 ((ssize_t)(cmsg)->cmsg_len >= sizeof (struct cmsghdr)) && \
93 ((uintptr_t)(cmsg) + (cmsg)->cmsg_len <= (uintptr_t)(end)))
94 #define SO_LOCK_WAKEUP_TIME 3000 /* Wakeup time in milliseconds */
95
96 dev_t sockdev; /* For fsid in getattr */
97
98 struct socklist socklist;
99
100 struct kmem_cache *socket_cache;
101
102 /*
103 * sockconf_lock protects the socket configuration (socket types and
104 * socket filters) which is changed via the sockconfig system call.
105 */
106 krwlock_t sockconf_lock;
107
108 static int sockfs_update(kstat_t *, int);
109 static int sockfs_snapshot(kstat_t *, void *, int);
110 extern smod_info_t *sotpi_smod_create(void);
111
112 extern void sendfile_init();
113
114 extern int modrootloaded;
115
116 /*
117 * Translate from a device pathname (e.g. "/dev/tcp") to a vnode.
118 * Returns with the vnode held.
119 */
120 int
sogetvp(char * devpath,vnode_t ** vpp,int uioflag)121 sogetvp(char *devpath, vnode_t **vpp, int uioflag)
122 {
123 struct snode *csp;
124 vnode_t *vp, *dvp;
125 major_t maj;
126 int error;
127
128 ASSERT(uioflag == UIO_SYSSPACE || uioflag == UIO_USERSPACE);
129
130 /*
131 * Lookup the underlying filesystem vnode.
132 */
133 error = lookupname(devpath, uioflag, FOLLOW, NULLVPP, &vp);
134 if (error)
135 return (error);
136
137 /* Check that it is the correct vnode */
138 if (vp->v_type != VCHR) {
139 VN_RELE(vp);
140 return (ENOTSOCK);
141 }
142
143 /*
144 * If devpath went through devfs, the device should already
145 * be configured. If devpath is a mknod file, however, we
146 * need to make sure the device is properly configured.
147 * To do this, we do something similar to spec_open()
148 * except that we resolve to the minor/leaf level since
149 * we need to return a vnode.
150 */
151 csp = VTOS(VTOS(vp)->s_commonvp);
152 if (!(csp->s_flag & SDIPSET)) {
153 char *pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
154 error = ddi_dev_pathname(vp->v_rdev, S_IFCHR, pathname);
155 if (error == 0)
156 error = devfs_lookupname(pathname, NULLVPP, &dvp);
157 VN_RELE(vp);
158 kmem_free(pathname, MAXPATHLEN);
159 if (error != 0)
160 return (ENXIO);
161 vp = dvp; /* use the devfs vp */
162 }
163
164 /* device is configured at this point */
165 maj = getmajor(vp->v_rdev);
166 if (!STREAMSTAB(maj)) {
167 VN_RELE(vp);
168 return (ENOSTR);
169 }
170
171 *vpp = vp;
172 return (0);
173 }
174
175 /*
176 * Update the accessed, updated, or changed times in an sonode
177 * with the current time.
178 *
179 * Note that both SunOS 4.X and 4.4BSD sockets do not present reasonable
180 * attributes in a fstat call. (They return the current time and 0 for
181 * all timestamps, respectively.) We maintain the current timestamps
182 * here primarily so that should sockmod be popped the resulting
183 * file descriptor will behave like a stream w.r.t. the timestamps.
184 */
185 void
so_update_attrs(struct sonode * so,int flag)186 so_update_attrs(struct sonode *so, int flag)
187 {
188 time_t now = gethrestime_sec();
189
190 if (SOCK_IS_NONSTR(so))
191 return;
192
193 mutex_enter(&so->so_lock);
194 so->so_flag |= flag;
195 if (flag & SOACC)
196 SOTOTPI(so)->sti_atime = now;
197 if (flag & SOMOD)
198 SOTOTPI(so)->sti_mtime = now;
199 mutex_exit(&so->so_lock);
200 }
201
202 extern so_create_func_t sock_comm_create_function;
203 extern so_destroy_func_t sock_comm_destroy_function;
204 /*
205 * Init function called when sockfs is loaded.
206 */
207 int
sockinit(int fstype,char * name)208 sockinit(int fstype, char *name)
209 {
210 static const fs_operation_def_t sock_vfsops_template[] = {
211 NULL, NULL
212 };
213 int error;
214 major_t dev;
215 char *err_str;
216
217 error = vfs_setfsops(fstype, sock_vfsops_template, NULL);
218 if (error != 0) {
219 zcmn_err(GLOBAL_ZONEID, CE_WARN,
220 "sockinit: bad vfs ops template");
221 return (error);
222 }
223
224 error = vn_make_ops(name, socket_vnodeops_template,
225 &socket_vnodeops);
226 if (error != 0) {
227 err_str = "sockinit: bad socket vnode ops template";
228 /* vn_make_ops() does not reset socktpi_vnodeops on failure. */
229 socket_vnodeops = NULL;
230 goto failure;
231 }
232
233 socket_cache = kmem_cache_create("socket_cache",
234 sizeof (struct sonode), 0, sonode_constructor,
235 sonode_destructor, NULL, NULL, NULL, 0);
236
237 rw_init(&sockconf_lock, NULL, RW_DEFAULT, NULL);
238
239 error = socktpi_init();
240 if (error != 0) {
241 err_str = NULL;
242 goto failure;
243 }
244
245 error = sod_init();
246 if (error != 0) {
247 err_str = NULL;
248 goto failure;
249 }
250
251 /*
252 * Set up the default create and destroy functions
253 */
254 sock_comm_create_function = socket_sonode_create;
255 sock_comm_destroy_function = socket_sonode_destroy;
256
257 /*
258 * Build initial list mapping socket parameters to vnode.
259 */
260 smod_init();
261 smod_add(sotpi_smod_create());
262
263 sockparams_init();
264
265 /*
266 * If sockets are needed before init runs /sbin/soconfig
267 * it is possible to preload the sockparams list here using
268 * calls like:
269 * sockconfig(1,2,3, "/dev/tcp", 0);
270 */
271
272 /*
273 * Create a unique dev_t for use in so_fsid.
274 */
275
276 if ((dev = getudev()) == (major_t)-1)
277 dev = 0;
278 sockdev = makedevice(dev, 0);
279
280 mutex_init(&socklist.sl_lock, NULL, MUTEX_DEFAULT, NULL);
281 sendfile_init();
282
283 /* Initialize socket filters */
284 sof_init();
285
286 return (0);
287
288 failure:
289 (void) vfs_freevfsops_by_type(fstype);
290 if (socket_vnodeops != NULL)
291 vn_freevnodeops(socket_vnodeops);
292 if (err_str != NULL)
293 zcmn_err(GLOBAL_ZONEID, CE_WARN, err_str);
294 return (error);
295 }
296
297 /*
298 * Caller must hold the mutex. Used to set SOLOCKED.
299 */
300 void
so_lock_single(struct sonode * so)301 so_lock_single(struct sonode *so)
302 {
303 ASSERT(MUTEX_HELD(&so->so_lock));
304
305 while (so->so_flag & (SOLOCKED | SOASYNC_UNBIND)) {
306 cv_wait_stop(&so->so_single_cv, &so->so_lock,
307 SO_LOCK_WAKEUP_TIME);
308 }
309 so->so_flag |= SOLOCKED;
310 }
311
312 /*
313 * Caller must hold the mutex and pass in SOLOCKED or SOASYNC_UNBIND.
314 * Used to clear SOLOCKED or SOASYNC_UNBIND.
315 */
316 void
so_unlock_single(struct sonode * so,int flag)317 so_unlock_single(struct sonode *so, int flag)
318 {
319 ASSERT(MUTEX_HELD(&so->so_lock));
320 ASSERT(flag & (SOLOCKED|SOASYNC_UNBIND));
321 ASSERT((flag & ~(SOLOCKED|SOASYNC_UNBIND)) == 0);
322 ASSERT(so->so_flag & flag);
323 /*
324 * Process the T_DISCON_IND on sti_discon_ind_mp.
325 *
326 * Call to so_drain_discon_ind will result in so_lock
327 * being dropped and re-acquired later.
328 */
329 if (!SOCK_IS_NONSTR(so)) {
330 sotpi_info_t *sti = SOTOTPI(so);
331
332 if (sti->sti_discon_ind_mp != NULL)
333 so_drain_discon_ind(so);
334 }
335
336 cv_signal(&so->so_single_cv);
337 so->so_flag &= ~flag;
338 }
339
340 /*
341 * Caller must hold the mutex. Used to set SOREADLOCKED.
342 * If the caller wants nonblocking behavior it should set fmode.
343 */
344 int
so_lock_read(struct sonode * so,int fmode)345 so_lock_read(struct sonode *so, int fmode)
346 {
347 ASSERT(MUTEX_HELD(&so->so_lock));
348
349 while (so->so_flag & SOREADLOCKED) {
350 if (fmode & (FNDELAY|FNONBLOCK))
351 return (EWOULDBLOCK);
352 cv_wait_stop(&so->so_read_cv, &so->so_lock,
353 SO_LOCK_WAKEUP_TIME);
354 }
355 so->so_flag |= SOREADLOCKED;
356 return (0);
357 }
358
359 /*
360 * Like so_lock_read above but allows signals.
361 */
362 int
so_lock_read_intr(struct sonode * so,int fmode)363 so_lock_read_intr(struct sonode *so, int fmode)
364 {
365 ASSERT(MUTEX_HELD(&so->so_lock));
366
367 while (so->so_flag & SOREADLOCKED) {
368 if (fmode & (FNDELAY|FNONBLOCK))
369 return (EWOULDBLOCK);
370 if (!cv_wait_sig(&so->so_read_cv, &so->so_lock))
371 return (EINTR);
372 }
373 so->so_flag |= SOREADLOCKED;
374 return (0);
375 }
376
377 /*
378 * Caller must hold the mutex. Used to clear SOREADLOCKED,
379 * set in so_lock_read() or so_lock_read_intr().
380 */
381 void
so_unlock_read(struct sonode * so)382 so_unlock_read(struct sonode *so)
383 {
384 ASSERT(MUTEX_HELD(&so->so_lock));
385 ASSERT(so->so_flag & SOREADLOCKED);
386
387 cv_signal(&so->so_read_cv);
388 so->so_flag &= ~SOREADLOCKED;
389 }
390
391 /*
392 * Verify that the specified offset falls within the mblk and
393 * that the resulting pointer is aligned.
394 * Returns NULL if not.
395 */
396 void *
sogetoff(mblk_t * mp,t_uscalar_t offset,t_uscalar_t length,uint_t align_size)397 sogetoff(mblk_t *mp, t_uscalar_t offset,
398 t_uscalar_t length, uint_t align_size)
399 {
400 uintptr_t ptr1, ptr2;
401
402 ASSERT(mp && mp->b_wptr >= mp->b_rptr);
403 ptr1 = (uintptr_t)mp->b_rptr + offset;
404 ptr2 = (uintptr_t)ptr1 + length;
405 if (ptr1 < (uintptr_t)mp->b_rptr || ptr2 > (uintptr_t)mp->b_wptr) {
406 eprintline(0);
407 return (NULL);
408 }
409 if ((ptr1 & (align_size - 1)) != 0) {
410 eprintline(0);
411 return (NULL);
412 }
413 return ((void *)ptr1);
414 }
415
416 /*
417 * Return the AF_UNIX underlying filesystem vnode matching a given name.
418 * Makes sure the sending and the destination sonodes are compatible.
419 * The vnode is returned held.
420 *
421 * The underlying filesystem VSOCK vnode has a v_stream pointer that
422 * references the actual stream head (hence indirectly the actual sonode).
423 */
424 static int
so_ux_lookup(struct sonode * so,struct sockaddr_un * soun,int checkaccess,vnode_t ** vpp)425 so_ux_lookup(struct sonode *so, struct sockaddr_un *soun, int checkaccess,
426 vnode_t **vpp)
427 {
428 vnode_t *vp; /* Underlying filesystem vnode */
429 vnode_t *rvp; /* real vnode */
430 vnode_t *svp; /* sockfs vnode */
431 struct sonode *so2;
432 int error;
433
434 dprintso(so, 1, ("so_ux_lookup(%p) name <%s>\n", (void *)so,
435 soun->sun_path));
436
437 error = lookupname(soun->sun_path, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp);
438 if (error) {
439 eprintsoline(so, error);
440 return (error);
441 }
442
443 /*
444 * Traverse lofs mounts get the real vnode
445 */
446 if (VOP_REALVP(vp, &rvp, NULL) == 0) {
447 VN_HOLD(rvp); /* hold the real vnode */
448 VN_RELE(vp); /* release hold from lookup */
449 vp = rvp;
450 }
451
452 if (vp->v_type != VSOCK) {
453 error = ENOTSOCK;
454 eprintsoline(so, error);
455 goto done2;
456 }
457
458 if (checkaccess) {
459 /*
460 * Check that we have permissions to access the destination
461 * vnode. This check is not done in BSD but it is required
462 * by X/Open.
463 */
464 error = VOP_ACCESS(vp, VREAD|VWRITE, 0, CRED(), NULL);
465 if (error != 0) {
466 eprintsoline(so, error);
467 goto done2;
468 }
469 }
470
471 /*
472 * Check if the remote socket has been closed.
473 *
474 * Synchronize with vn_rele_stream by holding v_lock while traversing
475 * v_stream->sd_vnode.
476 */
477 mutex_enter(&vp->v_lock);
478 if (vp->v_stream == NULL) {
479 mutex_exit(&vp->v_lock);
480 if (so->so_type == SOCK_DGRAM)
481 error = EDESTADDRREQ;
482 else
483 error = ECONNREFUSED;
484
485 eprintsoline(so, error);
486 goto done2;
487 }
488 ASSERT(vp->v_stream->sd_vnode);
489 svp = vp->v_stream->sd_vnode;
490 /*
491 * holding v_lock on underlying filesystem vnode and acquiring
492 * it on sockfs vnode. Assumes that no code ever attempts to
493 * acquire these locks in the reverse order.
494 */
495 VN_HOLD(svp);
496 mutex_exit(&vp->v_lock);
497
498 if (svp->v_type != VSOCK) {
499 error = ENOTSOCK;
500 eprintsoline(so, error);
501 goto done;
502 }
503
504 so2 = VTOSO(svp);
505
506 if (so->so_type != so2->so_type) {
507 error = EPROTOTYPE;
508 eprintsoline(so, error);
509 goto done;
510 }
511
512 VN_RELE(svp);
513 *vpp = vp;
514 return (0);
515
516 done:
517 VN_RELE(svp);
518 done2:
519 VN_RELE(vp);
520 return (error);
521 }
522
523 /*
524 * Verify peer address for connect and sendto/sendmsg.
525 * Since sendto/sendmsg would not get synchronous errors from the transport
526 * provider we have to do these ugly checks in the socket layer to
527 * preserve compatibility with SunOS 4.X.
528 */
529 int
so_addr_verify(struct sonode * so,const struct sockaddr * name,socklen_t namelen)530 so_addr_verify(struct sonode *so, const struct sockaddr *name,
531 socklen_t namelen)
532 {
533 int family;
534
535 dprintso(so, 1, ("so_addr_verify(%p, %p, %d)\n",
536 (void *)so, (void *)name, namelen));
537
538 ASSERT(name != NULL);
539
540 family = so->so_family;
541 switch (family) {
542 case AF_INET:
543 if (name->sa_family != family) {
544 eprintsoline(so, EAFNOSUPPORT);
545 return (EAFNOSUPPORT);
546 }
547 if (namelen != (socklen_t)sizeof (struct sockaddr_in)) {
548 eprintsoline(so, EINVAL);
549 return (EINVAL);
550 }
551 break;
552 case AF_INET6: {
553 #ifdef DEBUG
554 struct sockaddr_in6 *sin6;
555 #endif /* DEBUG */
556
557 if (name->sa_family != family) {
558 eprintsoline(so, EAFNOSUPPORT);
559 return (EAFNOSUPPORT);
560 }
561 if (namelen != (socklen_t)sizeof (struct sockaddr_in6)) {
562 eprintsoline(so, EINVAL);
563 return (EINVAL);
564 }
565 #ifdef DEBUG
566 /* Verify that apps don't forget to clear sin6_scope_id etc */
567 sin6 = (struct sockaddr_in6 *)name;
568 if (sin6->sin6_scope_id != 0 &&
569 !IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) {
570 zcmn_err(getzoneid(), CE_WARN,
571 "connect/send* with uninitialized sin6_scope_id "
572 "(%d) on socket. Pid = %d\n",
573 (int)sin6->sin6_scope_id, (int)curproc->p_pid);
574 }
575 #endif /* DEBUG */
576 break;
577 }
578 case AF_UNIX:
579 if (SOTOTPI(so)->sti_faddr_noxlate) {
580 return (0);
581 }
582 if (namelen < (socklen_t)sizeof (short)) {
583 eprintsoline(so, ENOENT);
584 return (ENOENT);
585 }
586 if (name->sa_family != family) {
587 eprintsoline(so, EAFNOSUPPORT);
588 return (EAFNOSUPPORT);
589 }
590 /* MAXPATHLEN + soun_family + nul termination */
591 if (namelen > (socklen_t)(MAXPATHLEN + sizeof (short) + 1)) {
592 eprintsoline(so, ENAMETOOLONG);
593 return (ENAMETOOLONG);
594 }
595
596 break;
597
598 default:
599 /*
600 * Default is don't do any length or sa_family check
601 * to allow non-sockaddr style addresses.
602 */
603 break;
604 }
605
606 return (0);
607 }
608
609
610 /*
611 * Translate an AF_UNIX sockaddr_un to the transport internal name.
612 * Assumes caller has called so_addr_verify first. The translated
613 * (internal form) address is stored in sti->sti_ux_taddr.
614 */
615 /*ARGSUSED*/
616 int
so_ux_addr_xlate(struct sonode * so,struct sockaddr * name,socklen_t namelen,int checkaccess,void ** addrp,socklen_t * addrlenp)617 so_ux_addr_xlate(struct sonode *so, struct sockaddr *name,
618 socklen_t namelen, int checkaccess,
619 void **addrp, socklen_t *addrlenp)
620 {
621 int error;
622 struct sockaddr_un *soun;
623 vnode_t *vp;
624 void *addr;
625 socklen_t addrlen;
626 sotpi_info_t *sti = SOTOTPI(so);
627
628 dprintso(so, 1, ("so_ux_addr_xlate(%p, %p, %d, %d)\n",
629 (void *)so, (void *)name, namelen, checkaccess));
630
631 ASSERT(name != NULL);
632 ASSERT(so->so_family == AF_UNIX);
633 ASSERT(!sti->sti_faddr_noxlate);
634 ASSERT(namelen >= (socklen_t)sizeof (short));
635 ASSERT(name->sa_family == AF_UNIX);
636 soun = (struct sockaddr_un *)name;
637 /*
638 * Lookup vnode for the specified path name and verify that
639 * it is a socket.
640 */
641 error = so_ux_lookup(so, soun, checkaccess, &vp);
642 if (error) {
643 eprintsoline(so, error);
644 return (error);
645 }
646 /*
647 * Use the address of the peer vnode as the address to send
648 * to. We release the peer vnode here. In case it has been
649 * closed by the time the T_CONN_REQ or T_UNITDATA_REQ reaches the
650 * transport the message will get an error or be dropped.
651 * Note that that soua_vp is never dereferenced; it's just a
652 * convenient value by which we can identify the peer.
653 */
654 sti->sti_ux_taddr.soua_vp = vp;
655 sti->sti_ux_taddr.soua_magic = SOU_MAGIC_EXPLICIT;
656 addr = &sti->sti_ux_taddr;
657 addrlen = (socklen_t)sizeof (sti->sti_ux_taddr);
658 dprintso(so, 1, ("ux_xlate UNIX: addrlen %d, vp %p\n",
659 addrlen, (void *)vp));
660 VN_RELE(vp);
661 *addrp = addr;
662 *addrlenp = (socklen_t)addrlen;
663 return (0);
664 }
665
666 /*
667 * Esballoc free function for messages that contain SO_FILEP option.
668 * Decrement the reference count on the file pointers using closef.
669 */
670 void
fdbuf_free(struct fdbuf * fdbuf)671 fdbuf_free(struct fdbuf *fdbuf)
672 {
673 int i;
674 struct file *fp;
675
676 dprint(1, ("fdbuf_free: %d fds\n", fdbuf->fd_numfd));
677 for (i = 0; i < fdbuf->fd_numfd; i++) {
678 /*
679 * We need pointer size alignment for fd_fds. On a LP64
680 * kernel, the required alignment is 8 bytes while
681 * the option headers and values are only 4 bytes
682 * aligned. So its safer to do a bcopy compared to
683 * assigning fdbuf->fd_fds[i] to fp.
684 */
685 bcopy((char *)&fdbuf->fd_fds[i], (char *)&fp, sizeof (fp));
686 dprint(1, ("fdbuf_free: [%d] = %p\n", i, (void *)fp));
687 (void) closef(fp);
688 }
689 if (fdbuf->fd_ebuf != NULL)
690 kmem_free(fdbuf->fd_ebuf, fdbuf->fd_ebuflen);
691 kmem_free(fdbuf, fdbuf->fd_size);
692 }
693
694 /*
695 * Allocate an esballoc'ed message for AF_UNIX file descriptor passing.
696 * Waits if memory is not available.
697 */
698 mblk_t *
fdbuf_allocmsg(int size,struct fdbuf * fdbuf)699 fdbuf_allocmsg(int size, struct fdbuf *fdbuf)
700 {
701 uchar_t *buf;
702 mblk_t *mp;
703
704 dprint(1, ("fdbuf_allocmsg: size %d, %d fds\n", size, fdbuf->fd_numfd));
705 buf = kmem_alloc(size, KM_SLEEP);
706 fdbuf->fd_ebuf = (caddr_t)buf;
707 fdbuf->fd_ebuflen = size;
708 fdbuf->fd_frtn.free_func = fdbuf_free;
709 fdbuf->fd_frtn.free_arg = (caddr_t)fdbuf;
710
711 mp = esballoc_wait(buf, size, BPRI_MED, &fdbuf->fd_frtn);
712 mp->b_datap->db_type = M_PROTO;
713 return (mp);
714 }
715
716 /*
717 * Extract file descriptors from a fdbuf.
718 * Return list in rights/rightslen.
719 */
720 /*ARGSUSED*/
721 static int
fdbuf_extract(struct fdbuf * fdbuf,void * rights,int rightslen)722 fdbuf_extract(struct fdbuf *fdbuf, void *rights, int rightslen)
723 {
724 int i, fd;
725 int *rp;
726 struct file *fp;
727 int numfd;
728
729 dprint(1, ("fdbuf_extract: %d fds, len %d\n",
730 fdbuf->fd_numfd, rightslen));
731
732 numfd = fdbuf->fd_numfd;
733 ASSERT(rightslen == numfd * (int)sizeof (int));
734
735 /*
736 * Allocate a file descriptor and increment the f_count.
737 * The latter is needed since we always call fdbuf_free
738 * which performs a closef.
739 */
740 rp = (int *)rights;
741 for (i = 0; i < numfd; i++) {
742 if ((fd = ufalloc(0)) == -1)
743 goto cleanup;
744 /*
745 * We need pointer size alignment for fd_fds. On a LP64
746 * kernel, the required alignment is 8 bytes while
747 * the option headers and values are only 4 bytes
748 * aligned. So its safer to do a bcopy compared to
749 * assigning fdbuf->fd_fds[i] to fp.
750 */
751 bcopy((char *)&fdbuf->fd_fds[i], (char *)&fp, sizeof (fp));
752 mutex_enter(&fp->f_tlock);
753 fp->f_count++;
754 mutex_exit(&fp->f_tlock);
755 setf(fd, fp);
756 *rp++ = fd;
757 if (AU_AUDITING())
758 audit_fdrecv(fd, fp);
759 dprint(1, ("fdbuf_extract: [%d] = %d, %p refcnt %d\n",
760 i, fd, (void *)fp, fp->f_count));
761 }
762 return (0);
763
764 cleanup:
765 /*
766 * Undo whatever partial work the loop above has done.
767 */
768 {
769 int j;
770
771 rp = (int *)rights;
772 for (j = 0; j < i; j++) {
773 dprint(0,
774 ("fdbuf_extract: cleanup[%d] = %d\n", j, *rp));
775 (void) closeandsetf(*rp++, NULL);
776 }
777 }
778
779 return (EMFILE);
780 }
781
782 /*
783 * Insert file descriptors into an fdbuf.
784 * Returns a kmem_alloc'ed fdbuf. The fdbuf should be freed
785 * by calling fdbuf_free().
786 */
787 int
fdbuf_create(void * rights,int rightslen,struct fdbuf ** fdbufp)788 fdbuf_create(void *rights, int rightslen, struct fdbuf **fdbufp)
789 {
790 int numfd, i;
791 int *fds;
792 struct file *fp;
793 struct fdbuf *fdbuf;
794 int fdbufsize;
795
796 dprint(1, ("fdbuf_create: len %d\n", rightslen));
797
798 numfd = rightslen / (int)sizeof (int);
799
800 fdbufsize = (int)FDBUF_HDRSIZE + (numfd * (int)sizeof (struct file *));
801 fdbuf = kmem_alloc(fdbufsize, KM_SLEEP);
802 fdbuf->fd_size = fdbufsize;
803 fdbuf->fd_numfd = 0;
804 fdbuf->fd_ebuf = NULL;
805 fdbuf->fd_ebuflen = 0;
806 fds = (int *)rights;
807 for (i = 0; i < numfd; i++) {
808 if ((fp = getf(fds[i])) == NULL) {
809 fdbuf_free(fdbuf);
810 return (EBADF);
811 }
812 dprint(1, ("fdbuf_create: [%d] = %d, %p refcnt %d\n",
813 i, fds[i], (void *)fp, fp->f_count));
814 mutex_enter(&fp->f_tlock);
815 fp->f_count++;
816 mutex_exit(&fp->f_tlock);
817 /*
818 * The maximum alignment for fdbuf (or any option header
819 * and its value) it 4 bytes. On a LP64 kernel, the alignment
820 * is not sufficient for pointers (fd_fds in this case). Since
821 * we just did a kmem_alloc (we get a double word alignment),
822 * we don't need to do anything on the send side (we loose
823 * the double word alignment because fdbuf goes after an
824 * option header (eg T_unitdata_req) which is only 4 byte
825 * aligned). We take care of this when we extract the file
826 * descriptor in fdbuf_extract or fdbuf_free.
827 */
828 fdbuf->fd_fds[i] = fp;
829 fdbuf->fd_numfd++;
830 releasef(fds[i]);
831 if (AU_AUDITING())
832 audit_fdsend(fds[i], fp, 0);
833 }
834 *fdbufp = fdbuf;
835 return (0);
836 }
837
838 static int
fdbuf_optlen(int rightslen)839 fdbuf_optlen(int rightslen)
840 {
841 int numfd;
842
843 numfd = rightslen / (int)sizeof (int);
844
845 return ((int)FDBUF_HDRSIZE + (numfd * (int)sizeof (struct file *)));
846 }
847
848 static t_uscalar_t
fdbuf_cmsglen(int fdbuflen)849 fdbuf_cmsglen(int fdbuflen)
850 {
851 return (t_uscalar_t)((fdbuflen - FDBUF_HDRSIZE) /
852 (int)sizeof (struct file *) * (int)sizeof (int));
853 }
854
855
856 /*
857 * Return non-zero if the mblk and fdbuf are consistent.
858 */
859 static int
fdbuf_verify(mblk_t * mp,struct fdbuf * fdbuf,int fdbuflen)860 fdbuf_verify(mblk_t *mp, struct fdbuf *fdbuf, int fdbuflen)
861 {
862 if (fdbuflen >= FDBUF_HDRSIZE &&
863 fdbuflen == fdbuf->fd_size) {
864 frtn_t *frp = mp->b_datap->db_frtnp;
865 /*
866 * Check that the SO_FILEP portion of the
867 * message has not been modified by
868 * the loopback transport. The sending sockfs generates
869 * a message that is esballoc'ed with the free function
870 * being fdbuf_free() and where free_arg contains the
871 * identical information as the SO_FILEP content.
872 *
873 * If any of these constraints are not satisfied we
874 * silently ignore the option.
875 */
876 ASSERT(mp);
877 if (frp != NULL &&
878 frp->free_func == fdbuf_free &&
879 frp->free_arg != NULL &&
880 bcmp(frp->free_arg, fdbuf, fdbuflen) == 0) {
881 dprint(1, ("fdbuf_verify: fdbuf %p len %d\n",
882 (void *)fdbuf, fdbuflen));
883 return (1);
884 } else {
885 zcmn_err(getzoneid(), CE_WARN,
886 "sockfs: mismatched fdbuf content (%p)",
887 (void *)mp);
888 return (0);
889 }
890 } else {
891 zcmn_err(getzoneid(), CE_WARN,
892 "sockfs: mismatched fdbuf len %d, %d\n",
893 fdbuflen, fdbuf->fd_size);
894 return (0);
895 }
896 }
897
898 /*
899 * When the file descriptors returned by sorecvmsg can not be passed
900 * to the application this routine will cleanup the references on
901 * the files. Start at startoff bytes into the buffer.
902 */
903 static void
close_fds(void * fdbuf,int fdbuflen,int startoff)904 close_fds(void *fdbuf, int fdbuflen, int startoff)
905 {
906 int *fds = (int *)fdbuf;
907 int numfd = fdbuflen / (int)sizeof (int);
908 int i;
909
910 dprint(1, ("close_fds(%p, %d, %d)\n", fdbuf, fdbuflen, startoff));
911
912 for (i = 0; i < numfd; i++) {
913 if (startoff < 0)
914 startoff = 0;
915 if (startoff < (int)sizeof (int)) {
916 /*
917 * This file descriptor is partially or fully after
918 * the offset
919 */
920 dprint(0,
921 ("close_fds: cleanup[%d] = %d\n", i, fds[i]));
922 (void) closeandsetf(fds[i], NULL);
923 }
924 startoff -= (int)sizeof (int);
925 }
926 }
927
928 /*
929 * Close all file descriptors contained in the control part starting at
930 * the startoffset.
931 */
932 void
so_closefds(void * control,t_uscalar_t controllen,int oldflg,int startoff)933 so_closefds(void *control, t_uscalar_t controllen, int oldflg,
934 int startoff)
935 {
936 struct cmsghdr *cmsg;
937
938 if (control == NULL)
939 return;
940
941 if (oldflg) {
942 close_fds(control, controllen, startoff);
943 return;
944 }
945 /* Scan control part for file descriptors. */
946 for (cmsg = (struct cmsghdr *)control;
947 CMSG_VALID(cmsg, control, (uintptr_t)control + controllen);
948 cmsg = CMSG_NEXT(cmsg)) {
949 if (cmsg->cmsg_level == SOL_SOCKET &&
950 cmsg->cmsg_type == SCM_RIGHTS) {
951 close_fds(CMSG_CONTENT(cmsg),
952 (int)CMSG_CONTENTLEN(cmsg),
953 startoff - (int)sizeof (struct cmsghdr));
954 }
955 startoff -= ROUNDUP_cmsglen(cmsg->cmsg_len);
956 }
957 }
958
959 /*
960 * Handle truncation of a cmsg when the receive buffer is not big enough.
961 * Adjust the cmsg_len header field in the last cmsg that will be included in
962 * the buffer to reflect the number of bytes included.
963 */
964 void
so_truncatecmsg(void * control,t_uscalar_t controllen,uint_t maxlen)965 so_truncatecmsg(void *control, t_uscalar_t controllen, uint_t maxlen)
966 {
967 struct cmsghdr *cmsg;
968 uint_t len = 0;
969
970 if (control == NULL)
971 return;
972
973 for (cmsg = control;
974 CMSG_VALID(cmsg, control, (uintptr_t)control + controllen);
975 cmsg = CMSG_NEXT(cmsg)) {
976
977 len += ROUNDUP_cmsglen(cmsg->cmsg_len);
978
979 if (len > maxlen) {
980 /*
981 * This cmsg is the last one that will be included in
982 * the truncated buffer.
983 */
984 socklen_t diff = len - maxlen;
985
986 if (diff < CMSG_CONTENTLEN(cmsg)) {
987 dprint(1, ("so_truncatecmsg: %d -> %d\n",
988 cmsg->cmsg_len, cmsg->cmsg_len - diff));
989 cmsg->cmsg_len -= diff;
990 } else {
991 cmsg->cmsg_len = sizeof (struct cmsghdr);
992 }
993 break;
994 }
995 }
996 }
997
998 /*
999 * Returns a pointer/length for the file descriptors contained
1000 * in the control buffer. Returns with *fdlenp == -1 if there are no
1001 * file descriptor options present. This is different than there being
1002 * a zero-length file descriptor option.
1003 * Fail if there are multiple SCM_RIGHT cmsgs.
1004 */
1005 int
so_getfdopt(void * control,t_uscalar_t controllen,int oldflg,void ** fdsp,int * fdlenp)1006 so_getfdopt(void *control, t_uscalar_t controllen, int oldflg,
1007 void **fdsp, int *fdlenp)
1008 {
1009 struct cmsghdr *cmsg;
1010 void *fds;
1011 int fdlen;
1012
1013 if (control == NULL) {
1014 *fdsp = NULL;
1015 *fdlenp = -1;
1016 return (0);
1017 }
1018
1019 if (oldflg) {
1020 *fdsp = control;
1021 if (controllen == 0)
1022 *fdlenp = -1;
1023 else
1024 *fdlenp = controllen;
1025 dprint(1, ("so_getfdopt: old %d\n", *fdlenp));
1026 return (0);
1027 }
1028
1029 fds = NULL;
1030 fdlen = 0;
1031
1032 for (cmsg = (struct cmsghdr *)control;
1033 CMSG_VALID(cmsg, control, (uintptr_t)control + controllen);
1034 cmsg = CMSG_NEXT(cmsg)) {
1035 if (cmsg->cmsg_level == SOL_SOCKET &&
1036 cmsg->cmsg_type == SCM_RIGHTS) {
1037 if (fds != NULL)
1038 return (EINVAL);
1039 fds = CMSG_CONTENT(cmsg);
1040 fdlen = (int)CMSG_CONTENTLEN(cmsg);
1041 dprint(1, ("so_getfdopt: new %lu\n",
1042 (size_t)CMSG_CONTENTLEN(cmsg)));
1043 }
1044 }
1045 if (fds == NULL) {
1046 dprint(1, ("so_getfdopt: NONE\n"));
1047 *fdlenp = -1;
1048 } else
1049 *fdlenp = fdlen;
1050 *fdsp = fds;
1051 return (0);
1052 }
1053
1054 /*
1055 * Return the length of the options including any file descriptor options.
1056 */
1057 t_uscalar_t
so_optlen(void * control,t_uscalar_t controllen,int oldflg)1058 so_optlen(void *control, t_uscalar_t controllen, int oldflg)
1059 {
1060 struct cmsghdr *cmsg;
1061 t_uscalar_t optlen = 0;
1062 t_uscalar_t len;
1063
1064 if (control == NULL)
1065 return (0);
1066
1067 if (oldflg)
1068 return ((t_uscalar_t)(sizeof (struct T_opthdr) +
1069 fdbuf_optlen(controllen)));
1070
1071 for (cmsg = (struct cmsghdr *)control;
1072 CMSG_VALID(cmsg, control, (uintptr_t)control + controllen);
1073 cmsg = CMSG_NEXT(cmsg)) {
1074 if (cmsg->cmsg_level == SOL_SOCKET &&
1075 cmsg->cmsg_type == SCM_RIGHTS) {
1076 len = fdbuf_optlen((int)CMSG_CONTENTLEN(cmsg));
1077 } else {
1078 len = (t_uscalar_t)CMSG_CONTENTLEN(cmsg);
1079 }
1080 optlen += (t_uscalar_t)(_TPI_ALIGN_TOPT(len) +
1081 sizeof (struct T_opthdr));
1082 }
1083 dprint(1, ("so_optlen: controllen %d, flg %d -> optlen %d\n",
1084 controllen, oldflg, optlen));
1085 return (optlen);
1086 }
1087
1088 /*
1089 * Copy options from control to the mblk. Skip any file descriptor options.
1090 */
1091 void
so_cmsg2opt(void * control,t_uscalar_t controllen,int oldflg,mblk_t * mp)1092 so_cmsg2opt(void *control, t_uscalar_t controllen, int oldflg, mblk_t *mp)
1093 {
1094 struct T_opthdr toh;
1095 struct cmsghdr *cmsg;
1096
1097 if (control == NULL)
1098 return;
1099
1100 if (oldflg) {
1101 /* No real options - caller has handled file descriptors */
1102 return;
1103 }
1104 for (cmsg = (struct cmsghdr *)control;
1105 CMSG_VALID(cmsg, control, (uintptr_t)control + controllen);
1106 cmsg = CMSG_NEXT(cmsg)) {
1107 /*
1108 * Note: The caller handles file descriptors prior
1109 * to calling this function.
1110 */
1111 t_uscalar_t len;
1112
1113 if (cmsg->cmsg_level == SOL_SOCKET &&
1114 cmsg->cmsg_type == SCM_RIGHTS)
1115 continue;
1116
1117 len = (t_uscalar_t)CMSG_CONTENTLEN(cmsg);
1118 toh.level = cmsg->cmsg_level;
1119 toh.name = cmsg->cmsg_type;
1120 toh.len = len + (t_uscalar_t)sizeof (struct T_opthdr);
1121 toh.status = 0;
1122
1123 soappendmsg(mp, &toh, sizeof (toh));
1124 soappendmsg(mp, CMSG_CONTENT(cmsg), len);
1125 mp->b_wptr += _TPI_ALIGN_TOPT(len) - len;
1126 ASSERT(mp->b_wptr <= mp->b_datap->db_lim);
1127 }
1128 }
1129
1130 /*
1131 * Return the length of the control message derived from the options.
1132 * Exclude SO_SRCADDR and SO_UNIX_CLOSE options. Include SO_FILEP.
1133 * When oldflg is set only include SO_FILEP.
1134 * so_opt2cmsg and so_cmsglen are inter-related since so_cmsglen
1135 * allocates the space that so_opt2cmsg fills. If one changes, the other should
1136 * also be checked for any possible impacts.
1137 */
1138 t_uscalar_t
so_cmsglen(mblk_t * mp,void * opt,t_uscalar_t optlen,int oldflg)1139 so_cmsglen(mblk_t *mp, void *opt, t_uscalar_t optlen, int oldflg)
1140 {
1141 t_uscalar_t cmsglen = 0;
1142 struct T_opthdr *tohp;
1143 t_uscalar_t len;
1144 t_uscalar_t last_roundup = 0;
1145
1146 ASSERT(__TPI_TOPT_ISALIGNED(opt));
1147
1148 for (tohp = (struct T_opthdr *)opt;
1149 tohp && _TPI_TOPT_VALID(tohp, opt, (uintptr_t)opt + optlen);
1150 tohp = _TPI_TOPT_NEXTHDR(opt, optlen, tohp)) {
1151 dprint(1, ("so_cmsglen: level 0x%x, name %d, len %d\n",
1152 tohp->level, tohp->name, tohp->len));
1153 if (tohp->level == SOL_SOCKET &&
1154 (tohp->name == SO_SRCADDR ||
1155 tohp->name == SO_UNIX_CLOSE)) {
1156 continue;
1157 }
1158 if (tohp->level == SOL_SOCKET && tohp->name == SO_FILEP) {
1159 struct fdbuf *fdbuf;
1160 int fdbuflen;
1161
1162 fdbuf = (struct fdbuf *)_TPI_TOPT_DATA(tohp);
1163 fdbuflen = (int)_TPI_TOPT_DATALEN(tohp);
1164
1165 if (!fdbuf_verify(mp, fdbuf, fdbuflen))
1166 continue;
1167 if (oldflg) {
1168 cmsglen += fdbuf_cmsglen(fdbuflen);
1169 continue;
1170 }
1171 len = fdbuf_cmsglen(fdbuflen);
1172 } else if (tohp->level == SOL_SOCKET &&
1173 tohp->name == SCM_TIMESTAMP) {
1174 if (oldflg)
1175 continue;
1176
1177 if (get_udatamodel() == DATAMODEL_NATIVE) {
1178 len = sizeof (struct timeval);
1179 } else {
1180 len = sizeof (struct timeval32);
1181 }
1182 } else {
1183 if (oldflg)
1184 continue;
1185 len = (t_uscalar_t)_TPI_TOPT_DATALEN(tohp);
1186 }
1187 /*
1188 * Exclude roundup for last option to not set
1189 * MSG_CTRUNC when the cmsg fits but the padding doesn't fit.
1190 */
1191 last_roundup = (t_uscalar_t)
1192 (ROUNDUP_cmsglen(len + (int)sizeof (struct cmsghdr)) -
1193 (len + (int)sizeof (struct cmsghdr)));
1194 cmsglen += (t_uscalar_t)(len + (int)sizeof (struct cmsghdr)) +
1195 last_roundup;
1196 }
1197 cmsglen -= last_roundup;
1198 dprint(1, ("so_cmsglen: optlen %d, flg %d -> cmsglen %d\n",
1199 optlen, oldflg, cmsglen));
1200 return (cmsglen);
1201 }
1202
1203 /*
1204 * Copy options from options to the control. Convert SO_FILEP to
1205 * file descriptors.
1206 * Returns errno or zero.
1207 * so_opt2cmsg and so_cmsglen are inter-related since so_cmsglen
1208 * allocates the space that so_opt2cmsg fills. If one changes, the other should
1209 * also be checked for any possible impacts.
1210 */
1211 int
so_opt2cmsg(mblk_t * mp,void * opt,t_uscalar_t optlen,int oldflg,void * control,t_uscalar_t controllen)1212 so_opt2cmsg(mblk_t *mp, void *opt, t_uscalar_t optlen, int oldflg,
1213 void *control, t_uscalar_t controllen)
1214 {
1215 struct T_opthdr *tohp;
1216 struct cmsghdr *cmsg;
1217 struct fdbuf *fdbuf;
1218 int fdbuflen;
1219 int error;
1220 #if defined(DEBUG) || defined(__lint)
1221 struct cmsghdr *cend = (struct cmsghdr *)
1222 (((uint8_t *)control) + ROUNDUP_cmsglen(controllen));
1223 #endif
1224 cmsg = (struct cmsghdr *)control;
1225
1226 ASSERT(__TPI_TOPT_ISALIGNED(opt));
1227
1228 for (tohp = (struct T_opthdr *)opt;
1229 tohp && _TPI_TOPT_VALID(tohp, opt, (uintptr_t)opt + optlen);
1230 tohp = _TPI_TOPT_NEXTHDR(opt, optlen, tohp)) {
1231 dprint(1, ("so_opt2cmsg: level 0x%x, name %d, len %d\n",
1232 tohp->level, tohp->name, tohp->len));
1233
1234 if (tohp->level == SOL_SOCKET &&
1235 (tohp->name == SO_SRCADDR ||
1236 tohp->name == SO_UNIX_CLOSE)) {
1237 continue;
1238 }
1239 ASSERT((uintptr_t)cmsg <= (uintptr_t)control + controllen);
1240 if (tohp->level == SOL_SOCKET && tohp->name == SO_FILEP) {
1241 fdbuf = (struct fdbuf *)_TPI_TOPT_DATA(tohp);
1242 fdbuflen = (int)_TPI_TOPT_DATALEN(tohp);
1243
1244 if (!fdbuf_verify(mp, fdbuf, fdbuflen))
1245 return (EPROTO);
1246 if (oldflg) {
1247 error = fdbuf_extract(fdbuf, control,
1248 (int)controllen);
1249 if (error != 0)
1250 return (error);
1251 continue;
1252 } else {
1253 int fdlen;
1254
1255 fdlen = (int)fdbuf_cmsglen(
1256 (int)_TPI_TOPT_DATALEN(tohp));
1257
1258 cmsg->cmsg_level = tohp->level;
1259 cmsg->cmsg_type = SCM_RIGHTS;
1260 cmsg->cmsg_len = (socklen_t)(fdlen +
1261 sizeof (struct cmsghdr));
1262
1263 error = fdbuf_extract(fdbuf,
1264 CMSG_CONTENT(cmsg), fdlen);
1265 if (error != 0)
1266 return (error);
1267 }
1268 } else if (tohp->level == SOL_SOCKET &&
1269 tohp->name == SCM_TIMESTAMP) {
1270 timestruc_t *timestamp;
1271
1272 if (oldflg)
1273 continue;
1274
1275 cmsg->cmsg_level = tohp->level;
1276 cmsg->cmsg_type = tohp->name;
1277
1278 timestamp =
1279 (timestruc_t *)P2ROUNDUP((intptr_t)&tohp[1],
1280 sizeof (intptr_t));
1281
1282 if (get_udatamodel() == DATAMODEL_NATIVE) {
1283 struct timeval tv;
1284
1285 cmsg->cmsg_len = sizeof (struct timeval) +
1286 sizeof (struct cmsghdr);
1287 tv.tv_sec = timestamp->tv_sec;
1288 tv.tv_usec = timestamp->tv_nsec /
1289 (NANOSEC / MICROSEC);
1290 /*
1291 * on LP64 systems, the struct timeval in
1292 * the destination will not be 8-byte aligned,
1293 * so use bcopy to avoid alignment trouble
1294 */
1295 bcopy(&tv, CMSG_CONTENT(cmsg), sizeof (tv));
1296 } else {
1297 struct timeval32 *time32;
1298
1299 cmsg->cmsg_len = sizeof (struct timeval32) +
1300 sizeof (struct cmsghdr);
1301 time32 = (struct timeval32 *)CMSG_CONTENT(cmsg);
1302 time32->tv_sec = (time32_t)timestamp->tv_sec;
1303 time32->tv_usec =
1304 (int32_t)(timestamp->tv_nsec /
1305 (NANOSEC / MICROSEC));
1306 }
1307
1308 } else {
1309 if (oldflg)
1310 continue;
1311
1312 cmsg->cmsg_level = tohp->level;
1313 cmsg->cmsg_type = tohp->name;
1314 cmsg->cmsg_len = (socklen_t)sizeof (struct cmsghdr);
1315 if (tohp->level == IPPROTO_IP &&
1316 (tohp->name == IP_RECVTOS ||
1317 tohp->name == IP_RECVTTL)) {
1318 /*
1319 * The data for these is a uint8_t but, in
1320 * order to maintain alignment for any
1321 * following TPI primitives in the message,
1322 * there will be some trailing padding bytes
1323 * which are included in the TPI_TOPT_DATALEN.
1324 * For these types, we set the cmsg_len
1325 * explicitly to the correct value.
1326 */
1327 cmsg->cmsg_len += (socklen_t)sizeof (uint8_t);
1328 } else {
1329 cmsg->cmsg_len +=
1330 (socklen_t)(_TPI_TOPT_DATALEN(tohp));
1331 }
1332
1333 /* copy content to control data part */
1334 bcopy(&tohp[1], CMSG_CONTENT(cmsg),
1335 CMSG_CONTENTLEN(cmsg));
1336 }
1337 /* move to next CMSG structure! */
1338 cmsg = CMSG_NEXT(cmsg);
1339 }
1340 dprint(1, ("so_opt2cmsg: buf %p len %d; cend %p; final cmsg %p\n",
1341 control, controllen, (void *)cend, (void *)cmsg));
1342 ASSERT(cmsg <= cend);
1343 return (0);
1344 }
1345
1346 /*
1347 * Extract the SO_SRCADDR option value if present.
1348 */
1349 void
so_getopt_srcaddr(void * opt,t_uscalar_t optlen,void ** srcp,t_uscalar_t * srclenp)1350 so_getopt_srcaddr(void *opt, t_uscalar_t optlen, void **srcp,
1351 t_uscalar_t *srclenp)
1352 {
1353 struct T_opthdr *tohp;
1354
1355 ASSERT(__TPI_TOPT_ISALIGNED(opt));
1356
1357 ASSERT(srcp != NULL && srclenp != NULL);
1358 *srcp = NULL;
1359 *srclenp = 0;
1360
1361 for (tohp = (struct T_opthdr *)opt;
1362 tohp && _TPI_TOPT_VALID(tohp, opt, (uintptr_t)opt + optlen);
1363 tohp = _TPI_TOPT_NEXTHDR(opt, optlen, tohp)) {
1364 dprint(1, ("so_getopt_srcaddr: level 0x%x, name %d, len %d\n",
1365 tohp->level, tohp->name, tohp->len));
1366 if (tohp->level == SOL_SOCKET &&
1367 tohp->name == SO_SRCADDR) {
1368 *srcp = _TPI_TOPT_DATA(tohp);
1369 *srclenp = (t_uscalar_t)_TPI_TOPT_DATALEN(tohp);
1370 }
1371 }
1372 }
1373
1374 /*
1375 * Verify if the SO_UNIX_CLOSE option is present.
1376 */
1377 int
so_getopt_unix_close(void * opt,t_uscalar_t optlen)1378 so_getopt_unix_close(void *opt, t_uscalar_t optlen)
1379 {
1380 struct T_opthdr *tohp;
1381
1382 ASSERT(__TPI_TOPT_ISALIGNED(opt));
1383
1384 for (tohp = (struct T_opthdr *)opt;
1385 tohp && _TPI_TOPT_VALID(tohp, opt, (uintptr_t)opt + optlen);
1386 tohp = _TPI_TOPT_NEXTHDR(opt, optlen, tohp)) {
1387 dprint(1,
1388 ("so_getopt_unix_close: level 0x%x, name %d, len %d\n",
1389 tohp->level, tohp->name, tohp->len));
1390 if (tohp->level == SOL_SOCKET &&
1391 tohp->name == SO_UNIX_CLOSE)
1392 return (1);
1393 }
1394 return (0);
1395 }
1396
1397 /*
1398 * Allocate an M_PROTO message.
1399 *
1400 * If allocation fails the behavior depends on sleepflg:
1401 * _ALLOC_NOSLEEP fail immediately
1402 * _ALLOC_INTR sleep for memory until a signal is caught
1403 * _ALLOC_SLEEP sleep forever. Don't return NULL.
1404 */
1405 mblk_t *
soallocproto(size_t size,int sleepflg,cred_t * cr)1406 soallocproto(size_t size, int sleepflg, cred_t *cr)
1407 {
1408 mblk_t *mp;
1409
1410 /* Round up size for reuse */
1411 size = MAX(size, 64);
1412 if (cr != NULL)
1413 mp = allocb_cred(size, cr, curproc->p_pid);
1414 else
1415 mp = allocb(size, BPRI_MED);
1416
1417 if (mp == NULL) {
1418 int error; /* Dummy - error not returned to caller */
1419
1420 switch (sleepflg) {
1421 case _ALLOC_SLEEP:
1422 if (cr != NULL) {
1423 mp = allocb_cred_wait(size, STR_NOSIG, &error,
1424 cr, curproc->p_pid);
1425 } else {
1426 mp = allocb_wait(size, BPRI_MED, STR_NOSIG,
1427 &error);
1428 }
1429 ASSERT(mp);
1430 break;
1431 case _ALLOC_INTR:
1432 if (cr != NULL) {
1433 mp = allocb_cred_wait(size, 0, &error, cr,
1434 curproc->p_pid);
1435 } else {
1436 mp = allocb_wait(size, BPRI_MED, 0, &error);
1437 }
1438 if (mp == NULL) {
1439 /* Caught signal while sleeping for memory */
1440 eprintline(ENOBUFS);
1441 return (NULL);
1442 }
1443 break;
1444 case _ALLOC_NOSLEEP:
1445 default:
1446 eprintline(ENOBUFS);
1447 return (NULL);
1448 }
1449 }
1450 DB_TYPE(mp) = M_PROTO;
1451 return (mp);
1452 }
1453
1454 /*
1455 * Allocate an M_PROTO message with a single component.
1456 * len is the length of buf. size is the amount to allocate.
1457 *
1458 * buf can be NULL with a non-zero len.
1459 * This results in a bzero'ed chunk being placed the message.
1460 */
1461 mblk_t *
soallocproto1(const void * buf,ssize_t len,ssize_t size,int sleepflg,cred_t * cr)1462 soallocproto1(const void *buf, ssize_t len, ssize_t size, int sleepflg,
1463 cred_t *cr)
1464 {
1465 mblk_t *mp;
1466
1467 if (size == 0)
1468 size = len;
1469
1470 ASSERT(size >= len);
1471 /* Round up size for reuse */
1472 size = MAX(size, 64);
1473 mp = soallocproto(size, sleepflg, cr);
1474 if (mp == NULL)
1475 return (NULL);
1476 mp->b_datap->db_type = M_PROTO;
1477 if (len != 0) {
1478 if (buf != NULL)
1479 bcopy(buf, mp->b_wptr, len);
1480 else
1481 bzero(mp->b_wptr, len);
1482 mp->b_wptr += len;
1483 }
1484 return (mp);
1485 }
1486
1487 /*
1488 * Append buf/len to mp.
1489 * The caller has to ensure that there is enough room in the mblk.
1490 *
1491 * buf can be NULL with a non-zero len.
1492 * This results in a bzero'ed chunk being placed the message.
1493 */
1494 void
soappendmsg(mblk_t * mp,const void * buf,ssize_t len)1495 soappendmsg(mblk_t *mp, const void *buf, ssize_t len)
1496 {
1497 ASSERT(mp);
1498
1499 if (len != 0) {
1500 /* Assert for room left */
1501 ASSERT(mp->b_datap->db_lim - mp->b_wptr >= len);
1502 if (buf != NULL)
1503 bcopy(buf, mp->b_wptr, len);
1504 else
1505 bzero(mp->b_wptr, len);
1506 }
1507 mp->b_wptr += len;
1508 }
1509
1510 /*
1511 * Create a message using two kernel buffers.
1512 * If size is set that will determine the allocation size (e.g. for future
1513 * soappendmsg calls). If size is zero it is derived from the buffer
1514 * lengths.
1515 */
1516 mblk_t *
soallocproto2(const void * buf1,ssize_t len1,const void * buf2,ssize_t len2,ssize_t size,int sleepflg,cred_t * cr)1517 soallocproto2(const void *buf1, ssize_t len1, const void *buf2, ssize_t len2,
1518 ssize_t size, int sleepflg, cred_t *cr)
1519 {
1520 mblk_t *mp;
1521
1522 if (size == 0)
1523 size = len1 + len2;
1524 ASSERT(size >= len1 + len2);
1525
1526 mp = soallocproto1(buf1, len1, size, sleepflg, cr);
1527 if (mp)
1528 soappendmsg(mp, buf2, len2);
1529 return (mp);
1530 }
1531
1532 /*
1533 * Create a message using three kernel buffers.
1534 * If size is set that will determine the allocation size (for future
1535 * soappendmsg calls). If size is zero it is derived from the buffer
1536 * lengths.
1537 */
1538 mblk_t *
soallocproto3(const void * buf1,ssize_t len1,const void * buf2,ssize_t len2,const void * buf3,ssize_t len3,ssize_t size,int sleepflg,cred_t * cr)1539 soallocproto3(const void *buf1, ssize_t len1, const void *buf2, ssize_t len2,
1540 const void *buf3, ssize_t len3, ssize_t size, int sleepflg, cred_t *cr)
1541 {
1542 mblk_t *mp;
1543
1544 if (size == 0)
1545 size = len1 + len2 +len3;
1546 ASSERT(size >= len1 + len2 + len3);
1547
1548 mp = soallocproto1(buf1, len1, size, sleepflg, cr);
1549 if (mp != NULL) {
1550 soappendmsg(mp, buf2, len2);
1551 soappendmsg(mp, buf3, len3);
1552 }
1553 return (mp);
1554 }
1555
1556 #ifdef DEBUG
1557 char *
pr_state(uint_t state,uint_t mode)1558 pr_state(uint_t state, uint_t mode)
1559 {
1560 static char buf[1024];
1561
1562 buf[0] = 0;
1563 if (state & SS_ISCONNECTED)
1564 (void) strcat(buf, "ISCONNECTED ");
1565 if (state & SS_ISCONNECTING)
1566 (void) strcat(buf, "ISCONNECTING ");
1567 if (state & SS_ISDISCONNECTING)
1568 (void) strcat(buf, "ISDISCONNECTING ");
1569 if (state & SS_CANTSENDMORE)
1570 (void) strcat(buf, "CANTSENDMORE ");
1571
1572 if (state & SS_CANTRCVMORE)
1573 (void) strcat(buf, "CANTRCVMORE ");
1574 if (state & SS_ISBOUND)
1575 (void) strcat(buf, "ISBOUND ");
1576 if (state & SS_NDELAY)
1577 (void) strcat(buf, "NDELAY ");
1578 if (state & SS_NONBLOCK)
1579 (void) strcat(buf, "NONBLOCK ");
1580
1581 if (state & SS_ASYNC)
1582 (void) strcat(buf, "ASYNC ");
1583 if (state & SS_ACCEPTCONN)
1584 (void) strcat(buf, "ACCEPTCONN ");
1585 if (state & SS_SAVEDEOR)
1586 (void) strcat(buf, "SAVEDEOR ");
1587
1588 if (state & SS_RCVATMARK)
1589 (void) strcat(buf, "RCVATMARK ");
1590 if (state & SS_OOBPEND)
1591 (void) strcat(buf, "OOBPEND ");
1592 if (state & SS_HAVEOOBDATA)
1593 (void) strcat(buf, "HAVEOOBDATA ");
1594 if (state & SS_HADOOBDATA)
1595 (void) strcat(buf, "HADOOBDATA ");
1596
1597 if (mode & SM_PRIV)
1598 (void) strcat(buf, "PRIV ");
1599 if (mode & SM_ATOMIC)
1600 (void) strcat(buf, "ATOMIC ");
1601 if (mode & SM_ADDR)
1602 (void) strcat(buf, "ADDR ");
1603 if (mode & SM_CONNREQUIRED)
1604 (void) strcat(buf, "CONNREQUIRED ");
1605
1606 if (mode & SM_FDPASSING)
1607 (void) strcat(buf, "FDPASSING ");
1608 if (mode & SM_EXDATA)
1609 (void) strcat(buf, "EXDATA ");
1610 if (mode & SM_OPTDATA)
1611 (void) strcat(buf, "OPTDATA ");
1612 if (mode & SM_BYTESTREAM)
1613 (void) strcat(buf, "BYTESTREAM ");
1614 return (buf);
1615 }
1616
1617 char *
pr_addr(int family,struct sockaddr * addr,t_uscalar_t addrlen)1618 pr_addr(int family, struct sockaddr *addr, t_uscalar_t addrlen)
1619 {
1620 static char buf[1024];
1621
1622 if (addr == NULL || addrlen == 0) {
1623 (void) sprintf(buf, "(len %d) %p", addrlen, (void *)addr);
1624 return (buf);
1625 }
1626 switch (family) {
1627 case AF_INET: {
1628 struct sockaddr_in sin;
1629
1630 bcopy(addr, &sin, sizeof (sin));
1631
1632 (void) sprintf(buf, "(len %d) %x/%d",
1633 addrlen, ntohl(sin.sin_addr.s_addr), ntohs(sin.sin_port));
1634 break;
1635 }
1636 case AF_INET6: {
1637 struct sockaddr_in6 sin6;
1638 uint16_t *piece = (uint16_t *)&sin6.sin6_addr;
1639
1640 bcopy((char *)addr, (char *)&sin6, sizeof (sin6));
1641 (void) sprintf(buf, "(len %d) %x:%x:%x:%x:%x:%x:%x:%x/%d",
1642 addrlen,
1643 ntohs(piece[0]), ntohs(piece[1]),
1644 ntohs(piece[2]), ntohs(piece[3]),
1645 ntohs(piece[4]), ntohs(piece[5]),
1646 ntohs(piece[6]), ntohs(piece[7]),
1647 ntohs(sin6.sin6_port));
1648 break;
1649 }
1650 case AF_UNIX: {
1651 struct sockaddr_un *soun = (struct sockaddr_un *)addr;
1652
1653 (void) sprintf(buf, "(len %d) %s", addrlen,
1654 (soun == NULL) ? "(none)" : soun->sun_path);
1655 break;
1656 }
1657 default:
1658 (void) sprintf(buf, "(unknown af %d)", family);
1659 break;
1660 }
1661 return (buf);
1662 }
1663
1664 /* The logical equivalence operator (a if-and-only-if b) */
1665 #define EQUIVALENT(a, b) (((a) && (b)) || (!(a) && (!(b))))
1666
1667 /*
1668 * Verify limitations and invariants on oob state.
1669 * Return 1 if OK, otherwise 0 so that it can be used as
1670 * ASSERT(verify_oobstate(so));
1671 */
1672 int
so_verify_oobstate(struct sonode * so)1673 so_verify_oobstate(struct sonode *so)
1674 {
1675 boolean_t havemark;
1676
1677 ASSERT(MUTEX_HELD(&so->so_lock));
1678
1679 /*
1680 * The possible state combinations are:
1681 * 0
1682 * SS_OOBPEND
1683 * SS_OOBPEND|SS_HAVEOOBDATA
1684 * SS_OOBPEND|SS_HADOOBDATA
1685 * SS_HADOOBDATA
1686 */
1687 switch (so->so_state & (SS_OOBPEND|SS_HAVEOOBDATA|SS_HADOOBDATA)) {
1688 case 0:
1689 case SS_OOBPEND:
1690 case SS_OOBPEND|SS_HAVEOOBDATA:
1691 case SS_OOBPEND|SS_HADOOBDATA:
1692 case SS_HADOOBDATA:
1693 break;
1694 default:
1695 printf("Bad oob state 1 (%p): state %s\n",
1696 (void *)so, pr_state(so->so_state, so->so_mode));
1697 return (0);
1698 }
1699
1700 /* SS_RCVATMARK should only be set when SS_OOBPEND is set */
1701 if ((so->so_state & (SS_RCVATMARK|SS_OOBPEND)) == SS_RCVATMARK) {
1702 printf("Bad oob state 2 (%p): state %s\n",
1703 (void *)so, pr_state(so->so_state, so->so_mode));
1704 return (0);
1705 }
1706
1707 /*
1708 * (havemark != 0 or SS_RCVATMARK) iff SS_OOBPEND
1709 * For TPI, the presence of a "mark" is indicated by sti_oobsigcnt.
1710 */
1711 havemark = (SOCK_IS_NONSTR(so)) ? so->so_oobmark > 0 :
1712 SOTOTPI(so)->sti_oobsigcnt > 0;
1713
1714 if (!EQUIVALENT(havemark || (so->so_state & SS_RCVATMARK),
1715 so->so_state & SS_OOBPEND)) {
1716 printf("Bad oob state 3 (%p): state %s\n",
1717 (void *)so, pr_state(so->so_state, so->so_mode));
1718 return (0);
1719 }
1720
1721 /*
1722 * Unless SO_OOBINLINE we have so_oobmsg != NULL iff SS_HAVEOOBDATA
1723 */
1724 if (!(so->so_options & SO_OOBINLINE) &&
1725 !EQUIVALENT(so->so_oobmsg != NULL, so->so_state & SS_HAVEOOBDATA)) {
1726 printf("Bad oob state 4 (%p): state %s\n",
1727 (void *)so, pr_state(so->so_state, so->so_mode));
1728 return (0);
1729 }
1730
1731 if (!SOCK_IS_NONSTR(so) &&
1732 SOTOTPI(so)->sti_oobsigcnt < SOTOTPI(so)->sti_oobcnt) {
1733 printf("Bad oob state 5 (%p): counts %d/%d state %s\n",
1734 (void *)so, SOTOTPI(so)->sti_oobsigcnt,
1735 SOTOTPI(so)->sti_oobcnt,
1736 pr_state(so->so_state, so->so_mode));
1737 return (0);
1738 }
1739
1740 return (1);
1741 }
1742 #undef EQUIVALENT
1743 #endif /* DEBUG */
1744
1745 /* initialize sockfs zone specific kstat related items */
1746 void *
sock_kstat_init(zoneid_t zoneid)1747 sock_kstat_init(zoneid_t zoneid)
1748 {
1749 kstat_t *ksp;
1750
1751 ksp = kstat_create_zone("sockfs", 0, "sock_unix_list", "misc",
1752 KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VAR_SIZE|KSTAT_FLAG_VIRTUAL, zoneid);
1753
1754 if (ksp != NULL) {
1755 ksp->ks_update = sockfs_update;
1756 ksp->ks_snapshot = sockfs_snapshot;
1757 ksp->ks_lock = &socklist.sl_lock;
1758 ksp->ks_private = (void *)(uintptr_t)zoneid;
1759 kstat_install(ksp);
1760 }
1761
1762 return (ksp);
1763 }
1764
1765 /* tear down sockfs zone specific kstat related items */
1766 /*ARGSUSED*/
1767 void
sock_kstat_fini(zoneid_t zoneid,void * arg)1768 sock_kstat_fini(zoneid_t zoneid, void *arg)
1769 {
1770 kstat_t *ksp = (kstat_t *)arg;
1771
1772 if (ksp != NULL) {
1773 ASSERT(zoneid == (zoneid_t)(uintptr_t)ksp->ks_private);
1774 kstat_delete(ksp);
1775 }
1776 }
1777
1778 /*
1779 * Zones:
1780 * Note that nactive is going to be different for each zone.
1781 * This means we require kstat to call sockfs_update and then sockfs_snapshot
1782 * for the same zone, or sockfs_snapshot will be taken into the wrong size
1783 * buffer. This is safe, but if the buffer is too small, user will not be
1784 * given details of all sockets. However, as this kstat has a ks_lock, kstat
1785 * driver will keep it locked between the update and the snapshot, so no
1786 * other process (zone) can currently get inbetween resulting in a wrong size
1787 * buffer allocation.
1788 */
1789 static int
sockfs_update(kstat_t * ksp,int rw)1790 sockfs_update(kstat_t *ksp, int rw)
1791 {
1792 uint_t nactive = 0; /* # of active AF_UNIX sockets */
1793 struct sonode *so; /* current sonode on socklist */
1794 zoneid_t myzoneid = (zoneid_t)(uintptr_t)ksp->ks_private;
1795
1796 ASSERT((zoneid_t)(uintptr_t)ksp->ks_private == getzoneid());
1797
1798 if (rw == KSTAT_WRITE) { /* bounce all writes */
1799 return (EACCES);
1800 }
1801
1802 for (so = socklist.sl_list; so != NULL; so = SOTOTPI(so)->sti_next_so) {
1803 if (so->so_count != 0 && so->so_zoneid == myzoneid) {
1804 nactive++;
1805 }
1806 }
1807 ksp->ks_ndata = nactive;
1808 ksp->ks_data_size = nactive * sizeof (struct sockinfo);
1809
1810 return (0);
1811 }
1812
1813 static int
sockfs_snapshot(kstat_t * ksp,void * buf,int rw)1814 sockfs_snapshot(kstat_t *ksp, void *buf, int rw)
1815 {
1816 int ns; /* # of sonodes we've copied */
1817 struct sonode *so; /* current sonode on socklist */
1818 struct sockinfo *psi; /* where we put sockinfo data */
1819 t_uscalar_t sn_len; /* soa_len */
1820 zoneid_t myzoneid = (zoneid_t)(uintptr_t)ksp->ks_private;
1821 sotpi_info_t *sti;
1822
1823 ASSERT((zoneid_t)(uintptr_t)ksp->ks_private == getzoneid());
1824
1825 ksp->ks_snaptime = gethrtime();
1826
1827 if (rw == KSTAT_WRITE) { /* bounce all writes */
1828 return (EACCES);
1829 }
1830
1831 /*
1832 * For each sonode on the socklist, we massage the important
1833 * info into buf, in sockinfo format.
1834 */
1835 psi = (struct sockinfo *)buf;
1836 ns = 0;
1837 for (so = socklist.sl_list; so != NULL; so = SOTOTPI(so)->sti_next_so) {
1838 vattr_t attr;
1839
1840 /* only stuff active sonodes and the same zone: */
1841 if (so->so_count == 0 || so->so_zoneid != myzoneid) {
1842 continue;
1843 }
1844
1845 /*
1846 * If the sonode was activated between the update and the
1847 * snapshot, we're done - as this is only a snapshot.
1848 */
1849 if ((caddr_t)(psi) >= (caddr_t)buf + ksp->ks_data_size) {
1850 break;
1851 }
1852
1853 sti = SOTOTPI(so);
1854 /* copy important info into buf: */
1855 psi->si_size = sizeof (struct sockinfo);
1856 psi->si_family = so->so_family;
1857 psi->si_type = so->so_type;
1858 psi->si_flag = so->so_flag;
1859 psi->si_state = so->so_state;
1860 psi->si_serv_type = sti->sti_serv_type;
1861 psi->si_ux_laddr_sou_magic = sti->sti_ux_laddr.soua_magic;
1862 psi->si_ux_faddr_sou_magic = sti->sti_ux_faddr.soua_magic;
1863 psi->si_laddr_soa_len = sti->sti_laddr.soa_len;
1864 psi->si_faddr_soa_len = sti->sti_faddr.soa_len;
1865 psi->si_szoneid = so->so_zoneid;
1866 psi->si_faddr_noxlate = sti->sti_faddr_noxlate;
1867
1868 /*
1869 * Grab the inode, if possible.
1870 * This must be done before entering so_lock as VOP_GETATTR
1871 * will acquire it.
1872 */
1873 if (so->so_vnode == NULL ||
1874 VOP_GETATTR(so->so_vnode, &attr, 0, CRED(), NULL) != 0)
1875 attr.va_nodeid = 0;
1876
1877 psi->si_inode = attr.va_nodeid;
1878
1879 mutex_enter(&so->so_lock);
1880
1881 if (sti->sti_laddr_sa != NULL) {
1882 ASSERT(sti->sti_laddr_sa->sa_data != NULL);
1883 sn_len = sti->sti_laddr_len;
1884 ASSERT(sn_len <= sizeof (short) +
1885 sizeof (psi->si_laddr_sun_path));
1886
1887 psi->si_laddr_family =
1888 sti->sti_laddr_sa->sa_family;
1889 if (sn_len != 0) {
1890 /* AF_UNIX socket names are NULL terminated */
1891 (void) strncpy(psi->si_laddr_sun_path,
1892 sti->sti_laddr_sa->sa_data,
1893 sizeof (psi->si_laddr_sun_path));
1894 sn_len = strlen(psi->si_laddr_sun_path);
1895 }
1896 psi->si_laddr_sun_path[sn_len] = 0;
1897 }
1898
1899 if (sti->sti_faddr_sa != NULL) {
1900 ASSERT(sti->sti_faddr_sa->sa_data != NULL);
1901 sn_len = sti->sti_faddr_len;
1902 ASSERT(sn_len <= sizeof (short) +
1903 sizeof (psi->si_faddr_sun_path));
1904
1905 psi->si_faddr_family =
1906 sti->sti_faddr_sa->sa_family;
1907 if (sn_len != 0) {
1908 (void) strncpy(psi->si_faddr_sun_path,
1909 sti->sti_faddr_sa->sa_data,
1910 sizeof (psi->si_faddr_sun_path));
1911 sn_len = strlen(psi->si_faddr_sun_path);
1912 }
1913 psi->si_faddr_sun_path[sn_len] = 0;
1914 }
1915
1916 mutex_exit(&so->so_lock);
1917
1918 (void) snprintf(psi->si_son_straddr,
1919 sizeof (psi->si_son_straddr), "%p", (void *)so);
1920 (void) snprintf(psi->si_lvn_straddr,
1921 sizeof (psi->si_lvn_straddr), "%p",
1922 (void *)sti->sti_ux_laddr.soua_vp);
1923 (void) snprintf(psi->si_fvn_straddr,
1924 sizeof (psi->si_fvn_straddr), "%p",
1925 (void *)sti->sti_ux_faddr.soua_vp);
1926
1927 ns++;
1928 psi++;
1929 }
1930
1931 ksp->ks_ndata = ns;
1932 return (0);
1933 }
1934
1935 ssize_t
soreadfile(file_t * fp,uchar_t * buf,u_offset_t fileoff,int * err,size_t size)1936 soreadfile(file_t *fp, uchar_t *buf, u_offset_t fileoff, int *err, size_t size)
1937 {
1938 struct uio auio;
1939 struct iovec aiov[1];
1940 register vnode_t *vp;
1941 int ioflag, rwflag;
1942 ssize_t cnt;
1943 int error = 0;
1944 int iovcnt = 0;
1945 short fflag;
1946
1947 vp = fp->f_vnode;
1948 fflag = fp->f_flag;
1949
1950 rwflag = 0;
1951 aiov[0].iov_base = (caddr_t)buf;
1952 aiov[0].iov_len = size;
1953 iovcnt = 1;
1954 cnt = (ssize_t)size;
1955 (void) VOP_RWLOCK(vp, rwflag, NULL);
1956
1957 auio.uio_loffset = fileoff;
1958 auio.uio_iov = aiov;
1959 auio.uio_iovcnt = iovcnt;
1960 auio.uio_resid = cnt;
1961 auio.uio_segflg = UIO_SYSSPACE;
1962 auio.uio_llimit = MAXOFFSET_T;
1963 auio.uio_fmode = fflag;
1964 auio.uio_extflg = UIO_COPY_CACHED;
1965
1966 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC);
1967
1968 /* If read sync is not asked for, filter sync flags */
1969 if ((ioflag & FRSYNC) == 0)
1970 ioflag &= ~(FSYNC|FDSYNC);
1971 error = VOP_READ(vp, &auio, ioflag, fp->f_cred, NULL);
1972 cnt -= auio.uio_resid;
1973
1974 VOP_RWUNLOCK(vp, rwflag, NULL);
1975
1976 if (error == EINTR && cnt != 0)
1977 error = 0;
1978
1979 if (error != 0) {
1980 *err = error;
1981 return (0);
1982 } else {
1983 *err = 0;
1984 return (cnt);
1985 }
1986 }
1987
1988 int
so_copyin(const void * from,void * to,size_t size,int fromkernel)1989 so_copyin(const void *from, void *to, size_t size, int fromkernel)
1990 {
1991 if (fromkernel) {
1992 bcopy(from, to, size);
1993 return (0);
1994 }
1995 return (xcopyin(from, to, size));
1996 }
1997
1998 int
so_copyout(const void * from,void * to,size_t size,int tokernel)1999 so_copyout(const void *from, void *to, size_t size, int tokernel)
2000 {
2001 if (tokernel) {
2002 bcopy(from, to, size);
2003 return (0);
2004 }
2005 return (xcopyout(from, to, size));
2006 }
2007