1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 /*
27  * Copyright (c) 2017 by Delphix. All rights reserved.
28  * Copyright 2021 Racktop Systems, Inc.
29  */
30 
31 #include <sys/types.h>
32 #include <sys/t_lock.h>
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/bitmap.h>
36 #include <sys/debug.h>
37 #include <sys/errno.h>
38 #include <sys/strsubr.h>
39 #include <sys/cmn_err.h>
40 #include <sys/sysmacros.h>
41 #include <sys/filio.h>
42 #include <sys/flock.h>
43 #include <sys/stat.h>
44 #include <sys/share.h>
45 
46 #include <sys/vfs.h>
47 #include <sys/vfs_opreg.h>
48 
49 #include <sys/sockio.h>
50 #include <sys/socket.h>
51 #include <sys/socketvar.h>
52 #include <sys/strsun.h>
53 
54 #include <fs/sockfs/sockcommon.h>
55 #include <fs/sockfs/socktpi.h>
56 
57 /*
58  * Generic vnode ops
59  */
60 static int	socket_vop_open(struct vnode **, int, struct cred *,
61 		    caller_context_t *);
62 static int	socket_vop_close(struct vnode *, int, int, offset_t,
63 		    struct cred *, caller_context_t *);
64 static int	socket_vop_read(struct vnode *, struct uio *, int,
65 		    struct cred *, caller_context_t *);
66 static int	socket_vop_write(struct vnode *, struct uio *, int,
67 		    struct cred *, caller_context_t *);
68 static int	socket_vop_ioctl(struct vnode *, int, intptr_t, int,
69 		    struct cred *, int32_t *, caller_context_t *);
70 static int	socket_vop_setfl(struct vnode *, int, int, cred_t *,
71 		    caller_context_t *);
72 static int	socket_vop_getattr(struct vnode *, struct vattr *, int,
73 		    struct cred *, caller_context_t *);
74 static int	socket_vop_setattr(struct vnode *, struct vattr *, int,
75 		    struct cred *, caller_context_t *);
76 static int	socket_vop_access(struct vnode *, int, int, struct cred *,
77 		    caller_context_t *);
78 static int	socket_vop_fsync(struct vnode *, int, struct cred *,
79 		    caller_context_t *);
80 static void	socket_vop_inactive(struct vnode *, struct cred *,
81 		    caller_context_t *);
82 static int	socket_vop_fid(struct vnode *, struct fid *,
83 		    caller_context_t *);
84 static int	socket_vop_seek(struct vnode *, offset_t, offset_t *,
85 		    caller_context_t *);
86 static int	socket_vop_poll(struct vnode *, short, int, short *,
87 		    struct pollhead **, caller_context_t *);
88 
89 extern int	socket_close_internal(struct sonode *, int, cred_t *);
90 extern void	socket_destroy_internal(struct sonode *, cred_t *);
91 
92 struct vnodeops *socket_vnodeops;
93 const fs_operation_def_t socket_vnodeops_template[] = {
94 	VOPNAME_OPEN,		{ .vop_open = socket_vop_open },
95 	VOPNAME_CLOSE,		{ .vop_close = socket_vop_close },
96 	VOPNAME_READ,		{ .vop_read = socket_vop_read },
97 	VOPNAME_WRITE,		{ .vop_write = socket_vop_write },
98 	VOPNAME_IOCTL,		{ .vop_ioctl = socket_vop_ioctl },
99 	VOPNAME_SETFL,		{ .vop_setfl = socket_vop_setfl },
100 	VOPNAME_GETATTR,	{ .vop_getattr = socket_vop_getattr },
101 	VOPNAME_SETATTR,	{ .vop_setattr = socket_vop_setattr },
102 	VOPNAME_ACCESS,		{ .vop_access = socket_vop_access },
103 	VOPNAME_FSYNC,		{ .vop_fsync = socket_vop_fsync },
104 	VOPNAME_INACTIVE,	{ .vop_inactive = socket_vop_inactive },
105 	VOPNAME_FID,		{ .vop_fid = socket_vop_fid },
106 	VOPNAME_SEEK,		{ .vop_seek = socket_vop_seek },
107 	VOPNAME_POLL,		{ .vop_poll = socket_vop_poll },
108 	VOPNAME_DISPOSE,	{ .error = fs_error },
109 	NULL,			NULL
110 };
111 
112 
113 /*
114  * generic vnode ops
115  */
116 
117 /*ARGSUSED*/
118 static int
socket_vop_open(struct vnode ** vpp,int flag,struct cred * cr,caller_context_t * ct)119 socket_vop_open(struct vnode **vpp, int flag, struct cred *cr,
120     caller_context_t *ct)
121 {
122 	struct vnode *vp = *vpp;
123 	struct sonode *so = VTOSO(vp);
124 
125 	flag &= ~FCREAT;		/* paranoia */
126 	mutex_enter(&so->so_lock);
127 	so->so_count++;
128 	mutex_exit(&so->so_lock);
129 
130 	ASSERT(so->so_count != 0);	/* wraparound */
131 	ASSERT(vp->v_type == VSOCK);
132 
133 	return (0);
134 }
135 
136 /*ARGSUSED*/
137 static int
socket_vop_close(struct vnode * vp,int flag,int count,offset_t offset,struct cred * cr,caller_context_t * ct)138 socket_vop_close(struct vnode *vp, int flag, int count, offset_t offset,
139     struct cred *cr, caller_context_t *ct)
140 {
141 	struct sonode *so;
142 	int error = 0;
143 
144 	so = VTOSO(vp);
145 	ASSERT(vp->v_type == VSOCK);
146 
147 	cleanlocks(vp, ttoproc(curthread)->p_pid, 0);
148 	cleanshares(vp, ttoproc(curthread)->p_pid);
149 
150 	if (vp->v_stream)
151 		strclean(vp);
152 
153 	if (count > 1) {
154 		dprint(2, ("socket_vop_close: count %d\n", count));
155 		return (0);
156 	}
157 
158 	mutex_enter(&so->so_lock);
159 	if (--so->so_count == 0) {
160 		/*
161 		 * Initiate connection shutdown.
162 		 */
163 		mutex_exit(&so->so_lock);
164 		error = socket_close_internal(so, flag, cr);
165 	} else {
166 		mutex_exit(&so->so_lock);
167 	}
168 
169 	return (error);
170 }
171 
172 /*ARGSUSED2*/
173 static int
socket_vop_read(struct vnode * vp,struct uio * uiop,int ioflag,struct cred * cr,caller_context_t * ct)174 socket_vop_read(struct vnode *vp, struct uio *uiop, int ioflag, struct cred *cr,
175     caller_context_t *ct)
176 {
177 	struct sonode *so = VTOSO(vp);
178 	struct nmsghdr lmsg;
179 
180 	ASSERT(vp->v_type == VSOCK);
181 	bzero((void *)&lmsg, sizeof (lmsg));
182 
183 	return (socket_recvmsg(so, &lmsg, uiop, cr));
184 }
185 
186 /*ARGSUSED2*/
187 static int
socket_vop_write(struct vnode * vp,struct uio * uiop,int ioflag,struct cred * cr,caller_context_t * ct)188 socket_vop_write(struct vnode *vp, struct uio *uiop, int ioflag,
189     struct cred *cr, caller_context_t *ct)
190 {
191 	struct sonode *so = VTOSO(vp);
192 	struct nmsghdr lmsg;
193 
194 	ASSERT(vp->v_type == VSOCK);
195 	bzero((void *)&lmsg, sizeof (lmsg));
196 
197 	if (!(so->so_mode & SM_BYTESTREAM)) {
198 		/*
199 		 * If the socket is not byte stream set MSG_EOR
200 		 */
201 		lmsg.msg_flags = MSG_EOR;
202 	}
203 
204 	return (socket_sendmsg(so, &lmsg, uiop, cr));
205 }
206 
207 /*ARGSUSED4*/
208 static int
socket_vop_ioctl(struct vnode * vp,int cmd,intptr_t arg,int mode,struct cred * cr,int32_t * rvalp,caller_context_t * ct)209 socket_vop_ioctl(struct vnode *vp, int cmd, intptr_t arg, int mode,
210     struct cred *cr, int32_t *rvalp, caller_context_t *ct)
211 {
212 	struct sonode *so = VTOSO(vp);
213 
214 	ASSERT(vp->v_type == VSOCK);
215 
216 	return (socket_ioctl(so, cmd, arg, mode, cr, rvalp));
217 }
218 
219 /*
220  * Allow any flags. Record FNDELAY and FNONBLOCK so that they can be inherited
221  * from listener to acceptor.
222  */
223 /* ARGSUSED */
224 static int
socket_vop_setfl(vnode_t * vp,int oflags,int nflags,cred_t * cr,caller_context_t * ct)225 socket_vop_setfl(vnode_t *vp, int oflags, int nflags, cred_t *cr,
226     caller_context_t *ct)
227 {
228 	struct sonode *so = VTOSO(vp);
229 	int error = 0;
230 
231 	ASSERT(vp->v_type == VSOCK);
232 
233 	mutex_enter(&so->so_lock);
234 	if (nflags & FNDELAY)
235 		so->so_state |= SS_NDELAY;
236 	else
237 		so->so_state &= ~SS_NDELAY;
238 	if (nflags & FNONBLOCK)
239 		so->so_state |= SS_NONBLOCK;
240 	else
241 		so->so_state &= ~SS_NONBLOCK;
242 	mutex_exit(&so->so_lock);
243 
244 	if (so->so_state & SS_ASYNC)
245 		oflags |= FASYNC;
246 	/*
247 	 * Sets/clears the SS_ASYNC flag based on the presence/absence
248 	 * of the FASYNC flag passed to fcntl(F_SETFL).
249 	 * This exists solely for BSD fcntl() FASYNC compatibility.
250 	 */
251 	if ((oflags ^ nflags) & FASYNC && so->so_version != SOV_STREAM) {
252 		int async = nflags & FASYNC;
253 		int32_t rv;
254 
255 		/*
256 		 * For non-TPI sockets all we have to do is set/remove the
257 		 * SS_ASYNC bit, but for TPI it is more involved. For that
258 		 * reason we delegate the job to the protocol's ioctl handler.
259 		 */
260 		error = socket_ioctl(so, FIOASYNC, (intptr_t)&async, FKIOCTL,
261 		    cr, &rv);
262 	}
263 	return (error);
264 }
265 
266 
267 /*
268  * Get the made up attributes for the vnode.
269  * 4.3BSD returns the current time for all the timestamps.
270  * 4.4BSD returns 0 for all the timestamps.
271  * Here we use the access and modified times recorded in the sonode.
272  *
273  * Just like in BSD there is not effect on the underlying file system node
274  * bound to an AF_UNIX pathname.
275  *
276  * When sockmod has been popped this will act just like a stream. Since
277  * a socket is always a clone there is no need to inspect the attributes
278  * of the "realvp".
279  */
280 /* ARGSUSED */
281 int
socket_vop_getattr(struct vnode * vp,struct vattr * vap,int flags,struct cred * cr,caller_context_t * ct)282 socket_vop_getattr(struct vnode *vp, struct vattr *vap, int flags,
283     struct cred *cr, caller_context_t *ct)
284 {
285 	dev_t		fsid;
286 	struct sonode	*so;
287 	static int	sonode_shift = 0;
288 
289 	/*
290 	 * Calculate the amount of bitshift to a sonode pointer which will
291 	 * still keep it unique.  See below. Note that highbit() uses
292 	 * 1-based indexing for the highest bit set (and 0 for 'no bits set').
293 	 * To use the result of highbit() as a shift value, we must subtract 1
294 	 * from the result.
295 	 */
296 	if (sonode_shift == 0) {
297 		int bit = highbit(sizeof (struct sonode));
298 
299 		/* Sanity check */
300 		VERIFY3S(bit, >, 0);
301 		sonode_shift = bit - 1;
302 	}
303 
304 	so = VTOSO(vp);
305 	fsid = sockdev;
306 
307 	if (so->so_version == SOV_STREAM) {
308 		/*
309 		 * The imaginary "sockmod" has been popped - act
310 		 * as a stream
311 		 */
312 		vap->va_type = VCHR;
313 		vap->va_mode = 0;
314 	} else {
315 		vap->va_type = vp->v_type;
316 		vap->va_mode = S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|
317 		    S_IROTH|S_IWOTH;
318 	}
319 	vap->va_uid = vap->va_gid = 0;
320 	vap->va_fsid = fsid;
321 	/*
322 	 * If the va_nodeid is > UINT32_MAX, then stat(2) might fail in
323 	 * unexpected ways inside non-largefile aware 32-bit processes --
324 	 * historically, socket inode values (va_nodeid values) were capped at
325 	 * UINT16_MAX (for even more ancient reasons long since unnecessary).
326 	 * To avoid the potential of surprise failures, we shift down
327 	 * the sonode pointer address to try and get the most
328 	 * uniqueness into 32-bits. In practice, this represents the unique
329 	 * portion of the kernel address space, so the chance of duplicate
330 	 * socket inode values is minimized.
331 	 */
332 	vap->va_nodeid = ((ino_t)so >> sonode_shift) & 0xFFFFFFFF;
333 	vap->va_nlink = 0;
334 	vap->va_size = 0;
335 
336 	/*
337 	 * We need to zero out the va_rdev to avoid some fstats getting
338 	 * EOVERFLOW.  This also mimics SunOS 4.x and BSD behavior.
339 	 */
340 	vap->va_rdev = (dev_t)0;
341 	vap->va_blksize = MAXBSIZE;
342 	vap->va_nblocks = btod(vap->va_size);
343 
344 	if (!SOCK_IS_NONSTR(so)) {
345 		sotpi_info_t *sti = SOTOTPI(so);
346 
347 		mutex_enter(&so->so_lock);
348 		vap->va_atime.tv_sec = sti->sti_atime;
349 		vap->va_mtime.tv_sec = sti->sti_mtime;
350 		vap->va_ctime.tv_sec = sti->sti_ctime;
351 		mutex_exit(&so->so_lock);
352 	} else {
353 		vap->va_atime.tv_sec = 0;
354 		vap->va_mtime.tv_sec = 0;
355 		vap->va_ctime.tv_sec = 0;
356 	}
357 
358 	vap->va_atime.tv_nsec = 0;
359 	vap->va_mtime.tv_nsec = 0;
360 	vap->va_ctime.tv_nsec = 0;
361 	vap->va_seq = 0;
362 
363 	return (0);
364 }
365 
366 /*
367  * Set attributes.
368  * Just like in BSD there is not effect on the underlying file system node
369  * bound to an AF_UNIX pathname.
370  *
371  * When sockmod has been popped this will act just like a stream. Since
372  * a socket is always a clone there is no need to modify the attributes
373  * of the "realvp".
374  */
375 /* ARGSUSED */
376 int
socket_vop_setattr(struct vnode * vp,struct vattr * vap,int flags,struct cred * cr,caller_context_t * ct)377 socket_vop_setattr(struct vnode *vp, struct vattr *vap, int flags,
378     struct cred *cr, caller_context_t *ct)
379 {
380 	struct sonode *so = VTOSO(vp);
381 
382 	/*
383 	 * If times were changed, and we have a STREAMS socket, then update
384 	 * the sonode.
385 	 */
386 	if (!SOCK_IS_NONSTR(so)) {
387 		sotpi_info_t *sti = SOTOTPI(so);
388 
389 		mutex_enter(&so->so_lock);
390 		if (vap->va_mask & AT_ATIME)
391 			sti->sti_atime = vap->va_atime.tv_sec;
392 		if (vap->va_mask & AT_MTIME) {
393 			sti->sti_mtime = vap->va_mtime.tv_sec;
394 			sti->sti_ctime = gethrestime_sec();
395 		}
396 		mutex_exit(&so->so_lock);
397 	}
398 
399 	return (0);
400 }
401 
402 /*
403  * Check if user is allowed to access vp. For non-STREAMS based sockets,
404  * there might not be a device attached to the file system. So for those
405  * types of sockets there are no permissions to check.
406  *
407  * XXX Should there be some other mechanism to check access rights?
408  */
409 /*ARGSUSED*/
410 int
socket_vop_access(struct vnode * vp,int mode,int flags,struct cred * cr,caller_context_t * ct)411 socket_vop_access(struct vnode *vp, int mode, int flags, struct cred *cr,
412     caller_context_t *ct)
413 {
414 	struct sonode *so = VTOSO(vp);
415 
416 	if (!SOCK_IS_NONSTR(so)) {
417 		ASSERT(so->so_sockparams->sp_sdev_info.sd_vnode != NULL);
418 		return (VOP_ACCESS(so->so_sockparams->sp_sdev_info.sd_vnode,
419 		    mode, flags, cr, NULL));
420 	}
421 	return (0);
422 }
423 
424 /*
425  * 4.3BSD and 4.4BSD fail a fsync on a socket with EINVAL.
426  * This code does the same to be compatible and also to not give an
427  * application the impression that the data has actually been "synced"
428  * to the other end of the connection.
429  */
430 /* ARGSUSED */
431 int
socket_vop_fsync(struct vnode * vp,int syncflag,struct cred * cr,caller_context_t * ct)432 socket_vop_fsync(struct vnode *vp, int syncflag, struct cred *cr,
433     caller_context_t *ct)
434 {
435 	return (EINVAL);
436 }
437 
438 /*ARGSUSED*/
439 static void
socket_vop_inactive(struct vnode * vp,struct cred * cr,caller_context_t * ct)440 socket_vop_inactive(struct vnode *vp, struct cred *cr, caller_context_t *ct)
441 {
442 	struct sonode *so = VTOSO(vp);
443 
444 	ASSERT(vp->v_type == VSOCK);
445 
446 	mutex_enter(&vp->v_lock);
447 	/*
448 	 * If no one has reclaimed the vnode, remove from the
449 	 * cache now.
450 	 */
451 	if (vp->v_count < 1)
452 		cmn_err(CE_PANIC, "socket_inactive: Bad v_count");
453 
454 	VN_RELE_LOCKED(vp);
455 	if (vp->v_count != 0) {
456 		mutex_exit(&vp->v_lock);
457 		return;
458 	}
459 	mutex_exit(&vp->v_lock);
460 
461 
462 	ASSERT(!vn_has_cached_data(vp));
463 
464 	/* socket specfic clean-up */
465 	socket_destroy_internal(so, cr);
466 }
467 
468 /* ARGSUSED */
469 int
socket_vop_fid(struct vnode * vp,struct fid * fidp,caller_context_t * ct)470 socket_vop_fid(struct vnode *vp, struct fid *fidp, caller_context_t *ct)
471 {
472 	return (EINVAL);
473 }
474 
475 /*
476  * Sockets are not seekable.
477  * (and there is a bug to fix STREAMS to make them fail this as well).
478  */
479 /*ARGSUSED*/
480 int
socket_vop_seek(struct vnode * vp,offset_t ooff,offset_t * noffp,caller_context_t * ct)481 socket_vop_seek(struct vnode *vp, offset_t ooff, offset_t *noffp,
482     caller_context_t *ct)
483 {
484 	return (ESPIPE);
485 }
486 
487 /*ARGSUSED*/
488 static int
socket_vop_poll(struct vnode * vp,short events,int anyyet,short * reventsp,struct pollhead ** phpp,caller_context_t * ct)489 socket_vop_poll(struct vnode *vp, short events, int anyyet, short *reventsp,
490     struct pollhead **phpp, caller_context_t *ct)
491 {
492 	struct sonode *so = VTOSO(vp);
493 
494 	ASSERT(vp->v_type == VSOCK);
495 
496 	return (socket_poll(so, events, anyyet, reventsp, phpp));
497 }
498