1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 /*
27  * Copyright (c) 2017 by Delphix. All rights reserved.
28  */
29 
30 #include <sys/types.h>
31 #include <sys/t_lock.h>
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/bitmap.h>
35 #include <sys/debug.h>
36 #include <sys/errno.h>
37 #include <sys/strsubr.h>
38 #include <sys/cmn_err.h>
39 #include <sys/sysmacros.h>
40 #include <sys/filio.h>
41 #include <sys/flock.h>
42 #include <sys/stat.h>
43 #include <sys/share.h>
44 
45 #include <sys/vfs.h>
46 #include <sys/vfs_opreg.h>
47 
48 #include <sys/sockio.h>
49 #include <sys/socket.h>
50 #include <sys/socketvar.h>
51 #include <sys/strsun.h>
52 
53 #include <fs/sockfs/sockcommon.h>
54 #include <fs/sockfs/socktpi.h>
55 
56 /*
57  * Generic vnode ops
58  */
59 static int	socket_vop_open(struct vnode **, int, struct cred *,
60 		    caller_context_t *);
61 static int	socket_vop_close(struct vnode *, int, int, offset_t,
62 		    struct cred *, caller_context_t *);
63 static int	socket_vop_read(struct vnode *, struct uio *, int,
64 		    struct cred *, caller_context_t *);
65 static int	socket_vop_write(struct vnode *, struct uio *, int,
66 		    struct cred *, caller_context_t *);
67 static int	socket_vop_ioctl(struct vnode *, int, intptr_t, int,
68 		    struct cred *, int32_t *, caller_context_t *);
69 static int	socket_vop_setfl(struct vnode *, int, int, cred_t *,
70 		    caller_context_t *);
71 static int 	socket_vop_getattr(struct vnode *, struct vattr *, int,
72 		    struct cred *, caller_context_t *);
73 static int 	socket_vop_setattr(struct vnode *, struct vattr *, int,
74 		    struct cred *, caller_context_t *);
75 static int 	socket_vop_access(struct vnode *, int, int, struct cred *,
76 		    caller_context_t *);
77 static int 	socket_vop_fsync(struct vnode *, int, struct cred *,
78 		    caller_context_t *);
79 static void	socket_vop_inactive(struct vnode *, struct cred *,
80 		    caller_context_t *);
81 static int 	socket_vop_fid(struct vnode *, struct fid *,
82 		    caller_context_t *);
83 static int 	socket_vop_seek(struct vnode *, offset_t, offset_t *,
84 		    caller_context_t *);
85 static int	socket_vop_poll(struct vnode *, short, int, short *,
86 		    struct pollhead **, caller_context_t *);
87 
88 extern int	socket_close_internal(struct sonode *, int, cred_t *);
89 extern void	socket_destroy_internal(struct sonode *, cred_t *);
90 
91 struct vnodeops *socket_vnodeops;
92 const fs_operation_def_t socket_vnodeops_template[] = {
93 	VOPNAME_OPEN,		{ .vop_open = socket_vop_open },
94 	VOPNAME_CLOSE,		{ .vop_close = socket_vop_close },
95 	VOPNAME_READ,		{ .vop_read = socket_vop_read },
96 	VOPNAME_WRITE,		{ .vop_write = socket_vop_write },
97 	VOPNAME_IOCTL,		{ .vop_ioctl = socket_vop_ioctl },
98 	VOPNAME_SETFL,		{ .vop_setfl = socket_vop_setfl },
99 	VOPNAME_GETATTR,	{ .vop_getattr = socket_vop_getattr },
100 	VOPNAME_SETATTR,	{ .vop_setattr = socket_vop_setattr },
101 	VOPNAME_ACCESS,		{ .vop_access = socket_vop_access },
102 	VOPNAME_FSYNC,		{ .vop_fsync = socket_vop_fsync },
103 	VOPNAME_INACTIVE,	{ .vop_inactive = socket_vop_inactive },
104 	VOPNAME_FID,		{ .vop_fid = socket_vop_fid },
105 	VOPNAME_SEEK,		{ .vop_seek = socket_vop_seek },
106 	VOPNAME_POLL,		{ .vop_poll = socket_vop_poll },
107 	VOPNAME_DISPOSE,	{ .error = fs_error },
108 	NULL,			NULL
109 };
110 
111 
112 /*
113  * generic vnode ops
114  */
115 
116 /*ARGSUSED*/
117 static int
118 socket_vop_open(struct vnode **vpp, int flag, struct cred *cr,
119     caller_context_t *ct)
120 {
121 	struct vnode *vp = *vpp;
122 	struct sonode *so = VTOSO(vp);
123 
124 	flag &= ~FCREAT;		/* paranoia */
125 	mutex_enter(&so->so_lock);
126 	so->so_count++;
127 	mutex_exit(&so->so_lock);
128 
129 	ASSERT(so->so_count != 0);	/* wraparound */
130 	ASSERT(vp->v_type == VSOCK);
131 
132 	return (0);
133 }
134 
135 /*ARGSUSED*/
136 static int
137 socket_vop_close(struct vnode *vp, int flag, int count, offset_t offset,
138     struct cred *cr, caller_context_t *ct)
139 {
140 	struct sonode *so;
141 	int error = 0;
142 
143 	so = VTOSO(vp);
144 	ASSERT(vp->v_type == VSOCK);
145 
146 	cleanlocks(vp, ttoproc(curthread)->p_pid, 0);
147 	cleanshares(vp, ttoproc(curthread)->p_pid);
148 
149 	if (vp->v_stream)
150 		strclean(vp);
151 
152 	if (count > 1) {
153 		dprint(2, ("socket_vop_close: count %d\n", count));
154 		return (0);
155 	}
156 
157 	mutex_enter(&so->so_lock);
158 	if (--so->so_count == 0) {
159 		/*
160 		 * Initiate connection shutdown.
161 		 */
162 		mutex_exit(&so->so_lock);
163 		error = socket_close_internal(so, flag, cr);
164 	} else {
165 		mutex_exit(&so->so_lock);
166 	}
167 
168 	return (error);
169 }
170 
171 /*ARGSUSED2*/
172 static int
173 socket_vop_read(struct vnode *vp, struct uio *uiop, int ioflag, struct cred *cr,
174     caller_context_t *ct)
175 {
176 	struct sonode *so = VTOSO(vp);
177 	struct nmsghdr lmsg;
178 
179 	ASSERT(vp->v_type == VSOCK);
180 	bzero((void *)&lmsg, sizeof (lmsg));
181 
182 	return (socket_recvmsg(so, &lmsg, uiop, cr));
183 }
184 
185 /*ARGSUSED2*/
186 static int
187 socket_vop_write(struct vnode *vp, struct uio *uiop, int ioflag,
188     struct cred *cr, caller_context_t *ct)
189 {
190 	struct sonode *so = VTOSO(vp);
191 	struct nmsghdr lmsg;
192 
193 	ASSERT(vp->v_type == VSOCK);
194 	bzero((void *)&lmsg, sizeof (lmsg));
195 
196 	if (!(so->so_mode & SM_BYTESTREAM)) {
197 		/*
198 		 * If the socket is not byte stream set MSG_EOR
199 		 */
200 		lmsg.msg_flags = MSG_EOR;
201 	}
202 
203 	return (socket_sendmsg(so, &lmsg, uiop, cr));
204 }
205 
206 /*ARGSUSED4*/
207 static int
208 socket_vop_ioctl(struct vnode *vp, int cmd, intptr_t arg, int mode,
209     struct cred *cr, int32_t *rvalp, caller_context_t *ct)
210 {
211 	struct sonode *so = VTOSO(vp);
212 
213 	ASSERT(vp->v_type == VSOCK);
214 
215 	return (socket_ioctl(so, cmd, arg, mode, cr, rvalp));
216 }
217 
218 /*
219  * Allow any flags. Record FNDELAY and FNONBLOCK so that they can be inherited
220  * from listener to acceptor.
221  */
222 /* ARGSUSED */
223 static int
224 socket_vop_setfl(vnode_t *vp, int oflags, int nflags, cred_t *cr,
225     caller_context_t *ct)
226 {
227 	struct sonode *so = VTOSO(vp);
228 	int error = 0;
229 
230 	ASSERT(vp->v_type == VSOCK);
231 
232 	mutex_enter(&so->so_lock);
233 	if (nflags & FNDELAY)
234 		so->so_state |= SS_NDELAY;
235 	else
236 		so->so_state &= ~SS_NDELAY;
237 	if (nflags & FNONBLOCK)
238 		so->so_state |= SS_NONBLOCK;
239 	else
240 		so->so_state &= ~SS_NONBLOCK;
241 	mutex_exit(&so->so_lock);
242 
243 	if (so->so_state & SS_ASYNC)
244 		oflags |= FASYNC;
245 	/*
246 	 * Sets/clears the SS_ASYNC flag based on the presence/absence
247 	 * of the FASYNC flag passed to fcntl(F_SETFL).
248 	 * This exists solely for BSD fcntl() FASYNC compatibility.
249 	 */
250 	if ((oflags ^ nflags) & FASYNC && so->so_version != SOV_STREAM) {
251 		int async = nflags & FASYNC;
252 		int32_t rv;
253 
254 		/*
255 		 * For non-TPI sockets all we have to do is set/remove the
256 		 * SS_ASYNC bit, but for TPI it is more involved. For that
257 		 * reason we delegate the job to the protocol's ioctl handler.
258 		 */
259 		error = socket_ioctl(so, FIOASYNC, (intptr_t)&async, FKIOCTL,
260 		    cr, &rv);
261 	}
262 	return (error);
263 }
264 
265 
266 /*
267  * Get the made up attributes for the vnode.
268  * 4.3BSD returns the current time for all the timestamps.
269  * 4.4BSD returns 0 for all the timestamps.
270  * Here we use the access and modified times recorded in the sonode.
271  *
272  * Just like in BSD there is not effect on the underlying file system node
273  * bound to an AF_UNIX pathname.
274  *
275  * When sockmod has been popped this will act just like a stream. Since
276  * a socket is always a clone there is no need to inspect the attributes
277  * of the "realvp".
278  */
279 /* ARGSUSED */
280 int
281 socket_vop_getattr(struct vnode *vp, struct vattr *vap, int flags,
282     struct cred *cr, caller_context_t *ct)
283 {
284 	dev_t		fsid;
285 	struct sonode 	*so;
286 	static int	sonode_shift = 0;
287 
288 	/*
289 	 * Calculate the amount of bitshift to a sonode pointer which will
290 	 * still keep it unique.  See below.
291 	 */
292 	if (sonode_shift == 0)
293 		sonode_shift = highbit(sizeof (struct sonode));
294 	ASSERT(sonode_shift > 0);
295 
296 	so = VTOSO(vp);
297 	fsid = sockdev;
298 
299 	if (so->so_version == SOV_STREAM) {
300 		/*
301 		 * The imaginary "sockmod" has been popped - act
302 		 * as a stream
303 		 */
304 		vap->va_type = VCHR;
305 		vap->va_mode = 0;
306 	} else {
307 		vap->va_type = vp->v_type;
308 		vap->va_mode = S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|
309 		    S_IROTH|S_IWOTH;
310 	}
311 	vap->va_uid = vap->va_gid = 0;
312 	vap->va_fsid = fsid;
313 	/*
314 	 * If the va_nodeid is > MAX_USHORT, then i386 stats might fail.
315 	 * So we shift down the sonode pointer to try and get the most
316 	 * uniqueness into 16-bits.
317 	 */
318 	vap->va_nodeid = ((ino_t)so >> sonode_shift) & 0xFFFF;
319 	vap->va_nlink = 0;
320 	vap->va_size = 0;
321 
322 	/*
323 	 * We need to zero out the va_rdev to avoid some fstats getting
324 	 * EOVERFLOW.  This also mimics SunOS 4.x and BSD behavior.
325 	 */
326 	vap->va_rdev = (dev_t)0;
327 	vap->va_blksize = MAXBSIZE;
328 	vap->va_nblocks = btod(vap->va_size);
329 
330 	if (!SOCK_IS_NONSTR(so)) {
331 		sotpi_info_t *sti = SOTOTPI(so);
332 
333 		mutex_enter(&so->so_lock);
334 		vap->va_atime.tv_sec = sti->sti_atime;
335 		vap->va_mtime.tv_sec = sti->sti_mtime;
336 		vap->va_ctime.tv_sec = sti->sti_ctime;
337 		mutex_exit(&so->so_lock);
338 	} else {
339 		vap->va_atime.tv_sec = 0;
340 		vap->va_mtime.tv_sec = 0;
341 		vap->va_ctime.tv_sec = 0;
342 	}
343 
344 	vap->va_atime.tv_nsec = 0;
345 	vap->va_mtime.tv_nsec = 0;
346 	vap->va_ctime.tv_nsec = 0;
347 	vap->va_seq = 0;
348 
349 	return (0);
350 }
351 
352 /*
353  * Set attributes.
354  * Just like in BSD there is not effect on the underlying file system node
355  * bound to an AF_UNIX pathname.
356  *
357  * When sockmod has been popped this will act just like a stream. Since
358  * a socket is always a clone there is no need to modify the attributes
359  * of the "realvp".
360  */
361 /* ARGSUSED */
362 int
363 socket_vop_setattr(struct vnode *vp, struct vattr *vap, int flags,
364     struct cred *cr, caller_context_t *ct)
365 {
366 	struct sonode *so = VTOSO(vp);
367 
368 	/*
369 	 * If times were changed, and we have a STREAMS socket, then update
370 	 * the sonode.
371 	 */
372 	if (!SOCK_IS_NONSTR(so)) {
373 		sotpi_info_t *sti = SOTOTPI(so);
374 
375 		mutex_enter(&so->so_lock);
376 		if (vap->va_mask & AT_ATIME)
377 			sti->sti_atime = vap->va_atime.tv_sec;
378 		if (vap->va_mask & AT_MTIME) {
379 			sti->sti_mtime = vap->va_mtime.tv_sec;
380 			sti->sti_ctime = gethrestime_sec();
381 		}
382 		mutex_exit(&so->so_lock);
383 	}
384 
385 	return (0);
386 }
387 
388 /*
389  * Check if user is allowed to access vp. For non-STREAMS based sockets,
390  * there might not be a device attached to the file system. So for those
391  * types of sockets there are no permissions to check.
392  *
393  * XXX Should there be some other mechanism to check access rights?
394  */
395 /*ARGSUSED*/
396 int
397 socket_vop_access(struct vnode *vp, int mode, int flags, struct cred *cr,
398     caller_context_t *ct)
399 {
400 	struct sonode *so = VTOSO(vp);
401 
402 	if (!SOCK_IS_NONSTR(so)) {
403 		ASSERT(so->so_sockparams->sp_sdev_info.sd_vnode != NULL);
404 		return (VOP_ACCESS(so->so_sockparams->sp_sdev_info.sd_vnode,
405 		    mode, flags, cr, NULL));
406 	}
407 	return (0);
408 }
409 
410 /*
411  * 4.3BSD and 4.4BSD fail a fsync on a socket with EINVAL.
412  * This code does the same to be compatible and also to not give an
413  * application the impression that the data has actually been "synced"
414  * to the other end of the connection.
415  */
416 /* ARGSUSED */
417 int
418 socket_vop_fsync(struct vnode *vp, int syncflag, struct cred *cr,
419     caller_context_t *ct)
420 {
421 	return (EINVAL);
422 }
423 
424 /*ARGSUSED*/
425 static void
426 socket_vop_inactive(struct vnode *vp, struct cred *cr, caller_context_t *ct)
427 {
428 	struct sonode *so = VTOSO(vp);
429 
430 	ASSERT(vp->v_type == VSOCK);
431 
432 	mutex_enter(&vp->v_lock);
433 	/*
434 	 * If no one has reclaimed the vnode, remove from the
435 	 * cache now.
436 	 */
437 	if (vp->v_count < 1)
438 		cmn_err(CE_PANIC, "socket_inactive: Bad v_count");
439 
440 	VN_RELE_LOCKED(vp);
441 	if (vp->v_count != 0) {
442 		mutex_exit(&vp->v_lock);
443 		return;
444 	}
445 	mutex_exit(&vp->v_lock);
446 
447 
448 	ASSERT(!vn_has_cached_data(vp));
449 
450 	/* socket specfic clean-up */
451 	socket_destroy_internal(so, cr);
452 }
453 
454 /* ARGSUSED */
455 int
456 socket_vop_fid(struct vnode *vp, struct fid *fidp, caller_context_t *ct)
457 {
458 	return (EINVAL);
459 }
460 
461 /*
462  * Sockets are not seekable.
463  * (and there is a bug to fix STREAMS to make them fail this as well).
464  */
465 /*ARGSUSED*/
466 int
467 socket_vop_seek(struct vnode *vp, offset_t ooff, offset_t *noffp,
468     caller_context_t *ct)
469 {
470 	return (ESPIPE);
471 }
472 
473 /*ARGSUSED*/
474 static int
475 socket_vop_poll(struct vnode *vp, short events, int anyyet, short *reventsp,
476     struct pollhead **phpp, caller_context_t *ct)
477 {
478 	struct sonode *so = VTOSO(vp);
479 
480 	ASSERT(vp->v_type == VSOCK);
481 
482 	return (socket_poll(so, events, anyyet, reventsp, phpp));
483 }
484