1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright 2017 Sebastian Wiedenroth
25 */
26
27#include <sys/types.h>
28#include <sys/param.h>
29#include <sys/systm.h>
30#include <sys/sysmacros.h>
31#include <sys/debug.h>
32#include <sys/cmn_err.h>
33#include <sys/vfs.h>
34#include <sys/policy.h>
35#include <sys/modctl.h>
36
37#include <sys/sunddi.h>
38
39#include <sys/strsun.h>
40#include <sys/stropts.h>
41#include <sys/strsubr.h>
42#include <sys/socket.h>
43#include <sys/socketvar.h>
44#include <sys/uio.h>
45
46#include <inet/ipclassifier.h>
47#include <fs/sockfs/sockcommon.h>
48#include <fs/sockfs/sockfilter_impl.h>
49#include <fs/sockfs/nl7c.h>
50#include <fs/sockfs/socktpi.h>
51#include <fs/sockfs/sodirect.h>
52#include <inet/ip.h>
53
54extern int xnet_skip_checks, xnet_check_print, xnet_truncate_print;
55
56/*
57 * Common socket access functions.
58 *
59 * Instead of accessing the sonode switch directly (i.e., SOP_xxx()),
60 * the socket_xxx() function should be used.
61 */
62
63/*
64 * Try to create a new sonode of the requested <family, type, protocol>.
65 */
66/* ARGSUSED */
67struct sonode *
68socket_create(int family, int type, int protocol, char *devpath, char *mod,
69    int flags, int version, struct cred *cr, int *errorp)
70{
71	struct sonode *so;
72	struct sockparams *sp = NULL;
73	int saved_error;
74
75	/*
76	 * Look for a sockparams entry that match the given criteria.
77	 * solookup() returns with the entry held.
78	 */
79	*errorp = solookup(family, type, protocol, &sp);
80	saved_error = *errorp;
81	if (sp == NULL) {
82		int kmflags = (flags == SOCKET_SLEEP) ? KM_SLEEP : KM_NOSLEEP;
83		/*
84		 * There is no matching sockparams entry. An ephemeral entry is
85		 * created if the caller specifies a device or a socket module.
86		 */
87		if (devpath != NULL) {
88			saved_error = 0;
89			sp = sockparams_hold_ephemeral_bydev(family, type,
90			    protocol, devpath, kmflags, errorp);
91		} else if (mod != NULL) {
92			saved_error = 0;
93			sp = sockparams_hold_ephemeral_bymod(family, type,
94			    protocol, mod, kmflags, errorp);
95		} else {
96			*errorp = solookup(family, type, 0, &sp);
97		}
98
99		if (sp == NULL) {
100			if (saved_error && (*errorp == EPROTONOSUPPORT ||
101			    *errorp == EPROTOTYPE || *errorp == ENOPROTOOPT))
102				*errorp = saved_error;
103			return (NULL);
104		}
105	}
106
107	ASSERT(sp->sp_smod_info != NULL);
108	ASSERT(flags == SOCKET_SLEEP || flags == SOCKET_NOSLEEP);
109	sp->sp_stats.sps_ncreate.value.ui64++;
110	so = sp->sp_smod_info->smod_sock_create_func(sp, family, type,
111	    protocol, version, flags, errorp, cr);
112	if (so == NULL) {
113		SOCKPARAMS_DEC_REF(sp);
114	} else {
115		if ((*errorp = SOP_INIT(so, NULL, cr, flags)) == 0) {
116			/* Cannot fail, only bumps so_count */
117			(void) VOP_OPEN(&SOTOV(so), FREAD|FWRITE, cr, NULL);
118		} else {
119			if (saved_error && (*errorp == EPROTONOSUPPORT ||
120			    *errorp == EPROTOTYPE || *errorp == ENOPROTOOPT))
121				*errorp = saved_error;
122			socket_destroy(so);
123			so = NULL;
124		}
125	}
126	return (so);
127}
128
129struct sonode *
130socket_newconn(struct sonode *parent, sock_lower_handle_t lh,
131    sock_downcalls_t *dc, int flags, int *errorp)
132{
133	struct sonode *so;
134	struct sockparams *sp;
135	struct cred *cr;
136
137	if ((cr = CRED()) == NULL)
138		cr = kcred;
139
140	sp = parent->so_sockparams;
141	ASSERT(sp != NULL);
142
143	sp->sp_stats.sps_ncreate.value.ui64++;
144	so = sp->sp_smod_info->smod_sock_create_func(sp, parent->so_family,
145	    parent->so_type, parent->so_protocol, parent->so_version, flags,
146	    errorp, cr);
147	if (so != NULL) {
148		SOCKPARAMS_INC_REF(sp);
149
150		so->so_proto_handle = lh;
151		so->so_downcalls = dc;
152		/*
153		 * This function may be called in interrupt context, and CRED()
154		 * will be NULL. In this case, pass in kcred.
155		 */
156		if ((*errorp = SOP_INIT(so, parent, cr, flags)) == 0) {
157			/* Cannot fail, only bumps so_count */
158			(void) VOP_OPEN(&SOTOV(so), FREAD|FWRITE, cr, NULL);
159		} else  {
160			socket_destroy(so);
161			so = NULL;
162		}
163	}
164
165	return (so);
166}
167
168/*
169 * Bind local endpoint.
170 */
171int
172socket_bind(struct sonode *so, struct sockaddr *name, socklen_t namelen,
173    int flags, cred_t *cr)
174{
175	return (SOP_BIND(so, name, namelen, flags, cr));
176}
177
178/*
179 * Turn socket into a listen socket.
180 */
181int
182socket_listen(struct sonode *so, int backlog, cred_t *cr)
183{
184	if (backlog < 0) {
185		backlog = 0;
186	}
187
188	/*
189	 * Use the same qlimit as in BSD. BSD checks the qlimit
190	 * before queuing the next connection implying that a
191	 * listen(sock, 0) allows one connection to be queued.
192	 * BSD also uses 1.5 times the requested backlog.
193	 *
194	 * XNS Issue 4 required a strict interpretation of the backlog.
195	 * This has been waived subsequently for Issue 4 and the change
196	 * incorporated in XNS Issue 5. So we aren't required to do
197	 * anything special for XPG apps.
198	 */
199	if (backlog >= (INT_MAX - 1) / 3)
200		backlog = INT_MAX;
201	else
202		backlog = backlog * 3 / 2 + 1;
203
204	return (SOP_LISTEN(so, backlog, cr));
205}
206
207/*
208 * Accept incoming connection.
209 */
210int
211socket_accept(struct sonode *lso, int fflag, cred_t *cr, struct sonode **nsop)
212{
213	return (SOP_ACCEPT(lso, fflag, cr, nsop));
214}
215
216/*
217 * Active open.
218 */
219int
220socket_connect(struct sonode *so, struct sockaddr *name,
221    socklen_t namelen, int fflag, int flags, cred_t *cr)
222{
223	int error;
224
225	/*
226	 * Handle a connect to a name parameter of type AF_UNSPEC like a
227	 * connect to a null address. This is the portable method to
228	 * unconnect a socket.
229	 */
230	if ((namelen >= sizeof (sa_family_t)) &&
231	    (name->sa_family == AF_UNSPEC)) {
232		name = NULL;
233		namelen = 0;
234	}
235
236	error = SOP_CONNECT(so, name, namelen, fflag, flags, cr);
237
238	if (error == EHOSTUNREACH && flags & _SOCONNECT_XPG4_2) {
239		/*
240		 * X/Open specification contains a requirement that
241		 * ENETUNREACH be returned but does not require
242		 * EHOSTUNREACH. In order to keep the test suite
243		 * happy we mess with the errno here.
244		 */
245		error = ENETUNREACH;
246	}
247
248	return (error);
249}
250
251/*
252 * Get address of remote node.
253 */
254int
255socket_getpeername(struct sonode *so, struct sockaddr *addr,
256    socklen_t *addrlen, boolean_t accept, cred_t *cr)
257{
258	ASSERT(*addrlen > 0);
259	return (SOP_GETPEERNAME(so, addr, addrlen, accept, cr));
260
261}
262
263/*
264 * Get local address.
265 */
266int
267socket_getsockname(struct sonode *so, struct sockaddr *addr,
268    socklen_t *addrlen, cred_t *cr)
269{
270	return (SOP_GETSOCKNAME(so, addr, addrlen, cr));
271
272}
273
274/*
275 * Called from shutdown().
276 */
277int
278socket_shutdown(struct sonode *so, int how, cred_t *cr)
279{
280	return (SOP_SHUTDOWN(so, how, cr));
281}
282
283/*
284 * Get socket options.
285 */
286/*ARGSUSED*/
287int
288socket_getsockopt(struct sonode *so, int level, int option_name,
289    void *optval, socklen_t *optlenp, int flags, cred_t *cr)
290{
291	return (SOP_GETSOCKOPT(so, level, option_name, optval,
292	    optlenp, flags, cr));
293}
294
295/*
296 * Set socket options
297 */
298int
299socket_setsockopt(struct sonode *so, int level, int option_name,
300    const void *optval, t_uscalar_t optlen, cred_t *cr)
301{
302	int val = 1;
303	/* Caller allocates aligned optval, or passes null */
304	ASSERT(((uintptr_t)optval & (sizeof (t_scalar_t) - 1)) == 0);
305	/* If optval is null optlen is 0, and vice-versa */
306	ASSERT(optval != NULL || optlen == 0);
307	ASSERT(optlen != 0 || optval == NULL);
308
309	if (optval == NULL && optlen == 0)
310		optval = &val;
311
312	return (SOP_SETSOCKOPT(so, level, option_name, optval, optlen, cr));
313}
314
315int
316socket_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop,
317    cred_t *cr)
318{
319	int error = 0;
320	ssize_t orig_resid = uiop->uio_resid;
321
322	/*
323	 * Do not bypass the cache if we are doing a local (AF_UNIX) write.
324	 */
325	if (so->so_family == AF_UNIX)
326		uiop->uio_extflg |= UIO_COPY_CACHED;
327	else
328		uiop->uio_extflg &= ~UIO_COPY_CACHED;
329
330	error = SOP_SENDMSG(so, msg, uiop, cr);
331	switch (error) {
332	default:
333		break;
334	case EINTR:
335	case ENOMEM:
336	/* EAGAIN is EWOULDBLOCK */
337	case EWOULDBLOCK:
338		/* We did a partial send */
339		if (uiop->uio_resid != orig_resid)
340			error = 0;
341		break;
342	case EPIPE:
343		if (((so->so_mode & SM_KERNEL) == 0) &&
344		    ((msg->msg_flags & MSG_NOSIGNAL) == 0)) {
345			tsignal(curthread, SIGPIPE);
346		}
347		break;
348	}
349
350	return (error);
351}
352
353int
354socket_sendmblk(struct sonode *so, struct nmsghdr *msg, int fflag,
355    struct cred *cr, mblk_t **mpp)
356{
357	int error = 0;
358
359	error = SOP_SENDMBLK(so, msg, fflag, cr, mpp);
360	if (error == EPIPE) {
361		tsignal(curthread, SIGPIPE);
362	}
363	return (error);
364}
365
366int
367socket_recvmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop,
368    cred_t *cr)
369{
370	int error;
371	ssize_t orig_resid = uiop->uio_resid;
372
373	/*
374	 * Do not bypass the cache when reading data, as the application
375	 * is likely to access the data shortly.
376	 */
377	uiop->uio_extflg |= UIO_COPY_CACHED;
378
379	error = SOP_RECVMSG(so, msg, uiop, cr);
380
381	switch (error) {
382	case EINTR:
383	/* EAGAIN is EWOULDBLOCK */
384	case EWOULDBLOCK:
385		/* We did a partial read */
386		if (uiop->uio_resid != orig_resid)
387			error = 0;
388		break;
389	default:
390		break;
391	}
392	return (error);
393}
394
395int
396socket_ioctl(struct sonode *so, int cmd, intptr_t arg, int mode,
397    struct cred *cr, int32_t *rvalp)
398{
399	return (SOP_IOCTL(so, cmd, arg, mode, cr, rvalp));
400}
401
402int
403socket_poll(struct sonode *so, short events, int anyyet, short *reventsp,
404    struct pollhead **phpp)
405{
406	return (SOP_POLL(so, events, anyyet, reventsp, phpp));
407}
408
409int
410socket_close(struct sonode *so, int flag, struct cred *cr)
411{
412	return (VOP_CLOSE(SOTOV(so), flag, 1, 0, cr, NULL));
413}
414
415int
416socket_close_internal(struct sonode *so, int flag, cred_t *cr)
417{
418	ASSERT(so->so_count == 0);
419
420	return (SOP_CLOSE(so, flag, cr));
421}
422
423void
424socket_destroy(struct sonode *so)
425{
426	vn_invalid(SOTOV(so));
427	VN_RELE(SOTOV(so));
428}
429
430/* ARGSUSED */
431void
432socket_destroy_internal(struct sonode *so, cred_t *cr)
433{
434	struct sockparams *sp = so->so_sockparams;
435	ASSERT(so->so_count == 0 && sp != NULL);
436
437	sp->sp_smod_info->smod_sock_destroy_func(so);
438
439	SOCKPARAMS_DEC_REF(sp);
440}
441
442/*
443 * TODO Once the common vnode ops is available, then the vnops argument
444 * should be removed.
445 */
446/*ARGSUSED*/
447int
448sonode_constructor(void *buf, void *cdrarg, int kmflags)
449{
450	struct sonode *so = buf;
451	struct vnode *vp;
452
453	vp = so->so_vnode = vn_alloc(kmflags);
454	if (vp == NULL) {
455		return (-1);
456	}
457	vp->v_data = so;
458	vn_setops(vp, socket_vnodeops);
459
460	so->so_priv 		= NULL;
461	so->so_oobmsg		= NULL;
462
463	so->so_proto_handle	= NULL;
464
465	so->so_peercred 	= NULL;
466
467	so->so_rcv_queued	= 0;
468	so->so_rcv_q_head 	= NULL;
469	so->so_rcv_q_last_head 	= NULL;
470	so->so_rcv_head		= NULL;
471	so->so_rcv_last_head	= NULL;
472	so->so_rcv_wanted	= 0;
473	so->so_rcv_timer_interval = SOCKET_NO_RCVTIMER;
474	so->so_rcv_timer_tid	= 0;
475	so->so_rcv_thresh	= 0;
476
477	list_create(&so->so_acceptq_list, sizeof (struct sonode),
478	    offsetof(struct sonode, so_acceptq_node));
479	list_create(&so->so_acceptq_defer, sizeof (struct sonode),
480	    offsetof(struct sonode, so_acceptq_node));
481	list_link_init(&so->so_acceptq_node);
482	so->so_acceptq_len	= 0;
483	so->so_backlog		= 0;
484	so->so_listener		= NULL;
485
486	so->so_snd_qfull	= B_FALSE;
487
488	so->so_filter_active	= 0;
489	so->so_filter_tx	= 0;
490	so->so_filter_defertime = 0;
491	so->so_filter_top	= NULL;
492	so->so_filter_bottom	= NULL;
493
494	mutex_init(&so->so_lock, NULL, MUTEX_DEFAULT, NULL);
495	mutex_init(&so->so_acceptq_lock, NULL, MUTEX_DEFAULT, NULL);
496	rw_init(&so->so_fallback_rwlock, NULL, RW_DEFAULT, NULL);
497	cv_init(&so->so_state_cv, NULL, CV_DEFAULT, NULL);
498	cv_init(&so->so_single_cv, NULL, CV_DEFAULT, NULL);
499	cv_init(&so->so_read_cv, NULL, CV_DEFAULT, NULL);
500
501	cv_init(&so->so_acceptq_cv, NULL, CV_DEFAULT, NULL);
502	cv_init(&so->so_snd_cv, NULL, CV_DEFAULT, NULL);
503	cv_init(&so->so_rcv_cv, NULL, CV_DEFAULT, NULL);
504	cv_init(&so->so_copy_cv, NULL, CV_DEFAULT, NULL);
505	cv_init(&so->so_closing_cv, NULL, CV_DEFAULT, NULL);
506
507	return (0);
508}
509
510/*ARGSUSED*/
511void
512sonode_destructor(void *buf, void *cdrarg)
513{
514	struct sonode *so = buf;
515	struct vnode *vp = SOTOV(so);
516
517	ASSERT(so->so_priv == NULL);
518	ASSERT(so->so_peercred == NULL);
519
520	ASSERT(so->so_oobmsg == NULL);
521
522	ASSERT(so->so_rcv_q_head == NULL);
523
524	list_destroy(&so->so_acceptq_list);
525	list_destroy(&so->so_acceptq_defer);
526	ASSERT(!list_link_active(&so->so_acceptq_node));
527	ASSERT(so->so_listener == NULL);
528
529	ASSERT(so->so_filter_active == 0);
530	ASSERT(so->so_filter_tx == 0);
531	ASSERT(so->so_filter_top == NULL);
532	ASSERT(so->so_filter_bottom == NULL);
533
534	ASSERT(vp->v_data == so);
535	ASSERT(vn_matchops(vp, socket_vnodeops));
536
537	vn_free(vp);
538
539	mutex_destroy(&so->so_lock);
540	mutex_destroy(&so->so_acceptq_lock);
541	rw_destroy(&so->so_fallback_rwlock);
542
543	cv_destroy(&so->so_state_cv);
544	cv_destroy(&so->so_single_cv);
545	cv_destroy(&so->so_read_cv);
546	cv_destroy(&so->so_acceptq_cv);
547	cv_destroy(&so->so_snd_cv);
548	cv_destroy(&so->so_rcv_cv);
549	cv_destroy(&so->so_closing_cv);
550}
551
552void
553sonode_init(struct sonode *so, struct sockparams *sp, int family,
554    int type, int protocol, sonodeops_t *sops)
555{
556	vnode_t *vp;
557
558	vp = SOTOV(so);
559
560	so->so_flag	= 0;
561
562	so->so_state	= 0;
563	so->so_mode	= 0;
564
565	so->so_count	= 0;
566
567	so->so_family	= family;
568	so->so_type	= type;
569	so->so_protocol	= protocol;
570
571	SOCK_CONNID_INIT(so->so_proto_connid);
572
573	so->so_options	= 0;
574	so->so_linger.l_onoff   = 0;
575	so->so_linger.l_linger = 0;
576	so->so_sndbuf	= 0;
577	so->so_error	= 0;
578	so->so_rcvtimeo	= 0;
579	so->so_sndtimeo = 0;
580	so->so_xpg_rcvbuf = 0;
581
582	ASSERT(so->so_oobmsg == NULL);
583	so->so_oobmark	= 0;
584	so->so_pgrp	= 0;
585
586	ASSERT(so->so_peercred == NULL);
587
588	so->so_zoneid = getzoneid();
589
590	so->so_sockparams = sp;
591
592	so->so_ops = sops;
593
594	so->so_not_str = (sops != &sotpi_sonodeops);
595
596	so->so_proto_handle = NULL;
597
598	so->so_downcalls = NULL;
599
600	so->so_copyflag = 0;
601
602	vn_reinit(vp);
603	vp->v_vfsp	= rootvfs;
604	vp->v_type	= VSOCK;
605	vp->v_rdev	= sockdev;
606
607	so->so_snd_qfull = B_FALSE;
608	so->so_minpsz = 0;
609
610	so->so_rcv_wakeup = B_FALSE;
611	so->so_snd_wakeup = B_FALSE;
612	so->so_flowctrld = B_FALSE;
613
614	so->so_pollev = 0;
615	bzero(&so->so_poll_list, sizeof (so->so_poll_list));
616	bzero(&so->so_proto_props, sizeof (struct sock_proto_props));
617
618	bzero(&(so->so_ksock_callbacks), sizeof (ksocket_callbacks_t));
619	so->so_ksock_cb_arg = NULL;
620
621	so->so_max_addr_len = sizeof (struct sockaddr_storage);
622
623	so->so_direct = NULL;
624
625	vn_exists(vp);
626}
627
628void
629sonode_fini(struct sonode *so)
630{
631	vnode_t *vp;
632
633	ASSERT(so->so_count == 0);
634
635	if (so->so_rcv_timer_tid) {
636		ASSERT(MUTEX_NOT_HELD(&so->so_lock));
637		(void) untimeout(so->so_rcv_timer_tid);
638		so->so_rcv_timer_tid = 0;
639	}
640
641	if (so->so_poll_list.ph_list != NULL) {
642		pollwakeup(&so->so_poll_list, POLLERR);
643		pollhead_clean(&so->so_poll_list);
644	}
645
646	if (so->so_direct != NULL)
647		sod_sock_fini(so);
648
649	vp = SOTOV(so);
650	vn_invalid(vp);
651
652	if (so->so_peercred != NULL) {
653		crfree(so->so_peercred);
654		so->so_peercred = NULL;
655	}
656	/* Detach and destroy filters */
657	if (so->so_filter_top != NULL)
658		sof_sonode_cleanup(so);
659
660	ASSERT(list_is_empty(&so->so_acceptq_list));
661	ASSERT(list_is_empty(&so->so_acceptq_defer));
662	ASSERT(!list_link_active(&so->so_acceptq_node));
663
664	ASSERT(so->so_rcv_queued == 0);
665	ASSERT(so->so_rcv_q_head == NULL);
666	ASSERT(so->so_rcv_q_last_head == NULL);
667	ASSERT(so->so_rcv_head == NULL);
668	ASSERT(so->so_rcv_last_head == NULL);
669}
670