10f1702cYu Xiangning<Eric.Yu@Sun.COM>/*
20f1702cYu Xiangning<Eric.Yu@Sun.COM> * CDDL HEADER START
30f1702cYu Xiangning<Eric.Yu@Sun.COM> *
40f1702cYu Xiangning<Eric.Yu@Sun.COM> * The contents of this file are subject to the terms of the
50f1702cYu Xiangning<Eric.Yu@Sun.COM> * Common Development and Distribution License (the "License").
60f1702cYu Xiangning<Eric.Yu@Sun.COM> * You may not use this file except in compliance with the License.
70f1702cYu Xiangning<Eric.Yu@Sun.COM> *
80f1702cYu Xiangning<Eric.Yu@Sun.COM> * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90f1702cYu Xiangning<Eric.Yu@Sun.COM> * or http://www.opensolaris.org/os/licensing.
100f1702cYu Xiangning<Eric.Yu@Sun.COM> * See the License for the specific language governing permissions
110f1702cYu Xiangning<Eric.Yu@Sun.COM> * and limitations under the License.
120f1702cYu Xiangning<Eric.Yu@Sun.COM> *
130f1702cYu Xiangning<Eric.Yu@Sun.COM> * When distributing Covered Code, include this CDDL HEADER in each
140f1702cYu Xiangning<Eric.Yu@Sun.COM> * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150f1702cYu Xiangning<Eric.Yu@Sun.COM> * If applicable, add the following below this CDDL HEADER, with the
160f1702cYu Xiangning<Eric.Yu@Sun.COM> * fields enclosed by brackets "[]" replaced with your own identifying
170f1702cYu Xiangning<Eric.Yu@Sun.COM> * information: Portions Copyright [yyyy] [name of copyright owner]
180f1702cYu Xiangning<Eric.Yu@Sun.COM> *
190f1702cYu Xiangning<Eric.Yu@Sun.COM> * CDDL HEADER END
200f1702cYu Xiangning<Eric.Yu@Sun.COM> */
210f1702cYu Xiangning<Eric.Yu@Sun.COM>
220f1702cYu Xiangning<Eric.Yu@Sun.COM>/*
233e95bd4Anders Persson * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
24f67b7b8Sebastian Wiedenroth * Copyright 2017 Sebastian Wiedenroth
250f1702cYu Xiangning<Eric.Yu@Sun.COM> */
260f1702cYu Xiangning<Eric.Yu@Sun.COM>
270f1702cYu Xiangning<Eric.Yu@Sun.COM>#include <sys/types.h>
280f1702cYu Xiangning<Eric.Yu@Sun.COM>#include <sys/param.h>
290f1702cYu Xiangning<Eric.Yu@Sun.COM>#include <sys/systm.h>
300f1702cYu Xiangning<Eric.Yu@Sun.COM>#include <sys/sysmacros.h>
310f1702cYu Xiangning<Eric.Yu@Sun.COM>#include <sys/debug.h>
320f1702cYu Xiangning<Eric.Yu@Sun.COM>#include <sys/cmn_err.h>
330f1702cYu Xiangning<Eric.Yu@Sun.COM>#include <sys/vfs.h>
340f1702cYu Xiangning<Eric.Yu@Sun.COM>#include <sys/policy.h>
350f1702cYu Xiangning<Eric.Yu@Sun.COM>#include <sys/modctl.h>
360f1702cYu Xiangning<Eric.Yu@Sun.COM>
370f1702cYu Xiangning<Eric.Yu@Sun.COM>#include <sys/sunddi.h>
380f1702cYu Xiangning<Eric.Yu@Sun.COM>
390f1702cYu Xiangning<Eric.Yu@Sun.COM>#include <sys/strsun.h>
400f1702cYu Xiangning<Eric.Yu@Sun.COM>#include <sys/stropts.h>
410f1702cYu Xiangning<Eric.Yu@Sun.COM>#include <sys/strsubr.h>
420f1702cYu Xiangning<Eric.Yu@Sun.COM>#include <sys/socket.h>
430f1702cYu Xiangning<Eric.Yu@Sun.COM>#include <sys/socketvar.h>
440f1702cYu Xiangning<Eric.Yu@Sun.COM>#include <sys/uio.h>
450f1702cYu Xiangning<Eric.Yu@Sun.COM>
460f1702cYu Xiangning<Eric.Yu@Sun.COM>#include <inet/ipclassifier.h>
470f1702cYu Xiangning<Eric.Yu@Sun.COM>#include <fs/sockfs/sockcommon.h>
483e95bd4Anders Persson#include <fs/sockfs/sockfilter_impl.h>
490f1702cYu Xiangning<Eric.Yu@Sun.COM>#include <fs/sockfs/nl7c.h>
50d36be52Rao Shoaib#include <fs/sockfs/socktpi.h>
51bbc000eAnders Persson#include <fs/sockfs/sodirect.h>
520f1702cYu Xiangning<Eric.Yu@Sun.COM>#include <inet/ip.h>
530f1702cYu Xiangning<Eric.Yu@Sun.COM>
540f1702cYu Xiangning<Eric.Yu@Sun.COM>extern int xnet_skip_checks, xnet_check_print, xnet_truncate_print;
550f1702cYu Xiangning<Eric.Yu@Sun.COM>
560f1702cYu Xiangning<Eric.Yu@Sun.COM>/*
570f1702cYu Xiangning<Eric.Yu@Sun.COM> * Common socket access functions.
580f1702cYu Xiangning<Eric.Yu@Sun.COM> *
590f1702cYu Xiangning<Eric.Yu@Sun.COM> * Instead of accessing the sonode switch directly (i.e., SOP_xxx()),
600f1702cYu Xiangning<Eric.Yu@Sun.COM> * the socket_xxx() function should be used.
610f1702cYu Xiangning<Eric.Yu@Sun.COM> */
620f1702cYu Xiangning<Eric.Yu@Sun.COM>
630f1702cYu Xiangning<Eric.Yu@Sun.COM>/*
640f1702cYu Xiangning<Eric.Yu@Sun.COM> * Try to create a new sonode of the requested <family, type, protocol>.
650f1702cYu Xiangning<Eric.Yu@Sun.COM> */
660f1702cYu Xiangning<Eric.Yu@Sun.COM>/* ARGSUSED */
670f1702cYu Xiangning<Eric.Yu@Sun.COM>struct sonode *
680f1702cYu Xiangning<Eric.Yu@Sun.COM>socket_create(int family, int type, int protocol, char *devpath, char *mod,
690f1702cYu Xiangning<Eric.Yu@Sun.COM>    int flags, int version, struct cred *cr, int *errorp)
700f1702cYu Xiangning<Eric.Yu@Sun.COM>{
710f1702cYu Xiangning<Eric.Yu@Sun.COM>	struct sonode *so;
720f1702cYu Xiangning<Eric.Yu@Sun.COM>	struct sockparams *sp = NULL;
7322238f7shenjian	int saved_error;
740f1702cYu Xiangning<Eric.Yu@Sun.COM>
750f1702cYu Xiangning<Eric.Yu@Sun.COM>	/*
760f1702cYu Xiangning<Eric.Yu@Sun.COM>	 * Look for a sockparams entry that match the given criteria.
770f1702cYu Xiangning<Eric.Yu@Sun.COM>	 * solookup() returns with the entry held.
780f1702cYu Xiangning<Eric.Yu@Sun.COM>	 */
790f1702cYu Xiangning<Eric.Yu@Sun.COM>	*errorp = solookup(family, type, protocol, &sp);
8022238f7shenjian	saved_error = *errorp;
810f1702cYu Xiangning<Eric.Yu@Sun.COM>	if (sp == NULL) {
820f1702cYu Xiangning<Eric.Yu@Sun.COM>		int kmflags = (flags == SOCKET_SLEEP) ? KM_SLEEP : KM_NOSLEEP;
830f1702cYu Xiangning<Eric.Yu@Sun.COM>		/*
840f1702cYu Xiangning<Eric.Yu@Sun.COM>		 * There is no matching sockparams entry. An ephemeral entry is
850f1702cYu Xiangning<Eric.Yu@Sun.COM>		 * created if the caller specifies a device or a socket module.
860f1702cYu Xiangning<Eric.Yu@Sun.COM>		 */
870f1702cYu Xiangning<Eric.Yu@Sun.COM>		if (devpath != NULL) {
8822238f7shenjian			saved_error = 0;
890f1702cYu Xiangning<Eric.Yu@Sun.COM>			sp = sockparams_hold_ephemeral_bydev(family, type,
900f1702cYu Xiangning<Eric.Yu@Sun.COM>			    protocol, devpath, kmflags, errorp);
910f1702cYu Xiangning<Eric.Yu@Sun.COM>		} else if (mod != NULL) {
9222238f7shenjian			saved_error = 0;
930f1702cYu Xiangning<Eric.Yu@Sun.COM>			sp = sockparams_hold_ephemeral_bymod(family, type,
940f1702cYu Xiangning<Eric.Yu@Sun.COM>			    protocol, mod, kmflags, errorp);
950f1702cYu Xiangning<Eric.Yu@Sun.COM>		} else {
9622238f7shenjian			*errorp = solookup(family, type, 0, &sp);
970f1702cYu Xiangning<Eric.Yu@Sun.COM>		}
980f1702cYu Xiangning<Eric.Yu@Sun.COM>
9922238f7shenjian		if (sp == NULL) {
10022238f7shenjian			if (saved_error && (*errorp == EPROTONOSUPPORT ||
10122238f7shenjian			    *errorp == EPROTOTYPE || *errorp == ENOPROTOOPT))
10222238f7shenjian				*errorp = saved_error;
1030f1702cYu Xiangning<Eric.Yu@Sun.COM>			return (NULL);
10422238f7shenjian		}
1050f1702cYu Xiangning<Eric.Yu@Sun.COM>	}
1060f1702cYu Xiangning<Eric.Yu@Sun.COM>
1070f1702cYu Xiangning<Eric.Yu@Sun.COM>	ASSERT(sp->sp_smod_info != NULL);
1080f1702cYu Xiangning<Eric.Yu@Sun.COM>	ASSERT(flags == SOCKET_SLEEP || flags == SOCKET_NOSLEEP);
1097d64f41Anders Persson	sp->sp_stats.sps_ncreate.value.ui64++;
1100f1702cYu Xiangning<Eric.Yu@Sun.COM>	so = sp->sp_smod_info->smod_sock_create_func(sp, family, type,
1110f1702cYu Xiangning<Eric.Yu@Sun.COM>	    protocol, version, flags, errorp, cr);
1120f1702cYu Xiangning<Eric.Yu@Sun.COM>	if (so == NULL) {
1130f1702cYu Xiangning<Eric.Yu@Sun.COM>		SOCKPARAMS_DEC_REF(sp);
1140f1702cYu Xiangning<Eric.Yu@Sun.COM>	} else {
1150f1702cYu Xiangning<Eric.Yu@Sun.COM>		if ((*errorp = SOP_INIT(so, NULL, cr, flags)) == 0) {
1160f1702cYu Xiangning<Eric.Yu@Sun.COM>			/* Cannot fail, only bumps so_count */
1170f1702cYu Xiangning<Eric.Yu@Sun.COM>			(void) VOP_OPEN(&SOTOV(so), FREAD|FWRITE, cr, NULL);
1180f1702cYu Xiangning<Eric.Yu@Sun.COM>		} else {
11922238f7shenjian			if (saved_error && (*errorp == EPROTONOSUPPORT ||
12022238f7shenjian			    *errorp == EPROTOTYPE || *errorp == ENOPROTOOPT))
12122238f7shenjian				*errorp = saved_error;
1220f1702cYu Xiangning<Eric.Yu@Sun.COM>			socket_destroy(so);
1230f1702cYu Xiangning<Eric.Yu@Sun.COM>			so = NULL;
1240f1702cYu Xiangning<Eric.Yu@Sun.COM>		}
1250f1702cYu Xiangning<Eric.Yu@Sun.COM>	}
1260f1702cYu Xiangning<Eric.Yu@Sun.COM>	return (so);
1270f1702cYu Xiangning<Eric.Yu@Sun.COM>}
1280f1702cYu Xiangning<Eric.Yu@Sun.COM>
1290f1702cYu Xiangning<Eric.Yu@Sun.COM>struct sonode *
1300f1702cYu Xiangning<Eric.Yu@Sun.COM>socket_newconn(struct sonode *parent, sock_lower_handle_t lh,
1310f1702cYu Xiangning<Eric.Yu@Sun.COM>    sock_downcalls_t *dc, int flags, int *errorp)
1320f1702cYu Xiangning<Eric.Yu@Sun.COM>{
1330f1702cYu Xiangning<Eric.Yu@Sun.COM>	struct sonode *so;
1340f1702cYu Xiangning<Eric.Yu@Sun.COM>	struct sockparams *sp;
1350f1702cYu Xiangning<Eric.Yu@Sun.COM>	struct cred *cr;
1360f1702cYu Xiangning<Eric.Yu@Sun.COM>
1370f1702cYu Xiangning<Eric.Yu@Sun.COM>	if ((cr = CRED()) == NULL)
1380f1702cYu Xiangning<Eric.Yu@Sun.COM>		cr = kcred;
1390f1702cYu Xiangning<Eric.Yu@Sun.COM>
1400f1702cYu Xiangning<Eric.Yu@Sun.COM>	sp = parent->so_sockparams;
1410f1702cYu Xiangning<Eric.Yu@Sun.COM>	ASSERT(sp != NULL);
1420f1702cYu Xiangning<Eric.Yu@Sun.COM>
1437d64f41Anders Persson	sp->sp_stats.sps_ncreate.value.ui64++;
1440f1702cYu Xiangning<Eric.Yu@Sun.COM>	so = sp->sp_smod_info->smod_sock_create_func(sp, parent->so_family,
1450f1702cYu Xiangning<Eric.Yu@Sun.COM>	    parent->so_type, parent->so_protocol, parent->so_version, flags,
1460f1702cYu Xiangning<Eric.Yu@Sun.COM>	    errorp, cr);
1470f1702cYu Xiangning<Eric.Yu@Sun.COM>	if (so != NULL) {
1480f1702cYu Xiangning<Eric.Yu@Sun.COM>		SOCKPARAMS_INC_REF(sp);
1490f1702cYu Xiangning<Eric.Yu@Sun.COM>
1500f1702cYu Xiangning<Eric.Yu@Sun.COM>		so->so_proto_handle = lh;
1510f1702cYu Xiangning<Eric.Yu@Sun.COM>		so->so_downcalls = dc;
1520f1702cYu Xiangning<Eric.Yu@Sun.COM>		/*
1530f1702cYu Xiangning<Eric.Yu@Sun.COM>		 * This function may be called in interrupt context, and CRED()
1540f1702cYu Xiangning<Eric.Yu@Sun.COM>		 * will be NULL. In this case, pass in kcred.
1550f1702cYu Xiangning<Eric.Yu@Sun.COM>		 */
1560f1702cYu Xiangning<Eric.Yu@Sun.COM>		if ((*errorp = SOP_INIT(so, parent, cr, flags)) == 0) {
1570f1702cYu Xiangning<Eric.Yu@Sun.COM>			/* Cannot fail, only bumps so_count */
1580f1702cYu Xiangning<Eric.Yu@Sun.COM>			(void) VOP_OPEN(&SOTOV(so), FREAD|FWRITE, cr, NULL);
1590f1702cYu Xiangning<Eric.Yu@Sun.COM>		} else  {
1600f1702cYu Xiangning<Eric.Yu@Sun.COM>			socket_destroy(so);
1610f1702cYu Xiangning<Eric.Yu@Sun.COM>			so = NULL;
1620f1702cYu Xiangning<Eric.Yu@Sun.COM>		}
1630f1702cYu Xiangning<Eric.Yu@Sun.COM>	}
1640f1702cYu Xiangning<Eric.Yu@Sun.COM>
1650f1702cYu Xiangning<Eric.Yu@Sun.COM>	return (so);
1660f1702cYu Xiangning<Eric.Yu@Sun.COM>}
1670f1702cYu Xiangning<Eric.Yu@Sun.COM>
1680f1702cYu Xiangning<Eric.Yu@Sun.COM>/*
1690f1702cYu Xiangning<Eric.Yu@Sun.COM> * Bind local endpoint.
1700f1702cYu Xiangning<Eric.Yu@Sun.COM> */
1710f1702cYu Xiangning<Eric.Yu@Sun.COM>int
1720f1702cYu Xiangning<Eric.Yu@Sun.COM>socket_bind(struct sonode *so, struct sockaddr *name, socklen_t namelen,
1730f1702cYu Xiangning<Eric.Yu@Sun.COM>    int flags, cred_t *cr)
1740f1702cYu Xiangning<Eric.Yu@Sun.COM>{
1750f1702cYu Xiangning<Eric.Yu@Sun.COM>	return (SOP_BIND(so, name, namelen, flags, cr));
1760f1702cYu Xiangning<Eric.Yu@Sun.COM>}
1770f1702cYu Xiangning<Eric.Yu@Sun.COM>
1780f1702cYu Xiangning<Eric.Yu@Sun.COM>/*
1790f1702cYu Xiangning<Eric.Yu@Sun.COM> * Turn socket into a listen socket.
1800f1702cYu Xiangning<Eric.Yu@Sun.COM> */
1810f1702cYu Xiangning<Eric.Yu@Sun.COM>int
1820f1702cYu Xiangning<Eric.Yu@Sun.COM>socket_listen(struct sonode *so, int backlog, cred_t *cr)
1830f1702cYu Xiangning<Eric.Yu@Sun.COM>{
1840f1702cYu Xiangning<Eric.Yu@Sun.COM>	if (backlog < 0) {
1850f1702cYu Xiangning<Eric.Yu@Sun.COM>		backlog = 0;
1860f1702cYu Xiangning<Eric.Yu@Sun.COM>	}
1870f1702cYu Xiangning<Eric.Yu@Sun.COM>
1880f1702cYu Xiangning<Eric.Yu@Sun.COM>	/*
1890f1702cYu Xiangning<Eric.Yu@Sun.COM>	 * Use the same qlimit as in BSD. BSD checks the qlimit
1900f1702cYu Xiangning<Eric.Yu@Sun.COM>	 * before queuing the next connection implying that a
1910f1702cYu Xiangning<Eric.Yu@Sun.COM>	 * listen(sock, 0) allows one connection to be queued.
1920f1702cYu Xiangning<Eric.Yu@Sun.COM>	 * BSD also uses 1.5 times the requested backlog.
1930f1702cYu Xiangning<Eric.Yu@Sun.COM>	 *
1940f1702cYu Xiangning<Eric.Yu@Sun.COM>	 * XNS Issue 4 required a strict interpretation of the backlog.
1950f1702cYu Xiangning<Eric.Yu@Sun.COM>	 * This has been waived subsequently for Issue 4 and the change
1960f1702cYu Xiangning<Eric.Yu@Sun.COM>	 * incorporated in XNS Issue 5. So we aren't required to do
1970f1702cYu Xiangning<Eric.Yu@Sun.COM>	 * anything special for XPG apps.
1980f1702cYu Xiangning<Eric.Yu@Sun.COM>	 */
1990f1702cYu Xiangning<Eric.Yu@Sun.COM>	if (backlog >= (INT_MAX - 1) / 3)
2000f1702cYu Xiangning<Eric.Yu@Sun.COM>		backlog = INT_MAX;
2010f1702cYu Xiangning<Eric.Yu@Sun.COM>	else
2020f1702cYu Xiangning<Eric.Yu@Sun.COM>		backlog = backlog * 3 / 2 + 1;
2030f1702cYu Xiangning<Eric.Yu@Sun.COM>
2040f1702cYu Xiangning<Eric.Yu@Sun.COM>	return (SOP_LISTEN(so, backlog, cr));
2050f1702cYu Xiangning<Eric.Yu@Sun.COM>}
2060f1702cYu Xiangning<Eric.Yu@Sun.COM>
2070f1702cYu Xiangning<Eric.Yu@Sun.COM>/*
2080f1702cYu Xiangning<Eric.Yu@Sun.COM> * Accept incoming connection.
2090f1702cYu Xiangning<Eric.Yu@Sun.COM> */
2100f1702cYu Xiangning<Eric.Yu@Sun.COM>int
2110f1702cYu Xiangning<Eric.Yu@Sun.COM>socket_accept(struct sonode *lso, int fflag, cred_t *cr, struct sonode **nsop)
2120f1702cYu Xiangning<Eric.Yu@Sun.COM>{
2130f1702cYu Xiangning<Eric.Yu@Sun.COM>	return (SOP_ACCEPT(lso, fflag, cr, nsop));
2140f1702cYu Xiangning<Eric.Yu@Sun.COM>}
2150f1702cYu Xiangning<Eric.Yu@Sun.COM>
2160f1702cYu Xiangning<Eric.Yu@Sun.COM>/*
2170f1702cYu Xiangning<Eric.Yu@Sun.COM> * Active open.
2180f1702cYu Xiangning<Eric.Yu@Sun.COM> */
2190f1702cYu Xiangning<Eric.Yu@Sun.COM>int
2203e95bd4Anders Perssonsocket_connect(struct sonode *so, struct sockaddr *name,
2210f1702cYu Xiangning<Eric.Yu@Sun.COM>    socklen_t namelen, int fflag, int flags, cred_t *cr)
2220f1702cYu Xiangning<Eric.Yu@Sun.COM>{
2230f1702cYu Xiangning<Eric.Yu@Sun.COM>	int error;
2240f1702cYu Xiangning<Eric.Yu@Sun.COM>
2250f1702cYu Xiangning<Eric.Yu@Sun.COM>	/*
2260f1702cYu Xiangning<Eric.Yu@Sun.COM>	 * Handle a connect to a name parameter of type AF_UNSPEC like a
2270f1702cYu Xiangning<Eric.Yu@Sun.COM>	 * connect to a null address. This is the portable method to
2280f1702cYu Xiangning<Eric.Yu@Sun.COM>	 * unconnect a socket.
2290f1702cYu Xiangning<Eric.Yu@Sun.COM>	 */
2300f1702cYu Xiangning<Eric.Yu@Sun.COM>	if ((namelen >= sizeof (sa_family_t)) &&
2310f1702cYu Xiangning<Eric.Yu@Sun.COM>	    (name->sa_family == AF_UNSPEC)) {
2320f1702cYu Xiangning<Eric.Yu@Sun.COM>		name = NULL;
2330f1702cYu Xiangning<Eric.Yu@Sun.COM>		namelen = 0;
2340f1702cYu Xiangning<Eric.Yu@Sun.COM>	}
2350f1702cYu Xiangning<Eric.Yu@Sun.COM>
2360f1702cYu Xiangning<Eric.Yu@Sun.COM>	error = SOP_CONNECT(so, name, namelen, fflag, flags, cr);
2370f1702cYu Xiangning<Eric.Yu@Sun.COM>
2380f1702cYu Xiangning<Eric.Yu@Sun.COM>	if (error == EHOSTUNREACH && flags & _SOCONNECT_XPG4_2) {
2390f1702cYu Xiangning<Eric.Yu@Sun.COM>		/*
2400f1702cYu Xiangning<Eric.Yu@Sun.COM>		 * X/Open specification contains a requirement that
2410f1702cYu Xiangning<Eric.Yu@Sun.COM>		 * ENETUNREACH be returned but does not require
2420f1702cYu Xiangning<Eric.Yu@Sun.COM>		 * EHOSTUNREACH. In order to keep the test suite
2430f1702cYu Xiangning<Eric.Yu@Sun.COM>		 * happy we mess with the errno here.
2440f1702cYu Xiangning<Eric.Yu@Sun.COM>		 */
2450f1702cYu Xiangning<Eric.Yu@Sun.COM>		error = ENETUNREACH;
2460f1702cYu Xiangning<Eric.Yu@Sun.COM>	}
2470f1702cYu Xiangning<Eric.Yu@Sun.COM>
2480f1702cYu Xiangning<Eric.Yu@Sun.COM>	return (error);
2490f1702cYu Xiangning<Eric.Yu@Sun.COM>}
2500f1702cYu Xiangning<Eric.Yu@Sun.COM>
2510f1702cYu Xiangning<Eric.Yu@Sun.COM>/*
2520f1702cYu Xiangning<Eric.Yu@Sun.COM> * Get address of remote node.
2530f1702cYu Xiangning<Eric.Yu@Sun.COM> */
2540f1702cYu Xiangning<Eric.Yu@Sun.COM>int
2550f1702cYu Xiangning<Eric.Yu@Sun.COM>socket_getpeername(struct sonode *so, struct sockaddr *addr,
2560f1702cYu Xiangning<Eric.Yu@Sun.COM>    socklen_t *addrlen, boolean_t accept, cred_t *cr)
2570f1702cYu Xiangning<Eric.Yu@Sun.COM>{
2580f1702cYu Xiangning<Eric.Yu@Sun.COM>	ASSERT(*addrlen > 0);
2590f1702cYu Xiangning<Eric.Yu@Sun.COM>	return (SOP_GETPEERNAME(so, addr, addrlen, accept, cr));
2600f1702cYu Xiangning<Eric.Yu@Sun.COM>
2610f1702cYu Xiangning<Eric.Yu@Sun.COM>}
2620f1702cYu Xiangning<Eric.Yu@Sun.COM>
2630f1702cYu Xiangning<Eric.Yu@Sun.COM>/*
2640f1702cYu Xiangning<Eric.Yu@Sun.COM> * Get local address.
2650f1702cYu Xiangning<Eric.Yu@Sun.COM> */
2660f1702cYu Xiangning<Eric.Yu@Sun.COM>int
2670f1702cYu Xiangning<Eric.Yu@Sun.COM>socket_getsockname(struct sonode *so, struct sockaddr *addr,
2680f1702cYu Xiangning<Eric.Yu@Sun.COM>    socklen_t *addrlen, cred_t *cr)
2690f1702cYu Xiangning<Eric.Yu@Sun.COM>{
2700f1702cYu Xiangning<Eric.Yu@Sun.COM>	return (SOP_GETSOCKNAME(so, addr, addrlen, cr));
2710f1702cYu Xiangning<Eric.Yu@Sun.COM>
2720f1702cYu Xiangning<Eric.Yu@Sun.COM>}
2730f1702cYu Xiangning<Eric.Yu@Sun.COM>
2740f1702cYu Xiangning<Eric.Yu@Sun.COM>/*
2750f1702cYu Xiangning<Eric.Yu@Sun.COM> * Called from shutdown().
2760f1702cYu Xiangning<Eric.Yu@Sun.COM> */
2770f1702cYu Xiangning<Eric.Yu@Sun.COM>int
2780f1702cYu Xiangning<Eric.Yu@Sun.COM>socket_shutdown(struct sonode *so, int how, cred_t *cr)
2790f1702cYu Xiangning<Eric.Yu@Sun.COM>{
2800f1702cYu Xiangning<Eric.Yu@Sun.COM>	return (SOP_SHUTDOWN(so, how, cr));
2810f1702cYu Xiangning<Eric.Yu@Sun.COM>}
2820f1702cYu Xiangning<Eric.Yu@Sun.COM>
2830f1702cYu Xiangning<Eric.Yu@Sun.COM>/*
2840f1702cYu Xiangning<Eric.Yu@Sun.COM> * Get socket options.
2850f1702cYu Xiangning<Eric.Yu@Sun.COM> */
2860f1702cYu Xiangning<Eric.Yu@Sun.COM>/*ARGSUSED*/
2870f1702cYu Xiangning<Eric.Yu@Sun.COM>int
2880f1702cYu Xiangning<Eric.Yu@Sun.COM>socket_getsockopt(struct sonode *so, int level, int option_name,
2890f1702cYu Xiangning<Eric.Yu@Sun.COM>    void *optval, socklen_t *optlenp, int flags, cred_t *cr)
2900f1702cYu Xiangning<Eric.Yu@Sun.COM>{
2910f1702cYu Xiangning<Eric.Yu@Sun.COM>	return (SOP_GETSOCKOPT(so, level, option_name, optval,
2920f1702cYu Xiangning<Eric.Yu@Sun.COM>	    optlenp, flags, cr));
2930f1702cYu Xiangning<Eric.Yu@Sun.COM>}
2940f1702cYu Xiangning<Eric.Yu@Sun.COM>
2950f1702cYu Xiangning<Eric.Yu@Sun.COM>/*
2960f1702cYu Xiangning<Eric.Yu@Sun.COM> * Set socket options
2970f1702cYu Xiangning<Eric.Yu@Sun.COM> */
2980f1702cYu Xiangning<Eric.Yu@Sun.COM>int
2990f1702cYu Xiangning<Eric.Yu@Sun.COM>socket_setsockopt(struct sonode *so, int level, int option_name,
3000f1702cYu Xiangning<Eric.Yu@Sun.COM>    const void *optval, t_uscalar_t optlen, cred_t *cr)
3010f1702cYu Xiangning<Eric.Yu@Sun.COM>{
30222238f7shenjian	int val = 1;
3030f1702cYu Xiangning<Eric.Yu@Sun.COM>	/* Caller allocates aligned optval, or passes null */
3040f1702cYu Xiangning<Eric.Yu@Sun.COM>	ASSERT(((uintptr_t)optval & (sizeof (t_scalar_t) - 1)) == 0);
3050f1702cYu Xiangning<Eric.Yu@Sun.COM>	/* If optval is null optlen is 0, and vice-versa */
3060f1702cYu Xiangning<Eric.Yu@Sun.COM>	ASSERT(optval != NULL || optlen == 0);
3070f1702cYu Xiangning<Eric.Yu@Sun.COM>	ASSERT(optlen != 0 || optval == NULL);
3080f1702cYu Xiangning<Eric.Yu@Sun.COM>
30922238f7shenjian	if (optval == NULL && optlen == 0)
31022238f7shenjian		optval = &val;
3110f1702cYu Xiangning<Eric.Yu@Sun.COM>
3120f1702cYu Xiangning<Eric.Yu@Sun.COM>	return (SOP_SETSOCKOPT(so, level, option_name, optval, optlen, cr));
3130f1702cYu Xiangning<Eric.Yu@Sun.COM>}
3140f1702cYu Xiangning<Eric.Yu@Sun.COM>
3150f1702cYu Xiangning<Eric.Yu@Sun.COM>int
3160f1702cYu Xiangning<Eric.Yu@Sun.COM>socket_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop,
3170f1702cYu Xiangning<Eric.Yu@Sun.COM>    cred_t *cr)
3180f1702cYu Xiangning<Eric.Yu@Sun.COM>{
3190f1702cYu Xiangning<Eric.Yu@Sun.COM>	int error = 0;
3200f1702cYu Xiangning<Eric.Yu@Sun.COM>	ssize_t orig_resid = uiop->uio_resid;
3210f1702cYu Xiangning<Eric.Yu@Sun.COM>
3220f1702cYu Xiangning<Eric.Yu@Sun.COM>	/*
3230f1702cYu Xiangning<Eric.Yu@Sun.COM>	 * Do not bypass the cache if we are doing a local (AF_UNIX) write.
3240f1702cYu Xiangning<Eric.Yu@Sun.COM>	 */
3250f1702cYu Xiangning<Eric.Yu@Sun.COM>	if (so->so_family == AF_UNIX)
3260f1702cYu Xiangning<Eric.Yu@Sun.COM>		uiop->uio_extflg |= UIO_COPY_CACHED;
3270f1702cYu Xiangning<Eric.Yu@Sun.COM>	else
3280f1702cYu Xiangning<Eric.Yu@Sun.COM>		uiop->uio_extflg &= ~UIO_COPY_CACHED;
3290f1702cYu Xiangning<Eric.Yu@Sun.COM>
3300f1702cYu Xiangning<Eric.Yu@Sun.COM>	error = SOP_SENDMSG(so, msg, uiop, cr);
3310f1702cYu Xiangning<Eric.Yu@Sun.COM>	switch (error) {
3320f1702cYu Xiangning<Eric.Yu@Sun.COM>	default:
3330f1702cYu Xiangning<Eric.Yu@Sun.COM>		break;
3340f1702cYu Xiangning<Eric.Yu@Sun.COM>	case EINTR:
3352640612Anders Persson	case ENOMEM:
33634dfe68shenjian	/* EAGAIN is EWOULDBLOCK */
3370f1702cYu Xiangning<Eric.Yu@Sun.COM>	case EWOULDBLOCK:
3380f1702cYu Xiangning<Eric.Yu@Sun.COM>		/* We did a partial send */
3390f1702cYu Xiangning<Eric.Yu@Sun.COM>		if (uiop->uio_resid != orig_resid)
3400f1702cYu Xiangning<Eric.Yu@Sun.COM>			error = 0;
3410f1702cYu Xiangning<Eric.Yu@Sun.COM>		break;
3420f1702cYu Xiangning<Eric.Yu@Sun.COM>	case EPIPE:
343f67b7b8Sebastian Wiedenroth		if (((so->so_mode & SM_KERNEL) == 0) &&
344f67b7b8Sebastian Wiedenroth		    ((msg->msg_flags & MSG_NOSIGNAL) == 0)) {
3450f1702cYu Xiangning<Eric.Yu@Sun.COM>			tsignal(curthread, SIGPIPE);
346f67b7b8Sebastian Wiedenroth		}
3470f1702cYu Xiangning<Eric.Yu@Sun.COM>		break;
3480f1702cYu Xiangning<Eric.Yu@Sun.COM>	}
3490f1702cYu Xiangning<Eric.Yu@Sun.COM>
3500f1702cYu Xiangning<Eric.Yu@Sun.COM>	return (error);
3510f1702cYu Xiangning<Eric.Yu@Sun.COM>}
3520f1702cYu Xiangning<Eric.Yu@Sun.COM>
3530f1702cYu Xiangning<Eric.Yu@Sun.COM>int
3540f1702cYu Xiangning<Eric.Yu@Sun.COM>socket_sendmblk(struct sonode *so, struct nmsghdr *msg, int fflag,
3550f1702cYu Xiangning<Eric.Yu@Sun.COM>    struct cred *cr, mblk_t **mpp)
3560f1702cYu Xiangning<Eric.Yu@Sun.COM>{
3570f1702cYu Xiangning<Eric.Yu@Sun.COM>	int error = 0;
3580f1702cYu Xiangning<Eric.Yu@Sun.COM>
3590f1702cYu Xiangning<Eric.Yu@Sun.COM>	error = SOP_SENDMBLK(so, msg, fflag, cr, mpp);
3600f1702cYu Xiangning<Eric.Yu@Sun.COM>	if (error == EPIPE) {
3610f1702cYu Xiangning<Eric.Yu@Sun.COM>		tsignal(curthread, SIGPIPE);
3620f1702cYu Xiangning<Eric.Yu@Sun.COM>	}
3630f1702cYu Xiangning<Eric.Yu@Sun.COM>	return (error);
3640f1702cYu Xiangning<Eric.Yu@Sun.COM>}
3650f1702cYu Xiangning<Eric.Yu@Sun.COM>
3660f1702cYu Xiangning<Eric.Yu@Sun.COM>int
3670f1702cYu Xiangning<Eric.Yu@Sun.COM>socket_recvmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop,
3680f1702cYu Xiangning<Eric.Yu@Sun.COM>    cred_t *cr)
3690f1702cYu Xiangning<Eric.Yu@Sun.COM>{
3700f1702cYu Xiangning<Eric.Yu@Sun.COM>	int error;
3710f1702cYu Xiangning<Eric.Yu@Sun.COM>	ssize_t orig_resid = uiop->uio_resid;
3720f1702cYu Xiangning<Eric.Yu@Sun.COM>
3730f1702cYu Xiangning<Eric.Yu@Sun.COM>	/*
3740f1702cYu Xiangning<Eric.Yu@Sun.COM>	 * Do not bypass the cache when reading data, as the application
3750f1702cYu Xiangning<Eric.Yu@Sun.COM>	 * is likely to access the data shortly.
3760f1702cYu Xiangning<Eric.Yu@Sun.COM>	 */
3770f1702cYu Xiangning<Eric.Yu@Sun.COM>	uiop->uio_extflg |= UIO_COPY_CACHED;
3780f1702cYu Xiangning<Eric.Yu@Sun.COM>
3790f1702cYu Xiangning<Eric.Yu@Sun.COM>	error = SOP_RECVMSG(so, msg, uiop, cr);
3800f1702cYu Xiangning<Eric.Yu@Sun.COM>
3810f1702cYu Xiangning<Eric.Yu@Sun.COM>	switch (error) {
3820f1702cYu Xiangning<Eric.Yu@Sun.COM>	case EINTR:
38334dfe68shenjian	/* EAGAIN is EWOULDBLOCK */
3840f1702cYu Xiangning<Eric.Yu@Sun.COM>	case EWOULDBLOCK:
3850f1702cYu Xiangning<Eric.Yu@Sun.COM>		/* We did a partial read */
3860f1702cYu Xiangning<Eric.Yu@Sun.COM>		if (uiop->uio_resid != orig_resid)
3870f1702cYu Xiangning<Eric.Yu@Sun.COM>			error = 0;
3880f1702cYu Xiangning<Eric.Yu@Sun.COM>		break;
3890f1702cYu Xiangning<Eric.Yu@Sun.COM>	default:
3900f1702cYu Xiangning<Eric.Yu@Sun.COM>		break;
3910f1702cYu Xiangning<Eric.Yu@Sun.COM>	}
3920f1702cYu Xiangning<Eric.Yu@Sun.COM>	return (error);
3930f1702cYu Xiangning<Eric.Yu@Sun.COM>}
3940f1702cYu Xiangning<Eric.Yu@Sun.COM>
3950f1702cYu Xiangning<Eric.Yu@Sun.COM>int
3960f1702cYu Xiangning<Eric.Yu@Sun.COM>socket_ioctl(struct sonode *so, int cmd, intptr_t arg, int mode,
3970f1702cYu Xiangning<Eric.Yu@Sun.COM>    struct cred *cr, int32_t *rvalp)
3980f1702cYu Xiangning<Eric.Yu@Sun.COM>{
3990f1702cYu Xiangning<Eric.Yu@Sun.COM>	return (SOP_IOCTL(so, cmd, arg, mode, cr, rvalp));
4000f1702cYu Xiangning<Eric.Yu@Sun.COM>}
4010f1702cYu Xiangning<Eric.Yu@Sun.COM>
4020f1702cYu Xiangning<Eric.Yu@Sun.COM>int
4030f1702cYu Xiangning<Eric.Yu@Sun.COM>socket_poll(struct sonode *so, short events, int anyyet, short *reventsp,
4040f1702cYu Xiangning<Eric.Yu@Sun.COM>    struct pollhead **phpp)
4050f1702cYu Xiangning<Eric.Yu@Sun.COM>{
4060f1702cYu Xiangning<Eric.Yu@Sun.COM>	return (SOP_POLL(so, events, anyyet, reventsp, phpp));
4070f1702cYu Xiangning<Eric.Yu@Sun.COM>}
4080f1702cYu Xiangning<Eric.Yu@Sun.COM>
4090f1702cYu Xiangning<Eric.Yu@Sun.COM>int
4100f1702cYu Xiangning<Eric.Yu@Sun.COM>socket_close(struct sonode *so, int flag, struct cred *cr)
4110f1702cYu Xiangning<Eric.Yu@Sun.COM>{
4120f1702cYu Xiangning<Eric.Yu@Sun.COM>	return (VOP