udp.c revision da14cebe459d3275048785f25bd869cb09b5307f
27c478bdstevel@tonic-gate * CDDL HEADER START
37c478bdstevel@tonic-gate *
47c478bdstevel@tonic-gate * The contents of this file are subject to the terms of the
59557906ja * Common Development and Distribution License (the "License").
69557906ja * You may not use this file except in compliance with the License.
77c478bdstevel@tonic-gate *
87c478bdstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bdstevel@tonic-gate * or http://www.opensolaris.org/os/licensing.
107c478bdstevel@tonic-gate * See the License for the specific language governing permissions
117c478bdstevel@tonic-gate * and limitations under the License.
127c478bdstevel@tonic-gate *
137c478bdstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each
147c478bdstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bdstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the
167c478bdstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying
177c478bdstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bdstevel@tonic-gate *
197c478bdstevel@tonic-gate * CDDL HEADER END
207c478bdstevel@tonic-gate */
22aa92d85gt * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
237c478bdstevel@tonic-gate * Use is subject to license terms.
247c478bdstevel@tonic-gate */
257c478bdstevel@tonic-gate/* Copyright (c) 1990 Mentat Inc. */
277c478bdstevel@tonic-gate#include <sys/types.h>
287c478bdstevel@tonic-gate#include <sys/stream.h>
29ff550d0masputra#include <sys/dlpi.h>
30ff550d0masputra#include <sys/pattr.h>
317c478bdstevel@tonic-gate#include <sys/stropts.h>
327c478bdstevel@tonic-gate#include <sys/strlog.h>
337c478bdstevel@tonic-gate#include <sys/strsun.h>
34e4f35dbgt#include <sys/time.h>
357c478bdstevel@tonic-gate#define	_SUN_TPI_VERSION 2
367c478bdstevel@tonic-gate#include <sys/tihdr.h>
377c478bdstevel@tonic-gate#include <sys/timod.h>
387c478bdstevel@tonic-gate#include <sys/ddi.h>
397c478bdstevel@tonic-gate#include <sys/sunddi.h>
407c478bdstevel@tonic-gate#include <sys/strsubr.h>
417c478bdstevel@tonic-gate#include <sys/suntpi.h>
427c478bdstevel@tonic-gate#include <sys/xti_inet.h>
437c478bdstevel@tonic-gate#include <sys/cmn_err.h>
447c478bdstevel@tonic-gate#include <sys/kmem.h>
457c478bdstevel@tonic-gate#include <sys/policy.h>
467c478bdstevel@tonic-gate#include <sys/ucred.h>
477c478bdstevel@tonic-gate#include <sys/zone.h>
497c478bdstevel@tonic-gate#include <sys/socket.h>
50ff550d0masputra#include <sys/sockio.h>
517c478bdstevel@tonic-gate#include <sys/vtrace.h>
52381a2a9dr#include <sys/sdt.h>
537c478bdstevel@tonic-gate#include <sys/debug.h>
547c478bdstevel@tonic-gate#include <sys/isa_defs.h>
557c478bdstevel@tonic-gate#include <sys/random.h>
567c478bdstevel@tonic-gate#include <netinet/in.h>
577c478bdstevel@tonic-gate#include <netinet/ip6.h>
587c478bdstevel@tonic-gate#include <netinet/icmp6.h>
597c478bdstevel@tonic-gate#include <netinet/udp.h>
607c478bdstevel@tonic-gate#include <net/if.h>
61ff550d0masputra#include <net/route.h>
637c478bdstevel@tonic-gate#include <inet/common.h>
647c478bdstevel@tonic-gate#include <inet/ip.h>
65ff550d0masputra#include <inet/ip_impl.h>
667c478bdstevel@tonic-gate#include <inet/ip6.h>
677c478bdstevel@tonic-gate#include <inet/ip_ire.h>
68ff550d0masputra#include <inet/ip_if.h>
69ff550d0masputra#include <inet/ip_multi.h>
70c793af9sangeeta#include <inet/ip_ndp.h>
717c478bdstevel@tonic-gate#include <inet/mi.h>
727c478bdstevel@tonic-gate#include <inet/mib2.h>
737c478bdstevel@tonic-gate#include <inet/nd.h>
747c478bdstevel@tonic-gate#include <inet/optcom.h>
757c478bdstevel@tonic-gate#include <inet/snmpcom.h>
767c478bdstevel@tonic-gate#include <inet/kstatcom.h>
777c478bdstevel@tonic-gate#include <inet/udp_impl.h>
78ff550d0masputra#include <inet/ipclassifier.h>
79ff550d0masputra#include <inet/ipsec_impl.h>
80ff550d0masputra#include <inet/ipp_common.h>
81da14cebEric Cheng#include <sys/squeue_impl.h>
82b127ac4Philip Kirk#include <inet/ipnet.h>
85ff550d0masputra * The ipsec_info.h header file is here since it has the definition for the
867c478bdstevel@tonic-gate * M_CTL message types used by IP to convey information to the ULP. The
8745916cdjpk * ipsec_info.h needs the pfkeyv2.h, hence the latter's presence.
887c478bdstevel@tonic-gate */
897c478bdstevel@tonic-gate#include <net/pfkeyv2.h>
907c478bdstevel@tonic-gate#include <inet/ipsec_info.h>
9245916cdjpk#include <sys/tsol/label.h>
9345916cdjpk#include <sys/tsol/tnet.h>
9445916cdjpk#include <rpc/pmap_prot.h>
977c478bdstevel@tonic-gate * Synchronization notes:
987c478bdstevel@tonic-gate *
99fc80c0dnordmark * UDP is MT and uses the usual kernel synchronization primitives. There are 2
100fc80c0dnordmark * locks, the fanout lock (uf_lock) and the udp endpoint lock udp_rwlock.
101fc80c0dnordmark * We also use conn_lock when updating things that affect the IP classifier
102fc80c0dnordmark * lookup.
103fc80c0dnordmark * The lock order is udp_rwlock -> uf_lock and is udp_rwlock -> conn_lock.
1047c478bdstevel@tonic-gate *
105fc80c0dnordmark * The fanout lock uf_lock:
106ff550d0masputra * When a UDP endpoint is bound to a local port, it is inserted into
1077c478bdstevel@tonic-gate * a bind hash list.  The list consists of an array of udp_fanout_t buckets.
1087c478bdstevel@tonic-gate * The size of the array is controlled by the udp_bind_fanout_size variable.
1097c478bdstevel@tonic-gate * This variable can be changed in /etc/system if the default value is
110ff550d0masputra * not large enough.  Each bind hash bucket is protected by a per bucket
111ff550d0masputra * lock.  It protects the udp_bind_hash and udp_ptpbhn fields in the udp_t
112fc80c0dnordmark * structure and a few other fields in the udp_t. A UDP endpoint is removed
113fc80c0dnordmark * from the bind hash list only when it is being unbound or being closed.
114fc80c0dnordmark * The per bucket lock also protects a UDP endpoint's state changes.
115ff550d0masputra *
116fc80c0dnordmark * The udp_rwlock:
117fc80c0dnordmark * This protects most of the other fields in the udp_t. The exact list of
118fc80c0dnordmark * fields which are protected by each of the above locks is documented in
119fc80c0dnordmark * the udp_t structure definition.
120ff550d0masputra *
121fc80c0dnordmark * Plumbing notes:
122fc80c0dnordmark * UDP is always a device driver. For compatibility with mibopen() code
123fc80c0dnordmark * it is possible to I_PUSH "udp", but that results in pushing a passthrough
124fc80c0dnordmark * dummy module.
125ff550d0masputra *
126fc80c0dnordmark * The above implies that we don't support any intermediate module to
127ff550d0masputra * reside in between /dev/ip and udp -- in fact, we never supported such
128ff550d0masputra * scenario in the past as the inter-layer communication semantics have
129fc80c0dnordmark * always been private.
1307c478bdstevel@tonic-gate */
132f4b3ec6dh/* For /etc/system control */
1337c478bdstevel@tonic-gateuint_t udp_bind_fanout_size = UDP_BIND_FANOUT_SIZE;
1357c478bdstevel@tonic-gate#define	NDD_TOO_QUICK_MSG \
13645916cdjpk	"ndd get info rate too high for non-privileged users, try again " \
1377c478bdstevel@tonic-gate	"later.\n"
1387c478bdstevel@tonic-gate#define	NDD_OUT_OF_BUF_MSG	"<< Out of buffer >>\n"
14045916cdjpk/* Option processing attrs */
14145916cdjpktypedef struct udpattrs_s {
14219a30e1rshoaib	union {
14319a30e1rshoaib		ip6_pkt_t	*udpattr_ipp6;	/* For V6 */
14419a30e1rshoaib		ip4_pkt_t 	*udpattr_ipp4;	/* For V4 */
14519a30e1rshoaib	} udpattr_ippu;
14619a30e1rshoaib#define	udpattr_ipp6 udpattr_ippu.udpattr_ipp6
14719a30e1rshoaib#define	udpattr_ipp4 udpattr_ippu.udpattr_ipp4
14845916cdjpk	mblk_t		*udpattr_mb;
14945916cdjpk	boolean_t	udpattr_credset;
15045916cdjpk} udpattrs_t;
1527c478bdstevel@tonic-gatestatic void	udp_addr_req(queue_t *q, mblk_t *mp);
1537c478bdstevel@tonic-gatestatic void	udp_bind(queue_t *q, mblk_t *mp);
1547c478bdstevel@tonic-gatestatic void	udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp);
1557c478bdstevel@tonic-gatestatic void	udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock);
156fc80c0dnordmarkstatic void	udp_bind_result(conn_t *, mblk_t *);
157fc80c0dnordmarkstatic void	udp_bind_ack(conn_t *, mblk_t *mp);
158fc80c0dnordmarkstatic void	udp_bind_error(conn_t *, mblk_t *mp);
159fc80c0dnordmarkstatic int	udp_build_hdrs(udp_t *udp);
1607c478bdstevel@tonic-gatestatic void	udp_capability_req(queue_t *q, mblk_t *mp);
1617c478bdstevel@tonic-gatestatic int	udp_close(queue_t *q);
1627c478bdstevel@tonic-gatestatic void	udp_connect(queue_t *q, mblk_t *mp);
1637c478bdstevel@tonic-gatestatic void	udp_disconnect(queue_t *q, mblk_t *mp);
1647c478bdstevel@tonic-gatestatic void	udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error,
1657c478bdstevel@tonic-gate		    int sys_error);
1667c478bdstevel@tonic-gatestatic void	udp_err_ack_prim(queue_t *q, mblk_t *mp, int primitive,
1677c478bdstevel@tonic-gate		    t_scalar_t tlierr, int unixerr);
1687c478bdstevel@tonic-gatestatic int	udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp,
1697c478bdstevel@tonic-gate		    cred_t *cr);
1707c478bdstevel@tonic-gatestatic int	udp_extra_priv_ports_add(queue_t *q, mblk_t *mp,
1717c478bdstevel@tonic-gate		    char *value, caddr_t cp, cred_t *cr);
1727c478bdstevel@tonic-gatestatic int	udp_extra_priv_ports_del(queue_t *q, mblk_t *mp,
1737c478bdstevel@tonic-gate		    char *value, caddr_t cp, cred_t *cr);
1747c478bdstevel@tonic-gatestatic void	udp_icmp_error(queue_t *q, mblk_t *mp);
1757c478bdstevel@tonic-gatestatic void	udp_icmp_error_ipv6(queue_t *q, mblk_t *mp);
1767c478bdstevel@tonic-gatestatic void	udp_info_req(queue_t *q, mblk_t *mp);
177fc80c0dnordmarkstatic void	udp_input(void *, mblk_t *, void *);
1787c478bdstevel@tonic-gatestatic mblk_t	*udp_ip_bind_mp(udp_t *udp, t_scalar_t bind_prim,
1797c478bdstevel@tonic-gate		    t_scalar_t addr_length);
180fc80c0dnordmarkstatic void	udp_lrput(queue_t *, mblk_t *);
181fc80c0dnordmarkstatic void	udp_lwput(queue_t *, mblk_t *);
1827c478bdstevel@tonic-gatestatic int	udp_open(queue_t *q, dev_t *devp, int flag, int sflag,
183fc80c0dnordmark		    cred_t *credp, boolean_t isv6);
184fc80c0dnordmarkstatic int	udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag,
185fc80c0dnordmark		    cred_t *credp);
186fc80c0dnordmarkstatic int	udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag,
1877c478bdstevel@tonic-gate		    cred_t *credp);
1887c478bdstevel@tonic-gatestatic  int	udp_unitdata_opt_process(queue_t *q, mblk_t *mp,
18945916cdjpk		    int *errorp, udpattrs_t *udpattrs);
1907c478bdstevel@tonic-gatestatic boolean_t udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name);
1917c478bdstevel@tonic-gatestatic int	udp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr);
192f4b3ec6dhstatic boolean_t udp_param_register(IDP *ndp, udpparam_t *udppa, int cnt);
1937c478bdstevel@tonic-gatestatic int	udp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp,
1947c478bdstevel@tonic-gate		    cred_t *cr);
1957c478bdstevel@tonic-gatestatic void	udp_report_item(mblk_t *mp, udp_t *udp);
196ff550d0masputrastatic int	udp_rinfop(queue_t *q, infod_t *dp);
197ff550d0masputrastatic int	udp_rrw(queue_t *q, struiod_t *dp);
1987c478bdstevel@tonic-gatestatic int	udp_status_report(queue_t *q, mblk_t *mp, caddr_t cp,
1997c478bdstevel@tonic-gate		    cred_t *cr);
200da14cebEric Chengstatic void	udp_send_data(udp_t *udp, queue_t *q, mblk_t *mp,
201da14cebEric Cheng		    ipha_t *ipha);
202ff550d0masputrastatic void	udp_ud_err(queue_t *q, mblk_t *mp, uchar_t *destaddr,
203ff550d0masputra		    t_scalar_t destlen, t_scalar_t err);
2047c478bdstevel@tonic-gatestatic void	udp_unbind(queue_t *q, mblk_t *mp);
20545916cdjpkstatic in_port_t udp_update_next_port(udp_t *udp, in_port_t port,
20645916cdjpk    boolean_t random);
207437220cdanmcdstatic mblk_t	*udp_output_v4(conn_t *, mblk_t *, ipaddr_t, uint16_t, uint_t,
208da14cebEric Cheng    int *, boolean_t);
209ff550d0masputrastatic mblk_t	*udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6,
21045916cdjpk		    int *error);
2117c478bdstevel@tonic-gatestatic void	udp_wput_other(queue_t *q, mblk_t *mp);
2127c478bdstevel@tonic-gatestatic void	udp_wput_iocdata(queue_t *q, mblk_t *mp);
213ff550d0masputrastatic size_t	udp_set_rcv_hiwat(udp_t *udp, size_t size);
215f4b3ec6dhstatic void	*udp_stack_init(netstackid_t stackid, netstack_t *ns);
216f4b3ec6dhstatic void	udp_stack_fini(netstackid_t stackid, void *arg);
218f4b3ec6dhstatic void	*udp_kstat_init(netstackid_t stackid);
219f4b3ec6dhstatic void	udp_kstat_fini(netstackid_t stackid, kstat_t *ksp);
220f4b3ec6dhstatic void	*udp_kstat2_init(netstackid_t, udp_stat_t *);
221f4b3ec6dhstatic void	udp_kstat2_fini(netstackid_t, kstat_t *);
2227c478bdstevel@tonic-gatestatic int	udp_kstat_update(kstat_t *kp, int rw);
224ff550d0masputrastatic void	udp_rcv_enqueue(queue_t *q, udp_t *udp, mblk_t *mp,
225ff550d0masputra		    uint_t pkt_len);
226ff550d0masputrastatic void	udp_rcv_drain(queue_t *q, udp_t *udp, boolean_t closing);
227fc80c0dnordmarkstatic void	udp_xmit(queue_t *, mblk_t *, ire_t *ire, conn_t *, zoneid_t);
229ff550d0masputra#define	UDP_RECV_HIWATER	(56 * 1024)
230ff550d0masputra#define	UDP_RECV_LOWATER	128
231ff550d0masputra#define	UDP_XMIT_HIWATER	(56 * 1024)
232ff550d0masputra#define	UDP_XMIT_LOWATER	1024
234fc80c0dnordmarkstatic struct module_info udp_mod_info =  {
239fc80c0dnordmark * Entry points for UDP as a device.
240fc80c0dnordmark * We have separate open functions for the /dev/udp and /dev/udp6 devices.
241fc80c0dnordmark */
242fc80c0dnordmarkstatic struct qinit udp_rinitv4 = {
243fc80c0dnordmark	NULL, NULL, udp_openv4, udp_close, NULL,
244fc80c0dnordmark	&udp_mod_info, NULL, udp_rrw, udp_rinfop, STRUIOT_STANDARD
247fc80c0dnordmarkstatic struct qinit udp_rinitv6 = {
248fc80c0dnordmark	NULL, NULL, udp_openv6, udp_close, NULL,
249fc80c0dnordmark	&udp_mod_info, NULL, udp_rrw, udp_rinfop, STRUIOT_STANDARD
252ff550d0masputrastatic struct qinit udp_winit = {
253a9737benordmark	(pfi_t)udp_wput, (pfi_t)ip_wsrv, NULL, NULL, NULL,
254fc80c0dnordmark	&udp_mod_info, NULL, NULL, NULL, STRUIOT_NONE
258fc80c0dnordmark * UDP needs to handle I_LINK and I_PLINK since ifconfig
259fc80c0dnordmark * likes to use it as a place to hang the various streams.
260fc80c0dnordmark */
261fc80c0dnordmarkstatic struct qinit udp_lrinit = {
262fc80c0dnordmark	(pfi_t)udp_lrput, NULL, udp_openv4, udp_close, NULL,
263fc80c0dnordmark	&udp_mod_info
266fc80c0dnordmarkstatic struct qinit udp_lwinit = {
267fc80c0dnordmark	(pfi_t)udp_lwput, NULL, udp_openv4, udp_close, NULL,
268fc80c0dnordmark	&udp_mod_info
271fc80c0dnordmark/* For AF_INET aka /dev/udp */
272fc80c0dnordmarkstruct streamtab udpinfov4 = {
273fc80c0dnordmark	&udp_rinitv4, &udp_winit, &udp_lrinit, &udp_lwinit
276fc80c0dnordmark/* For AF_INET6 aka /dev/udp6 */
277fc80c0dnordmarkstruct streamtab udpinfov6 = {
278fc80c0dnordmark	&udp_rinitv6, &udp_winit, &udp_lrinit, &udp_lwinit
2817c478bdstevel@tonic-gatestatic	sin_t	sin_null;	/* Zero address for quick clears */
2827c478bdstevel@tonic-gatestatic	sin6_t	sin6_null;	/* Zero address for quick clears */
2867c478bdstevel@tonic-gate/* Default structure copied into T_INFO_ACK messages */
2877c478bdstevel@tonic-gatestatic struct T_info_ack udp_g_t_info_ack_ipv4 = {
2887c478bdstevel@tonic-gate	T_INFO_ACK,
2897c478bdstevel@tonic-gate	UDP_MAXPACKET_IPV4,	/* TSDU_size. Excl. headers */
2907c478bdstevel@tonic-gate	T_INVALID,	/* ETSU_size.  udp does not support expedited data. */
2917c478bdstevel@tonic-gate	T_INVALID,	/* CDATA_size. udp does not support connect data. */
2927c478bdstevel@tonic-gate	T_INVALID,	/* DDATA_size. udp does not support disconnect data. */
2937c478bdstevel@tonic-gate	sizeof (sin_t),	/* ADDR_size. */
2947c478bdstevel@tonic-gate	0,		/* OPT_size - not initialized here */
2957c478bdstevel@tonic-gate	UDP_MAXPACKET_IPV4,	/* TIDU_size.  Excl. headers */
2967c478bdstevel@tonic-gate	T_CLTS,		/* SERV_type.  udp supports connection-less. */
2977c478bdstevel@tonic-gate	TS_UNBND,	/* CURRENT_state.  This is set from udp_state. */
2987c478bdstevel@tonic-gate	(XPG4_1|SENDZERO) /* PROVIDER_flag */
301ff550d0masputra#define	UDP_MAXPACKET_IPV6 (IP_MAXPACKET - UDPH_SIZE - IPV6_HDR_LEN)
3037c478bdstevel@tonic-gatestatic	struct T_info_ack udp_g_t_info_ack_ipv6 = {
3047c478bdstevel@tonic-gate	T_INFO_ACK,
3057c478bdstevel@tonic-gate	UDP_MAXPACKET_IPV6,	/* TSDU_size.  Excl. headers */
3067c478bdstevel@tonic-gate	T_INVALID,	/* ETSU_size.  udp does not support expedited data. */
3077c478bdstevel@tonic-gate	T_INVALID,	/* CDATA_size. udp does not support connect data. */
3087c478bdstevel@tonic-gate	T_INVALID,	/* DDATA_size. udp does not support disconnect data. */
3097c478bdstevel@tonic-gate	sizeof (sin6_t), /* ADDR_size. */
3107c478bdstevel@tonic-gate	0,		/* OPT_size - not initialized here */
3117c478bdstevel@tonic-gate	UDP_MAXPACKET_IPV6,	/* TIDU_size. Excl. headers */
3127c478bdstevel@tonic-gate	T_CLTS,		/* SERV_type.  udp supports connection-less. */
3137c478bdstevel@tonic-gate	TS_UNBND,	/* CURRENT_state.  This is set from udp_state. */
3147c478bdstevel@tonic-gate	(XPG4_1|SENDZERO) /* PROVIDER_flag */
3177c478bdstevel@tonic-gate/* largest UDP port number */
3187c478bdstevel@tonic-gate#define	UDP_MAX_PORT	65535
321f4b3ec6dh * Table of ND variables supported by udp.  These are loaded into us_nd
3227c478bdstevel@tonic-gate * in udp_open.
3237c478bdstevel@tonic-gate * All of these are alterable, within the min/max values given, at run time.
3247c478bdstevel@tonic-gate */
325ff550d0masputra/* BEGIN CSTYLED */
326ff550d0masputraudpparam_t udp_param_arr[] = {
327ff550d0masputra /*min		max		value		name */
328ff550d0masputra { 0L,		256,		32,		"udp_wroff_extra" },
329ff550d0masputra { 1L,		255,		255,		"udp_ipv4_ttl" },
330ff550d0masputra { 0,		IPV6_MAX_HOPS,	IPV6_DEFAULT_HOPS, "udp_ipv6_hoplimit"},
331ff550d0masputra { 1024,	(32 * 1024),	1024,		"udp_smallest_nonpriv_port" },
332ff550d0masputra { 0,		1,		1,		"udp_do_checksum" },
333ff550d0masputra { 1024,	UDP_MAX_PORT,	(32 * 1024),	"udp_smallest_anon_port" },
334ff550d0masputra { 1024,	UDP_MAX_PORT,	UDP_MAX_PORT,	"udp_largest_anon_port" },
335ff550d0masputra { UDP_XMIT_LOWATER, (1<<30), UDP_XMIT_HIWATER,	"udp_xmit_hiwat"},
336ff550d0masputra { 0,		     (1<<30), UDP_XMIT_LOWATER, "udp_xmit_lowat"},
337ff550d0masputra { UDP_RECV_LOWATER, (1<<30), UDP_RECV_HIWATER,	"udp_recv_hiwat"},
338ff550d0masputra { 65536,	(1<<30),	2*1024*1024,	"udp_max_buf"},
339ff550d0masputra { 100,		60000,		1000,		"udp_ndd_get_info_interval"},
341ff550d0masputra/* END CSTYLED */
343f4b3ec6dh/* Setable in /etc/system */
3447c478bdstevel@tonic-gate/* If set to 0, pick ephemeral port sequentially; otherwise randomly. */
3457c478bdstevel@tonic-gateuint32_t udp_random_anon_port = 1;
3487c478bdstevel@tonic-gate * Hook functions to enable cluster networking.
3497c478bdstevel@tonic-gate * On non-clustered systems these vectors must always be NULL
3507c478bdstevel@tonic-gate */
3527c478bdstevel@tonic-gatevoid (*cl_inet_bind)(uchar_t protocol, sa_family_t addr_family,
353ff550d0masputra    uint8_t *laddrp, in_port_t lport) = NULL;
3547c478bdstevel@tonic-gatevoid (*cl_inet_unbind)(uint8_t protocol, sa_family_t addr_family,
355ff550d0masputra    uint8_t *laddrp, in_port_t lport) = NULL;
357ff550d0masputratypedef union T_primitives *t_primp_t;
36045916cdjpk * Return the next anonymous port in the privileged port range for
3617c478bdstevel@tonic-gate * bind checking.
36245916cdjpk *
36345916cdjpk * Trusted Extension (TX) notes: TX allows administrator to mark or
36445916cdjpk * reserve ports as Multilevel ports (MLP). MLP has special function
36545916cdjpk * on TX systems. Once a port is made MLP, it's not available as
36645916cdjpk * ordinary port. This creates "holes" in the port name space. It
36745916cdjpk * may be necessary to skip the "holes" find a suitable anon port.
3687c478bdstevel@tonic-gate */
3697c478bdstevel@tonic-gatestatic in_port_t
37045916cdjpkudp_get_next_priv_port(udp_t *udp)
3727c478bdstevel@tonic-gate	static in_port_t next_priv_port = IPPORT_RESERVED - 1;
37345916cdjpk	in_port_t nextport;
37445916cdjpk	boolean_t restart = B_FALSE;
375f4b3ec6dh	udp_stack_t *us = udp->udp_us;
378f4b3ec6dh	if (next_priv_port < us->us_min_anonpriv_port ||
37945916cdjpk	    next_priv_port >= IPPORT_RESERVED) {
3807c478bdstevel@tonic-gate		next_priv_port = IPPORT_RESERVED - 1;
38145916cdjpk		if (restart)
38245916cdjpk			return (0);
38345916cdjpk		restart = B_TRUE;
38445916cdjpk	}
38645916cdjpk	if (is_system_labeled() &&
38745916cdjpk	    (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred),
38845916cdjpk	    next_priv_port, IPPROTO_UDP, B_FALSE)) != 0) {
38945916cdjpk		next_priv_port = nextport;
39045916cdjpk		goto retry;
3917c478bdstevel@tonic-gate	}
3937c478bdstevel@tonic-gate	return (next_priv_port--);
3967c478bdstevel@tonic-gate/* UDP bind hash report triggered via the Named Dispatch mechanism. */
3977c478bdstevel@tonic-gate/* ARGSUSED */
3987c478bdstevel@tonic-gatestatic int
3997c478bdstevel@tonic-gateudp_bind_hash_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr)
4017c478bdstevel@tonic-gate	udp_fanout_t	*udpf;
4027c478bdstevel@tonic-gate	int		i;
4037c478bdstevel@tonic-gate	zoneid_t	zoneid;
404ff550d0masputra	conn_t		*connp;
405ff550d0masputra	udp_t		*udp;
406f4b3ec6dh	udp_stack_t	*us;
408ff550d0masputra	connp = Q_TO_CONN(q);
409ff550d0masputra	udp = connp->conn_udp;
410f4b3ec6dh	us = udp->udp_us;
4127c478bdstevel@tonic-gate	/* Refer to comments in udp_status_report(). */
413f4b3ec6dh	if (cr == NULL || secpolicy_ip_config(cr, B_TRUE) != 0) {
414f4b3ec6dh		if (ddi_get_lbolt() - us->us_last_ndd_get_info_time <
415f4b3ec6dh		    drv_usectohz(us->us_ndd_get_info_interval * 1000)) {
4167c478bdstevel@tonic-gate			(void) mi_mpprintf(mp, NDD_TOO_QUICK_MSG);
4177c478bdstevel@tonic-gate			return (0);
4187c478bdstevel@tonic-gate		}
4197c478bdstevel@tonic-gate	}
4207c478bdstevel@tonic-gate	if ((mp->b_cont = allocb(ND_MAX_BUF_LEN, BPRI_HI)) == NULL) {
4217c478bdstevel@tonic-gate		/* The following may work even if we cannot get a large buf. */
4227c478bdstevel@tonic-gate		(void) mi_mpprintf(mp, NDD_OUT_OF_BUF_MSG);
4237c478bdstevel@tonic-gate		return (0);
4247c478bdstevel@tonic-gate	}
4267c478bdstevel@tonic-gate	(void) mi_mpprintf(mp,
4277c478bdstevel@tonic-gate	    "UDP     " MI_COL_HDRPAD_STR
4287c478bdstevel@tonic-gate	/*   12345678[89ABCDEF] */
4297c478bdstevel@tonic-gate	    " zone lport src addr        dest addr       port  state");
4307c478bdstevel@tonic-gate	/*    1234 12345 xxx.xxx.xxx.xxx xxx.xxx.xxx.xxx 12345 UNBOUND */
432ff550d0masputra	zoneid = connp->conn_zoneid;
434f4b3ec6dh	for (i = 0; i < us->us_bind_fanout_size; i++) {
435f4b3ec6dh		udpf = &us->us_bind_fanout[i];
4367c478bdstevel@tonic-gate		mutex_enter(&udpf->uf_lock);
4387c478bdstevel@tonic-gate		/* Print the hash index. */
4397c478bdstevel@tonic-gate		udp = udpf->uf_udp;
4407c478bdstevel@tonic-gate		if (zoneid != GLOBAL_ZONEID) {
4417c478bdstevel@tonic-gate			/* skip to first entry in this zone; might be none */
4427c478bdstevel@tonic-gate			while (udp != NULL &&
443ff550d0masputra			    udp->udp_connp->conn_zoneid != zoneid)
4447c478bdstevel@tonic-gate				udp = udp->udp_bind_hash;
4457c478bdstevel@tonic-gate		}
4467c478bdstevel@tonic-gate		if (udp != NULL) {
4477c478bdstevel@tonic-gate			uint_t print_len, buf_len;
4497c478bdstevel@tonic-gate			buf_len = mp->b_cont->b_datap->db_lim -
4507c478bdstevel@tonic-gate			    mp->b_cont->b_wptr;
4517c478bdstevel@tonic-gate			print_len = snprintf((char *)mp->b_cont->b_wptr,
4527c478bdstevel@tonic-gate			    buf_len, "%d\n", i);
4537c478bdstevel@tonic-gate			if (print_len < buf_len) {
4547c478bdstevel@tonic-gate				mp->b_cont->b_wptr += print_len;
4557c478bdstevel@tonic-gate			} else {
4567c478bdstevel@tonic-gate				mp->b_cont->b_wptr += buf_len;
4577c478bdstevel@tonic-gate			}
4587c478bdstevel@tonic-gate			for (; udp != NULL; udp = udp->udp_bind_hash) {
4597c478bdstevel@tonic-gate				if (zoneid == GLOBAL_ZONEID ||
460ff550d0masputra				    zoneid == udp->udp_connp->conn_zoneid)
4617c478bdstevel@tonic-gate					udp_report_item(mp->b_cont, udp);
4627c478bdstevel@tonic-gate			}
4637c478bdstevel@tonic-gate		}
4647c478bdstevel@tonic-gate		mutex_exit(&udpf->uf_lock);
4657c478bdstevel@tonic-gate	}
466f4b3ec6dh	us->us_last_ndd_get_info_time = ddi_get_lbolt();
4677c478bdstevel@tonic-gate	return (0);
4717c478bdstevel@tonic-gate * Hash list removal routine for udp_t structures.
4727c478bdstevel@tonic-gate */
4737c478bdstevel@tonic-gatestatic void
4747c478bdstevel@tonic-gateudp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock)
4767c478bdstevel@tonic-gate	udp_t	*udpnext;
4777c478bdstevel@tonic-gate	kmutex_t *lockp;
478f4b3ec6dh	udp_stack_t *us = udp->udp_us;
4807c478bdstevel@tonic-gate	if (udp->udp_ptpbhn == NULL)
4817c478bdstevel@tonic-gate		return;
4837c478bdstevel@tonic-gate	/*
4847c478bdstevel@tonic-gate	 * Extract the lock pointer in case there are concurrent
4857c478bdstevel@tonic-gate	 * hash_remove's for this instance.
4867c478bdstevel@tonic-gate	 */
4877c478bdstevel@tonic-gate	ASSERT(udp->udp_port != 0);
4887c478bdstevel@tonic-gate	if (!caller_holds_lock) {
489f4b3ec6dh		lockp = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port,
490437220cdanmcd		    us->us_bind_fanout_size)].uf_lock;
4917c478bdstevel@tonic-gate		ASSERT(lockp != NULL);
4927c478bdstevel@tonic-gate		mutex_enter(lockp);
4937c478bdstevel@tonic-gate	}
4947c478bdstevel@tonic-gate	if (udp->udp_ptpbhn != NULL) {
4957c478bdstevel@tonic-gate		udpnext = udp->udp_bind_hash;
4967c478bdstevel@tonic-gate		if (udpnext != NULL) {
4977c478bdstevel@tonic-gate			udpnext->udp_ptpbhn = udp->udp_ptpbhn;
4987c478bdstevel@tonic-gate			udp->udp_bind_hash = NULL;
4997c478bdstevel@tonic-gate		}
5007c478bdstevel@tonic-gate		*udp->udp_ptpbhn = udpnext;
5017c478bdstevel@tonic-gate		udp->udp_ptpbhn = NULL;
5027c478bdstevel@tonic-gate	}
5037c478bdstevel@tonic-gate	if (!caller_holds_lock) {
5047c478bdstevel@tonic-gate		mutex_exit(lockp);
5057c478bdstevel@tonic-gate	}
5087c478bdstevel@tonic-gatestatic void
5097c478bdstevel@tonic-gateudp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp)
5117c478bdstevel@tonic-gate	udp_t	**udpp;
5127c478bdstevel@tonic-gate	udp_t	*udpnext;
5147c478bdstevel@tonic-gate	ASSERT(MUTEX_HELD(&uf->uf_lock));
515fc80c0dnordmark	ASSERT(udp->udp_ptpbhn == NULL);
5167c478bdstevel@tonic-gate	udpp = &uf->uf_udp;
5177c478bdstevel@tonic-gate	udpnext = udpp[0];
5187c478bdstevel@tonic-gate	if (udpnext != NULL) {
5197c478bdstevel@tonic-gate		/*
5207c478bdstevel@tonic-gate		 * If the new udp bound to the INADDR_ANY address
5217c478bdstevel@tonic-gate		 * and the first one in the list is not bound to
5227c478bdstevel@tonic-gate		 * INADDR_ANY we skip all entries until we find the
5237c478bdstevel@tonic-gate		 * first one bound to INADDR_ANY.
5247c478bdstevel@tonic-gate		 * This makes sure that applications binding to a
5257c478bdstevel@tonic-gate		 * specific address get preference over those binding to
5267c478bdstevel@tonic-gate		 * INADDR_ANY.
5277c478bdstevel@tonic-gate		 */
5287c478bdstevel@tonic-gate		if (V6_OR_V4_INADDR_ANY(udp->udp_bound_v6src) &&
5297c478bdstevel@tonic-gate		    !V6_OR_V4_INADDR_ANY(udpnext->udp_bound_v6src)) {
5307c478bdstevel@tonic-gate			while ((udpnext = udpp[0]) != NULL &&
5317c478bdstevel@tonic-gate			    !V6_OR_V4_INADDR_ANY(
5327c478bdstevel@tonic-gate			    udpnext->udp_bound_v6src)) {
5337c478bdstevel@tonic-gate				udpp = &(udpnext->udp_bind_hash);
5347c478bdstevel@tonic-gate			}
5357c478bdstevel@tonic-gate			if (udpnext != NULL)
5367c478bdstevel@tonic-gate				udpnext->udp_ptpbhn = &udp->udp_bind_hash;
5377c478bdstevel@tonic-gate		} else {
5387c478bdstevel@tonic-gate			udpnext->udp_ptpbhn = &udp->udp_bind_hash;
5397c478bdstevel@tonic-gate		}
5407c478bdstevel@tonic-gate	}
5417c478bdstevel@tonic-gate	udp->udp_bind_hash = udpnext;
5427c478bdstevel@tonic-gate	udp->udp_ptpbhn = udpp;
5437c478bdstevel@tonic-gate	udpp[0] = udp;
5477c478bdstevel@tonic-gate * This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message
5487c478bdstevel@tonic-gate * passed to udp_wput.
5497c478bdstevel@tonic-gate * It associates a port number and local address with the stream.
5507c478bdstevel@tonic-gate * The O_T_BIND_REQ/T_BIND_REQ is passed downstream to ip with the UDP
5517c478bdstevel@tonic-gate * protocol type (IPPROTO_UDP) placed in the message following the address.
5527c478bdstevel@tonic-gate * A T_BIND_ACK message is passed upstream when ip acknowledges the request.
5537c478bdstevel@tonic-gate * (Called as writer.)
5547c478bdstevel@tonic-gate *
5557c478bdstevel@tonic-gate * Note that UDP over IPv4 and IPv6 sockets can use the same port number
5567c478bdstevel@tonic-gate * without setting SO_REUSEADDR. This is needed so that they
5577c478bdstevel@tonic-gate * can be viewed as two independent transport protocols.
5587c478bdstevel@tonic-gate * However, anonymouns ports are allocated from the same range to avoid
559f4b3ec6dh * duplicating the us->us_next_port_to_try.
5607c478bdstevel@tonic-gate */
5617c478bdstevel@tonic-gatestatic void
5627c478bdstevel@tonic-gateudp_bind(queue_t *q, mblk_t *mp)
5647c478bdstevel@tonic-gate	sin_t		*sin;
5657c478bdstevel@tonic-gate	sin6_t		*sin6;
5667c478bdstevel@tonic-gate	mblk_t		*mp1;
5677c478bdstevel@tonic-gate	in_port_t	port;		/* Host byte order */
5687c478bdstevel@tonic-gate	in_port_t	requested_port;	/* Host byte order */
5697c478bdstevel@tonic-gate	struct T_bind_req *tbr;
5707c478bdstevel@tonic-gate	int		count;
5717c478bdstevel@tonic-gate	in6_addr_t	v6src;
5727c478bdstevel@tonic-gate	boolean_t	bind_to_req_port_only;
5737c478bdstevel@tonic-gate	int		loopmax;
5747c478bdstevel@tonic-gate	udp_fanout_t	*udpf;
5757c478bdstevel@tonic-gate	in_port_t	lport;		/* Network byte order */
5767c478bdstevel@tonic-gate	zoneid_t	zoneid;
577ff550d0masputra	conn_t		*connp;
578ff550d0masputra	udp_t		*udp;
57945916cdjpk	boolean_t	is_inaddr_any;
58045916cdjpk	mlp_type_t	addrtype, mlptype;
581f4b3ec6dh	udp_stack_t	*us;
583ff550d0masputra	connp = Q_TO_CONN(q);
584ff550d0masputra	udp = connp->conn_udp;
585f4b3ec6dh	us = udp->udp_us;
5867c478bdstevel@tonic-gate	if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) {
5877c478bdstevel@tonic-gate		(void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
5887c478bdstevel@tonic-gate		    "udp_bind: bad req, len %u",
5897c478bdstevel@tonic-gate		    (uint_t)(mp->b_wptr - mp->b_rptr));
5907c478bdstevel@tonic-gate		udp_err_ack(q, mp, TPROTO, 0);
5917c478bdstevel@tonic-gate		return;
5927c478bdstevel@tonic-gate	}
5937c478bdstevel@tonic-gate	if (udp->udp_state != TS_UNBND) {
5947c478bdstevel@tonic-gate		(void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
5957c478bdstevel@tonic-gate		    "udp_bind: bad state, %u", udp->udp_state);
5967c478bdstevel@tonic-gate		udp_err_ack(q, mp, TOUTSTATE, 0);
5977c478bdstevel@tonic-gate		return;
5987c478bdstevel@tonic-gate	}
5997c478bdstevel@tonic-gate	/*
6007c478bdstevel@tonic-gate	 * Reallocate the message to make sure we have enough room for an
6017c478bdstevel@tonic-gate	 * address and the protocol type.
6027c478bdstevel@tonic-gate	 */
6037c478bdstevel@tonic-gate	mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t) + 1, 1);
6047c478bdstevel@tonic-gate	if (!mp1) {
6057c478bdstevel@tonic-gate		udp_err_ack(q, mp, TSYSERR, ENOMEM);
6067c478bdstevel@tonic-gate		return;
6077c478bdstevel@tonic-gate	}
6097c478bdstevel@tonic-gate	mp = mp1;
6107c478bdstevel@tonic-gate	tbr = (struct T_bind_req *)mp->b_rptr;
6117c478bdstevel@tonic-gate	switch (tbr->ADDR_length) {
6127c478bdstevel@tonic-gate	case 0:			/* Request for a generic port */
6137c478bdstevel@tonic-gate		tbr->ADDR_offset = sizeof (struct T_bind_req);
6147c478bdstevel@tonic-gate		if (udp->udp_family == AF_INET) {
6157c478bdstevel@tonic-gate			tbr->ADDR_length = sizeof (sin_t);
6167c478bdstevel@tonic-gate			sin = (sin_t *)&tbr[1];
6177c478bdstevel@tonic-gate			*sin = sin_null;
6187c478bdstevel@tonic-gate			sin->sin_family = AF_INET;
6197c478bdstevel@tonic-gate			mp->b_wptr = (uchar_t *)&sin[1];
6207c478bdstevel@tonic-gate		} else {
6217c478bdstevel@tonic-gate			ASSERT(udp->udp_family == AF_INET6);
6227c478bdstevel@tonic-gate			tbr->ADDR_length = sizeof (sin6_t);
6237c478bdstevel@tonic-gate			sin6 = (sin6_t *)&tbr[1];
6247c478bdstevel@tonic-gate			*sin6 = sin6_null;
6257c478bdstevel@tonic-gate			sin6->sin6_family = AF_INET6;
6267c478bdstevel@tonic-gate			mp->b_wptr = (uchar_t *)&sin6[1];
6277c478bdstevel@tonic-gate		}
6287c478bdstevel@tonic-gate		port = 0;
6297c478bdstevel@tonic-gate		break;
6317c478bdstevel@tonic-gate	case sizeof (sin_t):	/* Complete IPv4 address */
6327c478bdstevel@tonic-gate		sin = (sin_t *)mi_offset_param(mp, tbr->ADDR_offset,
6337c478bdstevel@tonic-gate		    sizeof (sin_t));
6347c478bdstevel@tonic-gate		if (sin == NULL || !OK_32PTR((char *)sin)) {
6357c478bdstevel@tonic-gate			udp_err_ack(q, mp, TSYSERR, EINVAL);
6367c478bdstevel@tonic-gate			return;
6377c478bdstevel@tonic-gate		}
6387c478bdstevel@tonic-gate		if (udp->udp_family != AF_INET ||
6397c478bdstevel@tonic-gate		    sin->sin_family != AF_INET) {
6407c478bdstevel@tonic-gate			udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT);
6417c478bdstevel@tonic-gate			return;
6427c478bdstevel@tonic-gate		}
6437c478bdstevel@tonic-gate		port = ntohs(sin->sin_port);
6447c478bdstevel@tonic-gate		break;
6467c478bdstevel@tonic-gate	case sizeof (sin6_t):	/* complete IPv6 address */
6477c478bdstevel@tonic-gate		sin6 = (sin6_t *)mi_offset_param(mp, tbr->ADDR_offset,
6487c478bdstevel@tonic-gate		    sizeof (sin6_t));
6497c478bdstevel@tonic-gate		if (sin6 == NULL || !OK_32PTR((char *)sin6)) {
6507c478bdstevel@tonic-gate			udp_err_ack(q, mp, TSYSERR, EINVAL);
6517c478bdstevel@tonic-gate			return;
6527c478bdstevel@tonic-gate		}
6537c478bdstevel@tonic-gate		if (udp->udp_family != AF_INET6 ||
6547c478bdstevel@tonic-gate		    sin6->sin6_family != AF_INET6) {
6557c478bdstevel@tonic-gate			udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT);
6567c478bdstevel@tonic-gate			return;
6577c478bdstevel@tonic-gate		}
6587c478bdstevel@tonic-gate		port = ntohs(sin6->sin6_port);
6597c478bdstevel@tonic-gate		break;
6617c478bdstevel@tonic-gate	default:		/* Invalid request */
6627c478bdstevel@tonic-gate		(void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
6637c478bdstevel@tonic-gate		    "udp_bind: bad ADDR_length length %u", tbr->ADDR_length);
6647c478bdstevel@tonic-gate		udp_err_ack(q, mp, TBADADDR, 0);
6657c478bdstevel@tonic-gate		return;
6667c478bdstevel@tonic-gate	}
6687c478bdstevel@tonic-gate	requested_port = port;
6707c478bdstevel@tonic-gate	if (requested_port == 0 || tbr->PRIM_type == O_T_BIND_REQ)
6717c478bdstevel@tonic-gate		bind_to_req_port_only = B_FALSE;
6727c478bdstevel@tonic-gate	else			/* T_BIND_REQ and requested_port != 0 */
6737c478bdstevel@tonic-gate		bind_to_req_port_only = B_TRUE;
6757c478bdstevel@tonic-gate	if (requested_port == 0) {
6767c478bdstevel@tonic-gate		/*
6777c478bdstevel@tonic-gate		 * If the application passed in zero for the port number, it
6787c478bdstevel@tonic-gate		 * doesn't care which port number we bind to. Get one in the
6797c478bdstevel@tonic-gate		 * valid range.
6807c478bdstevel@tonic-gate		 */
6817c478bdstevel@tonic-gate		if (udp->udp_anon_priv_bind) {
68245916cdjpk			port = udp_get_next_priv_port(udp);
6837c478bdstevel@tonic-gate		} else {
68445916cdjpk			port = udp_update_next_port(udp,
685f4b3ec6dh			    us->us_next_port_to_try, B_TRUE);
6867c478bdstevel@tonic-gate		}
6877c478bdstevel@tonic-gate	} else {
6887c478bdstevel@tonic-gate		/*
6897c478bdstevel@tonic-gate		 * If the port is in the well-known privileged range,
6907c478bdstevel@tonic-gate		 * make sure the caller was privileged.
6917c478bdstevel@tonic-gate		 */
6927c478bdstevel@tonic-gate		int i;
6937c478bdstevel@tonic-gate		boolean_t priv = B_FALSE;
695f4b3ec6dh		if (port < us->us_smallest_nonpriv_port) {
6967c478bdstevel@tonic-gate			priv = B_TRUE;
6977c478bdstevel@tonic-gate		} else {
698f4b3ec6dh			for (i = 0; i < us->us_num_epriv_ports; i++) {
699f4b3ec6dh				if (port == us->us_epriv_ports[i]) {
7007c478bdstevel@tonic-gate					priv = B_TRUE;
7017c478bdstevel@tonic-gate					break;
7027c478bdstevel@tonic-gate				}
7037c478bdstevel@tonic-gate			}
7047c478bdstevel@tonic-gate		}
7067c478bdstevel@tonic-gate		if (priv) {
707ff550d0masputra			cred_t *cr = DB_CREDDEF(mp, connp->conn_cred);
709ddf7fe9casper			if (secpolicy_net_privaddr(cr, port,
710ddf7fe9casper			    IPPROTO_UDP) != 0) {
7117c478bdstevel@tonic-gate				udp_err_ack(q, mp, TACCES, 0);
7127c478bdstevel@tonic-gate				return;
7137c478bdstevel@tonic-gate			}
7147c478bdstevel@tonic-gate		}
7157c478bdstevel@tonic-gate	}
71745916cdjpk	if (port == 0) {
71845916cdjpk		udp_err_ack(q, mp, TNOADDR, 0);
71945916cdjpk		return;
72045916cdjpk	}
7227c478bdstevel@tonic-gate	/*
723fc80c0dnordmark	 * The state must be TS_UNBND. TPI mandates that users must send
724fc80c0dnordmark	 * TPI primitives only 1 at a time and wait for the response before
725fc80c0dnordmark	 * sending the next primitive.
726fc80c0dnordmark	 */
727fc80c0dnordmark	rw_enter(&udp->udp_rwlock, RW_WRITER);
728fc80c0dnordmark	if (udp->udp_state != TS_UNBND || udp->udp_pending_op != -1) {
729fc80c0dnordmark		rw_exit(&udp->udp_rwlock);
730fc80c0dnordmark		(void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
731fc80c0dnordmark		    "udp_bind: bad state, %u", udp->udp_state);
732fc80c0dnordmark		udp_err_ack(q, mp, TOUTSTATE, 0);
733fc80c0dnordmark		return;
734fc80c0dnordmark	}
735fc80c0dnordmark	udp->udp_pending_op = tbr->PRIM_type;
736fc80c0dnordmark	/*
7377c478bdstevel@tonic-gate	 * Copy the source address into our udp structure. This address
7387c478bdstevel@tonic-gate	 * may still be zero; if so, IP will fill in the correct address
739fc80c0dnordmark	 * each time an outbound packet is passed to it. Since the udp is
740fc80c0dnordmark	 * not yet in the bind hash list, we don't grab the uf_lock to
741fc80c0dnordmark	 * change udp_ipversion
7427c478bdstevel@tonic-gate	 */
7437c478bdstevel@tonic-gate	if (udp->udp_family == AF_INET) {
7447c478bdstevel@tonic-gate		ASSERT(sin != NULL);
7457c478bdstevel@tonic-gate		ASSERT(udp->udp_ipversion == IPV4_VERSION);
7467c478bdstevel@tonic-gate		udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE +
7477c478bdstevel@tonic-gate		    udp->udp_ip_snd_options_len;
7487c478bdstevel@tonic-gate		IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &v6src);
7497c478bdstevel@tonic-gate	} else {
7507c478bdstevel@tonic-gate		ASSERT(sin6 != NULL);
7517c478bdstevel@tonic-gate		v6src = sin6->sin6_addr;
7527c478bdstevel@tonic-gate		if (IN6_IS_ADDR_V4MAPPED(&v6src)) {
753fc80c0dnordmark			/*
754fc80c0dnordmark			 * no need to hold the uf_lock to set the udp_ipversion
755fc80c0dnordmark			 * since we are not yet in the fanout list
756fc80c0dnordmark			 */
7577c478bdstevel@tonic-gate			udp->udp_ipversion = IPV4_VERSION;
7587c478bdstevel@tonic-gate			udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH +
7597c478bdstevel@tonic-gate			    UDPH_SIZE + udp->udp_ip_snd_options_len;
7607c478bdstevel@tonic-gate		} else {
7617c478bdstevel@tonic-gate			udp->udp_ipversion = IPV6_VERSION;
7627c478bdstevel@tonic-gate			udp->udp_max_hdr_len = udp->udp_sticky_hdrs_len;
7637c478bdstevel@tonic-gate		}
7647c478bdstevel@tonic-gate	}
7667c478bdstevel@tonic-gate	/*
7677c478bdstevel@tonic-gate	 * If udp_reuseaddr is not set, then we have to make sure that
7687c478bdstevel@tonic-gate	 * the IP address and port number the application requested
7697c478bdstevel@tonic-gate	 * (or we selected for the application) is not being used by
7707c478bdstevel@tonic-gate	 * another stream.  If another stream is already using the
7717c478bdstevel@tonic-gate	 * requested IP address and port, the behavior depends on
7727c478bdstevel@tonic-gate	 * "bind_to_req_port_only". If set the bind fails; otherwise we
7737c478bdstevel@tonic-gate	 * search for any an unused port to bind to the the stream.
7747c478bdstevel@tonic-gate	 *
7757c478bdstevel@tonic-gate	 * As per the BSD semantics, as modified by the Deering multicast
7767c478bdstevel@tonic-gate	 * changes, if udp_reuseaddr is set, then we allow multiple binds
7777c478bdstevel@tonic-gate	 * to the same port independent of the local IP address.
7787c478bdstevel@tonic-gate	 *
7797c478bdstevel@tonic-gate	 * This is slightly different than in SunOS 4.X which did not
7807c478bdstevel@tonic-gate	 * support IP multicast. Note that the change implemented by the
7817c478bdstevel@tonic-gate	 * Deering multicast code effects all binds - not only binding
7827c478bdstevel@tonic-gate	 * to IP multicast addresses.
7837c478bdstevel@tonic-gate	 *
7847c478bdstevel@tonic-gate	 * Note that when binding to port zero we ignore SO_REUSEADDR in
7857c478bdstevel@tonic-gate	 * order to guarantee a unique port.
7867c478bdstevel@tonic-gate	 */
7887c478bdstevel@tonic-gate	count = 0;
7897c478bdstevel@tonic-gate	if (udp->udp_anon_priv_bind) {
790f4b3ec6dh		/*
791f4b3ec6dh		 * loopmax = (IPPORT_RESERVED-1) -
792f4b3ec6dh		 *    us->us_min_anonpriv_port + 1
793f4b3ec6dh		 */
794f4b3ec6dh		loopmax = IPPORT_RESERVED - us->us_min_anonpriv_port;
7957c478bdstevel@tonic-gate	} else {
796f4b3ec6dh		loopmax = us->us_largest_anon_port -
797f4b3ec6dh		    us->us_smallest_anon_port + 1;
7987c478bdstevel@tonic-gate	}
80045916cdjpk	is_inaddr_any = V6_OR_V4_INADDR_ANY(v6src);
801ff550d0masputra	zoneid = connp->conn_zoneid;
8037c478bdstevel@tonic-gate	for (;;) {
8047c478bdstevel@tonic-gate		udp_t		*udp1;
8057c478bdstevel@tonic-gate		boolean_t	found_exclbind = B_FALSE;
8077c478bdstevel@tonic-gate		/*
8087c478bdstevel@tonic-gate		 * Walk through the list of udp streams bound to
8097c478bdstevel@tonic-gate		 * requested port with the same IP address.
8107c478bdstevel@tonic-gate		 */
8117c478bdstevel@tonic-gate		lport = htons(port);
812f4b3ec6dh		udpf = &us->us_bind_fanout[UDP_BIND_HASH(lport,
813f4b3ec6dh		    us->us_bind_fanout_size)];
8147c478bdstevel@tonic-gate		mutex_enter(&udpf->uf_lock);
8157c478bdstevel@tonic-gate		for (udp1 = udpf->uf_udp; udp1 != NULL;
8167c478bdstevel@tonic-gate		    udp1 = udp1->udp_bind_hash) {
81745916cdjpk			if (lport != udp1->udp_port)
81845916cdjpk				continue;
82045916cdjpk			/*
82145916cdjpk			 * On a labeled system, we must treat bindings to ports
82245916cdjpk			 * on shared IP addresses by sockets with MAC exemption
82345916cdjpk			 * privilege as being in all zones, as there's
82445916cdjpk			 * otherwise no way to identify the right receiver.
82545916cdjpk			 */
82655e77fark			if (!(IPCL_ZONE_MATCH(udp1->udp_connp, zoneid) ||
82755e77fark			    IPCL_ZONE_MATCH(connp,
82855e77fark			    udp1->udp_connp->conn_zoneid)) &&
829222c5bckp			    !connp->conn_mac_exempt && \
830222c5bckp			    !udp1->udp_connp->conn_mac_exempt)
8317c478bdstevel@tonic-gate				continue;
8337c478bdstevel@tonic-gate			/*
8347c478bdstevel@tonic-gate			 * If UDP_EXCLBIND is set for either the bound or
8357c478bdstevel@tonic-gate			 * binding endpoint, the semantics of bind
8367c478bdstevel@tonic-gate			 * is changed according to the following chart.
8377c478bdstevel@tonic-gate			 *
8387c478bdstevel@tonic-gate			 * spec = specified address (v4 or v6)
8397c478bdstevel@tonic-gate			 * unspec = unspecified address (v4 or v6)
8407c478bdstevel@tonic-gate			 * A = specified addresses are different for endpoints
8417c478bdstevel@tonic-gate			 *
8427c478bdstevel@tonic-gate			 * bound	bind to		allowed?
8437c478bdstevel@tonic-gate			 * -------------------------------------
8447c478bdstevel@tonic-gate			 * unspec	unspec		no
8457c478bdstevel@tonic-gate			 * unspec	spec		no
8467c478bdstevel@tonic-gate			 * spec		unspec		no
8477c478bdstevel@tonic-gate			 * spec		spec		yes if A
84845916cdjpk			 *
84945916cdjpk			 * For labeled systems, SO_MAC_EXEMPT behaves the same
85045916cdjpk			 * as UDP_EXCLBIND, except that zoneid is ignored.
8517c478bdstevel@tonic-gate			 */
85245916cdjpk			if (udp1->udp_exclbind || udp->udp_exclbind ||
853222c5bckp			    udp1->udp_connp->conn_mac_exempt ||
854222c5bckp			    connp->conn_mac_exempt) {
8557c478bdstevel@tonic-gate				if (V6_OR_V4_INADDR_ANY(
8567c478bdstevel@tonic-gate				    udp1->udp_bound_v6src) ||
8577c478bdstevel@tonic-gate				    is_inaddr_any ||
8587c478bdstevel@tonic-gate				    IN6_ARE_ADDR_EQUAL(&udp1->udp_bound_v6src,
8597c478bdstevel@tonic-gate				    &v6src)) {
8607c478bdstevel@tonic-gate					found_exclbind = B_TRUE;
8617c478bdstevel@tonic-gate					break;
8627c478bdstevel@tonic-gate				}
8637c478bdstevel@tonic-gate				continue;
8647c478bdstevel@tonic-gate			}
8667c478bdstevel@tonic-gate			/*
8677c478bdstevel@tonic-gate			 * Check ipversion to allow IPv4 and IPv6 sockets to
868738d543dduvall			 * have disjoint port number spaces.
8697c478bdstevel@tonic-gate			 */
870f952bcdse			if (udp->udp_ipversion != udp1->udp_ipversion) {
872f952bcdse				/*
873f952bcdse				 * On the first time through the loop, if the
874f952bcdse				 * the user intentionally specified a
875f952bcdse				 * particular port number, then ignore any
876f952bcdse				 * bindings of the other protocol that may
877f952bcdse				 * conflict. This allows the user to bind IPv6
878f952bcdse				 * alone and get both v4 and v6, or bind both
879f952bcdse				 * both and get each seperately. On subsequent
880f952bcdse				 * times through the loop, we're checking a
881f952bcdse				 * port that we chose (not the user) and thus
882f952bcdse				 * we do not allow casual duplicate bindings.
883f952bcdse				 */
884f952bcdse				if (count == 0 && requested_port != 0)
885f952bcdse					continue;
886f952bcdse			}
8887c478bdstevel@tonic-gate			/*
8897c478bdstevel@tonic-gate			 * No difference depending on SO_REUSEADDR.
8907c478bdstevel@tonic-gate			 *
8917c478bdstevel@tonic-gate			 * If existing port is bound to a
8927c478bdstevel@tonic-gate			 * non-wildcard IP address and
8937c478bdstevel@tonic-gate			 * the requesting stream is bound to
8947c478bdstevel@tonic-gate			 * a distinct different IP addresses
8957c478bdstevel@tonic-gate			 * (non-wildcard, also), keep going.
8967c478bdstevel@tonic-gate			 */
8977c478bdstevel@tonic-gate			if (!is_inaddr_any &&
8987c478bdstevel@tonic-gate			    !V6_OR_V4_INADDR_ANY(udp1->udp_bound_v6src) &&
8997c478bdstevel@tonic-gate			    !IN6_ARE_ADDR_EQUAL(&udp1->udp_bound_v6src,
9007c478bdstevel@tonic-gate			    &v6src)) {
9017c478bdstevel@tonic-gate				continue;
9027c478bdstevel@tonic-gate			}
9037c478bdstevel@tonic-gate			break;
9047c478bdstevel@tonic-gate		}
9067c478bdstevel@tonic-gate		if (!found_exclbind &&
9077c478bdstevel@tonic-gate		    (udp->udp_reuseaddr && requested_port != 0)) {
9087c478bdstevel@tonic-gate			break;
9097c478bdstevel@tonic-gate		}
9117c478bdstevel@tonic-gate		if (udp1 == NULL) {
9127c478bdstevel@tonic-gate			/*
9137c478bdstevel@tonic-gate			 * No other stream has this IP address
9147c478bdstevel@tonic-gate			 * and port number. We can use it.
9157c478bdstevel@tonic-gate			 */
9167c478bdstevel@tonic-gate			break;
9177c478bdstevel@tonic-gate		}
9187c478bdstevel@tonic-gate		mutex_exit(&udpf->uf_lock);
9197c478bdstevel@tonic-gate		if (bind_to_req_port_only) {
9207c478bdstevel@tonic-gate			/*
9217c478bdstevel@tonic-gate			 * We get here only when requested port
9227c478bdstevel@tonic-gate			 * is bound (and only first  of the for()
9237c478bdstevel@tonic-gate			 * loop iteration).
9247c478bdstevel@tonic-gate			 *
9257c478bdstevel@tonic-gate			 * The semantics of this bind request
9267c478bdstevel@tonic-gate			 * require it to fail so we return from
9277c478bdstevel@tonic-gate			 * the routine (and exit the loop).
9287c478bdstevel@tonic-gate			 *
9297c478bdstevel@tonic-gate			 */
930fc80c0dnordmark			udp->udp_pending_op = -1;
931fc80c0dnordmark			rw_exit(&udp->udp_rwlock);
9327c478bdstevel@tonic-gate			udp_err_ack(q, mp, TADDRBUSY, 0);
9337c478bdstevel@tonic-gate			return;
9347c478bdstevel@tonic-gate		}
9367c478bdstevel@tonic-gate		if (udp->udp_anon_priv_bind) {
93745916cdjpk			port = udp_get_next_priv_port(udp);
9387c478bdstevel@tonic-gate		} else {
9397c478bdstevel@tonic-gate			if ((count == 0) && (requested_port != 0)) {
9407c478bdstevel@tonic-gate				/*
9417c478bdstevel@tonic-gate				 * If the application wants us to find
9427c478bdstevel@tonic-gate				 * a port, get one to start with. Set
9437c478bdstevel@tonic-gate				 * requested_port to 0, so that we will
944f4b3ec6dh				 * update us->us_next_port_to_try below.
9457c478bdstevel@tonic-gate				 */
94645916cdjpk				port = udp_update_next_port(udp,
947f4b3ec6dh				    us->us_next_port_to_try, B_TRUE);
9487c478bdstevel@tonic-gate				requested_port = 0;
9497c478bdstevel@tonic-gate			} else {
95045916cdjpk				port = udp_update_next_port(udp, port + 1,
95145916cdjpk				    B_FALSE);
9527c478bdstevel@tonic-gate			}
9537c478bdstevel@tonic-gate		}
95545916cdjpk		if (port == 0 || ++count >= loopmax) {
9567c478bdstevel@tonic-gate			/*
9577c478bdstevel@tonic-gate			 * We've tried every possible port number and
9587c478bdstevel@tonic-gate			 * there are none available, so send an error
9597c478bdstevel@tonic-gate			 * to the user.
9607c478bdstevel@tonic-gate			 */
961fc80c0dnordmark			udp->udp_pending_op = -1;
962fc80c0dnordmark			rw_exit(&udp->udp_rwlock);
9637c478bdstevel@tonic-gate			udp_err_ack(q, mp, TNOADDR, 0);
9647c478bdstevel@tonic-gate			return;
9657c478bdstevel@tonic-gate		}
9667c478bdstevel@tonic-gate	}
9687c478bdstevel@tonic-gate	/*
9697c478bdstevel@tonic-gate	 * Copy the source address into our udp structure.  This address
9707c478bdstevel@tonic-gate	 * may still be zero; if so, ip will fill in the correct address
9717c478bdstevel@tonic-gate	 * each time an outbound packet is passed to it.
972fc80c0dnordmark	 * If we are binding to a broadcast or multicast address then
973fc80c0dnordmark	 * udp_bind_ack will clear the source address when it receives
974fc80c0dnordmark	 * the T_BIND_ACK.
9757c478bdstevel@tonic-gate	 */
9767c478bdstevel@tonic-gate	udp->udp_v6src = udp->udp_bound_v6src = v6src;
9777c478bdstevel@tonic-gate	udp->udp_port = lport;
9787c478bdstevel@tonic-gate	/*
9797c478bdstevel@tonic-gate	 * Now reset the the next anonymous port if the application requested
9807c478bdstevel@tonic-gate	 * an anonymous port, or we handed out the next anonymous port.
9817c478bdstevel@tonic-gate	 */
9827c478bdstevel@tonic-gate	if ((requested_port == 0) && (!udp->udp_anon_priv_bind)) {
983f4b3ec6dh		us->us_next_port_to_try = port + 1;
9847c478bdstevel@tonic-gate	}
9867c478bdstevel@tonic-gate	/* Initialize the O_T_BIND_REQ/T_BIND_REQ for ip. */
9877c478bdstevel@tonic-gate	if (udp->udp_family == AF_INET) {
9887c478bdstevel@tonic-gate		sin->sin_port = udp->udp_port;
9897c478bdstevel@tonic-gate	} else {
9907c478bdstevel@tonic-gate		int error;
9927c478bdstevel@tonic-gate		sin6->sin6_port = udp->udp_port;
9937c478bdstevel@tonic-gate		/* Rebuild the header template */
994fc80c0dnordmark		error = udp_build_hdrs(udp);
9957c478bdstevel@tonic-gate		if (error != 0) {
996fc80c0dnordmark			udp->udp_pending_op = -1;
997fc80c0dnordmark			rw_exit(&udp->udp_rwlock);
9987c478bdstevel@tonic-gate			mutex_exit(&udpf->uf_lock);
9997c478bdstevel@tonic-gate			udp_err_ack(q, mp, TSYSERR, error);
10007c478bdstevel@tonic-gate			return;
10017c478bdstevel@tonic-gate		}
10027c478bdstevel@tonic-gate	}
10037c478bdstevel@tonic-gate	udp->udp_state = TS_IDLE;
10047c478bdstevel@tonic-gate	udp_bind_hash_insert(udpf, udp);
10057c478bdstevel@tonic-gate	mutex_exit(&udpf->uf_lock);
1006fc80c0dnordmark	rw_exit(&udp->udp_rwlock);
10087c478bdstevel@tonic-gate	if (cl_inet_bind) {
10097c478bdstevel@tonic-gate		/*
10107c478bdstevel@tonic-gate		 * Running in cluster mode - register bind information
10117c478bdstevel@tonic-gate		 */
10127c478bdstevel@tonic-gate		if (udp->udp_ipversion == IPV4_VERSION) {
10137c478bdstevel@tonic-gate			(*cl_inet_bind)(IPPROTO_UDP, AF_INET,
10147c478bdstevel@tonic-gate			    (uint8_t *)(&V4_PART_OF_V6(udp->udp_v6src)),
10157c478bdstevel@tonic-gate			    (in_port_t)udp->udp_port);
10167c478bdstevel@tonic-gate		} else {
10177c478bdstevel@tonic-gate			(*cl_inet_bind)(IPPROTO_UDP, AF_INET6,
10187c478bdstevel@tonic-gate			    (uint8_t *)&(udp->udp_v6src),
10197c478bdstevel@tonic-gate			    (in_port_t)udp->udp_port);
10207c478bdstevel@tonic-gate		}
10227c478bdstevel@tonic-gate	}
102445916cdjpk	connp->conn_anon_port = (is_system_labeled() && requested_port == 0);
102545916cdjpk	if (is_system_labeled() && (!connp->conn_anon_port ||
102645916cdjpk	    connp->conn_anon_mlp)) {
102745916cdjpk		uint16_t mlpport;
102845916cdjpk		cred_t *cr = connp->conn_cred;
102945916cdjpk		zone_t *zone;
1031f4b3ec6dh		zone = crgetzone(cr);
103245916cdjpk		connp->conn_mlp_type = udp->udp_recvucred ? mlptBoth :
103345916cdjpk		    mlptSingle;
1034f4b3ec6dh		addrtype = tsol_mlp_addr_type(zone->zone_id, IPV6_VERSION,
1035fc80c0dnordmark		    &v6src, us->us_netstack->netstack_ip);
103645916cdjpk		if (addrtype == mlptSingle) {
1037fc80c0dnordmark			rw_enter(&udp->udp_rwlock, RW_WRITER);
1038fc80c0dnordmark			udp->udp_pending_op = -1;
1039fc80c0dnordmark			rw_exit(&udp->udp_rwlock);
104045916cdjpk			udp_err_ack(q, mp, TNOADDR, 0);
104145916cdjpk			connp->conn_anon_port = B_FALSE;
104245916cdjpk			connp->conn_mlp_type = mlptSingle;
104345916cdjpk			return;
104445916cdjpk		}
104545916cdjpk		mlpport = connp->conn_anon_port ? PMAPPORT : port;
104645916cdjpk		mlptype = tsol_mlp_port_type(zone, IPPROTO_UDP, mlpport,
104745916cdjpk		    addrtype);
104845916cdjpk		if (mlptype != mlptSingle &&
104945916cdjpk		    (connp->conn_mlp_type == mlptSingle ||
105045916cdjpk		    secpolicy_net_bindmlp(cr) != 0)) {
105145916cdjpk			if (udp->udp_debug) {
105245916cdjpk				(void) strlog(UDP_MOD_ID, 0, 1,
105345916cdjpk				    SL_ERROR|SL_TRACE,
105445916cdjpk				    "udp_bind: no priv for multilevel port %d",
105545916cdjpk				    mlpport);
105645916cdjpk			}
1057fc80c0dnordmark			rw_enter(&udp->udp_rwlock, RW_WRITER);
1058fc80c0dnordmark			udp->udp_pending_op = -1;
1059fc80c0dnordmark			rw_exit(&udp->udp_rwlock);
106045916cdjpk			udp_err_ack(q, mp, TACCES, 0);
106145916cdjpk			connp->conn_anon_port = B_FALSE;
106245916cdjpk			connp->conn_mlp_type = mlptSingle;
106345916cdjpk			return;
106445916cdjpk		}
106645916cdjpk		/*
106745916cdjpk		 * If we're specifically binding a shared IP address and the
106845916cdjpk		 * port is MLP on shared addresses, then check to see if this
106945916cdjpk		 * zone actually owns the MLP.  Reject if not.
107045916cdjpk		 */
107145916cdjpk		if (mlptype == mlptShared && addrtype == mlptShared) {
1072f4b3ec6dh			/*
1073f4b3ec6dh			 * No need to handle exclusive-stack zones since
1074f4b3ec6dh			 * ALL_ZONES only applies to the shared stack.
1075f4b3ec6dh			 */
107645916cdjpk			zoneid_t mlpzone;
107845916cdjpk			mlpzone = tsol_mlp_findzone(IPPROTO_UDP,