xref: /illumos-gate/usr/src/uts/common/inet/udp/udp.c (revision 6218f289)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright 2013 Nexenta Systems, Inc.  All rights reserved.
24  * Copyright 2014, OmniTI Computer Consulting, Inc. All rights reserved.
25  * Copyright 2020 OmniOS Community Edition (OmniOSce) Association.
26  */
27 /* Copyright (c) 1990 Mentat Inc. */
28 
29 #include <sys/sysmacros.h>
30 #include <sys/types.h>
31 #include <sys/stream.h>
32 #include <sys/stropts.h>
33 #include <sys/strlog.h>
34 #include <sys/strsun.h>
35 #define	_SUN_TPI_VERSION 2
36 #include <sys/tihdr.h>
37 #include <sys/timod.h>
38 #include <sys/ddi.h>
39 #include <sys/sunddi.h>
40 #include <sys/strsubr.h>
41 #include <sys/suntpi.h>
42 #include <sys/xti_inet.h>
43 #include <sys/kmem.h>
44 #include <sys/cred_impl.h>
45 #include <sys/policy.h>
46 #include <sys/priv.h>
47 #include <sys/ucred.h>
48 #include <sys/zone.h>
49 
50 #include <sys/socket.h>
51 #include <sys/socketvar.h>
52 #include <sys/sockio.h>
53 #include <sys/vtrace.h>
54 #include <sys/sdt.h>
55 #include <sys/debug.h>
56 #include <sys/isa_defs.h>
57 #include <sys/random.h>
58 #include <netinet/in.h>
59 #include <netinet/ip6.h>
60 #include <netinet/icmp6.h>
61 #include <netinet/udp.h>
62 
63 #include <inet/common.h>
64 #include <inet/ip.h>
65 #include <inet/ip_impl.h>
66 #include <inet/ipsec_impl.h>
67 #include <inet/ip6.h>
68 #include <inet/ip_ire.h>
69 #include <inet/ip_if.h>
70 #include <inet/ip_multi.h>
71 #include <inet/ip_ndp.h>
72 #include <inet/proto_set.h>
73 #include <inet/mib2.h>
74 #include <inet/optcom.h>
75 #include <inet/snmpcom.h>
76 #include <inet/kstatcom.h>
77 #include <inet/ipclassifier.h>
78 #include <sys/squeue_impl.h>
79 #include <inet/ipnet.h>
80 #include <sys/ethernet.h>
81 
82 #include <sys/tsol/label.h>
83 #include <sys/tsol/tnet.h>
84 #include <rpc/pmap_prot.h>
85 
86 #include <inet/udp_impl.h>
87 
88 /*
89  * Synchronization notes:
90  *
91  * UDP is MT and uses the usual kernel synchronization primitives. There are 2
92  * locks, the fanout lock (uf_lock) and conn_lock. conn_lock
93  * protects the contents of the udp_t. uf_lock protects the address and the
94  * fanout information.
95  * The lock order is conn_lock -> uf_lock.
96  *
97  * The fanout lock uf_lock:
98  * When a UDP endpoint is bound to a local port, it is inserted into
99  * a bind hash list.  The list consists of an array of udp_fanout_t buckets.
100  * The size of the array is controlled by the udp_bind_fanout_size variable.
101  * This variable can be changed in /etc/system if the default value is
102  * not large enough.  Each bind hash bucket is protected by a per bucket
103  * lock.  It protects the udp_bind_hash and udp_ptpbhn fields in the udp_t
104  * structure and a few other fields in the udp_t. A UDP endpoint is removed
105  * from the bind hash list only when it is being unbound or being closed.
106  * The per bucket lock also protects a UDP endpoint's state changes.
107  *
108  * Plumbing notes:
109  * UDP is always a device driver. For compatibility with mibopen() code
110  * it is possible to I_PUSH "udp", but that results in pushing a passthrough
111  * dummy module.
112  *
113  * The above implies that we don't support any intermediate module to
114  * reside in between /dev/ip and udp -- in fact, we never supported such
115  * scenario in the past as the inter-layer communication semantics have
116  * always been private.
117  */
118 
119 /* For /etc/system control */
120 uint_t udp_bind_fanout_size = UDP_BIND_FANOUT_SIZE;
121 
122 static void	udp_addr_req(queue_t *q, mblk_t *mp);
123 static void	udp_tpi_bind(queue_t *q, mblk_t *mp);
124 static void	udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp);
125 static void	udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock);
126 static int	udp_build_hdr_template(conn_t *, const in6_addr_t *,
127     const in6_addr_t *, in_port_t, uint32_t);
128 static void	udp_capability_req(queue_t *q, mblk_t *mp);
129 static int	udp_tpi_close(queue_t *q, int flags, cred_t *);
130 static void	udp_close_free(conn_t *);
131 static void	udp_tpi_connect(queue_t *q, mblk_t *mp);
132 static void	udp_tpi_disconnect(queue_t *q, mblk_t *mp);
133 static void	udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error,
134     int sys_error);
135 static void	udp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive,
136     t_scalar_t tlierr, int sys_error);
137 static int	udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp,
138 		    cred_t *cr);
139 static int	udp_extra_priv_ports_add(queue_t *q, mblk_t *mp,
140 		    char *value, caddr_t cp, cred_t *cr);
141 static int	udp_extra_priv_ports_del(queue_t *q, mblk_t *mp,
142 		    char *value, caddr_t cp, cred_t *cr);
143 static void	udp_icmp_input(void *, mblk_t *, void *, ip_recv_attr_t *);
144 static void	udp_icmp_error_ipv6(conn_t *connp, mblk_t *mp,
145     ip_recv_attr_t *ira);
146 static void	udp_info_req(queue_t *q, mblk_t *mp);
147 static void	udp_input(void *, mblk_t *, void *, ip_recv_attr_t *);
148 static int	udp_lrput(queue_t *, mblk_t *);
149 static int	udp_lwput(queue_t *, mblk_t *);
150 static int	udp_open(queue_t *q, dev_t *devp, int flag, int sflag,
151 		    cred_t *credp, boolean_t isv6);
152 static int	udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag,
153 		    cred_t *credp);
154 static int	udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag,
155 		    cred_t *credp);
156 static boolean_t udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name);
157 int		udp_opt_set(conn_t *connp, uint_t optset_context,
158 		    int level, int name, uint_t inlen,
159 		    uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp,
160 		    void *thisdg_attrs, cred_t *cr);
161 int		udp_opt_get(conn_t *connp, int level, int name,
162 		    uchar_t *ptr);
163 static int	udp_output_connected(conn_t *connp, mblk_t *mp, cred_t *cr,
164 		    pid_t pid);
165 static int	udp_output_lastdst(conn_t *connp, mblk_t *mp, cred_t *cr,
166     pid_t pid, ip_xmit_attr_t *ixa);
167 static int	udp_output_newdst(conn_t *connp, mblk_t *data_mp, sin_t *sin,
168 		    sin6_t *sin6, ushort_t ipversion, cred_t *cr, pid_t,
169 		    ip_xmit_attr_t *ixa);
170 static mblk_t	*udp_prepend_hdr(conn_t *, ip_xmit_attr_t *, const ip_pkt_t *,
171     const in6_addr_t *, const in6_addr_t *, in_port_t, uint32_t, mblk_t *,
172     int *);
173 static mblk_t	*udp_prepend_header_template(conn_t *, ip_xmit_attr_t *,
174     mblk_t *, const in6_addr_t *, in_port_t, uint32_t, int *);
175 static void	udp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err);
176 static void	udp_ud_err_connected(conn_t *, t_scalar_t);
177 static void	udp_tpi_unbind(queue_t *q, mblk_t *mp);
178 static in_port_t udp_update_next_port(udp_t *udp, in_port_t port,
179     boolean_t random);
180 static void	udp_wput_other(queue_t *q, mblk_t *mp);
181 static void	udp_wput_iocdata(queue_t *q, mblk_t *mp);
182 static int	udp_wput_fallback(queue_t *q, mblk_t *mp);
183 static size_t	udp_set_rcv_hiwat(udp_t *udp, size_t size);
184 
185 static void	*udp_stack_init(netstackid_t stackid, netstack_t *ns);
186 static void	udp_stack_fini(netstackid_t stackid, void *arg);
187 
188 /* Common routines for TPI and socket module */
189 static void	udp_ulp_recv(conn_t *, mblk_t *, uint_t, ip_recv_attr_t *);
190 
191 /* Common routine for TPI and socket module */
192 static conn_t	*udp_do_open(cred_t *, boolean_t, int, int *);
193 static void	udp_do_close(conn_t *);
194 static int	udp_do_bind(conn_t *, struct sockaddr *, socklen_t, cred_t *,
195     boolean_t);
196 static int	udp_do_unbind(conn_t *);
197 
198 int		udp_getsockname(sock_lower_handle_t,
199     struct sockaddr *, socklen_t *, cred_t *);
200 int		udp_getpeername(sock_lower_handle_t,
201     struct sockaddr *, socklen_t *, cred_t *);
202 static int	udp_do_connect(conn_t *, const struct sockaddr *, socklen_t,
203     cred_t *, pid_t);
204 
205 #pragma inline(udp_output_connected, udp_output_newdst, udp_output_lastdst)
206 
207 /*
208  * Checks if the given destination addr/port is allowed out.
209  * If allowed, registers the (dest_addr/port, node_ID) mapping at Cluster.
210  * Called for each connect() and for sendto()/sendmsg() to a different
211  * destination.
212  * For connect(), called in udp_connect().
213  * For sendto()/sendmsg(), called in udp_output_newdst().
214  *
215  * This macro assumes that the cl_inet_connect2 hook is not NULL.
216  * Please check this before calling this macro.
217  *
218  * void
219  * CL_INET_UDP_CONNECT(conn_t cp, udp_t *udp, boolean_t is_outgoing,
220  *     in6_addr_t *faddrp, in_port_t (or uint16_t) fport, int err);
221  */
222 #define	CL_INET_UDP_CONNECT(cp, is_outgoing, faddrp, fport, err) {	\
223 	(err) = 0;							\
224 	/*								\
225 	 * Running in cluster mode - check and register active		\
226 	 * "connection" information					\
227 	 */								\
228 	if ((cp)->conn_ipversion == IPV4_VERSION)			\
229 		(err) = (*cl_inet_connect2)(				\
230 		    (cp)->conn_netstack->netstack_stackid,		\
231 		    IPPROTO_UDP, is_outgoing, AF_INET,			\
232 		    (uint8_t *)&((cp)->conn_laddr_v4),			\
233 		    (cp)->conn_lport,					\
234 		    (uint8_t *)&(V4_PART_OF_V6(*faddrp)),		\
235 		    (in_port_t)(fport), NULL);				\
236 	else								\
237 		(err) = (*cl_inet_connect2)(				\
238 		    (cp)->conn_netstack->netstack_stackid,		\
239 		    IPPROTO_UDP, is_outgoing, AF_INET6,			\
240 		    (uint8_t *)&((cp)->conn_laddr_v6),			\
241 		    (cp)->conn_lport,					\
242 		    (uint8_t *)(faddrp), (in_port_t)(fport), NULL);	\
243 }
244 
245 static struct module_info udp_mod_info =  {
246 	UDP_MOD_ID, UDP_MOD_NAME, 1, INFPSZ, UDP_RECV_HIWATER, UDP_RECV_LOWATER
247 };
248 
249 /*
250  * Entry points for UDP as a device.
251  * We have separate open functions for the /dev/udp and /dev/udp6 devices.
252  */
253 static struct qinit udp_rinitv4 = {
254 	NULL, NULL, udp_openv4, udp_tpi_close, NULL, &udp_mod_info, NULL
255 };
256 
257 static struct qinit udp_rinitv6 = {
258 	NULL, NULL, udp_openv6, udp_tpi_close, NULL, &udp_mod_info, NULL
259 };
260 
261 static struct qinit udp_winit = {
262 	udp_wput, ip_wsrv, NULL, NULL, NULL, &udp_mod_info
263 };
264 
265 /* UDP entry point during fallback */
266 struct qinit udp_fallback_sock_winit = {
267 	udp_wput_fallback, NULL, NULL, NULL, NULL, &udp_mod_info
268 };
269 
270 /*
271  * UDP needs to handle I_LINK and I_PLINK since ifconfig
272  * likes to use it as a place to hang the various streams.
273  */
274 static struct qinit udp_lrinit = {
275 	udp_lrput, NULL, udp_openv4, udp_tpi_close, NULL, &udp_mod_info
276 };
277 
278 static struct qinit udp_lwinit = {
279 	udp_lwput, NULL, udp_openv4, udp_tpi_close, NULL, &udp_mod_info
280 };
281 
282 /* For AF_INET aka /dev/udp */
283 struct streamtab udpinfov4 = {
284 	&udp_rinitv4, &udp_winit, &udp_lrinit, &udp_lwinit
285 };
286 
287 /* For AF_INET6 aka /dev/udp6 */
288 struct streamtab udpinfov6 = {
289 	&udp_rinitv6, &udp_winit, &udp_lrinit, &udp_lwinit
290 };
291 
292 #define	UDP_MAXPACKET_IPV4 (IP_MAXPACKET - UDPH_SIZE - IP_SIMPLE_HDR_LENGTH)
293 
294 /* Default structure copied into T_INFO_ACK messages */
295 static struct T_info_ack udp_g_t_info_ack_ipv4 = {
296 	T_INFO_ACK,
297 	UDP_MAXPACKET_IPV4,	/* TSDU_size. Excl. headers */
298 	T_INVALID,	/* ETSU_size.  udp does not support expedited data. */
299 	T_INVALID,	/* CDATA_size. udp does not support connect data. */
300 	T_INVALID,	/* DDATA_size. udp does not support disconnect data. */
301 	sizeof (sin_t),	/* ADDR_size. */
302 	0,		/* OPT_size - not initialized here */
303 	UDP_MAXPACKET_IPV4,	/* TIDU_size.  Excl. headers */
304 	T_CLTS,		/* SERV_type.  udp supports connection-less. */
305 	TS_UNBND,	/* CURRENT_state.  This is set from udp_state. */
306 	(XPG4_1|SENDZERO) /* PROVIDER_flag */
307 };
308 
309 #define	UDP_MAXPACKET_IPV6 (IP_MAXPACKET - UDPH_SIZE - IPV6_HDR_LEN)
310 
311 static	struct T_info_ack udp_g_t_info_ack_ipv6 = {
312 	T_INFO_ACK,
313 	UDP_MAXPACKET_IPV6,	/* TSDU_size.  Excl. headers */
314 	T_INVALID,	/* ETSU_size.  udp does not support expedited data. */
315 	T_INVALID,	/* CDATA_size. udp does not support connect data. */
316 	T_INVALID,	/* DDATA_size. udp does not support disconnect data. */
317 	sizeof (sin6_t), /* ADDR_size. */
318 	0,		/* OPT_size - not initialized here */
319 	UDP_MAXPACKET_IPV6,	/* TIDU_size. Excl. headers */
320 	T_CLTS,		/* SERV_type.  udp supports connection-less. */
321 	TS_UNBND,	/* CURRENT_state.  This is set from udp_state. */
322 	(XPG4_1|SENDZERO) /* PROVIDER_flag */
323 };
324 
325 /*
326  * UDP tunables related declarations. Definitions are in udp_tunables.c
327  */
328 extern mod_prop_info_t udp_propinfo_tbl[];
329 extern int udp_propinfo_count;
330 
331 /* Setable in /etc/system */
332 /* If set to 0, pick ephemeral port sequentially; otherwise randomly. */
333 uint32_t udp_random_anon_port = 1;
334 
335 /*
336  * Hook functions to enable cluster networking.
337  * On non-clustered systems these vectors must always be NULL
338  */
339 
340 void (*cl_inet_bind)(netstackid_t stack_id, uchar_t protocol,
341     sa_family_t addr_family, uint8_t *laddrp, in_port_t lport,
342     void *args) = NULL;
343 void (*cl_inet_unbind)(netstackid_t stack_id, uint8_t protocol,
344     sa_family_t addr_family, uint8_t *laddrp, in_port_t lport,
345     void *args) = NULL;
346 
347 typedef union T_primitives *t_primp_t;
348 
349 /*
350  * Return the next anonymous port in the privileged port range for
351  * bind checking.
352  *
353  * Trusted Extension (TX) notes: TX allows administrator to mark or
354  * reserve ports as Multilevel ports (MLP). MLP has special function
355  * on TX systems. Once a port is made MLP, it's not available as
356  * ordinary port. This creates "holes" in the port name space. It
357  * may be necessary to skip the "holes" find a suitable anon port.
358  */
359 static in_port_t
360 udp_get_next_priv_port(udp_t *udp)
361 {
362 	static in_port_t next_priv_port = IPPORT_RESERVED - 1;
363 	in_port_t nextport;
364 	boolean_t restart = B_FALSE;
365 	udp_stack_t *us = udp->udp_us;
366 
367 retry:
368 	if (next_priv_port < us->us_min_anonpriv_port ||
369 	    next_priv_port >= IPPORT_RESERVED) {
370 		next_priv_port = IPPORT_RESERVED - 1;
371 		if (restart)
372 			return (0);
373 		restart = B_TRUE;
374 	}
375 
376 	if (is_system_labeled() &&
377 	    (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred),
378 	    next_priv_port, IPPROTO_UDP, B_FALSE)) != 0) {
379 		next_priv_port = nextport;
380 		goto retry;
381 	}
382 
383 	return (next_priv_port--);
384 }
385 
386 /*
387  * Hash list removal routine for udp_t structures.
388  */
389 static void
390 udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock)
391 {
392 	udp_t		*udpnext;
393 	kmutex_t	*lockp;
394 	udp_stack_t	*us = udp->udp_us;
395 	conn_t		*connp = udp->udp_connp;
396 
397 	if (udp->udp_ptpbhn == NULL)
398 		return;
399 
400 	/*
401 	 * Extract the lock pointer in case there are concurrent
402 	 * hash_remove's for this instance.
403 	 */
404 	ASSERT(connp->conn_lport != 0);
405 	if (!caller_holds_lock) {
406 		lockp = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport,
407 		    us->us_bind_fanout_size)].uf_lock;
408 		ASSERT(lockp != NULL);
409 		mutex_enter(lockp);
410 	}
411 	if (udp->udp_ptpbhn != NULL) {
412 		udpnext = udp->udp_bind_hash;
413 		if (udpnext != NULL) {
414 			udpnext->udp_ptpbhn = udp->udp_ptpbhn;
415 			udp->udp_bind_hash = NULL;
416 		}
417 		*udp->udp_ptpbhn = udpnext;
418 		udp->udp_ptpbhn = NULL;
419 	}
420 	if (!caller_holds_lock) {
421 		mutex_exit(lockp);
422 	}
423 }
424 
425 static void
426 udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp)
427 {
428 	conn_t	*connp = udp->udp_connp;
429 	udp_t	**udpp;
430 	udp_t	*udpnext;
431 	conn_t	*connext;
432 
433 	ASSERT(MUTEX_HELD(&uf->uf_lock));
434 	ASSERT(udp->udp_ptpbhn == NULL);
435 	udpp = &uf->uf_udp;
436 	udpnext = udpp[0];
437 	if (udpnext != NULL) {
438 		/*
439 		 * If the new udp bound to the INADDR_ANY address
440 		 * and the first one in the list is not bound to
441 		 * INADDR_ANY we skip all entries until we find the
442 		 * first one bound to INADDR_ANY.
443 		 * This makes sure that applications binding to a
444 		 * specific address get preference over those binding to
445 		 * INADDR_ANY.
446 		 */
447 		connext = udpnext->udp_connp;
448 		if (V6_OR_V4_INADDR_ANY(connp->conn_bound_addr_v6) &&
449 		    !V6_OR_V4_INADDR_ANY(connext->conn_bound_addr_v6)) {
450 			while ((udpnext = udpp[0]) != NULL &&
451 			    !V6_OR_V4_INADDR_ANY(connext->conn_bound_addr_v6)) {
452 				udpp = &(udpnext->udp_bind_hash);
453 			}
454 			if (udpnext != NULL)
455 				udpnext->udp_ptpbhn = &udp->udp_bind_hash;
456 		} else {
457 			udpnext->udp_ptpbhn = &udp->udp_bind_hash;
458 		}
459 	}
460 	udp->udp_bind_hash = udpnext;
461 	udp->udp_ptpbhn = udpp;
462 	udpp[0] = udp;
463 }
464 
465 /*
466  * This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message
467  * passed to udp_wput.
468  * It associates a port number and local address with the stream.
469  * It calls IP to verify the local IP address, and calls IP to insert
470  * the conn_t in the fanout table.
471  * If everything is ok it then sends the T_BIND_ACK back up.
472  *
473  * Note that UDP over IPv4 and IPv6 sockets can use the same port number
474  * without setting SO_REUSEADDR. This is needed so that they
475  * can be viewed as two independent transport protocols.
476  * However, anonymouns ports are allocated from the same range to avoid
477  * duplicating the us->us_next_port_to_try.
478  */
479 static void
480 udp_tpi_bind(queue_t *q, mblk_t *mp)
481 {
482 	sin_t		*sin;
483 	sin6_t		*sin6;
484 	mblk_t		*mp1;
485 	struct T_bind_req *tbr;
486 	conn_t		*connp;
487 	udp_t		*udp;
488 	int		error;
489 	struct sockaddr	*sa;
490 	cred_t		*cr;
491 
492 	/*
493 	 * All Solaris components should pass a db_credp
494 	 * for this TPI message, hence we ASSERT.
495 	 * But in case there is some other M_PROTO that looks
496 	 * like a TPI message sent by some other kernel
497 	 * component, we check and return an error.
498 	 */
499 	cr = msg_getcred(mp, NULL);
500 	ASSERT(cr != NULL);
501 	if (cr == NULL) {
502 		udp_err_ack(q, mp, TSYSERR, EINVAL);
503 		return;
504 	}
505 
506 	connp = Q_TO_CONN(q);
507 	udp = connp->conn_udp;
508 	if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) {
509 		(void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
510 		    "udp_bind: bad req, len %u",
511 		    (uint_t)(mp->b_wptr - mp->b_rptr));
512 		udp_err_ack(q, mp, TPROTO, 0);
513 		return;
514 	}
515 	if (udp->udp_state != TS_UNBND) {
516 		(void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
517 		    "udp_bind: bad state, %u", udp->udp_state);
518 		udp_err_ack(q, mp, TOUTSTATE, 0);
519 		return;
520 	}
521 	/*
522 	 * Reallocate the message to make sure we have enough room for an
523 	 * address.
524 	 */
525 	mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t), 1);
526 	if (mp1 == NULL) {
527 		udp_err_ack(q, mp, TSYSERR, ENOMEM);
528 		return;
529 	}
530 
531 	mp = mp1;
532 
533 	/* Reset the message type in preparation for shipping it back. */
534 	DB_TYPE(mp) = M_PCPROTO;
535 
536 	tbr = (struct T_bind_req *)mp->b_rptr;
537 	switch (tbr->ADDR_length) {
538 	case 0:			/* Request for a generic port */
539 		tbr->ADDR_offset = sizeof (struct T_bind_req);
540 		if (connp->conn_family == AF_INET) {
541 			tbr->ADDR_length = sizeof (sin_t);
542 			sin = (sin_t *)&tbr[1];
543 			*sin = sin_null;
544 			sin->sin_family = AF_INET;
545 			mp->b_wptr = (uchar_t *)&sin[1];
546 			sa = (struct sockaddr *)sin;
547 		} else {
548 			ASSERT(connp->conn_family == AF_INET6);
549 			tbr->ADDR_length = sizeof (sin6_t);
550 			sin6 = (sin6_t *)&tbr[1];
551 			*sin6 = sin6_null;
552 			sin6->sin6_family = AF_INET6;
553 			mp->b_wptr = (uchar_t *)&sin6[1];
554 			sa = (struct sockaddr *)sin6;
555 		}
556 		break;
557 
558 	case sizeof (sin_t):	/* Complete IPv4 address */
559 		sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset,
560 		    sizeof (sin_t));
561 		if (sa == NULL || !OK_32PTR((char *)sa)) {
562 			udp_err_ack(q, mp, TSYSERR, EINVAL);
563 			return;
564 		}
565 		if (connp->conn_family != AF_INET ||
566 		    sa->sa_family != AF_INET) {
567 			udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT);
568 			return;
569 		}
570 		break;
571 
572 	case sizeof (sin6_t):	/* complete IPv6 address */
573 		sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset,
574 		    sizeof (sin6_t));
575 		if (sa == NULL || !OK_32PTR((char *)sa)) {
576 			udp_err_ack(q, mp, TSYSERR, EINVAL);
577 			return;
578 		}
579 		if (connp->conn_family != AF_INET6 ||
580 		    sa->sa_family != AF_INET6) {
581 			udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT);
582 			return;
583 		}
584 		break;
585 
586 	default:		/* Invalid request */
587 		(void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
588 		    "udp_bind: bad ADDR_length length %u", tbr->ADDR_length);
589 		udp_err_ack(q, mp, TBADADDR, 0);
590 		return;
591 	}
592 
593 	error = udp_do_bind(connp, sa, tbr->ADDR_length, cr,
594 	    tbr->PRIM_type != O_T_BIND_REQ);
595 
596 	if (error != 0) {
597 		if (error > 0) {
598 			udp_err_ack(q, mp, TSYSERR, error);
599 		} else {
600 			udp_err_ack(q, mp, -error, 0);
601 		}
602 	} else {
603 		tbr->PRIM_type = T_BIND_ACK;
604 		qreply(q, mp);
605 	}
606 }
607 
608 /*
609  * This routine handles each T_CONN_REQ message passed to udp.  It
610  * associates a default destination address with the stream.
611  *
612  * After various error checks are completed, udp_connect() lays
613  * the target address and port into the composite header template.
614  * Then we ask IP for information, including a source address if we didn't
615  * already have one. Finally we send up the T_OK_ACK reply message.
616  */
617 static void
618 udp_tpi_connect(queue_t *q, mblk_t *mp)
619 {
620 	conn_t	*connp = Q_TO_CONN(q);
621 	int	error;
622 	socklen_t	len;
623 	struct sockaddr		*sa;
624 	struct T_conn_req	*tcr;
625 	cred_t		*cr;
626 	pid_t		pid;
627 	/*
628 	 * All Solaris components should pass a db_credp
629 	 * for this TPI message, hence we ASSERT.
630 	 * But in case there is some other M_PROTO that looks
631 	 * like a TPI message sent by some other kernel
632 	 * component, we check and return an error.
633 	 */
634 	cr = msg_getcred(mp, &pid);
635 	ASSERT(cr != NULL);
636 	if (cr == NULL) {
637 		udp_err_ack(q, mp, TSYSERR, EINVAL);
638 		return;
639 	}
640 
641 	tcr = (struct T_conn_req *)mp->b_rptr;
642 
643 	/* A bit of sanity checking */
644 	if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_conn_req)) {
645 		udp_err_ack(q, mp, TPROTO, 0);
646 		return;
647 	}
648 
649 	if (tcr->OPT_length != 0) {
650 		udp_err_ack(q, mp, TBADOPT, 0);
651 		return;
652 	}
653 
654 	/*
655 	 * Determine packet type based on type of address passed in
656 	 * the request should contain an IPv4 or IPv6 address.
657 	 * Make sure that address family matches the type of
658 	 * family of the address passed down.
659 	 */
660 	len = tcr->DEST_length;
661 	switch (tcr->DEST_length) {
662 	default:
663 		udp_err_ack(q, mp, TBADADDR, 0);
664 		return;
665 
666 	case sizeof (sin_t):
667 		sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset,
668 		    sizeof (sin_t));
669 		break;
670 
671 	case sizeof (sin6_t):
672 		sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset,
673 		    sizeof (sin6_t));
674 		break;
675 	}
676 
677 	error = proto_verify_ip_addr(connp->conn_family, sa, len);
678 	if (error != 0) {
679 		udp_err_ack(q, mp, TSYSERR, error);
680 		return;
681 	}
682 
683 	error = udp_do_connect(connp, sa, len, cr, pid);
684 	if (error != 0) {
685 		if (error < 0)
686 			udp_err_ack(q, mp, -error, 0);
687 		else
688 			udp_err_ack(q, mp, TSYSERR, error);
689 	} else {
690 		mblk_t	*mp1;
691 		/*
692 		 * We have to send a connection confirmation to
693 		 * keep TLI happy.
694 		 */
695 		if (connp->conn_family == AF_INET) {
696 			mp1 = mi_tpi_conn_con(NULL, (char *)sa,
697 			    sizeof (sin_t), NULL, 0);
698 		} else {
699 			mp1 = mi_tpi_conn_con(NULL, (char *)sa,
700 			    sizeof (sin6_t), NULL, 0);
701 		}
702 		if (mp1 == NULL) {
703 			udp_err_ack(q, mp, TSYSERR, ENOMEM);
704 			return;
705 		}
706 
707 		/*
708 		 * Send ok_ack for T_CONN_REQ
709 		 */
710 		mp = mi_tpi_ok_ack_alloc(mp);
711 		if (mp == NULL) {
712 			/* Unable to reuse the T_CONN_REQ for the ack. */
713 			udp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM);
714 			return;
715 		}
716 
717 		putnext(connp->conn_rq, mp);
718 		putnext(connp->conn_rq, mp1);
719 	}
720 }
721 
722 /* ARGSUSED */
723 static int
724 udp_tpi_close(queue_t *q, int flags, cred_t *credp __unused)
725 {
726 	conn_t	*connp;
727 
728 	if (flags & SO_FALLBACK) {
729 		/*
730 		 * stream is being closed while in fallback
731 		 * simply free the resources that were allocated
732 		 */
733 		inet_minor_free(WR(q)->q_ptr, (dev_t)(RD(q)->q_ptr));
734 		qprocsoff(q);
735 		goto done;
736 	}
737 
738 	connp = Q_TO_CONN(q);
739 	udp_do_close(connp);
740 done:
741 	q->q_ptr = WR(q)->q_ptr = NULL;
742 	return (0);
743 }
744 
745 static void
746 udp_close_free(conn_t *connp)
747 {
748 	udp_t *udp = connp->conn_udp;
749 
750 	/* If there are any options associated with the stream, free them. */
751 	if (udp->udp_recv_ipp.ipp_fields != 0)
752 		ip_pkt_free(&udp->udp_recv_ipp);
753 
754 	/*
755 	 * Clear any fields which the kmem_cache constructor clears.
756 	 * Only udp_connp needs to be preserved.
757 	 * TBD: We should make this more efficient to avoid clearing
758 	 * everything.
759 	 */
760 	ASSERT(udp->udp_connp == connp);
761 	bzero(udp, sizeof (udp_t));
762 	udp->udp_connp = connp;
763 }
764 
765 static int
766 udp_do_disconnect(conn_t *connp)
767 {
768 	udp_t	*udp;
769 	udp_fanout_t *udpf;
770 	udp_stack_t *us;
771 	int	error;
772 
773 	udp = connp->conn_udp;
774 	us = udp->udp_us;
775 	mutex_enter(&connp->conn_lock);
776 	if (udp->udp_state != TS_DATA_XFER) {
777 		mutex_exit(&connp->conn_lock);
778 		return (-TOUTSTATE);
779 	}
780 	udpf = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport,
781 	    us->us_bind_fanout_size)];
782 	mutex_enter(&udpf->uf_lock);
783 	if (connp->conn_mcbc_bind)
784 		connp->conn_saddr_v6 = ipv6_all_zeros;
785 	else
786 		connp->conn_saddr_v6 = connp->conn_bound_addr_v6;
787 	connp->conn_laddr_v6 = connp->conn_bound_addr_v6;
788 	connp->conn_faddr_v6 = ipv6_all_zeros;
789 	connp->conn_fport = 0;
790 	udp->udp_state = TS_IDLE;
791 	mutex_exit(&udpf->uf_lock);
792 
793 	/* Remove any remnants of mapped address binding */
794 	if (connp->conn_family == AF_INET6)
795 		connp->conn_ipversion = IPV6_VERSION;
796 
797 	connp->conn_v6lastdst = ipv6_all_zeros;
798 	error = udp_build_hdr_template(connp, &connp->conn_saddr_v6,
799 	    &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo);
800 	mutex_exit(&connp->conn_lock);
801 	if (error != 0)
802 		return (error);
803 
804 	/*
805 	 * Tell IP to remove the full binding and revert
806 	 * to the local address binding.
807 	 */
808 	return (ip_laddr_fanout_insert(connp));
809 }
810 
811 static void
812 udp_tpi_disconnect(queue_t *q, mblk_t *mp)
813 {
814 	conn_t	*connp = Q_TO_CONN(q);
815 	int	error;
816 
817 	/*
818 	 * Allocate the largest primitive we need to send back
819 	 * T_error_ack is > than T_ok_ack
820 	 */
821 	mp = reallocb(mp, sizeof (struct T_error_ack), 1);
822 	if (mp == NULL) {
823 		/* Unable to reuse the T_DISCON_REQ for the ack. */
824 		udp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, ENOMEM);
825 		return;
826 	}
827 
828 	error = udp_do_disconnect(connp);
829 
830 	if (error != 0) {
831 		if (error < 0) {
832 			udp_err_ack(q, mp, -error, 0);
833 		} else {
834 			udp_err_ack(q, mp, TSYSERR, error);
835 		}
836 	} else {
837 		mp = mi_tpi_ok_ack_alloc(mp);
838 		ASSERT(mp != NULL);
839 		qreply(q, mp);
840 	}
841 }
842 
843 int
844 udp_disconnect(conn_t *connp)
845 {
846 	int error;
847 
848 	connp->conn_dgram_errind = B_FALSE;
849 	error = udp_do_disconnect(connp);
850 	if (error < 0)
851 		error = proto_tlitosyserr(-error);
852 
853 	return (error);
854 }
855 
856 /* This routine creates a T_ERROR_ACK message and passes it upstream. */
857 static void
858 udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error)
859 {
860 	if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL)
861 		qreply(q, mp);
862 }
863 
864 /* Shorthand to generate and send TPI error acks to our client */
865 static void
866 udp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive,
867     t_scalar_t t_error, int sys_error)
868 {
869 	struct T_error_ack	*teackp;
870 
871 	if ((mp = tpi_ack_alloc(mp, sizeof (struct T_error_ack),
872 	    M_PCPROTO, T_ERROR_ACK)) != NULL) {
873 		teackp = (struct T_error_ack *)mp->b_rptr;
874 		teackp->ERROR_prim = primitive;
875 		teackp->TLI_error = t_error;
876 		teackp->UNIX_error = sys_error;
877 		qreply(q, mp);
878 	}
879 }
880 
881 /* At minimum we need 4 bytes of UDP header */
882 #define	ICMP_MIN_UDP_HDR	4
883 
884 /*
885  * udp_icmp_input is called as conn_recvicmp to process ICMP messages.
886  * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors.
887  * Assumes that IP has pulled up everything up to and including the ICMP header.
888  */
889 /* ARGSUSED2 */
890 static void
891 udp_icmp_input(void *arg1, mblk_t *mp, void *arg2, ip_recv_attr_t *ira)
892 {
893 	conn_t		*connp = (conn_t *)arg1;
894 	icmph_t		*icmph;
895 	ipha_t		*ipha;
896 	int		iph_hdr_length;
897 	udpha_t		*udpha;
898 	sin_t		sin;
899 	sin6_t		sin6;
900 	mblk_t		*mp1;
901 	int		error = 0;
902 	udp_t		*udp = connp->conn_udp;
903 
904 	ipha = (ipha_t *)mp->b_rptr;
905 
906 	ASSERT(OK_32PTR(mp->b_rptr));
907 
908 	if (IPH_HDR_VERSION(ipha) != IPV4_VERSION) {
909 		ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION);
910 		udp_icmp_error_ipv6(connp, mp, ira);
911 		return;
912 	}
913 	ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION);
914 
915 	/* Skip past the outer IP and ICMP headers */
916 	ASSERT(IPH_HDR_LENGTH(ipha) == ira->ira_ip_hdr_length);
917 	iph_hdr_length = ira->ira_ip_hdr_length;
918 	icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length];
919 	ipha = (ipha_t *)&icmph[1];	/* Inner IP header */
920 
921 	/* Skip past the inner IP and find the ULP header */
922 	iph_hdr_length = IPH_HDR_LENGTH(ipha);
923 	udpha = (udpha_t *)((char *)ipha + iph_hdr_length);
924 
925 	switch (icmph->icmph_type) {
926 	case ICMP_DEST_UNREACHABLE:
927 		switch (icmph->icmph_code) {
928 		case ICMP_FRAGMENTATION_NEEDED: {
929 			ipha_t		*ipha;
930 			ip_xmit_attr_t	*ixa;
931 			/*
932 			 * IP has already adjusted the path MTU.
933 			 * But we need to adjust DF for IPv4.
934 			 */
935 			if (connp->conn_ipversion != IPV4_VERSION)
936 				break;
937 
938 			ixa = conn_get_ixa(connp, B_FALSE);
939 			if (ixa == NULL || ixa->ixa_ire == NULL) {
940 				/*
941 				 * Some other thread holds conn_ixa. We will
942 				 * redo this on the next ICMP too big.
943 				 */
944 				if (ixa != NULL)
945 					ixa_refrele(ixa);
946 				break;
947 			}
948 			(void) ip_get_pmtu(ixa);
949 
950 			mutex_enter(&connp->conn_lock);
951 			ipha = (ipha_t *)connp->conn_ht_iphc;
952 			if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF) {
953 				ipha->ipha_fragment_offset_and_flags |=
954 				    IPH_DF_HTONS;
955 			} else {
956 				ipha->ipha_fragment_offset_and_flags &=
957 				    ~IPH_DF_HTONS;
958 			}
959 			mutex_exit(&connp->conn_lock);
960 			ixa_refrele(ixa);
961 			break;
962 		}
963 		case ICMP_PORT_UNREACHABLE:
964 		case ICMP_PROTOCOL_UNREACHABLE:
965 			error = ECONNREFUSED;
966 			break;
967 		default:
968 			/* Transient errors */
969 			break;
970 		}
971 		break;
972 	default:
973 		/* Transient errors */
974 		break;
975 	}
976 	if (error == 0) {
977 		freemsg(mp);
978 		return;
979 	}
980 
981 	/*
982 	 * Deliver T_UDERROR_IND when the application has asked for it.
983 	 * The socket layer enables this automatically when connected.
984 	 */
985 	if (!connp->conn_dgram_errind) {
986 		freemsg(mp);
987 		return;
988 	}
989 
990 	switch (connp->conn_family) {
991 	case AF_INET:
992 		sin = sin_null;
993 		sin.sin_family = AF_INET;
994 		sin.sin_addr.s_addr = ipha->ipha_dst;
995 		sin.sin_port = udpha->uha_dst_port;
996 		if (IPCL_IS_NONSTR(connp)) {
997 			mutex_enter(&connp->conn_lock);
998 			if (udp->udp_state == TS_DATA_XFER) {
999 				if (sin.sin_port == connp->conn_fport &&
1000 				    sin.sin_addr.s_addr ==
1001 				    connp->conn_faddr_v4) {
1002 					mutex_exit(&connp->conn_lock);
1003 					(*connp->conn_upcalls->su_set_error)
1004 					    (connp->conn_upper_handle, error);
1005 					goto done;
1006 				}
1007 			} else {
1008 				udp->udp_delayed_error = error;
1009 				*((sin_t *)&udp->udp_delayed_addr) = sin;
1010 			}
1011 			mutex_exit(&connp->conn_lock);
1012 		} else {
1013 			mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t),
1014 			    NULL, 0, error);
1015 			if (mp1 != NULL)
1016 				putnext(connp->conn_rq, mp1);
1017 		}
1018 		break;
1019 	case AF_INET6:
1020 		sin6 = sin6_null;
1021 		sin6.sin6_family = AF_INET6;
1022 		IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &sin6.sin6_addr);
1023 		sin6.sin6_port = udpha->uha_dst_port;
1024 		if (IPCL_IS_NONSTR(connp)) {
1025 			mutex_enter(&connp->conn_lock);
1026 			if (udp->udp_state == TS_DATA_XFER) {
1027 				if (sin6.sin6_port == connp->conn_fport &&
1028 				    IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr,
1029 				    &connp->conn_faddr_v6)) {
1030 					mutex_exit(&connp->conn_lock);
1031 					(*connp->conn_upcalls->su_set_error)
1032 					    (connp->conn_upper_handle, error);
1033 					goto done;
1034 				}
1035 			} else {
1036 				udp->udp_delayed_error = error;
1037 				*((sin6_t *)&udp->udp_delayed_addr) = sin6;
1038 			}
1039 			mutex_exit(&connp->conn_lock);
1040 		} else {
1041 			mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t),
1042 			    NULL, 0, error);
1043 			if (mp1 != NULL)
1044 				putnext(connp->conn_rq, mp1);
1045 		}
1046 		break;
1047 	}
1048 done:
1049 	freemsg(mp);
1050 }
1051 
1052 /*
1053  * udp_icmp_error_ipv6 is called by udp_icmp_error to process ICMP for IPv6.
1054  * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors.
1055  * Assumes that IP has pulled up all the extension headers as well as the
1056  * ICMPv6 header.
1057  */
1058 static void
1059 udp_icmp_error_ipv6(conn_t *connp, mblk_t *mp, ip_recv_attr_t *ira)
1060 {
1061 	icmp6_t		*icmp6;
1062 	ip6_t		*ip6h, *outer_ip6h;
1063 	uint16_t	iph_hdr_length;
1064 	uint8_t		*nexthdrp;
1065 	udpha_t		*udpha;
1066 	sin6_t		sin6;
1067 	mblk_t		*mp1;
1068 	int		error = 0;
1069 	udp_t		*udp = connp->conn_udp;
1070 	udp_stack_t	*us = udp->udp_us;
1071 
1072 	outer_ip6h = (ip6_t *)mp->b_rptr;
1073 #ifdef DEBUG
1074 	if (outer_ip6h->ip6_nxt != IPPROTO_ICMPV6)
1075 		iph_hdr_length = ip_hdr_length_v6(mp, outer_ip6h);
1076 	else
1077 		iph_hdr_length = IPV6_HDR_LEN;
1078 	ASSERT(iph_hdr_length == ira->ira_ip_hdr_length);
1079 #endif
1080 	/* Skip past the outer IP and ICMP headers */
1081 	iph_hdr_length = ira->ira_ip_hdr_length;
1082 	icmp6 = (icmp6_t *)&mp->b_rptr[iph_hdr_length];
1083 
1084 	/* Skip past the inner IP and find the ULP header */
1085 	ip6h = (ip6_t *)&icmp6[1];	/* Inner IP header */
1086 	if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, &nexthdrp)) {
1087 		freemsg(mp);
1088 		return;
1089 	}
1090 	udpha = (udpha_t *)((char *)ip6h + iph_hdr_length);
1091 
1092 	switch (icmp6->icmp6_type) {
1093 	case ICMP6_DST_UNREACH:
1094 		switch (icmp6->icmp6_code) {
1095 		case ICMP6_DST_UNREACH_NOPORT:
1096 			error = ECONNREFUSED;
1097 			break;
1098 		case ICMP6_DST_UNREACH_ADMIN:
1099 		case ICMP6_DST_UNREACH_NOROUTE:
1100 		case ICMP6_DST_UNREACH_BEYONDSCOPE:
1101 		case ICMP6_DST_UNREACH_ADDR:
1102 			/* Transient errors */
1103 			break;
1104 		default:
1105 			break;
1106 		}
1107 		break;
1108 	case ICMP6_PACKET_TOO_BIG: {
1109 		struct T_unitdata_ind	*tudi;
1110 		struct T_opthdr		*toh;
1111 		size_t			udi_size;
1112 		mblk_t			*newmp;
1113 		t_scalar_t		opt_length = sizeof (struct T_opthdr) +
1114 		    sizeof (struct ip6_mtuinfo);
1115 		sin6_t			*sin6;
1116 		struct ip6_mtuinfo	*mtuinfo;
1117 
1118 		/*
1119 		 * If the application has requested to receive path mtu
1120 		 * information, send up an empty message containing an
1121 		 * IPV6_PATHMTU ancillary data item.
1122 		 */
1123 		if (!connp->conn_ipv6_recvpathmtu)
1124 			break;
1125 
1126 		udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) +
1127 		    opt_length;
1128 		if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) {
1129 			UDPS_BUMP_MIB(us, udpInErrors);
1130 			break;
1131 		}
1132 
1133 		/*
1134 		 * newmp->b_cont is left to NULL on purpose.  This is an
1135 		 * empty message containing only ancillary data.
1136 		 */
1137 		newmp->b_datap->db_type = M_PROTO;
1138 		tudi = (struct T_unitdata_ind *)newmp->b_rptr;
1139 		newmp->b_wptr = (uchar_t *)tudi + udi_size;
1140 		tudi->PRIM_type = T_UNITDATA_IND;
1141 		tudi->SRC_length = sizeof (sin6_t);
1142 		tudi->SRC_offset = sizeof (struct T_unitdata_ind);
1143 		tudi->OPT_offset = tudi->SRC_offset + sizeof (sin6_t);
1144 		tudi->OPT_length = opt_length;
1145 
1146 		sin6 = (sin6_t *)&tudi[1];
1147 		bzero(sin6, sizeof (sin6_t));
1148 		sin6->sin6_family = AF_INET6;
1149 		sin6->sin6_addr = connp->conn_faddr_v6;
1150 
1151 		toh = (struct T_opthdr *)&sin6[1];
1152 		toh->level = IPPROTO_IPV6;
1153 		toh->name = IPV6_PATHMTU;
1154 		toh->len = opt_length;
1155 		toh->status = 0;
1156 
1157 		mtuinfo = (struct ip6_mtuinfo *)&toh[1];
1158 		bzero(mtuinfo, sizeof (struct ip6_mtuinfo));
1159 		mtuinfo->ip6m_addr.sin6_family = AF_INET6;
1160 		mtuinfo->ip6m_addr.sin6_addr = ip6h->ip6_dst;
1161 		mtuinfo->ip6m_mtu = icmp6->icmp6_mtu;
1162 		/*
1163 		 * We've consumed everything we need from the original
1164 		 * message.  Free it, then send our empty message.
1165 		 */
1166 		freemsg(mp);
1167 		udp_ulp_recv(connp, newmp, msgdsize(newmp), ira);
1168 		return;
1169 	}
1170 	case ICMP6_TIME_EXCEEDED:
1171 		/* Transient errors */
1172 		break;
1173 	case ICMP6_PARAM_PROB:
1174 		/* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */
1175 		if (icmp6->icmp6_code == ICMP6_PARAMPROB_NEXTHEADER &&
1176 		    (uchar_t *)ip6h + icmp6->icmp6_pptr ==
1177 		    (uchar_t *)nexthdrp) {
1178 			error = ECONNREFUSED;
1179 			break;
1180 		}
1181 		break;
1182 	}
1183 	if (error == 0) {
1184 		freemsg(mp);
1185 		return;
1186 	}
1187 
1188 	/*
1189 	 * Deliver T_UDERROR_IND when the application has asked for it.
1190 	 * The socket layer enables this automatically when connected.
1191 	 */
1192 	if (!connp->conn_dgram_errind) {
1193 		freemsg(mp);
1194 		return;
1195 	}
1196 
1197 	sin6 = sin6_null;
1198 	sin6.sin6_family = AF_INET6;
1199 	sin6.sin6_addr = ip6h->ip6_dst;
1200 	sin6.sin6_port = udpha->uha_dst_port;
1201 	sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK;
1202 
1203 	if (IPCL_IS_NONSTR(connp)) {
1204 		mutex_enter(&connp->conn_lock);
1205 		if (udp->udp_state == TS_DATA_XFER) {
1206 			if (sin6.sin6_port == connp->conn_fport &&
1207 			    IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr,
1208 			    &connp->conn_faddr_v6)) {
1209 				mutex_exit(&connp->conn_lock);
1210 				(*connp->conn_upcalls->su_set_error)
1211 				    (connp->conn_upper_handle, error);
1212 				goto done;
1213 			}
1214 		} else {
1215 			udp->udp_delayed_error = error;
1216 			*((sin6_t *)&udp->udp_delayed_addr) = sin6;
1217 		}
1218 		mutex_exit(&connp->conn_lock);
1219 	} else {
1220 		mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t),
1221 		    NULL, 0, error);
1222 		if (mp1 != NULL)
1223 			putnext(connp->conn_rq, mp1);
1224 	}
1225 done:
1226 	freemsg(mp);
1227 }
1228 
1229 /*
1230  * This routine responds to T_ADDR_REQ messages.  It is called by udp_wput.
1231  * The local address is filled in if endpoint is bound. The remote address
1232  * is filled in if remote address has been precified ("connected endpoint")
1233  * (The concept of connected CLTS sockets is alien to published TPI
1234  *  but we support it anyway).
1235  */
1236 static void
1237 udp_addr_req(queue_t *q, mblk_t *mp)
1238 {
1239 	struct sockaddr *sa;
1240 	mblk_t	*ackmp;
1241 	struct T_addr_ack *taa;
1242 	udp_t	*udp = Q_TO_UDP(q);
1243 	conn_t	*connp = udp->udp_connp;
1244 	uint_t	addrlen;
1245 
1246 	/* Make it large enough for worst case */
1247 	ackmp = reallocb(mp, sizeof (struct T_addr_ack) +
1248 	    2 * sizeof (sin6_t), 1);
1249 	if (ackmp == NULL) {
1250 		udp_err_ack(q, mp, TSYSERR, ENOMEM);
1251 		return;
1252 	}
1253 	taa = (struct T_addr_ack *)ackmp->b_rptr;
1254 
1255 	bzero(taa, sizeof (struct T_addr_ack));
1256 	ackmp->b_wptr = (uchar_t *)&taa[1];
1257 
1258 	taa->PRIM_type = T_ADDR_ACK;
1259 	ackmp->b_datap->db_type = M_PCPROTO;
1260 
1261 	if (connp->conn_family == AF_INET)
1262 		addrlen = sizeof (sin_t);
1263 	else
1264 		addrlen = sizeof (sin6_t);
1265 
1266 	mutex_enter(&connp->conn_lock);
1267 	/*
1268 	 * Note: Following code assumes 32 bit alignment of basic
1269 	 * data structures like sin_t and struct T_addr_ack.
1270 	 */
1271 	if (udp->udp_state != TS_UNBND) {
1272 		/*
1273 		 * Fill in local address first
1274 		 */
1275 		taa->LOCADDR_offset = sizeof (*taa);
1276 		taa->LOCADDR_length = addrlen;
1277 		sa = (struct sockaddr *)&taa[1];
1278 		(void) conn_getsockname(connp, sa, &addrlen);
1279 		ackmp->b_wptr += addrlen;
1280 	}
1281 	if (udp->udp_state == TS_DATA_XFER) {
1282 		/*
1283 		 * connected, fill remote address too
1284 		 */
1285 		taa->REMADDR_length = addrlen;
1286 		/* assumed 32-bit alignment */
1287 		taa->REMADDR_offset = taa->LOCADDR_offset + taa->LOCADDR_length;
1288 		sa = (struct sockaddr *)(ackmp->b_rptr + taa->REMADDR_offset);
1289 		(void) conn_getpeername(connp, sa, &addrlen);
1290 		ackmp->b_wptr += addrlen;
1291 	}
1292 	mutex_exit(&connp->conn_lock);
1293 	ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim);
1294 	qreply(q, ackmp);
1295 }
1296 
1297 static void
1298 udp_copy_info(struct T_info_ack *tap, udp_t *udp)
1299 {
1300 	conn_t		*connp = udp->udp_connp;
1301 
1302 	if (connp->conn_family == AF_INET) {
1303 		*tap = udp_g_t_info_ack_ipv4;
1304 	} else {
1305 		*tap = udp_g_t_info_ack_ipv6;
1306 	}
1307 	tap->CURRENT_state = udp->udp_state;
1308 	tap->OPT_size = udp_max_optsize;
1309 }
1310 
1311 static void
1312 udp_do_capability_ack(udp_t *udp, struct T_capability_ack *tcap,
1313     t_uscalar_t cap_bits1)
1314 {
1315 	tcap->CAP_bits1 = 0;
1316 
1317 	if (cap_bits1 & TC1_INFO) {
1318 		udp_copy_info(&tcap->INFO_ack, udp);
1319 		tcap->CAP_bits1 |= TC1_INFO;
1320 	}
1321 }
1322 
1323 /*
1324  * This routine responds to T_CAPABILITY_REQ messages.  It is called by
1325  * udp_wput.  Much of the T_CAPABILITY_ACK information is copied from
1326  * udp_g_t_info_ack.  The current state of the stream is copied from
1327  * udp_state.
1328  */
1329 static void
1330 udp_capability_req(queue_t *q, mblk_t *mp)
1331 {
1332 	t_uscalar_t		cap_bits1;
1333 	struct T_capability_ack	*tcap;
1334 	udp_t	*udp = Q_TO_UDP(q);
1335 
1336 	cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1;
1337 
1338 	mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack),
1339 	    mp->b_datap->db_type, T_CAPABILITY_ACK);
1340 	if (!mp)
1341 		return;
1342 
1343 	tcap = (struct T_capability_ack *)mp->b_rptr;
1344 	udp_do_capability_ack(udp, tcap, cap_bits1);
1345 
1346 	qreply(q, mp);
1347 }
1348 
1349 /*
1350  * This routine responds to T_INFO_REQ messages.  It is called by udp_wput.
1351  * Most of the T_INFO_ACK information is copied from udp_g_t_info_ack.
1352  * The current state of the stream is copied from udp_state.
1353  */
1354 static void
1355 udp_info_req(queue_t *q, mblk_t *mp)
1356 {
1357 	udp_t *udp = Q_TO_UDP(q);
1358 
1359 	/* Create a T_INFO_ACK message. */
1360 	mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO,
1361 	    T_INFO_ACK);
1362 	if (!mp)
1363 		return;
1364 	udp_copy_info((struct T_info_ack *)mp->b_rptr, udp);
1365 	qreply(q, mp);
1366 }
1367 
1368 /* For /dev/udp aka AF_INET open */
1369 static int
1370 udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
1371 {
1372 	return (udp_open(q, devp, flag, sflag, credp, B_FALSE));
1373 }
1374 
1375 /* For /dev/udp6 aka AF_INET6 open */
1376 static int
1377 udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
1378 {
1379 	return (udp_open(q, devp, flag, sflag, credp, B_TRUE));
1380 }
1381 
1382 /*
1383  * This is the open routine for udp.  It allocates a udp_t structure for
1384  * the stream and, on the first open of the module, creates an ND table.
1385  */
1386 static int
1387 udp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp,
1388     boolean_t isv6)
1389 {
1390 	udp_t		*udp;
1391 	conn_t		*connp;
1392 	dev_t		conn_dev;
1393 	vmem_t		*minor_arena;
1394 	int		err;
1395 
1396 	/* If the stream is already open, return immediately. */
1397 	if (q->q_ptr != NULL)
1398 		return (0);
1399 
1400 	if (sflag == MODOPEN)
1401 		return (EINVAL);
1402 
1403 	if ((ip_minor_arena_la != NULL) && (flag & SO_SOCKSTR) &&
1404 	    ((conn_dev = inet_minor_alloc(ip_minor_arena_la)) != 0)) {
1405 		minor_arena = ip_minor_arena_la;
1406 	} else {
1407 		/*
1408 		 * Either minor numbers in the large arena were exhausted
1409 		 * or a non socket application is doing the open.
1410 		 * Try to allocate from the small arena.
1411 		 */
1412 		if ((conn_dev = inet_minor_alloc(ip_minor_arena_sa)) == 0)
1413 			return (EBUSY);
1414 
1415 		minor_arena = ip_minor_arena_sa;
1416 	}
1417 
1418 	if (flag & SO_FALLBACK) {
1419 		/*
1420 		 * Non streams socket needs a stream to fallback to
1421 		 */
1422 		RD(q)->q_ptr = (void *)conn_dev;
1423 		WR(q)->q_qinfo = &udp_fallback_sock_winit;
1424 		WR(q)->q_ptr = (void *)minor_arena;
1425 		qprocson(q);
1426 		return (0);
1427 	}
1428 
1429 	connp = udp_do_open(credp, isv6, KM_SLEEP, &err);
1430 	if (connp == NULL) {
1431 		inet_minor_free(minor_arena, conn_dev);
1432 		return (err);
1433 	}
1434 	udp = connp->conn_udp;
1435 
1436 	*devp = makedevice(getemajor(*devp), (minor_t)conn_dev);
1437 	connp->conn_dev = conn_dev;
1438 	connp->conn_minor_arena = minor_arena;
1439 
1440 	/*
1441 	 * Initialize the udp_t structure for this stream.
1442 	 */
1443 	q->q_ptr = connp;
1444 	WR(q)->q_ptr = connp;
1445 	connp->conn_rq = q;
1446 	connp->conn_wq = WR(q);
1447 
1448 	/*
1449 	 * Since this conn_t/udp_t is not yet visible to anybody else we don't
1450 	 * need to lock anything.
1451 	 */
1452 	ASSERT(connp->conn_proto == IPPROTO_UDP);
1453 	ASSERT(connp->conn_udp == udp);
1454 	ASSERT(udp->udp_connp == connp);
1455 
1456 	if (flag & SO_SOCKSTR) {
1457 		udp->udp_issocket = B_TRUE;
1458 	}
1459 
1460 	WR(q)->q_hiwat = connp->conn_sndbuf;
1461 	WR(q)->q_lowat = connp->conn_sndlowat;
1462 
1463 	qprocson(q);
1464 
1465 	/* Set the Stream head write offset and high watermark. */
1466 	(void) proto_set_tx_wroff(q, connp, connp->conn_wroff);
1467 	(void) proto_set_rx_hiwat(q, connp,
1468 	    udp_set_rcv_hiwat(udp, connp->conn_rcvbuf));
1469 
1470 	mutex_enter(&connp->conn_lock);
1471 	connp->conn_state_flags &= ~CONN_INCIPIENT;
1472 	mutex_exit(&connp->conn_lock);
1473 	return (0);
1474 }
1475 
1476 /*
1477  * Which UDP options OK to set through T_UNITDATA_REQ...
1478  */
1479 /* ARGSUSED */
1480 static boolean_t
1481 udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name)
1482 {
1483 	return (B_TRUE);
1484 }
1485 
1486 /*
1487  * This routine gets default values of certain options whose default
1488  * values are maintained by protcol specific code
1489  */
1490 int
1491 udp_opt_default(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr)
1492 {
1493 	udp_t		*udp = Q_TO_UDP(q);
1494 	udp_stack_t *us = udp->udp_us;
1495 	int *i1 = (int *)ptr;
1496 
1497 	switch (level) {
1498 	case IPPROTO_IP:
1499 		switch (name) {
1500 		case IP_MULTICAST_TTL:
1501 			*ptr = (uchar_t)IP_DEFAULT_MULTICAST_TTL;
1502 			return (sizeof (uchar_t));
1503 		case IP_MULTICAST_LOOP:
1504 			*ptr = (uchar_t)IP_DEFAULT_MULTICAST_LOOP;
1505 			return (sizeof (uchar_t));
1506 		}
1507 		break;
1508 	case IPPROTO_IPV6:
1509 		switch (name) {
1510 		case IPV6_MULTICAST_HOPS:
1511 			*i1 = IP_DEFAULT_MULTICAST_TTL;
1512 			return (sizeof (int));
1513 		case IPV6_MULTICAST_LOOP:
1514 			*i1 = IP_DEFAULT_MULTICAST_LOOP;
1515 			return (sizeof (int));
1516 		case IPV6_UNICAST_HOPS:
1517 			*i1 = us->us_ipv6_hoplimit;
1518 			return (sizeof (int));
1519 		}
1520 		break;
1521 	}
1522 	return (-1);
1523 }
1524 
1525 /*
1526  * This routine retrieves the current status of socket options.
1527  * It returns the size of the option retrieved, or -1.
1528  */
1529 int
1530 udp_opt_get(conn_t *connp, t_scalar_t level, t_scalar_t name,
1531     uchar_t *ptr)
1532 {
1533 	int		*i1 = (int *)ptr;
1534 	udp_t		*udp = connp->conn_udp;
1535 	int		len;
1536 	conn_opt_arg_t	coas;
1537 	int		retval;
1538 
1539 	coas.coa_connp = connp;
1540 	coas.coa_ixa = connp->conn_ixa;
1541 	coas.coa_ipp = &connp->conn_xmit_ipp;
1542 	coas.coa_ancillary = B_FALSE;
1543 	coas.coa_changed = 0;
1544 
1545 	/*
1546 	 * We assume that the optcom framework has checked for the set
1547 	 * of levels and names that are supported, hence we don't worry
1548 	 * about rejecting based on that.
1549 	 * First check for UDP specific handling, then pass to common routine.
1550 	 */
1551 	switch (level) {
1552 	case IPPROTO_IP:
1553 		/*
1554 		 * Only allow IPv4 option processing on IPv4 sockets.
1555 		 */
1556 		if (connp->conn_family != AF_INET)
1557 			return (-1);
1558 
1559 		switch (name) {
1560 		case IP_OPTIONS:
1561 		case T_IP_OPTIONS:
1562 			mutex_enter(&connp->conn_lock);
1563 			if (!(udp->udp_recv_ipp.ipp_fields &
1564 			    IPPF_IPV4_OPTIONS)) {
1565 				mutex_exit(&connp->conn_lock);
1566 				return (0);
1567 			}
1568 
1569 			len = udp->udp_recv_ipp.ipp_ipv4_options_len;
1570 			ASSERT(len != 0);
1571 			bcopy(udp->udp_recv_ipp.ipp_ipv4_options, ptr, len);
1572 			mutex_exit(&connp->conn_lock);
1573 			return (len);
1574 		}
1575 		break;
1576 	case IPPROTO_UDP:
1577 		switch (name) {
1578 		case UDP_NAT_T_ENDPOINT:
1579 			mutex_enter(&connp->conn_lock);
1580 			*i1 = udp->udp_nat_t_endpoint;
1581 			mutex_exit(&connp->conn_lock);
1582 			return (sizeof (int));
1583 		case UDP_RCVHDR:
1584 			mutex_enter(&connp->conn_lock);
1585 			*i1 = udp->udp_rcvhdr ? 1 : 0;
1586 			mutex_exit(&connp->conn_lock);
1587 			return (sizeof (int));
1588 		}
1589 	}
1590 	mutex_enter(&connp->conn_lock);
1591 	retval = conn_opt_get(&coas, level, name, ptr);
1592 	mutex_exit(&connp->conn_lock);
1593 	return (retval);
1594 }
1595 
1596 /*
1597  * This routine retrieves the current status of socket options.
1598  * It returns the size of the option retrieved, or -1.
1599  */
1600 int
1601 udp_tpi_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr)
1602 {
1603 	conn_t		*connp = Q_TO_CONN(q);
1604 	int		err;
1605 
1606 	err = udp_opt_get(connp, level, name, ptr);
1607 	return (err);
1608 }
1609 
1610 /*
1611  * This routine sets socket options.
1612  */
1613 int
1614 udp_do_opt_set(conn_opt_arg_t *coa, int level, int name,
1615     uint_t inlen, uchar_t *invalp, cred_t *cr, boolean_t checkonly)
1616 {
1617 	conn_t		*connp = coa->coa_connp;
1618 	ip_xmit_attr_t	*ixa = coa->coa_ixa;
1619 	udp_t		*udp = connp->conn_udp;
1620 	udp_stack_t	*us = udp->udp_us;
1621 	int		*i1 = (int *)invalp;
1622 	boolean_t	onoff = (*i1 == 0) ? 0 : 1;
1623 	int		error;
1624 
1625 	ASSERT(MUTEX_NOT_HELD(&coa->coa_connp->conn_lock));
1626 	/*
1627 	 * First do UDP specific sanity checks and handle UDP specific
1628 	 * options. Note that some IPPROTO_UDP options are handled
1629 	 * by conn_opt_set.
1630 	 */
1631 	switch (level) {
1632 	case SOL_SOCKET:
1633 		switch (name) {
1634 		case SO_SNDBUF:
1635 			if (*i1 > us->us_max_buf) {
1636 				return (ENOBUFS);
1637 			}
1638 			break;
1639 		case SO_RCVBUF:
1640 			if (*i1 > us->us_max_buf) {
1641 				return (ENOBUFS);
1642 			}
1643 			break;
1644 
1645 		case SCM_UCRED: {
1646 			struct ucred_s *ucr;
1647 			cred_t *newcr;
1648 			ts_label_t *tsl;
1649 
1650 			/*
1651 			 * Only sockets that have proper privileges and are
1652 			 * bound to MLPs will have any other value here, so
1653 			 * this implicitly tests for privilege to set label.
1654 			 */
1655 			if (connp->conn_mlp_type == mlptSingle)
1656 				break;
1657 
1658 			ucr = (struct ucred_s *)invalp;
1659 			if (inlen < sizeof (*ucr) + sizeof (bslabel_t) ||
1660 			    ucr->uc_labeloff < sizeof (*ucr) ||
1661 			    ucr->uc_labeloff + sizeof (bslabel_t) > inlen)
1662 				return (EINVAL);
1663 			if (!checkonly) {
1664 				/*
1665 				 * Set ixa_tsl to the new label.
1666 				 * We assume that crgetzoneid doesn't change
1667 				 * as part of the SCM_UCRED.
1668 				 */
1669 				ASSERT(cr != NULL);
1670 				if ((tsl = crgetlabel(cr)) == NULL)
1671 					return (EINVAL);
1672 				newcr = copycred_from_bslabel(cr, UCLABEL(ucr),
1673 				    tsl->tsl_doi, KM_NOSLEEP);
1674 				if (newcr == NULL)
1675 					return (ENOSR);
1676 				ASSERT(newcr->cr_label != NULL);
1677 				/*
1678 				 * Move the hold on the cr_label to ixa_tsl by
1679 				 * setting cr_label to NULL. Then release newcr.
1680 				 */
1681 				ip_xmit_attr_replace_tsl(ixa, newcr->cr_label);
1682 				ixa->ixa_flags |= IXAF_UCRED_TSL;
1683 				newcr->cr_label = NULL;
1684 				crfree(newcr);
1685 				coa->coa_changed |= COA_HEADER_CHANGED;
1686 				coa->coa_changed |= COA_WROFF_CHANGED;
1687 			}
1688 			/* Fully handled this option. */
1689 			return (0);
1690 		}
1691 		}
1692 		break;
1693 	case IPPROTO_UDP:
1694 		switch (name) {
1695 		case UDP_NAT_T_ENDPOINT:
1696 			if ((error = secpolicy_ip_config(cr, B_FALSE)) != 0) {
1697 				return (error);
1698 			}
1699 
1700 			/*
1701 			 * Use conn_family instead so we can avoid ambiguitites
1702 			 * with AF_INET6 sockets that may switch from IPv4
1703 			 * to IPv6.
1704 			 */
1705 			if (connp->conn_family != AF_INET) {
1706 				return (EAFNOSUPPORT);
1707 			}
1708 
1709 			if (!checkonly) {
1710 				mutex_enter(&connp->conn_lock);
1711 				udp->udp_nat_t_endpoint = onoff;
1712 				mutex_exit(&connp->conn_lock);
1713 				coa->coa_changed |= COA_HEADER_CHANGED;
1714 				coa->coa_changed |= COA_WROFF_CHANGED;
1715 			}
1716 			/* Fully handled this option. */
1717 			return (0);
1718 		case UDP_RCVHDR:
1719 			mutex_enter(&connp->conn_lock);
1720 			udp->udp_rcvhdr = onoff;
1721 			mutex_exit(&connp->conn_lock);
1722 			return (0);
1723 		}
1724 		break;
1725 	}
1726 	error = conn_opt_set(coa, level, name, inlen, invalp,
1727 	    checkonly, cr);
1728 	return (error);
1729 }
1730 
1731 /*
1732  * This routine sets socket options.
1733  */
1734 int
1735 udp_opt_set(conn_t *connp, uint_t optset_context, int level,
1736     int name, uint_t inlen, uchar_t *invalp, uint_t *outlenp,
1737     uchar_t *outvalp, void *thisdg_attrs, cred_t *cr)
1738 {
1739 	udp_t		*udp = connp->conn_udp;
1740 	int		err;
1741 	conn_opt_arg_t	coas, *coa;
1742 	boolean_t	checkonly;
1743 	udp_stack_t	*us = udp->udp_us;
1744 
1745 	switch (optset_context) {
1746 	case SETFN_OPTCOM_CHECKONLY:
1747 		checkonly = B_TRUE;
1748 		/*
1749 		 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ
1750 		 * inlen != 0 implies value supplied and
1751 		 *	we have to "pretend" to set it.
1752 		 * inlen == 0 implies that there is no
1753 		 *	value part in T_CHECK request and just validation
1754 		 * done elsewhere should be enough, we just return here.
1755 		 */
1756 		if (inlen == 0) {
1757 			*outlenp = 0;
1758 			return (0);
1759 		}
1760 		break;
1761 	case SETFN_OPTCOM_NEGOTIATE:
1762 		checkonly = B_FALSE;
1763 		break;
1764 	case SETFN_UD_NEGOTIATE:
1765 	case SETFN_CONN_NEGOTIATE:
1766 		checkonly = B_FALSE;
1767 		/*
1768 		 * Negotiating local and "association-related" options
1769 		 * through T_UNITDATA_REQ.
1770 		 *
1771 		 * Following routine can filter out ones we do not
1772 		 * want to be "set" this way.
1773 		 */
1774 		if (!udp_opt_allow_udr_set(level, name)) {
1775 			*outlenp = 0;
1776 			return (EINVAL);
1777 		}
1778 		break;
1779 	default:
1780 		/*
1781 		 * We should never get here
1782 		 */
1783 		*outlenp = 0;
1784 		return (EINVAL);
1785 	}
1786 
1787 	ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) ||
1788 	    (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0));
1789 
1790 	if (thisdg_attrs != NULL) {
1791 		/* Options from T_UNITDATA_REQ */
1792 		coa = (conn_opt_arg_t *)thisdg_attrs;
1793 		ASSERT(coa->coa_connp == connp);
1794 		ASSERT(coa->coa_ixa != NULL);
1795 		ASSERT(coa->coa_ipp != NULL);
1796 		ASSERT(coa->coa_ancillary);
1797 	} else {
1798 		coa = &coas;
1799 		coas.coa_connp = connp;
1800 		/* Get a reference on conn_ixa to prevent concurrent mods */
1801 		coas.coa_ixa = conn_get_ixa(connp, B_TRUE);
1802 		if (coas.coa_ixa == NULL) {
1803 			*outlenp = 0;
1804 			return (ENOMEM);
1805 		}
1806 		coas.coa_ipp = &connp->conn_xmit_ipp;
1807 		coas.coa_ancillary = B_FALSE;
1808 		coas.coa_changed = 0;
1809 	}
1810 
1811 	err = udp_do_opt_set(coa, level, name, inlen, invalp,
1812 	    cr, checkonly);
1813 	if (err != 0) {
1814 errout:
1815 		if (!coa->coa_ancillary)
1816 			ixa_refrele(coa->coa_ixa);
1817 		*outlenp = 0;
1818 		return (err);
1819 	}
1820 	/* Handle DHCPINIT here outside of lock */
1821 	if (level == IPPROTO_IP && name == IP_DHCPINIT_IF) {
1822 		uint_t	ifindex;
1823 		ill_t	*ill;
1824 
1825 		ifindex = *(uint_t *)invalp;
1826 		if (ifindex == 0) {
1827 			ill = NULL;
1828 		} else {
1829 			ill = ill_lookup_on_ifindex(ifindex, B_FALSE,
1830 			    coa->coa_ixa->ixa_ipst);
1831 			if (ill == NULL) {
1832 				err = ENXIO;
1833 				goto errout;
1834 			}
1835 
1836 			mutex_enter(&ill->ill_lock);
1837 			if (ill->ill_state_flags & ILL_CONDEMNED) {
1838 				mutex_exit(&ill->ill_lock);
1839 				ill_refrele(ill);
1840 				err = ENXIO;
1841 				goto errout;
1842 			}
1843 			if (IS_VNI(ill)) {
1844 				mutex_exit(&ill->ill_lock);
1845 				ill_refrele(ill);
1846 				err = EINVAL;
1847 				goto errout;
1848 			}
1849 		}
1850 		mutex_enter(&connp->conn_lock);
1851 
1852 		if (connp->conn_dhcpinit_ill != NULL) {
1853 			/*
1854 			 * We've locked the conn so conn_cleanup_ill()
1855 			 * cannot clear conn_dhcpinit_ill -- so it's
1856 			 * safe to access the ill.
1857 			 */
1858 			ill_t *oill = connp->conn_dhcpinit_ill;
1859 
1860 			ASSERT(oill->ill_dhcpinit != 0);
1861 			atomic_dec_32(&oill->ill_dhcpinit);
1862 			ill_set_inputfn(connp->conn_dhcpinit_ill);
1863 			connp->conn_dhcpinit_ill = NULL;
1864 		}
1865 
1866 		if (ill != NULL) {
1867 			connp->conn_dhcpinit_ill = ill;
1868 			atomic_inc_32(&ill->ill_dhcpinit);
1869 			ill_set_inputfn(ill);
1870 			mutex_exit(&connp->conn_lock);
1871 			mutex_exit(&ill->ill_lock);
1872 			ill_refrele(ill);
1873 		} else {
1874 			mutex_exit(&connp->conn_lock);
1875 		}
1876 	}
1877 
1878 	/*
1879 	 * Common case of OK return with outval same as inval.
1880 	 */
1881 	if (invalp != outvalp) {
1882 		/* don't trust bcopy for identical src/dst */
1883 		(void) bcopy(invalp, outvalp, inlen);
1884 	}
1885 	*outlenp = inlen;
1886 
1887 	/*
1888 	 * If this was not ancillary data, then we rebuild the headers,
1889 	 * update the IRE/NCE, and IPsec as needed.
1890 	 * Since the label depends on the destination we go through
1891 	 * ip_set_destination first.
1892 	 */
1893 	if (coa->coa_ancillary) {
1894 		return (0);
1895 	}
1896 
1897 	if (coa->coa_changed & COA_ROUTE_CHANGED) {
1898 		in6_addr_t saddr, faddr, nexthop;
1899 		in_port_t fport;
1900 
1901 		/*
1902 		 * We clear lastdst to make sure we pick up the change
1903 		 * next time sending.
1904 		 * If we are connected we re-cache the information.
1905 		 * We ignore errors to preserve BSD behavior.
1906 		 * Note that we don't redo IPsec policy lookup here
1907 		 * since the final destination (or source) didn't change.
1908 		 */
1909 		mutex_enter(&connp->conn_lock);
1910 		connp->conn_v6lastdst = ipv6_all_zeros;
1911 
1912 		ip_attr_nexthop(coa->coa_ipp, coa->coa_ixa,
1913 		    &connp->conn_faddr_v6, &nexthop);
1914 		saddr = connp->conn_saddr_v6;
1915 		faddr = connp->conn_faddr_v6;
1916 		fport = connp->conn_fport;
1917 		mutex_exit(&connp->conn_lock);
1918 
1919 		if (!IN6_IS_ADDR_UNSPECIFIED(&faddr) &&
1920 		    !IN6_IS_ADDR_V4MAPPED_ANY(&faddr)) {
1921 			(void) ip_attr_connect(connp, coa->coa_ixa,
1922 			    &saddr, &faddr, &nexthop, fport, NULL, NULL,
1923 			    IPDF_ALLOW_MCBC | IPDF_VERIFY_DST);
1924 		}
1925 	}
1926 
1927 	ixa_refrele(coa->coa_ixa);
1928 
1929 	if (coa->coa_changed & COA_HEADER_CHANGED) {
1930 		/*
1931 		 * Rebuild the header template if we are connected.
1932 		 * Otherwise clear conn_v6lastdst so we rebuild the header
1933 		 * in the data path.
1934 		 */
1935 		mutex_enter(&connp->conn_lock);
1936 		if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) &&
1937 		    !IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) {
1938 			err = udp_build_hdr_template(connp,
1939 			    &connp->conn_saddr_v6, &connp->conn_faddr_v6,
1940 			    connp->conn_fport, connp->conn_flowinfo);
1941 			if (err != 0) {
1942 				mutex_exit(&connp->conn_lock);
1943 				return (err);
1944 			}
1945 		} else {
1946 			connp->conn_v6lastdst = ipv6_all_zeros;
1947 		}
1948 		mutex_exit(&connp->conn_lock);
1949 	}
1950 	if (coa->coa_changed & COA_RCVBUF_CHANGED) {
1951 		(void) proto_set_rx_hiwat(connp->conn_rq, connp,
1952 		    connp->conn_rcvbuf);
1953 	}
1954 	if ((coa->coa_changed & COA_SNDBUF_CHANGED) && !IPCL_IS_NONSTR(connp)) {
1955 		connp->conn_wq->q_hiwat = connp->conn_sndbuf;
1956 	}
1957 	if (coa->coa_changed & COA_WROFF_CHANGED) {
1958 		/* Increase wroff if needed */
1959 		uint_t wroff;
1960 
1961 		mutex_enter(&connp->conn_lock);
1962 		wroff = connp->conn_ht_iphc_allocated + us->us_wroff_extra;
1963 		if (udp->udp_nat_t_endpoint)
1964 			wroff += sizeof (uint32_t);
1965 		if (wroff > connp->conn_wroff) {
1966 			connp->conn_wroff = wroff;
1967 			mutex_exit(&connp->conn_lock);
1968 			(void) proto_set_tx_wroff(connp->conn_rq, connp, wroff);
1969 		} else {
1970 			mutex_exit(&connp->conn_lock);
1971 		}
1972 	}
1973 	return (err);
1974 }
1975 
1976 /* This routine sets socket options. */
1977 int
1978 udp_tpi_opt_set(queue_t *q, uint_t optset_context, int level, int name,
1979     uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp,
1980     void *thisdg_attrs, cred_t *cr)
1981 {
1982 	conn_t	*connp = Q_TO_CONN(q);
1983 	int error;
1984 
1985 	error = udp_opt_set(connp, optset_context, level, name, inlen, invalp,
1986 	    outlenp, outvalp, thisdg_attrs, cr);
1987 	return (error);
1988 }
1989 
1990 /*
1991  * Setup IP and UDP headers.
1992  * Returns NULL on allocation failure, in which case data_mp is freed.
1993  */
1994 mblk_t *
1995 udp_prepend_hdr(conn_t *connp, ip_xmit_attr_t *ixa, const ip_pkt_t *ipp,
1996     const in6_addr_t *v6src, const in6_addr_t *v6dst, in_port_t dstport,
1997     uint32_t flowinfo, mblk_t *data_mp, int *errorp)
1998 {
1999 	mblk_t		*mp;
2000 	udpha_t		*udpha;
2001 	udp_stack_t	*us = connp->conn_netstack->netstack_udp;
2002 	uint_t		data_len;
2003 	uint32_t	cksum;
2004 	udp_t		*udp = connp->conn_udp;
2005 	boolean_t	insert_spi = udp->udp_nat_t_endpoint;
2006 	uint_t		ulp_hdr_len;
2007 
2008 	data_len = msgdsize(data_mp);
2009 	ulp_hdr_len = UDPH_SIZE;
2010 	if (insert_spi)
2011 		ulp_hdr_len += sizeof (uint32_t);
2012 
2013 	mp = conn_prepend_hdr(ixa, ipp, v6src, v6dst, IPPROTO_UDP, flowinfo,
2014 	    ulp_hdr_len, data_mp, data_len, us->us_wroff_extra, &cksum, errorp);
2015 	if (mp == NULL) {
2016 		ASSERT(*errorp != 0);
2017 		return (NULL);
2018 	}
2019 
2020 	data_len += ulp_hdr_len;
2021 	ixa->ixa_pktlen = data_len + ixa->ixa_ip_hdr_length;
2022 
2023 	udpha = (udpha_t *)(mp->b_rptr + ixa->ixa_ip_hdr_length);
2024 	udpha->uha_src_port = connp->conn_lport;
2025 	udpha->uha_dst_port = dstport;
2026 	udpha->uha_checksum = 0;
2027 	udpha->uha_length = htons(data_len);
2028 
2029 	/*
2030 	 * If there was a routing option/header then conn_prepend_hdr
2031 	 * has massaged it and placed the pseudo-header checksum difference
2032 	 * in the cksum argument.
2033 	 *
2034 	 * Setup header length and prepare for ULP checksum done in IP.
2035 	 *
2036 	 * We make it easy for IP to include our pseudo header
2037 	 * by putting our length in uha_checksum.
2038 	 * The IP source, destination, and length have already been set by
2039 	 * conn_prepend_hdr.
2040 	 */
2041 	cksum += data_len;
2042 	cksum = (cksum >> 16) + (cksum & 0xFFFF);
2043 	ASSERT(cksum < 0x10000);
2044 
2045 	if (ixa->ixa_flags & IXAF_IS_IPV4) {
2046 		ipha_t	*ipha = (ipha_t *)mp->b_rptr;
2047 
2048 		ASSERT(ntohs(ipha->ipha_length) == ixa->ixa_pktlen);
2049 
2050 		/* IP does the checksum if uha_checksum is non-zero */
2051 		if (us->us_do_checksum) {
2052 			if (cksum == 0)
2053 				udpha->uha_checksum = 0xffff;
2054 			else
2055 				udpha->uha_checksum = htons(cksum);
2056 		} else {
2057 			udpha->uha_checksum = 0;
2058 		}
2059 	} else {
2060 		ip6_t *ip6h = (ip6_t *)mp->b_rptr;
2061 
2062 		ASSERT(ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN == ixa->ixa_pktlen);
2063 		if (cksum == 0)
2064 			udpha->uha_checksum = 0xffff;
2065 		else
2066 			udpha->uha_checksum = htons(cksum);
2067 	}
2068 
2069 	/* Insert all-0s SPI now. */
2070 	if (insert_spi)
2071 		*((uint32_t *)(udpha + 1)) = 0;
2072 
2073 	return (mp);
2074 }
2075 
2076 static int
2077 udp_build_hdr_template(conn_t *connp, const in6_addr_t *v6src,
2078     const in6_addr_t *v6dst, in_port_t dstport, uint32_t flowinfo)
2079 {
2080 	udpha_t		*udpha;
2081 	int		error;
2082 
2083 	ASSERT(MUTEX_HELD(&connp->conn_lock));
2084 	/*
2085 	 * We clear lastdst to make sure we don't use the lastdst path
2086 	 * next time sending since we might not have set v6dst yet.
2087 	 */
2088 	connp->conn_v6lastdst = ipv6_all_zeros;
2089 
2090 	error = conn_build_hdr_template(connp, UDPH_SIZE, 0, v6src, v6dst,
2091 	    flowinfo);
2092 	if (error != 0)
2093 		return (error);
2094 
2095 	/*
2096 	 * Any routing header/option has been massaged. The checksum difference
2097 	 * is stored in conn_sum.
2098 	 */
2099 	udpha = (udpha_t *)connp->conn_ht_ulp;
2100 	udpha->uha_src_port = connp->conn_lport;
2101 	udpha->uha_dst_port = dstport;
2102 	udpha->uha_checksum = 0;
2103 	udpha->uha_length = htons(UDPH_SIZE);	/* Filled in later */
2104 	return (0);
2105 }
2106 
2107 static mblk_t *
2108 udp_queue_fallback(udp_t *udp, mblk_t *mp)
2109 {
2110 	ASSERT(MUTEX_HELD(&udp->udp_recv_lock));
2111 	if (IPCL_IS_NONSTR(udp->udp_connp)) {
2112 		/*
2113 		 * fallback has started but messages have not been moved yet
2114 		 */
2115 		if (udp->udp_fallback_queue_head == NULL) {
2116 			ASSERT(udp->udp_fallback_queue_tail == NULL);
2117 			udp->udp_fallback_queue_head = mp;
2118 			udp->udp_fallback_queue_tail = mp;
2119 		} else {
2120 			ASSERT(udp->udp_fallback_queue_tail != NULL);
2121 			udp->udp_fallback_queue_tail->b_next = mp;
2122 			udp->udp_fallback_queue_tail = mp;
2123 		}
2124 		return (NULL);
2125 	} else {
2126 		/*
2127 		 * Fallback completed, let the caller putnext() the mblk.
2128 		 */
2129 		return (mp);
2130 	}
2131 }
2132 
2133 /*
2134  * Deliver data to ULP. In case we have a socket, and it's falling back to
2135  * TPI, then we'll queue the mp for later processing.
2136  */
2137 static void
2138 udp_ulp_recv(conn_t *connp, mblk_t *mp, uint_t len, ip_recv_attr_t *ira)
2139 {
2140 	if (IPCL_IS_NONSTR(connp)) {
2141 		udp_t *udp = connp->conn_udp;
2142 		int error;
2143 
2144 		ASSERT(len == msgdsize(mp));
2145 		if ((*connp->conn_upcalls->su_recv)
2146 		    (connp->conn_upper_handle, mp, len, 0, &error, NULL) < 0) {
2147 			mutex_enter(&udp->udp_recv_lock);
2148 			if (error == ENOSPC) {
2149 				/*
2150 				 * let's confirm while holding the lock
2151 				 */
2152 				if ((*connp->conn_upcalls->su_recv)
2153 				    (connp->conn_upper_handle, NULL, 0, 0,
2154 				    &error, NULL) < 0) {
2155 					ASSERT(error == ENOSPC);
2156 					if (error == ENOSPC) {
2157 						connp->conn_flow_cntrld =
2158 						    B_TRUE;
2159 					}
2160 				}
2161 				mutex_exit(&udp->udp_recv_lock);
2162 			} else {
2163 				ASSERT(error == EOPNOTSUPP);
2164 				mp = udp_queue_fallback(udp, mp);
2165 				mutex_exit(&udp->udp_recv_lock);
2166 				if (mp != NULL)
2167 					putnext(connp->conn_rq, mp);
2168 			}
2169 		}
2170 		ASSERT(MUTEX_NOT_HELD(&udp->udp_recv_lock));
2171 	} else {
2172 		if (is_system_labeled()) {
2173 			ASSERT(ira->ira_cred != NULL);
2174 			/*
2175 			 * Provide for protocols above UDP such as RPC
2176 			 * NOPID leaves db_cpid unchanged.
2177 			 */
2178 			mblk_setcred(mp, ira->ira_cred, NOPID);
2179 		}
2180 
2181 		putnext(connp->conn_rq, mp);
2182 	}
2183 }
2184 
2185 /*
2186  * This is the inbound data path.
2187  * IP has already pulled up the IP plus UDP headers and verified alignment
2188  * etc.
2189  */
2190 /* ARGSUSED2 */
2191 static void
2192 udp_input(void *arg1, mblk_t *mp, void *arg2, ip_recv_attr_t *ira)
2193 {
2194 	conn_t			*connp = (conn_t *)arg1;
2195 	struct T_unitdata_ind	*tudi;
2196 	uchar_t			*rptr;		/* Pointer to IP header */
2197 	int			hdr_length;	/* Length of IP+UDP headers */
2198 	int			udi_size;	/* Size of T_unitdata_ind */
2199 	int			pkt_len;
2200 	udp_t			*udp;
2201 	udpha_t			*udpha;
2202 	ip_pkt_t		ipps;
2203 	ip6_t			*ip6h;
2204 	mblk_t			*mp1;
2205 	uint32_t		udp_ipv4_options_len;
2206 	crb_t			recv_ancillary;
2207 	udp_stack_t		*us;
2208 
2209 	ASSERT(connp->conn_flags & IPCL_UDPCONN);
2210 
2211 	udp = connp->conn_udp;
2212 	us = udp->udp_us;
2213 	rptr = mp->b_rptr;
2214 
2215 	ASSERT(DB_TYPE(mp) == M_DATA);
2216 	ASSERT(OK_32PTR(rptr));
2217 	ASSERT(ira->ira_pktlen == msgdsize(mp));
2218 	pkt_len = ira->ira_pktlen;
2219 
2220 	/*
2221 	 * Get a snapshot of these and allow other threads to change
2222 	 * them after that. We need the same recv_ancillary when determining
2223 	 * the size as when adding the ancillary data items.
2224 	 */
2225 	mutex_enter(&connp->conn_lock);
2226 	udp_ipv4_options_len = udp->udp_recv_ipp.ipp_ipv4_options_len;
2227 	recv_ancillary = connp->conn_recv_ancillary;
2228 	mutex_exit(&connp->conn_lock);
2229 
2230 	hdr_length = ira->ira_ip_hdr_length;
2231 
2232 	/*
2233 	 * IP inspected the UDP header thus all of it must be in the mblk.
2234 	 * UDP length check is performed for IPv6 packets and IPv4 packets
2235 	 * to check if the size of the packet as specified
2236 	 * by the UDP header is the same as the length derived from the IP
2237 	 * header.
2238 	 */
2239 	udpha = (udpha_t *)(rptr + hdr_length);
2240 	if (pkt_len != ntohs(udpha->uha_length) + hdr_length)
2241 		goto tossit;
2242 
2243 	hdr_length += UDPH_SIZE;
2244 	ASSERT(MBLKL(mp) >= hdr_length);	/* IP did a pullup */
2245 
2246 	/* Initialize regardless of IP version */
2247 	ipps.ipp_fields = 0;
2248 
2249 	if (((ira->ira_flags & IRAF_IPV4_OPTIONS) ||
2250 	    udp_ipv4_options_len > 0) &&
2251 	    connp->conn_family == AF_INET) {
2252 		int	err;
2253 
2254 		/*
2255 		 * Record/update udp_recv_ipp with the lock
2256 		 * held. Not needed for AF_INET6 sockets
2257 		 * since they don't support a getsockopt of IP_OPTIONS.
2258 		 */
2259 		mutex_enter(&connp->conn_lock);
2260 		err = ip_find_hdr_v4((ipha_t *)rptr, &udp->udp_recv_ipp,
2261 		    B_TRUE);
2262 		if (err != 0) {
2263 			/* Allocation failed. Drop packet */
2264 			mutex_exit(&connp->conn_lock);
2265 			freemsg(mp);
2266 			UDPS_BUMP_MIB(us, udpInErrors);
2267 			return;
2268 		}
2269 		mutex_exit(&connp->conn_lock);
2270 	}
2271 
2272 	if (recv_ancillary.crb_all != 0) {
2273 		/*
2274 		 * Record packet information in the ip_pkt_t
2275 		 */
2276 		if (ira->ira_flags & IRAF_IS_IPV4) {
2277 			ASSERT(IPH_HDR_VERSION(rptr) == IPV4_VERSION);
2278 			ASSERT(MBLKL(mp) >= sizeof (ipha_t));
2279 			ASSERT(((ipha_t *)rptr)->ipha_protocol == IPPROTO_UDP);
2280 			ASSERT(ira->ira_ip_hdr_length == IPH_HDR_LENGTH(rptr));
2281 
2282 			(void) ip_find_hdr_v4((ipha_t *)rptr, &ipps, B_FALSE);
2283 		} else {
2284 			uint8_t nexthdrp;
2285 
2286 			ASSERT(IPH_HDR_VERSION(rptr) == IPV6_VERSION);
2287 			/*
2288 			 * IPv6 packets can only be received by applications
2289 			 * that are prepared to receive IPv6 addresses.
2290 			 * The IP fanout must ensure this.
2291 			 */
2292 			ASSERT(connp->conn_family == AF_INET6);
2293 
2294 			ip6h = (ip6_t *)rptr;
2295 
2296 			/* We don't care about the length, but need the ipp */
2297 			hdr_length = ip_find_hdr_v6(mp, ip6h, B_TRUE, &ipps,
2298 			    &nexthdrp);
2299 			ASSERT(hdr_length == ira->ira_ip_hdr_length);
2300 			/* Restore */
2301 			hdr_length = ira->ira_ip_hdr_length + UDPH_SIZE;
2302 			ASSERT(nexthdrp == IPPROTO_UDP);
2303 		}
2304 	}
2305 
2306 	/*
2307 	 * This is the inbound data path.  Packets are passed upstream as
2308 	 * T_UNITDATA_IND messages.
2309 	 */
2310 	if (connp->conn_family == AF_INET) {
2311 		sin_t *sin;
2312 
2313 		ASSERT(IPH_HDR_VERSION((ipha_t *)rptr) == IPV4_VERSION);
2314 
2315 		/*
2316 		 * Normally only send up the source address.
2317 		 * If any ancillary data items are wanted we add those.
2318 		 */
2319 		udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t);
2320 		if (recv_ancillary.crb_all != 0) {
2321 			udi_size += conn_recvancillary_size(connp,
2322 			    recv_ancillary, ira, mp, &ipps);
2323 		}
2324 
2325 		/* Allocate a message block for the T_UNITDATA_IND structure. */
2326 		mp1 = allocb(udi_size, BPRI_MED);
2327 		if (mp1 == NULL) {
2328 			freemsg(mp);
2329 			UDPS_BUMP_MIB(us, udpInErrors);
2330 			return;
2331 		}
2332 		mp1->b_cont = mp;
2333 		mp1->b_datap->db_type = M_PROTO;
2334 		tudi = (struct T_unitdata_ind *)mp1->b_rptr;
2335 		mp1->b_wptr = (uchar_t *)tudi + udi_size;
2336 		tudi->PRIM_type = T_UNITDATA_IND;
2337 		tudi->SRC_length = sizeof (sin_t);
2338 		tudi->SRC_offset = sizeof (struct T_unitdata_ind);
2339 		tudi->OPT_offset = sizeof (struct T_unitdata_ind) +
2340 		    sizeof (sin_t);
2341 		udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t));
2342 		tudi->OPT_length = udi_size;
2343 		sin = (sin_t *)&tudi[1];
2344 		sin->sin_addr.s_addr = ((ipha_t *)rptr)->ipha_src;
2345 		sin->sin_port =	udpha->uha_src_port;
2346 		sin->sin_family = connp->conn_family;
2347 		*(uint32_t *)&sin->sin_zero[0] = 0;
2348 		*(uint32_t *)&sin->sin_zero[4] = 0;
2349 
2350 		/*
2351 		 * Add options if IP_RECVDSTADDR, IP_RECVIF, IP_RECVSLLA,
2352 		 * IP_RECVTTL or IP_RECVTOS has been set.
2353 		 */
2354 		if (udi_size != 0) {
2355 			conn_recvancillary_add(connp, recv_ancillary, ira,
2356 			    &ipps, (uchar_t *)&sin[1], udi_size);
2357 		}
2358 	} else {
2359 		sin6_t *sin6;
2360 
2361 		/*
2362 		 * Handle both IPv4 and IPv6 packets for IPv6 sockets.
2363 		 *
2364 		 * Normally we only send up the address. If receiving of any
2365 		 * optional receive side information is enabled, we also send
2366 		 * that up as options.
2367 		 */
2368 		udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t);
2369 
2370 		if (recv_ancillary.crb_all != 0) {
2371 			udi_size += conn_recvancillary_size(connp,
2372 			    recv_ancillary, ira, mp, &ipps);
2373 		}
2374 
2375 		mp1 = allocb(udi_size, BPRI_MED);
2376 		if (mp1 == NULL) {
2377 			freemsg(mp);
2378 			UDPS_BUMP_MIB(us, udpInErrors);
2379 			return;
2380 		}
2381 		mp1->b_cont = mp;
2382 		mp1->b_datap->db_type = M_PROTO;
2383 		tudi = (struct T_unitdata_ind *)mp1->b_rptr;
2384 		mp1->b_wptr = (uchar_t *)tudi + udi_size;
2385 		tudi->PRIM_type = T_UNITDATA_IND;
2386 		tudi->SRC_length = sizeof (sin6_t);
2387 		tudi->SRC_offset = sizeof (struct T_unitdata_ind);
2388 		tudi->OPT_offset = sizeof (struct T_unitdata_ind) +
2389 		    sizeof (sin6_t);
2390 		udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin6_t));
2391 		tudi->OPT_length = udi_size;
2392 		sin6 = (sin6_t *)&tudi[1];
2393 		if (ira->ira_flags & IRAF_IS_IPV4) {
2394 			in6_addr_t v6dst;
2395 
2396 			IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_src,
2397 			    &sin6->sin6_addr);
2398 			IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_dst,
2399 			    &v6dst);
2400 			sin6->sin6_flowinfo = 0;
2401 			sin6->sin6_scope_id = 0;
2402 			sin6->__sin6_src_id = ip_srcid_find_addr(&v6dst,
2403 			    IPCL_ZONEID(connp), us->us_netstack);
2404 		} else {
2405 			ip6h = (ip6_t *)rptr;
2406 
2407 			sin6->sin6_addr = ip6h->ip6_src;
2408 			/* No sin6_flowinfo per API */
2409 			sin6->sin6_flowinfo = 0;
2410 			/* For link-scope pass up scope id */
2411 			if (IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src))
2412 				sin6->sin6_scope_id = ira->ira_ruifindex;
2413 			else
2414 				sin6->sin6_scope_id = 0;
2415 			sin6->__sin6_src_id = ip_srcid_find_addr(
2416 			    &ip6h->ip6_dst, IPCL_ZONEID(connp),
2417 			    us->us_netstack);
2418 		}
2419 		sin6->sin6_port = udpha->uha_src_port;
2420 		sin6->sin6_family = connp->conn_family;
2421 
2422 		if (udi_size != 0) {
2423 			conn_recvancillary_add(connp, recv_ancillary, ira,
2424 			    &ipps, (uchar_t *)&sin6[1], udi_size);
2425 		}
2426 	}
2427 
2428 	/*
2429 	 * DTrace this UDP input as udp:::receive (this is for IPv4, IPv6 and
2430 	 * loopback traffic).
2431 	 */
2432 	DTRACE_UDP5(receive, mblk_t *, NULL, ip_xmit_attr_t *, connp->conn_ixa,
2433 	    void_ip_t *, rptr, udp_t *, udp, udpha_t *, udpha);
2434 
2435 	/* Walk past the headers unless IP_RECVHDR was set. */
2436 	if (!udp->udp_rcvhdr) {
2437 		mp->b_rptr = rptr + hdr_length;
2438 		pkt_len -= hdr_length;
2439 	}
2440 
2441 	UDPS_BUMP_MIB(us, udpHCInDatagrams);
2442 	udp_ulp_recv(connp, mp1, pkt_len, ira);
2443 	return;
2444 
2445 tossit:
2446 	freemsg(mp);
2447 	UDPS_BUMP_MIB(us, udpInErrors);
2448 }
2449 
2450 /*
2451  * This routine creates a T_UDERROR_IND message and passes it upstream.
2452  * The address and options are copied from the T_UNITDATA_REQ message
2453  * passed in mp.  This message is freed.
2454  */
2455 static void
2456 udp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err)
2457 {
2458 	struct T_unitdata_req *tudr;
2459 	mblk_t	*mp1;
2460 	uchar_t *destaddr;
2461 	t_scalar_t destlen;
2462 	uchar_t	*optaddr;
2463 	t_scalar_t optlen;
2464 
2465 	if ((mp->b_wptr < mp->b_rptr) ||
2466 	    (MBLKL(mp)) < sizeof (struct T_unitdata_req)) {
2467 		goto done;
2468 	}
2469 	tudr = (struct T_unitdata_req *)mp->b_rptr;
2470 	destaddr = mp->b_rptr + tudr->DEST_offset;
2471 	if (destaddr < mp->b_rptr || destaddr >= mp->b_wptr ||
2472 	    destaddr + tudr->DEST_length < mp->b_rptr ||
2473 	    destaddr + tudr->DEST_length > mp->b_wptr) {
2474 		goto done;
2475 	}
2476 	optaddr = mp->b_rptr + tudr->OPT_offset;
2477 	if (optaddr < mp->b_rptr || optaddr >= mp->b_wptr ||
2478 	    optaddr + tudr->OPT_length < mp->b_rptr ||
2479 	    optaddr + tudr->OPT_length > mp->b_wptr) {
2480 		goto done;
2481 	}
2482 	destlen = tudr->DEST_length;
2483 	optlen = tudr->OPT_length;
2484 
2485 	mp1 = mi_tpi_uderror_ind((char *)destaddr, destlen,
2486 	    (char *)optaddr, optlen, err);
2487 	if (mp1 != NULL)
2488 		qreply(q, mp1);
2489 
2490 done:
2491 	freemsg(mp);
2492 }
2493 
2494 /*
2495  * This routine removes a port number association from a stream.  It
2496  * is called by udp_wput to handle T_UNBIND_REQ messages.
2497  */
2498 static void
2499 udp_tpi_unbind(queue_t *q, mblk_t *mp)
2500 {
2501 	conn_t	*connp = Q_TO_CONN(q);
2502 	int	error;
2503 
2504 	error = udp_do_unbind(connp);
2505 	if (error) {
2506 		if (error < 0)
2507 			udp_err_ack(q, mp, -error, 0);
2508 		else
2509 			udp_err_ack(q, mp, TSYSERR, error);
2510 		return;
2511 	}
2512 
2513 	mp = mi_tpi_ok_ack_alloc(mp);
2514 	ASSERT(mp != NULL);
2515 	ASSERT(((struct T_ok_ack *)mp->b_rptr)->PRIM_type == T_OK_ACK);
2516 	qreply(q, mp);
2517 }
2518 
2519 /*
2520  * Don't let port fall into the privileged range.
2521  * Since the extra privileged ports can be arbitrary we also
2522  * ensure that we exclude those from consideration.
2523  * us->us_epriv_ports is not sorted thus we loop over it until
2524  * there are no changes.
2525  */
2526 static in_port_t
2527 udp_update_next_port(udp_t *udp, in_port_t port, boolean_t random)
2528 {
2529 	int i, bump;
2530 	in_port_t nextport;
2531 	boolean_t restart = B_FALSE;
2532 	udp_stack_t *us = udp->udp_us;
2533 
2534 	if (random && udp_random_anon_port != 0) {
2535 		(void) random_get_pseudo_bytes((uint8_t *)&port,
2536 		    sizeof (in_port_t));
2537 		/*
2538 		 * Unless changed by a sys admin, the smallest anon port
2539 		 * is 32768 and the largest anon port is 65535.  It is
2540 		 * very likely (50%) for the random port to be smaller
2541 		 * than the smallest anon port.  When that happens,
2542 		 * add port % (anon port range) to the smallest anon
2543 		 * port to get the random port.  It should fall into the
2544 		 * valid anon port range.
2545 		 */
2546 		if ((port < us->us_smallest_anon_port) ||
2547 		    (port > us->us_largest_anon_port)) {
2548 			if (us->us_smallest_anon_port ==
2549 			    us->us_largest_anon_port) {
2550 				bump = 0;
2551 			} else {
2552 				bump = port % (us->us_largest_anon_port -
2553 				    us->us_smallest_anon_port);
2554 			}
2555 
2556 			port = us->us_smallest_anon_port + bump;
2557 		}
2558 	}
2559 
2560 retry:
2561 	if (port < us->us_smallest_anon_port)
2562 		port = us->us_smallest_anon_port;
2563 
2564 	if (port > us->us_largest_anon_port) {
2565 		port = us->us_smallest_anon_port;
2566 		if (restart)
2567 			return (0);
2568 		restart = B_TRUE;
2569 	}
2570 
2571 	if (port < us->us_smallest_nonpriv_port)
2572 		port = us->us_smallest_nonpriv_port;
2573 
2574 	for (i = 0; i < us->us_num_epriv_ports; i++) {
2575 		if (port == us->us_epriv_ports[i]) {
2576 			port++;
2577 			/*
2578 			 * Make sure that the port is in the
2579 			 * valid range.
2580 			 */
2581 			goto retry;
2582 		}
2583 	}
2584 
2585 	if (is_system_labeled() &&
2586 	    (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred),
2587 	    port, IPPROTO_UDP, B_TRUE)) != 0) {
2588 		port = nextport;
2589 		goto retry;
2590 	}
2591 
2592 	return (port);
2593 }
2594 
2595 /*
2596  * Handle T_UNITDATA_REQ with options. Both IPv4 and IPv6
2597  * Either tudr_mp or msg is set. If tudr_mp we take ancillary data from
2598  * the TPI options, otherwise we take them from msg_control.
2599  * If both sin and sin6 is set it is a connected socket and we use conn_faddr.
2600  * Always consumes mp; never consumes tudr_mp.
2601  */
2602 static int
2603 udp_output_ancillary(conn_t *connp, sin_t *sin, sin6_t *sin6, mblk_t *mp,
2604     mblk_t *tudr_mp, struct nmsghdr *msg, cred_t *cr, pid_t pid)
2605 {
2606 	udp_t		*udp = connp->conn_udp;
2607 	udp_stack_t	*us = udp->udp_us;
2608 	int		error;
2609 	ip_xmit_attr_t	*ixa;
2610 	ip_pkt_t	*ipp;
2611 	in6_addr_t	v6src;
2612 	in6_addr_t	v6dst;
2613 	in6_addr_t	v6nexthop;
2614 	in_port_t	dstport;
2615 	uint32_t	flowinfo;
2616 	uint_t		srcid;
2617 	int		is_absreq_failure = 0;
2618 	conn_opt_arg_t	coas, *coa;
2619 
2620 	ASSERT(tudr_mp != NULL || msg != NULL);
2621 
2622 	/*
2623 	 * Get ixa before checking state to handle a disconnect race.
2624 	 *
2625 	 * We need an exclusive copy of conn_ixa since the ancillary data
2626 	 * options might modify it. That copy has no pointers hence we
2627 	 * need to set them up once we've parsed the ancillary data.
2628 	 */
2629 	ixa = conn_get_ixa_exclusive(connp);
2630 	if (ixa == NULL) {
2631 		UDPS_BUMP_MIB(us, udpOutErrors);
2632 		freemsg(mp);
2633 		return (ENOMEM);
2634 	}
2635 	ASSERT(cr != NULL);
2636 	ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
2637 	ixa->ixa_cred = cr;
2638 	ixa->ixa_cpid = pid;
2639 	if (is_system_labeled()) {
2640 		/* We need to restart with a label based on the cred */
2641 		ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred);
2642 	}
2643 
2644 	/* In case previous destination was multicast or multirt */
2645 	ip_attr_newdst(ixa);
2646 
2647 	/* Get a copy of conn_xmit_ipp since the options might change it */
2648 	ipp = kmem_zalloc(sizeof (*ipp), KM_NOSLEEP);
2649 	if (ipp == NULL) {
2650 		ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
2651 		ixa->ixa_cred = connp->conn_cred;	/* Restore */
2652 		ixa->ixa_cpid = connp->conn_cpid;
2653 		ixa_refrele(ixa);
2654 		UDPS_BUMP_MIB(us, udpOutErrors);
2655 		freemsg(mp);
2656 		return (ENOMEM);
2657 	}
2658 	mutex_enter(&connp->conn_lock);
2659 	error = ip_pkt_copy(&connp->conn_xmit_ipp, ipp, KM_NOSLEEP);
2660 	mutex_exit(&connp->conn_lock);
2661 	if (error != 0) {
2662 		UDPS_BUMP_MIB(us, udpOutErrors);
2663 		freemsg(mp);
2664 		goto done;
2665 	}
2666 
2667 	/*
2668 	 * Parse the options and update ixa and ipp as a result.
2669 	 * Note that ixa_tsl can be updated if SCM_UCRED.
2670 	 * ixa_refrele/ixa_inactivate will release any reference on ixa_tsl.
2671 	 */
2672 
2673 	coa = &coas;
2674 	coa->coa_connp = connp;
2675 	coa->coa_ixa = ixa;
2676 	coa->coa_ipp = ipp;
2677 	coa->coa_ancillary = B_TRUE;
2678 	coa->coa_changed = 0;
2679 
2680 	if (msg != NULL) {
2681 		error = process_auxiliary_options(connp, msg->msg_control,
2682 		    msg->msg_controllen, coa, &udp_opt_obj, udp_opt_set, cr);
2683 	} else {
2684 		struct T_unitdata_req *tudr;
2685 
2686 		tudr = (struct T_unitdata_req *)tudr_mp->b_rptr;
2687 		ASSERT(tudr->PRIM_type == T_UNITDATA_REQ);
2688 		error = tpi_optcom_buf(connp->conn_wq, tudr_mp,
2689 		    &tudr->OPT_length, tudr->OPT_offset, cr, &udp_opt_obj,
2690 		    coa, &is_absreq_failure);
2691 	}
2692 	if (error != 0) {
2693 		/*
2694 		 * Note: No special action needed in this
2695 		 * module for "is_absreq_failure"
2696 		 */
2697 		freemsg(mp);
2698 		UDPS_BUMP_MIB(us, udpOutErrors);
2699 		goto done;
2700 	}
2701 	ASSERT(is_absreq_failure == 0);
2702 
2703 	mutex_enter(&connp->conn_lock);
2704 	/*
2705 	 * If laddr is unspecified then we look at sin6_src_id.
2706 	 * We will give precedence to a source address set with IPV6_PKTINFO
2707 	 * (aka IPPF_ADDR) but that is handled in build_hdrs. However, we don't
2708 	 * want ip_attr_connect to select a source (since it can fail) when
2709 	 * IPV6_PKTINFO is specified.
2710 	 * If this doesn't result in a source address then we get a source
2711 	 * from ip_attr_connect() below.
2712 	 */
2713 	v6src = connp->conn_saddr_v6;
2714 	if (sin != NULL) {
2715 		IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &v6dst);
2716 		dstport = sin->sin_port;
2717 		flowinfo = 0;
2718 		ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
2719 		ixa->ixa_flags |= IXAF_IS_IPV4;
2720 	} else if (sin6 != NULL) {
2721 		boolean_t v4mapped;
2722 
2723 		v6dst = sin6->sin6_addr;
2724 		dstport = sin6->sin6_port;
2725 		flowinfo = sin6->sin6_flowinfo;
2726 		srcid = sin6->__sin6_src_id;
2727 		if (IN6_IS_ADDR_LINKSCOPE(&v6dst) && sin6->sin6_scope_id != 0) {
2728 			ixa->ixa_scopeid = sin6->sin6_scope_id;
2729 			ixa->ixa_flags |= IXAF_SCOPEID_SET;
2730 		} else {
2731 			ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
2732 		}
2733 		v4mapped = IN6_IS_ADDR_V4MAPPED(&v6dst);
2734 		if (v4mapped)
2735 			ixa->ixa_flags |= IXAF_IS_IPV4;
2736 		else
2737 			ixa->ixa_flags &= ~IXAF_IS_IPV4;
2738 		if (srcid != 0 && IN6_IS_ADDR_UNSPECIFIED(&v6src)) {
2739 			if (!ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp),
2740 			    v4mapped, connp->conn_netstack)) {
2741 				/* Mismatch - v4mapped/v6 specified by srcid. */
2742 				mutex_exit(&connp->conn_lock);
2743 				error = EADDRNOTAVAIL;
2744 				goto failed;	/* Does freemsg() and mib. */
2745 			}
2746 		}
2747 	} else {
2748 		/* Connected case */
2749 		v6dst = connp->conn_faddr_v6;
2750 		dstport = connp->conn_fport;
2751 		flowinfo = connp->conn_flowinfo;
2752 	}
2753 	mutex_exit(&connp->conn_lock);
2754 
2755 	/* Handle IP_PKTINFO/IPV6_PKTINFO setting source address. */
2756 	if (ipp->ipp_fields & IPPF_ADDR) {
2757 		if (ixa->ixa_flags & IXAF_IS_IPV4) {
2758 			if (IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr))
2759 				v6src = ipp->ipp_addr;
2760 		} else {
2761 			if (!IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr))
2762 				v6src = ipp->ipp_addr;
2763 		}
2764 	}
2765 
2766 	ip_attr_nexthop(ipp, ixa, &v6dst, &v6nexthop);
2767 	error = ip_attr_connect(connp, ixa, &v6src, &v6dst, &v6nexthop, dstport,
2768 	    &v6src, NULL, IPDF_ALLOW_MCBC | IPDF_VERIFY_DST | IPDF_IPSEC);
2769 
2770 	switch (error) {
2771 	case 0:
2772 		break;
2773 	case EADDRNOTAVAIL:
2774 		/*
2775 		 * IXAF_VERIFY_SOURCE tells us to pick a better source.
2776 		 * Don't have the application see that errno
2777 		 */
2778 		error = ENETUNREACH;
2779 		goto failed;
2780 	case ENETDOWN:
2781 		/*
2782 		 * Have !ipif_addr_ready address; drop packet silently
2783 		 * until we can get applications to not send until we
2784 		 * are ready.
2785 		 */
2786 		error = 0;
2787 		goto failed;
2788 	case EHOSTUNREACH:
2789 	case ENETUNREACH:
2790 		if (ixa->ixa_ire != NULL) {
2791 			/*
2792 			 * Let conn_ip_output/ire_send_noroute return
2793 			 * the error and send any local ICMP error.
2794 			 */
2795 			error = 0;
2796 			break;
2797 		}
2798 		/* FALLTHRU */
2799 	default:
2800 	failed:
2801 		freemsg(mp);
2802 		UDPS_BUMP_MIB(us, udpOutErrors);
2803 		goto done;
2804 	}
2805 
2806 	/*
2807 	 * We might be going to a different destination than last time,
2808 	 * thus check that TX allows the communication and compute any
2809 	 * needed label.
2810 	 *
2811 	 * TSOL Note: We have an exclusive ipp and ixa for this thread so we
2812 	 * don't have to worry about concurrent threads.
2813 	 */
2814 	if (is_system_labeled()) {
2815 		/* Using UDP MLP requires SCM_UCRED from user */
2816 		if (connp->conn_mlp_type != mlptSingle &&
2817 		    !((ixa->ixa_flags & IXAF_UCRED_TSL))) {
2818 			UDPS_BUMP_MIB(us, udpOutErrors);
2819 			error = ECONNREFUSED;
2820 			freemsg(mp);
2821 			goto done;
2822 		}
2823 		/*
2824 		 * Check whether Trusted Solaris policy allows communication
2825 		 * with this host, and pretend that the destination is
2826 		 * unreachable if not.
2827 		 * Compute any needed label and place it in ipp_label_v4/v6.
2828 		 *
2829 		 * Later conn_build_hdr_template/conn_prepend_hdr takes
2830 		 * ipp_label_v4/v6 to form the packet.
2831 		 *
2832 		 * Tsol note: We have ipp structure local to this thread so
2833 		 * no locking is needed.
2834 		 */
2835 		error = conn_update_label(connp, ixa, &v6dst, ipp);
2836 		if (error != 0) {
2837 			freemsg(mp);
2838 			UDPS_BUMP_MIB(us, udpOutErrors);
2839 			goto done;
2840 		}
2841 	}
2842 	mp = udp_prepend_hdr(connp, ixa, ipp, &v6src, &v6dst, dstport,
2843 	    flowinfo, mp, &error);
2844 	if (mp == NULL) {
2845 		ASSERT(error != 0);
2846 		UDPS_BUMP_MIB(us, udpOutErrors);
2847 		goto done;
2848 	}
2849 	if (ixa->ixa_pktlen > IP_MAXPACKET) {
2850 		error = EMSGSIZE;
2851 		UDPS_BUMP_MIB(us, udpOutErrors);
2852 		freemsg(mp);
2853 		goto done;
2854 	}
2855 	/* We're done.  Pass the packet to ip. */
2856 	UDPS_BUMP_MIB(us, udpHCOutDatagrams);
2857 
2858 	DTRACE_UDP5(send, mblk_t *, NULL, ip_xmit_attr_t *, ixa,
2859 	    void_ip_t *, mp->b_rptr, udp_t *, udp, udpha_t *,
2860 	    &mp->b_rptr[ixa->ixa_ip_hdr_length]);
2861 
2862 	error = conn_ip_output(mp, ixa);
2863 	/* No udpOutErrors if an error since IP increases its error counter */
2864 	switch (error) {
2865 	case 0:
2866 		break;
2867 	case EWOULDBLOCK:
2868 		(void) ixa_check_drain_insert(connp, ixa);
2869 		error = 0;
2870 		break;
2871 	case EADDRNOTAVAIL:
2872 		/*
2873 		 * IXAF_VERIFY_SOURCE tells us to pick a better source.
2874 		 * Don't have the application see that errno
2875 		 */
2876 		error = ENETUNREACH;
2877 		/* FALLTHRU */
2878 	default:
2879 		mutex_enter(&connp->conn_lock);
2880 		/*
2881 		 * Clear the source and v6lastdst so we call ip_attr_connect
2882 		 * for the next packet and try to pick a better source.
2883 		 */
2884 		if (connp->conn_mcbc_bind)
2885 			connp->conn_saddr_v6 = ipv6_all_zeros;
2886 		else
2887 			connp->conn_saddr_v6 = connp->conn_bound_addr_v6;
2888 		connp->conn_v6lastdst = ipv6_all_zeros;
2889 		mutex_exit(&connp->conn_lock);
2890 		break;
2891 	}
2892 done:
2893 	ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
2894 	ixa->ixa_cred = connp->conn_cred;	/* Restore */
2895 	ixa->ixa_cpid = connp->conn_cpid;
2896 	ixa_refrele(ixa);
2897 	ip_pkt_free(ipp);
2898 	kmem_free(ipp, sizeof (*ipp));
2899 	return (error);
2900 }
2901 
2902 /*
2903  * Handle sending an M_DATA for a connected socket.
2904  * Handles both IPv4 and IPv6.
2905  */
2906 static int
2907 udp_output_connected(conn_t *connp, mblk_t *mp, cred_t *cr, pid_t pid)
2908 {
2909 	udp_t		*udp = connp->conn_udp;
2910 	udp_stack_t	*us = udp->udp_us;
2911 	int		error;
2912 	ip_xmit_attr_t	*ixa;
2913 
2914 	/*
2915 	 * If no other thread is using conn_ixa this just gets a reference to
2916 	 * conn_ixa. Otherwise we get a safe copy of conn_ixa.
2917 	 */
2918 	ixa = conn_get_ixa(connp, B_FALSE);
2919 	if (ixa == NULL) {
2920 		UDPS_BUMP_MIB(us, udpOutErrors);
2921 		freemsg(mp);
2922 		return (ENOMEM);
2923 	}
2924 
2925 	ASSERT(cr != NULL);
2926 	ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
2927 	ixa->ixa_cred = cr;
2928 	ixa->ixa_cpid = pid;
2929 
2930 	mutex_enter(&connp->conn_lock);
2931 	mp = udp_prepend_header_template(connp, ixa, mp, &connp->conn_saddr_v6,
2932 	    connp->conn_fport, connp->conn_flowinfo, &error);
2933 
2934 	if (mp == NULL) {
2935 		ASSERT(error != 0);
2936 		mutex_exit(&connp->conn_lock);
2937 		ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
2938 		ixa->ixa_cred = connp->conn_cred;	/* Restore */
2939 		ixa->ixa_cpid = connp->conn_cpid;
2940 		ixa_refrele(ixa);
2941 		UDPS_BUMP_MIB(us, udpOutErrors);
2942 		freemsg(mp);
2943 		return (error);
2944 	}
2945 
2946 	/*
2947 	 * In case we got a safe copy of conn_ixa, or if opt_set made us a new
2948 	 * safe copy, then we need to fill in any pointers in it.
2949 	 */
2950 	if (ixa->ixa_ire == NULL) {
2951 		in6_addr_t	faddr, saddr;
2952 		in6_addr_t	nexthop;
2953 		in_port_t	fport;
2954 
2955 		saddr = connp->conn_saddr_v6;
2956 		faddr = connp->conn_faddr_v6;
2957 		fport = connp->conn_fport;
2958 		ip_attr_nexthop(&connp->conn_xmit_ipp, ixa, &faddr, &nexthop);
2959 		mutex_exit(&connp->conn_lock);
2960 
2961 		error = ip_attr_connect(connp, ixa, &saddr, &faddr, &nexthop,
2962 		    fport, NULL, NULL, IPDF_ALLOW_MCBC | IPDF_VERIFY_DST |
2963 		    IPDF_IPSEC);
2964 		switch (error) {
2965 		case 0:
2966 			break;
2967 		case EADDRNOTAVAIL:
2968 			/*
2969 			 * IXAF_VERIFY_SOURCE tells us to pick a better source.
2970 			 * Don't have the application see that errno
2971 			 */
2972 			error = ENETUNREACH;
2973 			goto failed;
2974 		case ENETDOWN:
2975 			/*
2976 			 * Have !ipif_addr_ready address; drop packet silently
2977 			 * until we can get applications to not send until we
2978 			 * are ready.
2979 			 */
2980 			error = 0;
2981 			goto failed;
2982 		case EHOSTUNREACH:
2983 		case ENETUNREACH:
2984 			if (ixa->ixa_ire != NULL) {
2985 				/*
2986 				 * Let conn_ip_output/ire_send_noroute return
2987 				 * the error and send any local ICMP error.
2988 				 */
2989 				error = 0;
2990 				break;
2991 			}
2992 			/* FALLTHRU */
2993 		default:
2994 		failed:
2995 			ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
2996 			ixa->ixa_cred = connp->conn_cred;	/* Restore */
2997 			ixa->ixa_cpid = connp->conn_cpid;
2998 			ixa_refrele(ixa);
2999 			freemsg(mp);
3000 			UDPS_BUMP_MIB(us, udpOutErrors);
3001 			return (error);
3002 		}
3003 	} else {
3004 		/* Done with conn_t */
3005 		mutex_exit(&connp->conn_lock);
3006 	}
3007 	ASSERT(ixa->ixa_ire != NULL);
3008 
3009 	/* We're done.  Pass the packet to ip. */
3010 	UDPS_BUMP_MIB(us, udpHCOutDatagrams);
3011 
3012 	DTRACE_UDP5(send, mblk_t *, NULL, ip_xmit_attr_t *, ixa,
3013 	    void_ip_t *, mp->b_rptr, udp_t *, udp, udpha_t *,
3014 	    &mp->b_rptr[ixa->ixa_ip_hdr_length]);
3015 
3016 	error = conn_ip_output(mp, ixa);
3017 	/* No udpOutErrors if an error since IP increases its error counter */
3018 	switch (error) {
3019 	case 0:
3020 		break;
3021 	case EWOULDBLOCK:
3022 		(void) ixa_check_drain_insert(connp, ixa);
3023 		error = 0;
3024 		break;
3025 	case EADDRNOTAVAIL:
3026 		/*
3027 		 * IXAF_VERIFY_SOURCE tells us to pick a better source.
3028 		 * Don't have the application see that errno
3029 		 */
3030 		error = ENETUNREACH;
3031 		break;
3032 	}
3033 	ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3034 	ixa->ixa_cred = connp->conn_cred;	/* Restore */
3035 	ixa->ixa_cpid = connp->conn_cpid;
3036 	ixa_refrele(ixa);
3037 	return (error);
3038 }
3039 
3040 /*
3041  * Handle sending an M_DATA to the last destination.
3042  * Handles both IPv4 and IPv6.
3043  *
3044  * NOTE: The caller must hold conn_lock and we drop it here.
3045  */
3046 static int
3047 udp_output_lastdst(conn_t *connp, mblk_t *mp, cred_t *cr, pid_t pid,
3048     ip_xmit_attr_t *ixa)
3049 {
3050 	udp_t		*udp = connp->conn_udp;
3051 	udp_stack_t	*us = udp->udp_us;
3052 	int		error;
3053 
3054 	ASSERT(MUTEX_HELD(&connp->conn_lock));
3055 	ASSERT(ixa != NULL);
3056 
3057 	ASSERT(cr != NULL);
3058 	ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3059 	ixa->ixa_cred = cr;
3060 	ixa->ixa_cpid = pid;
3061 
3062 	mp = udp_prepend_header_template(connp, ixa, mp, &connp->conn_v6lastsrc,
3063 	    connp->conn_lastdstport, connp->conn_lastflowinfo, &error);
3064 
3065 	if (mp == NULL) {
3066 		ASSERT(error != 0);
3067 		mutex_exit(&connp->conn_lock);
3068 		ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3069 		ixa->ixa_cred = connp->conn_cred;	/* Restore */
3070 		ixa->ixa_cpid = connp->conn_cpid;
3071 		ixa_refrele(ixa);
3072 		UDPS_BUMP_MIB(us, udpOutErrors);
3073 		freemsg(mp);
3074 		return (error);
3075 	}
3076 
3077 	/*
3078 	 * In case we got a safe copy of conn_ixa, or if opt_set made us a new
3079 	 * safe copy, then we need to fill in any pointers in it.
3080 	 */
3081 	if (ixa->ixa_ire == NULL) {
3082 		in6_addr_t	lastdst, lastsrc;
3083 		in6_addr_t	nexthop;
3084 		in_port_t	lastport;
3085 
3086 		lastsrc = connp->conn_v6lastsrc;
3087 		lastdst = connp->conn_v6lastdst;
3088 		lastport = connp->conn_lastdstport;
3089 		ip_attr_nexthop(&connp->conn_xmit_ipp, ixa, &lastdst, &nexthop);
3090 		mutex_exit(&connp->conn_lock);
3091 
3092 		error = ip_attr_connect(connp, ixa, &lastsrc, &lastdst,
3093 		    &nexthop, lastport, NULL, NULL, IPDF_ALLOW_MCBC |
3094 		    IPDF_VERIFY_DST | IPDF_IPSEC);
3095 		switch (error) {
3096 		case 0:
3097 			break;
3098 		case EADDRNOTAVAIL:
3099 			/*
3100 			 * IXAF_VERIFY_SOURCE tells us to pick a better source.
3101 			 * Don't have the application see that errno
3102 			 */
3103 			error = ENETUNREACH;
3104 			goto failed;
3105 		case ENETDOWN:
3106 			/*
3107 			 * Have !ipif_addr_ready address; drop packet silently
3108 			 * until we can get applications to not send until we
3109 			 * are ready.
3110 			 */
3111 			error = 0;
3112 			goto failed;
3113 		case EHOSTUNREACH:
3114 		case ENETUNREACH:
3115 			if (ixa->ixa_ire != NULL) {
3116 				/*
3117 				 * Let conn_ip_output/ire_send_noroute return
3118 				 * the error and send any local ICMP error.
3119 				 */
3120 				error = 0;
3121 				break;
3122 			}
3123 			/* FALLTHRU */
3124 		default:
3125 		failed:
3126 			ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3127 			ixa->ixa_cred = connp->conn_cred;	/* Restore */
3128 			ixa->ixa_cpid = connp->conn_cpid;
3129 			ixa_refrele(ixa);
3130 			freemsg(mp);
3131 			UDPS_BUMP_MIB(us, udpOutErrors);
3132 			return (error);
3133 		}
3134 	} else {
3135 		/* Done with conn_t */
3136 		mutex_exit(&connp->conn_lock);
3137 	}
3138 
3139 	/* We're done.  Pass the packet to ip. */
3140 	UDPS_BUMP_MIB(us, udpHCOutDatagrams);
3141 
3142 	DTRACE_UDP5(send, mblk_t *, NULL, ip_xmit_attr_t *, ixa,
3143 	    void_ip_t *, mp->b_rptr, udp_t *, udp, udpha_t *,
3144 	    &mp->b_rptr[ixa->ixa_ip_hdr_length]);
3145 
3146 	error = conn_ip_output(mp, ixa);
3147 	/* No udpOutErrors if an error since IP increases its error counter */
3148 	switch (error) {
3149 	case 0:
3150 		break;
3151 	case EWOULDBLOCK:
3152 		(void) ixa_check_drain_insert(connp, ixa);
3153 		error = 0;
3154 		break;
3155 	case EADDRNOTAVAIL:
3156 		/*
3157 		 * IXAF_VERIFY_SOURCE tells us to pick a better source.
3158 		 * Don't have the application see that errno
3159 		 */
3160 		error = ENETUNREACH;
3161 		/* FALLTHRU */
3162 	default:
3163 		mutex_enter(&connp->conn_lock);
3164 		/*
3165 		 * Clear the source and v6lastdst so we call ip_attr_connect
3166 		 * for the next packet and try to pick a better source.
3167 		 */
3168 		if (connp->conn_mcbc_bind)
3169 			connp->conn_saddr_v6 = ipv6_all_zeros;
3170 		else
3171 			connp->conn_saddr_v6 = connp->conn_bound_addr_v6;
3172 		connp->conn_v6lastdst = ipv6_all_zeros;
3173 		mutex_exit(&connp->conn_lock);
3174 		break;
3175 	}
3176 	ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3177 	ixa->ixa_cred = connp->conn_cred;	/* Restore */
3178 	ixa->ixa_cpid = connp->conn_cpid;
3179 	ixa_refrele(ixa);
3180 	return (error);
3181 }
3182 
3183 
3184 /*
3185  * Prepend the header template and then fill in the source and
3186  * flowinfo. The caller needs to handle the destination address since
3187  * it's setting is different if rthdr or source route.
3188  *
3189  * Returns NULL is allocation failed or if the packet would exceed IP_MAXPACKET.
3190  * When it returns NULL it sets errorp.
3191  */
3192 static mblk_t *
3193 udp_prepend_header_template(conn_t *connp, ip_xmit_attr_t *ixa, mblk_t *mp,
3194     const in6_addr_t *v6src, in_port_t dstport, uint32_t flowinfo, int *errorp)
3195 {
3196 	udp_t		*udp = connp->conn_udp;
3197 	udp_stack_t	*us = udp->udp_us;
3198 	boolean_t	insert_spi = udp->udp_nat_t_endpoint;
3199 	uint_t		pktlen;
3200 	uint_t		alloclen;
3201 	uint_t		copylen;
3202 	uint8_t		*iph;
3203 	uint_t		ip_hdr_length;
3204 	udpha_t		*udpha;
3205 	uint32_t	cksum;
3206 	ip_pkt_t	*ipp;
3207 
3208 	ASSERT(MUTEX_HELD(&connp->conn_lock));
3209 
3210 	/*
3211 	 * Copy the header template and leave space for an SPI
3212 	 */
3213 	copylen = connp->conn_ht_iphc_len;
3214 	alloclen = copylen + (insert_spi ? sizeof (uint32_t) : 0);
3215 	pktlen = alloclen + msgdsize(mp);
3216 	if (pktlen > IP_MAXPACKET) {
3217 		freemsg(mp);
3218 		*errorp = EMSGSIZE;
3219 		return (NULL);
3220 	}
3221 	ixa->ixa_pktlen = pktlen;
3222 
3223 	/* check/fix buffer config, setup pointers into it */
3224 	iph = mp->b_rptr - alloclen;
3225 	if (DB_REF(mp) != 1 || iph < DB_BASE(mp) || !OK_32PTR(iph)) {
3226 		mblk_t *mp1;
3227 
3228 		mp1 = allocb(alloclen + us->us_wroff_extra, BPRI_MED);
3229 		if (mp1 == NULL) {
3230 			freemsg(mp);
3231 			*errorp = ENOMEM;
3232 			return (NULL);
3233 		}
3234 		mp1->b_wptr = DB_LIM(mp1);
3235 		mp1->b_cont = mp;
3236 		mp = mp1;
3237 		iph = (mp->b_wptr - alloclen);
3238 	}
3239 	mp->b_rptr = iph;
3240 	bcopy(connp->conn_ht_iphc, iph, copylen);
3241 	ip_hdr_length = (uint_t)(connp->conn_ht_ulp - connp->conn_ht_iphc);
3242 
3243 	ixa->ixa_ip_hdr_length = ip_hdr_length;
3244 	udpha = (udpha_t *)(iph + ip_hdr_length);
3245 
3246 	/*
3247 	 * Setup header length and prepare for ULP checksum done in IP.
3248 	 * udp_build_hdr_template has already massaged any routing header
3249 	 * and placed the result in conn_sum.
3250 	 *
3251 	 * We make it easy for IP to include our pseudo header
3252 	 * by putting our length in uha_checksum.
3253 	 */
3254 	cksum = pktlen - ip_hdr_length;
3255 	udpha->uha_length = htons(cksum);
3256 
3257 	cksum += connp->conn_sum;
3258 	cksum = (cksum >> 16) + (cksum & 0xFFFF);
3259 	ASSERT(cksum < 0x10000);
3260 
3261 	ipp = &connp->conn_xmit_ipp;
3262 	if (ixa->ixa_flags & IXAF_IS_IPV4) {
3263 		ipha_t	*ipha = (ipha_t *)iph;
3264 
3265 		ipha->ipha_length = htons((uint16_t)pktlen);
3266 
3267 		/* IP does the checksum if uha_checksum is non-zero */
3268 		if (us->us_do_checksum)
3269 			udpha->uha_checksum = htons(cksum);
3270 
3271 		/* if IP_PKTINFO specified an addres it wins over bind() */
3272 		if ((ipp->ipp_fields & IPPF_ADDR) &&
3273 		    IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) {
3274 			ASSERT(ipp->ipp_addr_v4 != INADDR_ANY);
3275 			ipha->ipha_src = ipp->ipp_addr_v4;
3276 		} else {
3277 			IN6_V4MAPPED_TO_IPADDR(v6src, ipha->ipha_src);
3278 		}
3279 	} else {
3280 		ip6_t *ip6h = (ip6_t *)iph;
3281 
3282 		ip6h->ip6_plen =  htons((uint16_t)(pktlen - IPV6_HDR_LEN));
3283 		udpha->uha_checksum = htons(cksum);
3284 
3285 		/* if IP_PKTINFO specified an addres it wins over bind() */
3286 		if ((ipp->ipp_fields & IPPF_ADDR) &&
3287 		    !IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) {
3288 			ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&ipp->ipp_addr));
3289 			ip6h->ip6_src = ipp->ipp_addr;
3290 		} else {
3291 			ip6h->ip6_src = *v6src;
3292 		}
3293 		ip6h->ip6_vcf =
3294 		    (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) |
3295 		    (flowinfo & ~IPV6_VERS_AND_FLOW_MASK);
3296 		if (ipp->ipp_fields & IPPF_TCLASS) {
3297 			/* Overrides the class part of flowinfo */
3298 			ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf,
3299 			    ipp->ipp_tclass);
3300 		}
3301 	}
3302 
3303 	/* Insert all-0s SPI now. */
3304 	if (insert_spi)
3305 		*((uint32_t *)(udpha + 1)) = 0;
3306 
3307 	udpha->uha_dst_port = dstport;
3308 	return (mp);
3309 }
3310 
3311 /*
3312  * Send a T_UDERR_IND in response to an M_DATA
3313  */
3314 static void
3315 udp_ud_err_connected(conn_t *connp, t_scalar_t error)
3316 {
3317 	struct sockaddr_storage ss;
3318 	sin_t		*sin;
3319 	sin6_t		*sin6;
3320 	struct sockaddr	*addr;
3321 	socklen_t	addrlen;
3322 	mblk_t		*mp1;
3323 
3324 	mutex_enter(&connp->conn_lock);
3325 	/* Initialize addr and addrlen as if they're passed in */
3326 	if (connp->conn_family == AF_INET) {
3327 		sin = (sin_t *)&ss;
3328 		*sin = sin_null;
3329 		sin->sin_family = AF_INET;
3330 		sin->sin_port = connp->conn_fport;
3331 		sin->sin_addr.s_addr = connp->conn_faddr_v4;
3332 		addr = (struct sockaddr *)sin;
3333 		addrlen = sizeof (*sin);
3334 	} else {
3335 		sin6 = (sin6_t *)&ss;
3336 		*sin6 = sin6_null;
3337 		sin6->sin6_family = AF_INET6;
3338 		sin6->sin6_port = connp->conn_fport;
3339 		sin6->sin6_flowinfo = connp->conn_flowinfo;
3340 		sin6->sin6_addr = connp->conn_faddr_v6;
3341 		if (IN6_IS_ADDR_LINKSCOPE(&connp->conn_faddr_v6) &&
3342 		    (connp->conn_ixa->ixa_flags & IXAF_SCOPEID_SET)) {
3343 			sin6->sin6_scope_id = connp->conn_ixa->ixa_scopeid;
3344 		} else {
3345 			sin6->sin6_scope_id = 0;
3346 		}
3347 		sin6->__sin6_src_id = 0;
3348 		addr = (struct sockaddr *)sin6;
3349 		addrlen = sizeof (*sin6);
3350 	}
3351 	mutex_exit(&connp->conn_lock);
3352 
3353 	mp1 = mi_tpi_uderror_ind((char *)addr, addrlen, NULL, 0, error);
3354 	if (mp1 != NULL)
3355 		putnext(connp->conn_rq, mp1);
3356 }
3357 
3358 /*
3359  * This routine handles all messages passed downstream.  It either
3360  * consumes the message or passes it downstream; it never queues a
3361  * a message.
3362  *
3363  * Also entry point for sockfs when udp is in "direct sockfs" mode.  This mode
3364  * is valid when we are directly beneath the stream head, and thus sockfs
3365  * is able to bypass STREAMS and directly call us, passing along the sockaddr
3366  * structure without the cumbersome T_UNITDATA_REQ interface for the case of
3367  * connected endpoints.
3368  */
3369 int
3370 udp_wput(queue_t *q, mblk_t *mp)
3371 {
3372 	sin6_t		*sin6;
3373 	sin_t		*sin = NULL;
3374 	uint_t		srcid;
3375 	conn_t		*connp = Q_TO_CONN(q);
3376 	udp_t		*udp = connp->conn_udp;
3377 	int		error = 0;
3378 	struct sockaddr	*addr = NULL;
3379 	socklen_t	addrlen;
3380 	udp_stack_t	*us = udp->udp_us;
3381 	struct T_unitdata_req *tudr;
3382 	mblk_t		*data_mp;
3383 	ushort_t	ipversion;
3384 	cred_t		*cr;
3385 	pid_t		pid;
3386 
3387 	/*
3388 	 * We directly handle several cases here: T_UNITDATA_REQ message
3389 	 * coming down as M_PROTO/M_PCPROTO and M_DATA messages for connected
3390 	 * socket.
3391 	 */
3392 	switch (DB_TYPE(mp)) {
3393 	case M_DATA:
3394 		if (!udp->udp_issocket || udp->udp_state != TS_DATA_XFER) {
3395 			/* Not connected; address is required */
3396 			UDPS_BUMP_MIB(us, udpOutErrors);
3397 			UDP_DBGSTAT(us, udp_data_notconn);
3398 			UDP_STAT(us, udp_out_err_notconn);
3399 			freemsg(mp);
3400 			return (0);
3401 		}
3402 		/*
3403 		 * All Solaris components should pass a db_credp
3404 		 * for this message, hence we ASSERT.
3405 		 * On production kernels we return an error to be robust against
3406 		 * random streams modules sitting on top of us.
3407 		 */
3408 		cr = msg_getcred(mp, &pid);
3409 		ASSERT(cr != NULL);
3410 		if (cr == NULL) {
3411 			UDPS_BUMP_MIB(us, udpOutErrors);
3412 			freemsg(mp);
3413 			return (0);
3414 		}
3415 		ASSERT(udp->udp_issocket);
3416 		UDP_DBGSTAT(us, udp_data_conn);
3417 		error = udp_output_connected(connp, mp, cr, pid);
3418 		if (error != 0) {
3419 			UDP_STAT(us, udp_out_err_output);
3420 			if (connp->conn_rq != NULL)
3421 				udp_ud_err_connected(connp, (t_scalar_t)error);
3422 #ifdef DEBUG
3423 			printf("udp_output_connected returned %d\n", error);
3424 #endif
3425 		}
3426 		return (0);
3427 
3428 	case M_PROTO:
3429 	case M_PCPROTO:
3430 		tudr = (struct T_unitdata_req *)mp->b_rptr;
3431 		if (MBLKL(mp) < sizeof (*tudr) ||
3432 		    ((t_primp_t)mp->b_rptr)->type != T_UNITDATA_REQ) {
3433 			udp_wput_other(q, mp);
3434 			return (0);
3435 		}
3436 		break;
3437 
3438 	default:
3439 		udp_wput_other(q, mp);
3440 		return (0);
3441 	}
3442 
3443 	/* Handle valid T_UNITDATA_REQ here */
3444 	data_mp = mp->b_cont;
3445 	if (data_mp == NULL) {
3446 		error = EPROTO;
3447 		goto ud_error2;
3448 	}
3449 	mp->b_cont = NULL;
3450 
3451 	if (!MBLKIN(mp, 0, tudr->DEST_offset + tudr->DEST_length)) {
3452 		error = EADDRNOTAVAIL;
3453 		goto ud_error2;
3454 	}
3455 
3456 	/*
3457 	 * All Solaris components should pass a db_credp
3458 	 * for this TPI message, hence we should ASSERT.
3459 	 * However, RPC (svc_clts_ksend) does this odd thing where it
3460 	 * passes the options from a T_UNITDATA_IND unchanged in a
3461 	 * T_UNITDATA_REQ. While that is the right thing to do for
3462 	 * some options, SCM_UCRED being the key one, this also makes it
3463 	 * pass down IP_RECVDSTADDR. Hence we can't ASSERT here.
3464 	 */
3465 	cr = msg_getcred(mp, &pid);
3466 	if (cr == NULL) {
3467 		cr = connp->conn_cred;
3468 		pid = connp->conn_cpid;
3469 	}
3470 
3471 	/*
3472 	 * If a port has not been bound to the stream, fail.
3473 	 * This is not a problem when sockfs is directly
3474 	 * above us, because it will ensure that the socket
3475 	 * is first bound before allowing data to be sent.
3476 	 */
3477 	if (udp->udp_state == TS_UNBND) {
3478 		error = EPROTO;
3479 		goto ud_error2;
3480 	}
3481 	addr = (struct sockaddr *)&mp->b_rptr[tudr->DEST_offset];
3482 	addrlen = tudr->DEST_length;
3483 
3484 	switch (connp->conn_family) {
3485 	case AF_INET6:
3486 		sin6 = (sin6_t *)addr;
3487 		if (!OK_32PTR((char *)sin6) || (addrlen != sizeof (sin6_t)) ||
3488 		    (sin6->sin6_family != AF_INET6)) {
3489 			error = EADDRNOTAVAIL;
3490 			goto ud_error2;
3491 		}
3492 
3493 		srcid = sin6->__sin6_src_id;
3494 		if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
3495 			/*
3496 			 * Destination is a non-IPv4-compatible IPv6 address.
3497 			 * Send out an IPv6 format packet.
3498 			 */
3499 
3500 			/*
3501 			 * If the local address is a mapped address return
3502 			 * an error.
3503 			 * It would be possible to send an IPv6 packet but the
3504 			 * response would never make it back to the application
3505 			 * since it is bound to a mapped address.
3506 			 */
3507 			if (IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6)) {
3508 				error = EADDRNOTAVAIL;
3509 				goto ud_error2;
3510 			}
3511 
3512 			UDP_DBGSTAT(us, udp_out_ipv6);
3513 
3514 			if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
3515 				sin6->sin6_addr = ipv6_loopback;
3516 			ipversion = IPV6_VERSION;
3517 		} else {
3518 			if (connp->conn_ipv6_v6only) {
3519 				error = EADDRNOTAVAIL;
3520 				goto ud_error2;
3521 			}
3522 
3523 			/*
3524 			 * If the local address is not zero or a mapped address
3525 			 * return an error.  It would be possible to send an
3526 			 * IPv4 packet but the response would never make it
3527 			 * back to the application since it is bound to a
3528 			 * non-mapped address.
3529 			 */
3530 			if (!IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6) &&
3531 			    !IN6_IS_ADDR_UNSPECIFIED(&connp->conn_saddr_v6)) {
3532 				error = EADDRNOTAVAIL;
3533 				goto ud_error2;
3534 			}
3535 			UDP_DBGSTAT(us, udp_out_mapped);
3536 
3537 			if (V4_PART_OF_V6(sin6->sin6_addr) == INADDR_ANY) {
3538 				V4_PART_OF_V6(sin6->sin6_addr) =
3539 				    htonl(INADDR_LOOPBACK);
3540 			}
3541 			ipversion = IPV4_VERSION;
3542 		}
3543 
3544 		if (tudr->OPT_length != 0) {
3545 			/*
3546 			 * If we are connected then the destination needs to be
3547 			 * the same as the connected one.
3548 			 */
3549 			if (udp->udp_state == TS_DATA_XFER &&
3550 			    !conn_same_as_last_v6(connp, sin6)) {
3551 				error = EISCONN;
3552 				goto ud_error2;
3553 			}
3554 			UDP_STAT(us, udp_out_opt);
3555 			error = udp_output_ancillary(connp, NULL, sin6,
3556 			    data_mp, mp, NULL, cr, pid);
3557 		} else {
3558 			ip_xmit_attr_t *ixa;
3559 
3560 			/*
3561 			 * We have to allocate an ip_xmit_attr_t before we grab
3562 			 * conn_lock and we need to hold conn_lock once we've
3563 			 * checked conn_same_as_last_v6 to handle concurrent
3564 			 * send* calls on a socket.
3565 			 */
3566 			ixa = conn_get_ixa(connp, B_FALSE);
3567 			if (ixa == NULL) {
3568 				error = ENOMEM;
3569 				goto ud_error2;
3570 			}
3571 			mutex_enter(&connp->conn_lock);
3572 
3573 			if (conn_same_as_last_v6(connp, sin6) &&
3574 			    connp->conn_lastsrcid == srcid &&
3575 			    ipsec_outbound_policy_current(ixa)) {
3576 				UDP_DBGSTAT(us, udp_out_lastdst);
3577 				/* udp_output_lastdst drops conn_lock */
3578 				error = udp_output_lastdst(connp, data_mp, cr,
3579 				    pid, ixa);
3580 			} else {
3581 				UDP_DBGSTAT(us, udp_out_diffdst);
3582 				/* udp_output_newdst drops conn_lock */
3583 				error = udp_output_newdst(connp, data_mp, NULL,
3584 				    sin6, ipversion, cr, pid, ixa);
3585 			}
3586 			ASSERT(MUTEX_NOT_HELD(&connp->conn_lock));
3587 		}
3588 		if (error == 0) {
3589 			freeb(mp);
3590 			return (0);
3591 		}
3592 		break;
3593 
3594 	case AF_INET:
3595 		sin = (sin_t *)addr;
3596 		if ((!OK_32PTR((char *)sin) || addrlen != sizeof (sin_t)) ||
3597 		    (sin->sin_family != AF_INET)) {
3598 			error = EADDRNOTAVAIL;
3599 			goto ud_error2;
3600 		}
3601 		UDP_DBGSTAT(us, udp_out_ipv4);
3602 		if (sin->sin_addr.s_addr == INADDR_ANY)
3603 			sin->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
3604 		ipversion = IPV4_VERSION;
3605 
3606 		srcid = 0;
3607 		if (tudr->OPT_length != 0) {
3608 			/*
3609 			 * If we are connected then the destination needs to be
3610 			 * the same as the connected one.
3611 			 */
3612 			if (udp->udp_state == TS_DATA_XFER &&
3613 			    !conn_same_as_last_v4(connp, sin)) {
3614 				error = EISCONN;
3615 				goto ud_error2;
3616 			}
3617 			UDP_STAT(us, udp_out_opt);
3618 			error = udp_output_ancillary(connp, sin, NULL,
3619 			    data_mp, mp, NULL, cr, pid);
3620 		} else {
3621 			ip_xmit_attr_t *ixa;
3622 
3623 			/*
3624 			 * We have to allocate an ip_xmit_attr_t before we grab
3625 			 * conn_lock and we need to hold conn_lock once we've
3626 			 * checked conn_same_as_last_v4 to handle concurrent
3627 			 * send* calls on a socket.
3628 			 */
3629 			ixa = conn_get_ixa(connp, B_FALSE);
3630 			if (ixa == NULL) {
3631 				error = ENOMEM;
3632 				goto ud_error2;
3633 			}
3634 			mutex_enter(&connp->conn_lock);
3635 
3636 			if (conn_same_as_last_v4(connp, sin) &&
3637 			    ipsec_outbound_policy_current(ixa)) {
3638 				UDP_DBGSTAT(us, udp_out_lastdst);
3639 				/* udp_output_lastdst drops conn_lock */
3640 				error = udp_output_lastdst(connp, data_mp, cr,
3641 				    pid, ixa);
3642 			} else {
3643 				UDP_DBGSTAT(us, udp_out_diffdst);
3644 				/* udp_output_newdst drops conn_lock */
3645 				error = udp_output_newdst(connp, data_mp, sin,
3646 				    NULL, ipversion, cr, pid, ixa);
3647 			}
3648 			ASSERT(MUTEX_NOT_HELD(&connp->conn_lock));
3649 		}
3650 		if (error == 0) {
3651 			freeb(mp);
3652 			return (0);
3653 		}
3654 		break;
3655 	}
3656 	UDP_STAT(us, udp_out_err_output);
3657 	ASSERT(mp != NULL);
3658 	/* mp is freed by the following routine */
3659 	udp_ud_err(q, mp, (t_scalar_t)error);
3660 	return (0);
3661 
3662 ud_error2:
3663 	UDPS_BUMP_MIB(us, udpOutErrors);
3664 	freemsg(data_mp);
3665 	UDP_STAT(us, udp_out_err_output);
3666 	ASSERT(mp != NULL);
3667 	/* mp is freed by the following routine */
3668 	udp_ud_err(q, mp, (t_scalar_t)error);
3669 	return (0);
3670 }
3671 
3672 /*
3673  * Handle the case of the IP address, port, flow label being different
3674  * for both IPv4 and IPv6.
3675  *
3676  * NOTE: The caller must hold conn_lock and we drop it here.
3677  */
3678 static int
3679 udp_output_newdst(conn_t *connp, mblk_t *data_mp, sin_t *sin, sin6_t *sin6,
3680     ushort_t ipversion, cred_t *cr, pid_t pid, ip_xmit_attr_t *ixa)
3681 {
3682 	uint_t		srcid;
3683 	uint32_t	flowinfo;
3684 	udp_t		*udp = connp->conn_udp;
3685 	int		error = 0;
3686 	ip_xmit_attr_t	*oldixa;
3687 	udp_stack_t	*us = udp->udp_us;
3688 	in6_addr_t	v6src;
3689 	in6_addr_t	v6dst;
3690 	in6_addr_t	v6nexthop;
3691 	in_port_t	dstport;
3692 
3693 	ASSERT(MUTEX_HELD(&connp->conn_lock));
3694 	ASSERT(ixa != NULL);
3695 	/*
3696 	 * We hold conn_lock across all the use and modifications of
3697 	 * the conn_lastdst, conn_ixa, and conn_xmit_ipp to ensure that they
3698 	 * stay consistent.
3699 	 */
3700 
3701 	ASSERT(cr != NULL);
3702 	ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3703 	ixa->ixa_cred = cr;
3704 	ixa->ixa_cpid = pid;
3705 	if (is_system_labeled()) {
3706 		/* We need to restart with a label based on the cred */
3707 		ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred);
3708 	}
3709 
3710 	/*
3711 	 * If we are connected then the destination needs to be the
3712 	 * same as the connected one, which is not the case here since we
3713 	 * checked for that above.
3714 	 */
3715 	if (udp->udp_state == TS_DATA_XFER) {
3716 		mutex_exit(&connp->conn_lock);
3717 		error = EISCONN;
3718 		goto ud_error;
3719 	}
3720 
3721 	/* In case previous destination was multicast or multirt */
3722 	ip_attr_newdst(ixa);
3723 
3724 	/*
3725 	 * If laddr is unspecified then we look at sin6_src_id.
3726 	 * We will give precedence to a source address set with IPV6_PKTINFO
3727 	 * (aka IPPF_ADDR) but that is handled in build_hdrs. However, we don't
3728 	 * want ip_attr_connect to select a source (since it can fail) when
3729 	 * IPV6_PKTINFO is specified.
3730 	 * If this doesn't result in a source address then we get a source
3731 	 * from ip_attr_connect() below.
3732 	 */
3733 	v6src = connp->conn_saddr_v6;
3734 	if (sin != NULL) {
3735 		IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &v6dst);
3736 		dstport = sin->sin_port;
3737 		flowinfo = 0;
3738 		/* Don't bother with ip_srcid_find_id(), but indicate anyway. */
3739 		srcid = 0;
3740 		ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
3741 		ixa->ixa_flags |= IXAF_IS_IPV4;
3742 	} else {
3743 		boolean_t v4mapped;
3744 
3745 		v6dst = sin6->sin6_addr;
3746 		dstport = sin6->sin6_port;
3747 		flowinfo = sin6->sin6_flowinfo;
3748 		srcid = sin6->__sin6_src_id;
3749 		if (IN6_IS_ADDR_LINKSCOPE(&v6dst) && sin6->sin6_scope_id != 0) {
3750 			ixa->ixa_scopeid = sin6->sin6_scope_id;
3751 			ixa->ixa_flags |= IXAF_SCOPEID_SET;
3752 		} else {
3753 			ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
3754 		}
3755 		v4mapped = IN6_IS_ADDR_V4MAPPED(&v6dst);
3756 		if (v4mapped)
3757 			ixa->ixa_flags |= IXAF_IS_IPV4;
3758 		else
3759 			ixa->ixa_flags &= ~IXAF_IS_IPV4;
3760 		if (srcid != 0 && IN6_IS_ADDR_UNSPECIFIED(&v6src)) {
3761 			if (!ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp),
3762 			    v4mapped, connp->conn_netstack)) {
3763 				/* Mismatched v4mapped/v6 specified by srcid. */
3764 				mutex_exit(&connp->conn_lock);
3765 				error = EADDRNOTAVAIL;
3766 				goto ud_error;
3767 			}
3768 		}
3769 	}
3770 	/* Handle IP_PKTINFO/IPV6_PKTINFO setting source address. */
3771 	if (connp->conn_xmit_ipp.ipp_fields & IPPF_ADDR) {
3772 		ip_pkt_t *ipp = &connp->conn_xmit_ipp;
3773 
3774 		if (ixa->ixa_flags & IXAF_IS_IPV4) {
3775 			if (IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr))
3776 				v6src = ipp->ipp_addr;
3777 		} else {
3778 			if (!IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr))
3779 				v6src = ipp->ipp_addr;
3780 		}
3781 	}
3782 
3783 	ip_attr_nexthop(&connp->conn_xmit_ipp, ixa, &v6dst, &v6nexthop);
3784 	mutex_exit(&connp->conn_lock);
3785 
3786 	error = ip_attr_connect(connp, ixa, &v6src, &v6dst, &v6nexthop, dstport,
3787 	    &v6src, NULL, IPDF_ALLOW_MCBC | IPDF_VERIFY_DST | IPDF_IPSEC);
3788 	switch (error) {
3789 	case 0:
3790 		break;
3791 	case EADDRNOTAVAIL:
3792 		/*
3793 		 * IXAF_VERIFY_SOURCE tells us to pick a better source.
3794 		 * Don't have the application see that errno
3795 		 */
3796 		error = ENETUNREACH;
3797 		goto failed;
3798 	case ENETDOWN:
3799 		/*
3800 		 * Have !ipif_addr_ready address; drop packet silently
3801 		 * until we can get applications to not send until we
3802 		 * are ready.
3803 		 */
3804 		error = 0;
3805 		goto failed;
3806 	case EHOSTUNREACH:
3807 	case ENETUNREACH:
3808 		if (ixa->ixa_ire != NULL) {
3809 			/*
3810 			 * Let conn_ip_output/ire_send_noroute return
3811 			 * the error and send any local ICMP error.
3812 			 */
3813 			error = 0;
3814 			break;
3815 		}
3816 		/* FALLTHRU */
3817 	failed:
3818 	default:
3819 		goto ud_error;
3820 	}
3821 
3822 
3823 	/*
3824 	 * Cluster note: we let the cluster hook know that we are sending to a
3825 	 * new address and/or port.
3826 	 */
3827 	if (cl_inet_connect2 != NULL) {
3828 		CL_INET_UDP_CONNECT(connp, B_TRUE, &v6dst, dstport, error);
3829 		if (error != 0) {
3830 			error = EHOSTUNREACH;
3831 			goto ud_error;
3832 		}
3833 	}
3834 
3835 	mutex_enter(&connp->conn_lock);
3836 	/*
3837 	 * While we dropped the lock some other thread might have connected
3838 	 * this socket. If so we bail out with EISCONN to ensure that the
3839 	 * connecting thread is the one that updates conn_ixa, conn_ht_*
3840 	 * and conn_*last*.
3841 	 */
3842 	if (udp->udp_state == TS_DATA_XFER) {
3843 		mutex_exit(&connp->conn_lock);
3844 		error = EISCONN;
3845 		goto ud_error;
3846 	}
3847 
3848 	/*
3849 	 * We need to rebuild the headers if
3850 	 *  - we are labeling packets (could be different for different
3851 	 *    destinations)
3852 	 *  - we have a source route (or routing header) since we need to
3853 	 *    massage that to get the pseudo-header checksum
3854 	 *  - the IP version is different than the last time
3855 	 *  - a socket option with COA_HEADER_CHANGED has been set which
3856 	 *    set conn_v6lastdst to zero.
3857 	 *
3858 	 * Otherwise the prepend function will just update the src, dst,
3859 	 * dstport, and flow label.
3860 	 */
3861 	if (is_system_labeled()) {
3862 		/* TX MLP requires SCM_UCRED and don't have that here */
3863 		if (connp->conn_mlp_type != mlptSingle) {
3864 			mutex_exit(&connp->conn_lock);
3865 			error = ECONNREFUSED;
3866 			goto ud_error;
3867 		}
3868 		/*
3869 		 * Check whether Trusted Solaris policy allows communication
3870 		 * with this host, and pretend that the destination is
3871 		 * unreachable if not.
3872 		 * Compute any needed label and place it in ipp_label_v4/v6.
3873 		 *
3874 		 * Later conn_build_hdr_template/conn_prepend_hdr takes
3875 		 * ipp_label_v4/v6 to form the packet.
3876 		 *
3877 		 * Tsol note: Since we hold conn_lock we know no other
3878 		 * thread manipulates conn_xmit_ipp.
3879 		 */
3880 		error = conn_update_label(connp, ixa, &v6dst,
3881 		    &connp->conn_xmit_ipp);
3882 		if (error != 0) {
3883 			mutex_exit(&connp->conn_lock);
3884 			goto ud_error;
3885 		}
3886 		/* Rebuild the header template */
3887 		error = udp_build_hdr_template(connp, &v6src, &v6dst, dstport,
3888 		    flowinfo);
3889 		if (error != 0) {
3890 			mutex_exit(&connp->conn_lock);
3891 			goto ud_error;
3892 		}
3893 	} else if ((connp->conn_xmit_ipp.ipp_fields &
3894 	    (IPPF_IPV4_OPTIONS|IPPF_RTHDR)) ||
3895 	    ipversion != connp->conn_lastipversion ||
3896 	    IN6_IS_ADDR_UNSPECIFIED(&connp->conn_v6lastdst)) {
3897 		/* Rebuild the header template */
3898 		error = udp_build_hdr_template(connp, &v6src, &v6dst, dstport,
3899 		    flowinfo);
3900 		if (error != 0) {
3901 			mutex_exit(&connp->conn_lock);
3902 			goto ud_error;
3903 		}
3904 	} else {
3905 		/* Simply update the destination address if no source route */
3906 		if (ixa->ixa_flags & IXAF_IS_IPV4) {
3907 			ipha_t	*ipha = (ipha_t *)connp->conn_ht_iphc;
3908 
3909 			IN6_V4MAPPED_TO_IPADDR(&v6dst, ipha->ipha_dst);
3910 			if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF) {
3911 				ipha->ipha_fragment_offset_and_flags |=
3912 				    IPH_DF_HTONS;
3913 			} else {
3914 				ipha->ipha_fragment_offset_and_flags &=
3915 				    ~IPH_DF_HTONS;
3916 			}
3917 		} else {
3918 			ip6_t *ip6h = (ip6_t *)connp->conn_ht_iphc;
3919 			ip6h->ip6_dst = v6dst;
3920 		}
3921 	}
3922 
3923 	/*
3924 	 * Remember the dst/dstport etc which corresponds to the built header
3925 	 * template and conn_ixa.
3926 	 */
3927 	oldixa = conn_replace_ixa(connp, ixa);
3928 	connp->conn_v6lastdst = v6dst;
3929 	connp->conn_lastipversion = ipversion;
3930 	connp->conn_lastdstport = dstport;
3931 	connp->conn_lastflowinfo = flowinfo;
3932 	connp->conn_lastscopeid = ixa->ixa_scopeid;
3933 	connp->conn_lastsrcid = srcid;
3934 	/* Also remember a source to use together with lastdst */
3935 	connp->conn_v6lastsrc = v6src;
3936 
3937 	data_mp = udp_prepend_header_template(connp, ixa, data_mp, &v6src,
3938 	    dstport, flowinfo, &error);
3939 
3940 	/* Done with conn_t */
3941 	mutex_exit(&connp->conn_lock);
3942 	ixa_refrele(oldixa);
3943 
3944 	if (data_mp == NULL) {
3945 		ASSERT(error != 0);
3946 		goto ud_error;
3947 	}
3948 
3949 	/* We're done.  Pass the packet to ip. */
3950 	UDPS_BUMP_MIB(us, udpHCOutDatagrams);
3951 
3952 	DTRACE_UDP5(send, mblk_t *, NULL, ip_xmit_attr_t *, ixa,
3953 	    void_ip_t *, data_mp->b_rptr, udp_t *, udp, udpha_t *,
3954 	    &data_mp->b_rptr[ixa->ixa_ip_hdr_length]);
3955 
3956 	error = conn_ip_output(data_mp, ixa);
3957 	/* No udpOutErrors if an error since IP increases its error counter */
3958 	switch (error) {
3959 	case 0:
3960 		break;
3961 	case EWOULDBLOCK:
3962 		(void) ixa_check_drain_insert(connp, ixa);
3963 		error = 0;
3964 		break;
3965 	case EADDRNOTAVAIL:
3966 		/*
3967 		 * IXAF_VERIFY_SOURCE tells us to pick a better source.
3968 		 * Don't have the application see that errno
3969 		 */
3970 		error = ENETUNREACH;
3971 		/* FALLTHRU */
3972 	default:
3973 		mutex_enter(&connp->conn_lock);
3974 		/*
3975 		 * Clear the source and v6lastdst so we call ip_attr_connect
3976 		 * for the next packet and try to pick a better source.
3977 		 */
3978 		if (connp->conn_mcbc_bind)
3979 			connp->conn_saddr_v6 = ipv6_all_zeros;
3980 		else
3981 			connp->conn_saddr_v6 = connp->conn_bound_addr_v6;
3982 		connp->conn_v6lastdst = ipv6_all_zeros;
3983 		mutex_exit(&connp->conn_lock);
3984 		break;
3985 	}
3986 	ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3987 	ixa->ixa_cred = connp->conn_cred;	/* Restore */
3988 	ixa->ixa_cpid = connp->conn_cpid;
3989 	ixa_refrele(ixa);
3990 	return (error);
3991 
3992 ud_error:
3993 	ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3994 	ixa->ixa_cred = connp->conn_cred;	/* Restore */
3995 	ixa->ixa_cpid = connp->conn_cpid;
3996 	ixa_refrele(ixa);
3997 
3998 	freemsg(data_mp);
3999 	UDPS_BUMP_MIB(us, udpOutErrors);
4000 	UDP_STAT(us, udp_out_err_output);
4001 	return (error);
4002 }
4003 
4004 /* ARGSUSED */
4005 static int
4006 udp_wput_fallback(queue_t *wq, mblk_t *mp)
4007 {
4008 #ifdef DEBUG
4009 	cmn_err(CE_CONT, "udp_wput_fallback: Message in fallback \n");
4010 #endif
4011 	freemsg(mp);
4012 	return (0);
4013 }
4014 
4015 
4016 /*
4017  * Handle special out-of-band ioctl requests (see PSARC/2008/265).
4018  */
4019 static void
4020 udp_wput_cmdblk(queue_t *q, mblk_t *mp)
4021 {
4022 	void	*data;
4023 	mblk_t	*datamp = mp->b_cont;
4024 	conn_t	*connp = Q_TO_CONN(q);
4025 	udp_t	*udp = connp->conn_udp;
4026 	cmdblk_t *cmdp = (cmdblk_t *)mp->b_rptr;
4027 
4028 	if (datamp == NULL || MBLKL(datamp) < cmdp->cb_len) {
4029 		cmdp->cb_error = EPROTO;
4030 		qreply(q, mp);
4031 		return;
4032 	}
4033 	data = datamp->b_rptr;
4034 
4035 	mutex_enter(&connp->conn_lock);
4036 	switch (cmdp->cb_cmd) {
4037 	case TI_GETPEERNAME:
4038 		if (udp->udp_state != TS_DATA_XFER)
4039 			cmdp->cb_error = ENOTCONN;
4040 		else
4041 			cmdp->cb_error = conn_getpeername(connp, data,
4042 			    &cmdp->cb_len);
4043 		break;
4044 	case TI_GETMYNAME:
4045 		cmdp->cb_error = conn_getsockname(connp, data, &cmdp->cb_len);
4046 		break;
4047 	default:
4048 		cmdp->cb_error = EINVAL;
4049 		break;
4050 	}
4051 	mutex_exit(&connp->conn_lock);
4052 
4053 	qreply(q, mp);
4054 }
4055 
4056 static void
4057 udp_use_pure_tpi(udp_t *udp)
4058 {
4059 	conn_t	*connp = udp->udp_connp;
4060 
4061 	mutex_enter(&connp->conn_lock);
4062 	udp->udp_issocket = B_FALSE;
4063 	mutex_exit(&connp->conn_lock);
4064 	UDP_STAT(udp->udp_us, udp_sock_fallback);
4065 }
4066 
4067 static void
4068 udp_wput_other(queue_t *q, mblk_t *mp)
4069 {
4070 	uchar_t	*rptr = mp->b_rptr;
4071 	struct iocblk *iocp;
4072 	conn_t	*connp = Q_TO_CONN(q);
4073 	udp_t	*udp = connp->conn_udp;
4074 	cred_t	*cr;
4075 
4076 	switch (mp->b_datap->db_type) {
4077 	case M_CMD:
4078 		udp_wput_cmdblk(q, mp);
4079 		return;
4080 
4081 	case M_PROTO:
4082 	case M_PCPROTO:
4083 		if (mp->b_wptr - rptr < sizeof (t_scalar_t)) {
4084 			/*
4085 			 * If the message does not contain a PRIM_type,
4086 			 * throw it away.
4087 			 */
4088 			freemsg(mp);
4089 			return;
4090 		}
4091 		switch (((t_primp_t)rptr)->type) {
4092 		case T_ADDR_REQ:
4093 			udp_addr_req(q, mp);
4094 			return;
4095 		case O_T_BIND_REQ:
4096 		case T_BIND_REQ:
4097 			udp_tpi_bind(q, mp);
4098 			return;
4099 		case T_CONN_REQ:
4100 			udp_tpi_connect(q, mp);
4101 			return;
4102 		case T_CAPABILITY_REQ:
4103 			udp_capability_req(q, mp);
4104 			return;
4105 		case T_INFO_REQ:
4106 			udp_info_req(q, mp);
4107 			return;
4108 		case T_UNITDATA_REQ:
4109 			/*
4110 			 * If a T_UNITDATA_REQ gets here, the address must
4111 			 * be bad.  Valid T_UNITDATA_REQs are handled
4112 			 * in udp_wput.
4113 			 */
4114 			udp_ud_err(q, mp, EADDRNOTAVAIL);
4115 			return;
4116 		case T_UNBIND_REQ:
4117 			udp_tpi_unbind(q, mp);
4118 			return;
4119 		case T_SVR4_OPTMGMT_REQ:
4120 			/*
4121 			 * All Solaris components should pass a db_credp
4122 			 * for this TPI message, hence we ASSERT.
4123 			 * But in case there is some other M_PROTO that looks
4124 			 * like a TPI message sent by some other kernel
4125 			 * component, we check and return an error.
4126 			 */
4127 			cr = msg_getcred(mp, NULL);
4128 			ASSERT(cr != NULL);
4129 			if (cr == NULL) {
4130 				udp_err_ack(q, mp, TSYSERR, EINVAL);
4131 				return;
4132 			}
4133 			if (!snmpcom_req(q, mp, udp_snmp_set, ip_snmp_get,
4134 			    cr)) {
4135 				svr4_optcom_req(q, mp, cr, &udp_opt_obj);
4136 			}
4137 			return;
4138 
4139 		case T_OPTMGMT_REQ:
4140 			/*
4141 			 * All Solaris components should pass a db_credp
4142 			 * for this TPI message, hence we ASSERT.
4143 			 * But in case there is some other M_PROTO that looks
4144 			 * like a TPI message sent by some other kernel
4145 			 * component, we check and return an error.
4146 			 */
4147 			cr = msg_getcred(mp, NULL);
4148 			ASSERT(cr != NULL);
4149 			if (cr == NULL) {
4150 				udp_err_ack(q, mp, TSYSERR, EINVAL);
4151 				return;
4152 			}
4153 			tpi_optcom_req(q, mp, cr, &udp_opt_obj);
4154 			return;
4155 
4156 		case T_DISCON_REQ:
4157 			udp_tpi_disconnect(q, mp);
4158 			return;
4159 
4160 		/* The following TPI message is not supported by udp. */
4161 		case O_T_CONN_RES:
4162 		case T_CONN_RES:
4163 			udp_err_ack(q, mp, TNOTSUPPORT, 0);
4164 			return;
4165 
4166 		/* The following 3 TPI requests are illegal for udp. */
4167 		case T_DATA_REQ:
4168 		case T_EXDATA_REQ:
4169 		case T_ORDREL_REQ:
4170 			udp_err_ack(q, mp, TNOTSUPPORT, 0);
4171 			return;
4172 		default:
4173 			break;
4174 		}
4175 		break;
4176 	case M_FLUSH:
4177 		if (*rptr & FLUSHW)
4178 			flushq(q, FLUSHDATA);
4179 		break;
4180 	case M_IOCTL:
4181 		iocp = (struct iocblk *)mp->b_rptr;
4182 		switch (iocp->ioc_cmd) {
4183 		case TI_GETPEERNAME:
4184 			if (udp->udp_state != TS_DATA_XFER) {
4185 				/*
4186 				 * If a default destination address has not
4187 				 * been associated with the stream, then we
4188 				 * don't know the peer's name.
4189 				 */
4190 				iocp->ioc_error = ENOTCONN;
4191 				iocp->ioc_count = 0;
4192 				mp->b_datap->db_type = M_IOCACK;
4193 				qreply(q, mp);
4194 				return;
4195 			}
4196 			/* FALLTHRU */
4197 		case TI_GETMYNAME:
4198 			/*
4199 			 * For TI_GETPEERNAME and TI_GETMYNAME, we first
4200 			 * need to copyin the user's strbuf structure.
4201 			 * Processing will continue in the M_IOCDATA case
4202 			 * below.
4203 			 */
4204 			mi_copyin(q, mp, NULL,
4205 			    SIZEOF_STRUCT(strbuf, iocp->ioc_flag));
4206 			return;
4207 		case _SIOCSOCKFALLBACK:
4208 			/*
4209 			 * Either sockmod is about to be popped and the
4210 			 * socket would now be treated as a plain stream,
4211 			 * or a module is about to be pushed so we have
4212 			 * to follow pure TPI semantics.
4213 			 */
4214 			if (!udp->udp_issocket) {
4215 				DB_TYPE(mp) = M_IOCNAK;
4216 				iocp->ioc_error = EINVAL;
4217 			} else {
4218 				udp_use_pure_tpi(udp);
4219 
4220 				DB_TYPE(mp) = M_IOCACK;
4221 				iocp->ioc_error = 0;
4222 			}
4223 			iocp->ioc_count = 0;
4224 			iocp->ioc_rval = 0;
4225 			qreply(q, mp);
4226 			return;
4227 		default:
4228 			break;
4229 		}
4230 		break;
4231 	case M_IOCDATA:
4232 		udp_wput_iocdata(q, mp);
4233 		return;
4234 	default:
4235 		/* Unrecognized messages are passed through without change. */
4236 		break;
4237 	}
4238 	ip_wput_nondata(q, mp);
4239 }
4240 
4241 /*
4242  * udp_wput_iocdata is called by udp_wput_other to handle all M_IOCDATA
4243  * messages.
4244  */
4245 static void
4246 udp_wput_iocdata(queue_t *q, mblk_t *mp)
4247 {
4248 	mblk_t		*mp1;
4249 	struct	iocblk *iocp = (struct iocblk *)mp->b_rptr;
4250 	STRUCT_HANDLE(strbuf, sb);
4251 	uint_t		addrlen;
4252 	conn_t		*connp = Q_TO_CONN(q);
4253 	udp_t		*udp = connp->conn_udp;
4254 
4255 	/* Make sure it is one of ours. */
4256 	switch (iocp->ioc_cmd) {
4257 	case TI_GETMYNAME:
4258 	case TI_GETPEERNAME:
4259 		break;
4260 	default:
4261 		ip_wput_nondata(q, mp);
4262 		return;
4263 	}
4264 
4265 	switch (mi_copy_state(q, mp, &mp1)) {
4266 	case -1:
4267 		return;
4268 	case MI_COPY_CASE(MI_COPY_IN, 1):
4269 		break;
4270 	case MI_COPY_CASE(MI_COPY_OUT, 1):
4271 		/*
4272 		 * The address has been copied out, so now
4273 		 * copyout the strbuf.
4274 		 */
4275 		mi_copyout(q, mp);
4276 		return;
4277 	case MI_COPY_CASE(MI_COPY_OUT, 2):
4278 		/*
4279 		 * The address and strbuf have been copied out.
4280 		 * We're done, so just acknowledge the original
4281 		 * M_IOCTL.
4282 		 */
4283 		mi_copy_done(q, mp, 0);
4284 		return;
4285 	default:
4286 		/*
4287 		 * Something strange has happened, so acknowledge
4288 		 * the original M_IOCTL with an EPROTO error.
4289 		 */
4290 		mi_copy_done(q, mp, EPROTO);
4291 		return;
4292 	}
4293 
4294 	/*
4295 	 * Now we have the strbuf structure for TI_GETMYNAME
4296 	 * and TI_GETPEERNAME.  Next we copyout the requested
4297 	 * address and then we'll copyout the strbuf.
4298 	 */
4299 	STRUCT_SET_HANDLE(sb, iocp->ioc_flag, (void *)mp1->b_rptr);
4300 
4301 	if (connp->conn_family == AF_INET)
4302 		addrlen = sizeof (sin_t);
4303 	else
4304 		addrlen = sizeof (sin6_t);
4305 
4306 	if (STRUCT_FGET(sb, maxlen) < addrlen) {
4307 		mi_copy_done(q, mp, EINVAL);
4308 		return;
4309 	}
4310 
4311 	switch (iocp->ioc_cmd) {
4312 	case TI_GETMYNAME:
4313 		break;
4314 	case TI_GETPEERNAME:
4315 		if (udp->udp_state != TS_DATA_XFER) {
4316 			mi_copy_done(q, mp, ENOTCONN);
4317 			return;
4318 		}
4319 		break;
4320 	}
4321 	mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), addrlen, B_TRUE);
4322 	if (!mp1)
4323 		return;
4324 
4325 	STRUCT_FSET(sb, len, addrlen);
4326 	switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) {
4327 	case TI_GETMYNAME:
4328 		(void) conn_getsockname(connp, (struct sockaddr *)mp1->b_wptr,
4329 		    &addrlen);
4330 		break;
4331 	case TI_GETPEERNAME:
4332 		(void) conn_getpeername(connp, (struct sockaddr *)mp1->b_wptr,
4333 		    &addrlen);
4334 		break;
4335 	}
4336 	mp1->b_wptr += addrlen;
4337 	/* Copy out the address */
4338 	mi_copyout(q, mp);
4339 }
4340 
4341 void
4342 udp_ddi_g_init(void)
4343 {
4344 	udp_max_optsize = optcom_max_optsize(udp_opt_obj.odb_opt_des_arr,
4345 	    udp_opt_obj.odb_opt_arr_cnt);
4346 
4347 	/*
4348 	 * We want to be informed each time a stack is created or
4349 	 * destroyed in the kernel, so we can maintain the
4350 	 * set of udp_stack_t's.
4351 	 */
4352 	netstack_register(NS_UDP, udp_stack_init, NULL, udp_stack_fini);
4353 }
4354 
4355 void
4356 udp_ddi_g_destroy(void)
4357 {
4358 	netstack_unregister(NS_UDP);
4359 }
4360 
4361 #define	INET_NAME	"ip"
4362 
4363 /*
4364  * Initialize the UDP stack instance.
4365  */
4366 static void *
4367 udp_stack_init(netstackid_t stackid, netstack_t *ns)
4368 {
4369 	udp_stack_t	*us;
4370 	int		i;
4371 	int		error = 0;
4372 	major_t		major;
4373 	size_t		arrsz;
4374 
4375 	us = (udp_stack_t *)kmem_zalloc(sizeof (*us), KM_SLEEP);
4376 	us->us_netstack = ns;
4377 
4378 	mutex_init(&us->us_epriv_port_lock, NULL, MUTEX_DEFAULT, NULL);
4379 	us->us_num_epriv_ports = UDP_NUM_EPRIV_PORTS;
4380 	us->us_epriv_ports[0] = ULP_DEF_EPRIV_PORT1;
4381 	us->us_epriv_ports[1] = ULP_DEF_EPRIV_PORT2;
4382 
4383 	/*
4384 	 * The smallest anonymous port in the priviledged port range which UDP
4385 	 * looks for free port.  Use in the option UDP_ANONPRIVBIND.
4386 	 */
4387 	us->us_min_anonpriv_port = 512;
4388 
4389 	us->us_bind_fanout_size = udp_bind_fanout_size;
4390 
4391 	/* Roundup variable that might have been modified in /etc/system */
4392 	if (!ISP2(us->us_bind_fanout_size)) {
4393 		/* Not a power of two. Round up to nearest power of two */
4394 		for (i = 0; i < 31; i++) {
4395 			if (us->us_bind_fanout_size < (1 << i))
4396 				break;
4397 		}
4398 		us->us_bind_fanout_size = 1 << i;
4399 	}
4400 	us->us_bind_fanout = kmem_zalloc(us->us_bind_fanout_size *
4401 	    sizeof (udp_fanout_t), KM_SLEEP);
4402 	for (i = 0; i < us->us_bind_fanout_size; i++) {
4403 		mutex_init(&us->us_bind_fanout[i].uf_lock, NULL, MUTEX_DEFAULT,
4404 		    NULL);
4405 	}
4406 
4407 	arrsz = udp_propinfo_count * sizeof (mod_prop_info_t);
4408 	us->us_propinfo_tbl = (mod_prop_info_t *)kmem_alloc(arrsz,
4409 	    KM_SLEEP);
4410 	bcopy(udp_propinfo_tbl, us->us_propinfo_tbl, arrsz);
4411 
4412 	/* Allocate the per netstack stats */
4413 	mutex_enter(&cpu_lock);
4414 	us->us_sc_cnt = MAX(ncpus, boot_ncpus);
4415 	mutex_exit(&cpu_lock);
4416 	us->us_sc = kmem_zalloc(max_ncpus  * sizeof (udp_stats_cpu_t *),
4417 	    KM_SLEEP);
4418 	for (i = 0; i < us->us_sc_cnt; i++) {
4419 		us->us_sc[i] = kmem_zalloc(sizeof (udp_stats_cpu_t),
4420 		    KM_SLEEP);
4421 	}
4422 
4423 	us->us_kstat = udp_kstat2_init(stackid);
4424 	us->us_mibkp = udp_kstat_init(stackid);
4425 
4426 	major = mod_name_to_major(INET_NAME);
4427 	error = ldi_ident_from_major(major, &us->us_ldi_ident);
4428 	ASSERT(error == 0);
4429 	return (us);
4430 }
4431 
4432 /*
4433  * Free the UDP stack instance.
4434  */
4435 static void
4436 udp_stack_fini(netstackid_t stackid, void *arg)
4437 {
4438 	udp_stack_t *us = (udp_stack_t *)arg;
4439 	int i;
4440 
4441 	for (i = 0; i < us->us_bind_fanout_size; i++) {
4442 		mutex_destroy(&us->us_bind_fanout[i].uf_lock);
4443 	}
4444 
4445 	kmem_free(us->us_bind_fanout, us->us_bind_fanout_size *
4446 	    sizeof (udp_fanout_t));
4447 
4448 	us->us_bind_fanout = NULL;
4449 
4450 	for (i = 0; i < us->us_sc_cnt; i++)
4451 		kmem_free(us->us_sc[i], sizeof (udp_stats_cpu_t));
4452 	kmem_free(us->us_sc, max_ncpus * sizeof (udp_stats_cpu_t *));
4453 
4454 	kmem_free(us->us_propinfo_tbl,
4455 	    udp_propinfo_count * sizeof (mod_prop_info_t));
4456 	us->us_propinfo_tbl = NULL;
4457 
4458 	udp_kstat_fini(stackid, us->us_mibkp);
4459 	us->us_mibkp = NULL;
4460 
4461 	udp_kstat2_fini(stackid, us->us_kstat);
4462 	us->us_kstat = NULL;
4463 
4464 	mutex_destroy(&us->us_epriv_port_lock);
4465 	ldi_ident_release(us->us_ldi_ident);
4466 	kmem_free(us, sizeof (*us));
4467 }
4468 
4469 static size_t
4470 udp_set_rcv_hiwat(udp_t *udp, size_t size)
4471 {
4472 	udp_stack_t *us = udp->udp_us;
4473 
4474 	/* We add a bit of extra buffering */
4475 	size += size >> 1;
4476 	if (size > us->us_max_buf)
4477 		size = us->us_max_buf;
4478 
4479 	udp->udp_rcv_hiwat = size;
4480 	return (size);
4481 }
4482 
4483 /*
4484  * For the lower queue so that UDP can be a dummy mux.
4485  * Nobody should be sending
4486  * packets up this stream
4487  */
4488 static int
4489 udp_lrput(queue_t *q, mblk_t *mp)
4490 {
4491 	switch (mp->b_datap->db_type) {
4492 	case M_FLUSH:
4493 		/* Turn around */
4494 		if (*mp->b_rptr & FLUSHW) {
4495 			*mp->b_rptr &= ~FLUSHR;
4496 			qreply(q, mp);
4497 			return (0);
4498 		}
4499 		break;
4500 	}
4501 	freemsg(mp);
4502 	return (0);
4503 }
4504 
4505 /*
4506  * For the lower queue so that UDP can be a dummy mux.
4507  * Nobody should be sending packets down this stream.
4508  */
4509 /* ARGSUSED */
4510 int
4511 udp_lwput(queue_t *q, mblk_t *mp)
4512 {
4513 	freemsg(mp);
4514 	return (0);
4515 }
4516 
4517 /*
4518  * When a CPU is added, we need to allocate the per CPU stats struct.
4519  */
4520 void
4521 udp_stack_cpu_add(udp_stack_t *us, processorid_t cpu_seqid)
4522 {
4523 	int i;
4524 
4525 	if (cpu_seqid < us->us_sc_cnt)
4526 		return;
4527 	for (i = us->us_sc_cnt; i <= cpu_seqid; i++) {
4528 		ASSERT(us->us_sc[i] == NULL);
4529 		us->us_sc[i] = kmem_zalloc(sizeof (udp_stats_cpu_t),
4530 		    KM_SLEEP);
4531 	}
4532 	membar_producer();
4533 	us->us_sc_cnt = cpu_seqid + 1;
4534 }
4535 
4536 /*
4537  * Below routines for UDP socket module.
4538  */
4539 
4540 static conn_t *
4541 udp_do_open(cred_t *credp, boolean_t isv6, int flags, int *errorp)
4542 {
4543 	udp_t		*udp;
4544 	conn_t		*connp;
4545 	zoneid_t	zoneid;
4546 	netstack_t	*ns;
4547 	udp_stack_t	*us;
4548 	int		len;
4549 
4550 	ASSERT(errorp != NULL);
4551 
4552 	if ((*errorp = secpolicy_basic_net_access(credp)) != 0)
4553 		return (NULL);
4554 
4555 	ns = netstack_find_by_cred(credp);
4556 	ASSERT(ns != NULL);
4557 	us = ns->netstack_udp;
4558 	ASSERT(us != NULL);
4559 
4560 	/*
4561 	 * For exclusive stacks we set the zoneid to zero
4562 	 * to make UDP operate as if in the global zone.
4563 	 */
4564 	if (ns->netstack_stackid != GLOBAL_NETSTACKID)
4565 		zoneid = GLOBAL_ZONEID;
4566 	else
4567 		zoneid = crgetzoneid(credp);
4568 
4569 	ASSERT(flags == KM_SLEEP || flags == KM_NOSLEEP);
4570 
4571 	connp = ipcl_conn_create(IPCL_UDPCONN, flags, ns);
4572 	if (connp == NULL) {
4573 		netstack_rele(ns);
4574 		*errorp = ENOMEM;
4575 		return (NULL);
4576 	}
4577 	udp = connp->conn_udp;
4578 
4579 	/*
4580 	 * ipcl_conn_create did a netstack_hold. Undo the hold that was
4581 	 * done by netstack_find_by_cred()
4582 	 */
4583 	netstack_rele(ns);
4584 
4585 	/*
4586 	 * Since this conn_t/udp_t is not yet visible to anybody else we don't
4587 	 * need to lock anything.
4588 	 */
4589 	ASSERT(connp->conn_proto == IPPROTO_UDP);
4590 	ASSERT(connp->conn_udp == udp);
4591 	ASSERT(udp->udp_connp == connp);
4592 
4593 	/* Set the initial state of the stream and the privilege status. */
4594 	udp->udp_state = TS_UNBND;
4595 	connp->conn_ixa->ixa_flags |= IXAF_VERIFY_SOURCE;
4596 	if (isv6) {
4597 		connp->conn_family = AF_INET6;
4598 		connp->conn_ipversion = IPV6_VERSION;
4599 		connp->conn_ixa->ixa_flags &= ~IXAF_IS_IPV4;
4600 		connp->conn_default_ttl = us->us_ipv6_hoplimit;
4601 		len = sizeof (ip6_t) + UDPH_SIZE;
4602 	} else {
4603 		connp->conn_family = AF_INET;
4604 		connp->conn_ipversion = IPV4_VERSION;
4605 		connp->conn_ixa->ixa_flags |= IXAF_IS_IPV4;
4606 		connp->conn_default_ttl = us->us_ipv4_ttl;
4607 		len = sizeof (ipha_t) + UDPH_SIZE;
4608 	}
4609 
4610 	ASSERT(connp->conn_ixa->ixa_protocol == connp->conn_proto);
4611 	connp->conn_xmit_ipp.ipp_unicast_hops = connp->conn_default_ttl;
4612 
4613 	connp->conn_ixa->ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
4614 	connp->conn_ixa->ixa_flags |= IXAF_MULTICAST_LOOP | IXAF_SET_ULP_CKSUM;
4615 	/* conn_allzones can not be set this early, hence no IPCL_ZONEID */
4616 	connp->conn_ixa->ixa_zoneid = zoneid;
4617 
4618 	connp->conn_zoneid = zoneid;
4619 
4620 	/*
4621 	 * If the caller has the process-wide flag set, then default to MAC
4622 	 * exempt mode.  This allows read-down to unlabeled hosts.
4623 	 */
4624 	if (getpflags(NET_MAC_AWARE, credp) != 0)
4625 		connp->conn_mac_mode = CONN_MAC_AWARE;
4626 
4627 	connp->conn_zone_is_global = (crgetzoneid(credp) == GLOBAL_ZONEID);
4628 
4629 	udp->udp_us = us;
4630 
4631 	connp->conn_rcvbuf = us->us_recv_hiwat;
4632 	connp->conn_sndbuf = us->us_xmit_hiwat;
4633 	connp->conn_sndlowat = us->us_xmit_lowat;
4634 	connp->conn_rcvlowat = udp_mod_info.mi_lowat;
4635 
4636 	connp->conn_wroff = len + us->us_wroff_extra;
4637 	connp->conn_so_type = SOCK_DGRAM;
4638 
4639 	connp->conn_recv = udp_input;
4640 	connp->conn_recvicmp = udp_icmp_input;
4641 	crhold(credp);
4642 	connp->conn_cred = credp;
4643 	connp->conn_cpid = curproc->p_pid;
4644 	connp->conn_open_time = ddi_get_lbolt64();
4645 	/* Cache things in ixa without an extra refhold */
4646 	ASSERT(!(connp->conn_ixa->ixa_free_flags & IXA_FREE_CRED));
4647 	connp->conn_ixa->ixa_cred = connp->conn_cred;
4648 	connp->conn_ixa->ixa_cpid = connp->conn_cpid;
4649 	if (is_system_labeled())
4650 		connp->conn_ixa->ixa_tsl = crgetlabel(connp->conn_cred);
4651 
4652 	*((sin6_t *)&udp->udp_delayed_addr) = sin6_null;
4653 
4654 	if (us->us_pmtu_discovery)
4655 		connp->conn_ixa->ixa_flags |= IXAF_PMTU_DISCOVERY;
4656 
4657 	return (connp);
4658 }
4659 
4660 sock_lower_handle_t
4661 udp_create(int family, int type, int proto, sock_downcalls_t **sock_downcalls,
4662     uint_t *smodep, int *errorp, int flags, cred_t *credp)
4663 {
4664 	udp_t		*udp = NULL;
4665 	udp_stack_t	*us;
4666 	conn_t		*connp;
4667 	boolean_t	isv6;
4668 
4669 	if (type != SOCK_DGRAM || (family != AF_INET && family != AF_INET6) ||
4670 	    (proto != 0 && proto != IPPROTO_UDP)) {
4671 		*errorp = EPROTONOSUPPORT;
4672 		return (NULL);
4673 	}
4674 
4675 	if (family == AF_INET6)
4676 		isv6 = B_TRUE;
4677 	else
4678 		isv6 = B_FALSE;
4679 
4680 	connp = udp_do_open(credp, isv6, flags, errorp);
4681 	if (connp == NULL)
4682 		return (NULL);
4683 
4684 	udp = connp->conn_udp;
4685 	ASSERT(udp != NULL);
4686 	us = udp->udp_us;
4687 	ASSERT(us != NULL);
4688 
4689 	udp->udp_issocket = B_TRUE;
4690 	connp->conn_flags |= IPCL_NONSTR;
4691 
4692 	/*
4693 	 * Set flow control
4694 	 * Since this conn_t/udp_t is not yet visible to anybody else we don't
4695 	 * need to lock anything.
4696 	 */
4697 	(void) udp_set_rcv_hiwat(udp, connp->conn_rcvbuf);
4698 	udp->udp_rcv_disply_hiwat = connp->conn_rcvbuf;
4699 
4700 	connp->conn_flow_cntrld = B_FALSE;
4701 
4702 	mutex_enter(&connp->conn_lock);
4703 	connp->conn_state_flags &= ~CONN_INCIPIENT;
4704 	mutex_exit(&connp->conn_lock);
4705 
4706 	*errorp = 0;
4707 	*smodep = SM_ATOMIC;
4708 	*sock_downcalls = &sock_udp_downcalls;
4709 	return ((sock_lower_handle_t)connp);
4710 }
4711 
4712 /* ARGSUSED3 */
4713 void
4714 udp_activate(sock_lower_handle_t proto_handle, sock_upper_handle_t sock_handle,
4715     sock_upcalls_t *sock_upcalls, int flags, cred_t *cr)
4716 {
4717 	conn_t		*connp = (conn_t *)proto_handle;
4718 	struct sock_proto_props sopp;
4719 
4720 	/* All Solaris components should pass a cred for this operation. */
4721 	ASSERT(cr != NULL);
4722 
4723 	connp->conn_upcalls = sock_upcalls;
4724 	connp->conn_upper_handle = sock_handle;
4725 
4726 	sopp.sopp_flags = SOCKOPT_WROFF | SOCKOPT_RCVHIWAT | SOCKOPT_RCVLOWAT |
4727 	    SOCKOPT_MAXBLK | SOCKOPT_MAXPSZ | SOCKOPT_MINPSZ;
4728 	sopp.sopp_wroff = connp->conn_wroff;
4729 	sopp.sopp_maxblk = INFPSZ;
4730 	sopp.sopp_rxhiwat = connp->conn_rcvbuf;
4731 	sopp.sopp_rxlowat = connp->conn_rcvlowat;
4732 	sopp.sopp_maxaddrlen = sizeof (sin6_t);
4733 	sopp.sopp_maxpsz =
4734 	    (connp->conn_family == AF_INET) ? UDP_MAXPACKET_IPV4 :
4735 	    UDP_MAXPACKET_IPV6;
4736 	sopp.sopp_minpsz = (udp_mod_info.mi_minpsz == 1) ? 0 :
4737 	    udp_mod_info.mi_minpsz;
4738 
4739 	(*connp->conn_upcalls->su_set_proto_props)(connp->conn_upper_handle,
4740 	    &sopp);
4741 }
4742 
4743 static void
4744 udp_do_close(conn_t *connp)
4745 {
4746 	udp_t	*udp;
4747 
4748 	ASSERT(connp != NULL && IPCL_IS_UDP(connp));
4749 	udp = connp->conn_udp;
4750 
4751 	if (cl_inet_unbind != NULL && udp->udp_state == TS_IDLE) {
4752 		/*
4753 		 * Running in cluster mode - register unbind information
4754 		 */
4755 		if (connp->conn_ipversion == IPV4_VERSION) {
4756 			(*cl_inet_unbind)(
4757 			    connp->conn_netstack->netstack_stackid,
4758 			    IPPROTO_UDP, AF_INET,
4759 			    (uint8_t *)(&V4_PART_OF_V6(connp->conn_laddr_v6)),
4760 			    (in_port_t)connp->conn_lport, NULL);
4761 		} else {
4762 			(*cl_inet_unbind)(
4763 			    connp->conn_netstack->netstack_stackid,
4764 			    IPPROTO_UDP, AF_INET6,
4765 			    (uint8_t *)&(connp->conn_laddr_v6),
4766 			    (in_port_t)connp->conn_lport, NULL);
4767 		}
4768 	}
4769 
4770 	udp_bind_hash_remove(udp, B_FALSE);
4771 
4772 	ip_quiesce_conn(connp);
4773 
4774 	if (!IPCL_IS_NONSTR(connp)) {
4775 		ASSERT(connp->conn_wq != NULL);
4776 		ASSERT(connp->conn_rq != NULL);
4777 		qprocsoff(connp->conn_rq);
4778 	}
4779 
4780 	udp_close_free(connp);
4781 
4782 	/*
4783 	 * Now we are truly single threaded on this stream, and can
4784 	 * delete the things hanging off the connp, and finally the connp.
4785 	 * We removed this connp from the fanout list, it cannot be
4786 	 * accessed thru the fanouts, and we already waited for the
4787 	 * conn_ref to drop to 0. We are already in close, so
4788 	 * there cannot be any other thread from the top. qprocsoff
4789 	 * has completed, and service has completed or won't run in
4790 	 * future.
4791 	 */
4792 	ASSERT(connp->conn_ref == 1);
4793 
4794 	if (!IPCL_IS_NONSTR(connp)) {
4795 		inet_minor_free(connp->conn_minor_arena, connp->conn_dev);
4796 	} else {
4797 		ip_free_helper_stream(connp);
4798 	}
4799 
4800 	connp->conn_ref--;
4801 	ipcl_conn_destroy(connp);
4802 }
4803 
4804 /* ARGSUSED1 */
4805 int
4806 udp_close(sock_lower_handle_t proto_handle, int flags, cred_t *cr)
4807 {
4808 	conn_t	*connp = (conn_t *)proto_handle;
4809 
4810 	/* All Solaris components should pass a cred for this operation. */
4811 	ASSERT(cr != NULL);
4812 
4813 	udp_do_close(connp);
4814 	return (0);
4815 }
4816 
4817 static int
4818 udp_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr,
4819     boolean_t bind_to_req_port_only)
4820 {
4821 	sin_t		*sin;
4822 	sin6_t		*sin6;
4823 	udp_t		*udp = connp->conn_udp;
4824 	int		error = 0;
4825 	ip_laddr_t	laddr_type = IPVL_UNICAST_UP;	/* INADDR_ANY */
4826 	in_port_t	port;		/* Host byte order */
4827 	in_port_t	requested_port;	/* Host byte order */
4828 	int		count;
4829 	ipaddr_t	v4src;		/* Set if AF_INET */
4830 	in6_addr_t	v6src;
4831 	int		loopmax;
4832 	udp_fanout_t	*udpf;
4833 	in_port_t	lport;		/* Network byte order */
4834 	uint_t		scopeid = 0;
4835 	zoneid_t	zoneid = IPCL_ZONEID(connp);
4836 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
4837 	boolean_t	is_inaddr_any;
4838 	mlp_type_t	addrtype, mlptype;
4839 	udp_stack_t	*us = udp->udp_us;
4840 
4841 	sin = NULL;
4842 	sin6 = NULL;
4843 	switch (len) {
4844 	case sizeof (sin_t):	/* Complete IPv4 address */
4845 		sin = (sin_t *)sa;
4846 
4847 		if (sin == NULL || !OK_32PTR((char *)sin))
4848 			return (EINVAL);
4849 
4850 		if (connp->conn_family != AF_INET ||
4851 		    sin->sin_family != AF_INET) {
4852 			return (EAFNOSUPPORT);
4853 		}
4854 		v4src = sin->sin_addr.s_addr;
4855 		IN6_IPADDR_TO_V4MAPPED(v4src, &v6src);
4856 		if (v4src != INADDR_ANY) {
4857 			laddr_type = ip_laddr_verify_v4(v4src, zoneid, ipst,
4858 			    B_TRUE);
4859 		}
4860 		port = ntohs(sin->sin_port);
4861 		break;
4862 
4863 	case sizeof (sin6_t):	/* complete IPv6 address */
4864 		sin6 = (sin6_t *)sa;
4865 
4866 		if (sin6 == NULL || !OK_32PTR((char *)sin6))
4867 			return (EINVAL);
4868 
4869 		if (connp->conn_family != AF_INET6 ||
4870 		    sin6->sin6_family != AF_INET6) {
4871 			return (EAFNOSUPPORT);
4872 		}
4873 		v6src = sin6->sin6_addr;
4874 		if (IN6_IS_ADDR_V4MAPPED(&v6src)) {
4875 			if (connp->conn_ipv6_v6only)
4876 				return (EADDRNOTAVAIL);
4877 
4878 			IN6_V4MAPPED_TO_IPADDR(&v6src, v4src);
4879 			if (v4src != INADDR_ANY) {
4880 				laddr_type = ip_laddr_verify_v4(v4src,
4881 				    zoneid, ipst, B_FALSE);
4882 			}
4883 		} else {
4884 			if (!IN6_IS_ADDR_UNSPECIFIED(&v6src)) {
4885 				if (IN6_IS_ADDR_LINKSCOPE(&v6src))
4886 					scopeid = sin6->sin6_scope_id;
4887 				laddr_type = ip_laddr_verify_v6(&v6src,
4888 				    zoneid, ipst, B_TRUE, scopeid);
4889 			}
4890 		}
4891 		port = ntohs(sin6->sin6_port);
4892 		break;
4893 
4894 	default:		/* Invalid request */
4895 		(void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
4896 		    "udp_bind: bad ADDR_length length %u", len);
4897 		return (-TBADADDR);
4898 	}
4899 
4900 	/* Is the local address a valid unicast, multicast, or broadcast? */
4901 	if (laddr_type == IPVL_BAD)
4902 		return (EADDRNOTAVAIL);
4903 
4904 	requested_port = port;
4905 
4906 	if (requested_port == 0 || !bind_to_req_port_only)
4907 		bind_to_req_port_only = B_FALSE;
4908 	else		/* T_BIND_REQ and requested_port != 0 */
4909 		bind_to_req_port_only = B_TRUE;
4910 
4911 	if (requested_port == 0) {
4912 		/*
4913 		 * If the application passed in zero for the port number, it
4914 		 * doesn't care which port number we bind to. Get one in the
4915 		 * valid range.
4916 		 */
4917 		if (connp->conn_anon_priv_bind) {
4918 			port = udp_get_next_priv_port(udp);
4919 		} else {
4920 			port = udp_update_next_port(udp,
4921 			    us->us_next_port_to_try, B_TRUE);
4922 		}
4923 	} else {
4924 		/*
4925 		 * If the port is in the well-known privileged range,
4926 		 * make sure the caller was privileged.
4927 		 */
4928 		int i;
4929 		boolean_t priv = B_FALSE;
4930 
4931 		if (port < us->us_smallest_nonpriv_port) {
4932 			priv = B_TRUE;
4933 		} else {
4934 			for (i = 0; i < us->us_num_epriv_ports; i++) {
4935 				if (port == us->us_epriv_ports[i]) {
4936 					priv = B_TRUE;
4937 					break;
4938 				}
4939 			}
4940 		}
4941 
4942 		if (priv) {
4943 			if (secpolicy_net_privaddr(cr, port, IPPROTO_UDP) != 0)
4944 				return (-TACCES);
4945 		}
4946 	}
4947 
4948 	if (port == 0)
4949 		return (-TNOADDR);
4950 
4951 	/*
4952 	 * The state must be TS_UNBND. TPI mandates that users must send
4953 	 * TPI primitives only 1 at a time and wait for the response before
4954 	 * sending the next primitive.
4955 	 */
4956 	mutex_enter(&connp->conn_lock);
4957 	if (udp->udp_state != TS_UNBND) {
4958 		mutex_exit(&connp->conn_lock);
4959 		(void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
4960 		    "udp_bind: bad state, %u", udp->udp_state);
4961 		return (-TOUTSTATE);
4962 	}
4963 	/*
4964 	 * Copy the source address into our udp structure. This address
4965 	 * may still be zero; if so, IP will fill in the correct address
4966 	 * each time an outbound packet is passed to it. Since the udp is
4967 	 * not yet in the bind hash list, we don't grab the uf_lock to
4968 	 * change conn_ipversion
4969 	 */
4970 	if (connp->conn_family == AF_INET) {
4971 		ASSERT(sin != NULL);
4972 		ASSERT(connp->conn_ixa->ixa_flags & IXAF_IS_IPV4);
4973 	} else {
4974 		if (IN6_IS_ADDR_V4MAPPED(&v6src)) {
4975 			/*
4976 			 * no need to hold the uf_lock to set the conn_ipversion
4977 			 * since we are not yet in the fanout list
4978 			 */
4979 			connp->conn_ipversion = IPV4_VERSION;
4980 			connp->conn_ixa->ixa_flags |= IXAF_IS_IPV4;
4981 		} else {
4982 			connp->conn_ipversion = IPV6_VERSION;
4983 			connp->conn_ixa->ixa_flags &= ~IXAF_IS_IPV4;
4984 		}
4985 	}
4986 
4987 	/*
4988 	 * If conn_reuseaddr is not set, then we have to make sure that
4989 	 * the IP address and port number the application requested
4990 	 * (or we selected for the application) is not being used by
4991 	 * another stream.  If another stream is already using the
4992 	 * requested IP address and port, the behavior depends on
4993 	 * "bind_to_req_port_only". If set the bind fails; otherwise we
4994 	 * search for any unused port to bind to the stream.
4995 	 *
4996 	 * As per the BSD semantics, as modified by the Deering multicast
4997 	 * changes, if conn_reuseaddr is set, then we allow multiple binds
4998 	 * to the same port independent of the local IP address.
4999 	 *
5000 	 * This is slightly different than in SunOS 4.X which did not
5001 	 * support IP multicast. Note that the change implemented by the
5002 	 * Deering multicast code effects all binds - not only binding
5003 	 * to IP multicast addresses.
5004 	 *
5005 	 * Note that when binding to port zero we ignore SO_REUSEADDR in
5006 	 * order to guarantee a unique port.
5007 	 */
5008 
5009 	count = 0;
5010 	if (connp->conn_anon_priv_bind) {
5011 		/*
5012 		 * loopmax = (IPPORT_RESERVED-1) -
5013 		 *    us->us_min_anonpriv_port + 1
5014 		 */
5015 		loopmax = IPPORT_RESERVED - us->us_min_anonpriv_port;
5016 	} else {
5017 		loopmax = us->us_largest_anon_port -
5018 		    us->us_smallest_anon_port + 1;
5019 	}
5020 
5021 	is_inaddr_any = V6_OR_V4_INADDR_ANY(v6src);
5022 
5023 	for (;;) {
5024 		udp_t		*udp1;
5025 		boolean_t	found_exclbind = B_FALSE;
5026 		conn_t		*connp1;
5027 
5028 		/*
5029 		 * Walk through the list of udp streams bound to
5030 		 * requested port with the same IP address.
5031 		 */
5032 		lport = htons(port);
5033 		udpf = &us->us_bind_fanout[UDP_BIND_HASH(lport,
5034 		    us->us_bind_fanout_size)];
5035 		mutex_enter(&udpf->uf_lock);
5036 		for (udp1 = udpf->uf_udp; udp1 != NULL;
5037 		    udp1 = udp1->udp_bind_hash) {
5038 			connp1 = udp1->udp_connp;
5039 
5040 			if (lport != connp1->conn_lport)
5041 				continue;
5042 
5043 			/*
5044 			 * On a labeled system, we must treat bindings to ports
5045 			 * on shared IP addresses by sockets with MAC exemption
5046 			 * privilege as being in all zones, as there's
5047 			 * otherwise no way to identify the right receiver.
5048 			 */
5049 			if (!IPCL_BIND_ZONE_MATCH(connp1, connp))
5050 				continue;
5051 
5052 			/*
5053 			 * If UDP_EXCLBIND is set for either the bound or
5054 			 * binding endpoint, the semantics of bind
5055 			 * is changed according to the following chart.
5056 			 *
5057 			 * spec = specified address (v4 or v6)
5058 			 * unspec = unspecified address (v4 or v6)
5059 			 * A = specified addresses are different for endpoints
5060 			 *
5061 			 * bound	bind to		allowed?
5062 			 * -------------------------------------
5063 			 * unspec	unspec		no
5064 			 * unspec	spec		no
5065 			 * spec		unspec		no
5066 			 * spec		spec		yes if A
5067 			 *
5068 			 * For labeled systems, SO_MAC_EXEMPT behaves the same
5069 			 * as UDP_EXCLBIND, except that zoneid is ignored.
5070 			 */
5071 			if (connp1->conn_exclbind || connp->conn_exclbind ||
5072 			    IPCL_CONNS_MAC(udp1->udp_connp, connp)) {
5073 				if (V6_OR_V4_INADDR_ANY(
5074 				    connp1->conn_bound_addr_v6) ||
5075 				    is_inaddr_any ||
5076 				    IN6_ARE_ADDR_EQUAL(
5077 				    &connp1->conn_bound_addr_v6,
5078 				    &v6src)) {
5079 					found_exclbind = B_TRUE;
5080 					break;
5081 				}
5082 				continue;
5083 			}
5084 
5085 			/*
5086 			 * Check ipversion to allow IPv4 and IPv6 sockets to
5087 			 * have disjoint port number spaces.
5088 			 */
5089 			if (connp->conn_ipversion != connp1->conn_ipversion) {
5090 
5091 				/*
5092 				 * On the first time through the loop, if the
5093 				 * the user intentionally specified a
5094 				 * particular port number, then ignore any
5095 				 * bindings of the other protocol that may
5096 				 * conflict. This allows the user to bind IPv6
5097 				 * alone and get both v4 and v6, or bind both
5098 				 * both and get each seperately. On subsequent
5099 				 * times through the loop, we're checking a
5100 				 * port that we chose (not the user) and thus
5101 				 * we do not allow casual duplicate bindings.
5102 				 */
5103 				if (count == 0 && requested_port != 0)
5104 					continue;
5105 			}
5106 
5107 			/*
5108 			 * No difference depending on SO_REUSEADDR.
5109 			 *
5110 			 * If existing port is bound to a
5111 			 * non-wildcard IP address and
5112 			 * the requesting stream is bound to
5113 			 * a distinct different IP addresses
5114 			 * (non-wildcard, also), keep going.
5115 			 */
5116 			if (!is_inaddr_any &&
5117 			    !V6_OR_V4_INADDR_ANY(connp1->conn_bound_addr_v6) &&
5118 			    !IN6_ARE_ADDR_EQUAL(&connp1->conn_laddr_v6,
5119 			    &v6src)) {
5120 				continue;
5121 			}
5122 			break;
5123 		}
5124 
5125 		if (!found_exclbind &&
5126 		    (connp->conn_reuseaddr && requested_port != 0)) {
5127 			break;
5128 		}
5129 
5130 		if (udp1 == NULL) {
5131 			/*
5132 			 * No other stream has this IP address
5133 			 * and port number. We can use it.
5134 			 */
5135 			break;
5136 		}
5137 		mutex_exit(&udpf->uf_lock);
5138 		if (bind_to_req_port_only) {
5139 			/*
5140 			 * We get here only when requested port
5141 			 * is bound (and only first  of the for()
5142 			 * loop iteration).
5143 			 *
5144 			 * The semantics of this bind request
5145 			 * require it to fail so we return from
5146 			 * the routine (and exit the loop).
5147 			 *
5148 			 */
5149 			mutex_exit(&connp->conn_lock);
5150 			return (-TADDRBUSY);
5151 		}
5152 
5153 		if (connp->conn_anon_priv_bind) {
5154 			port = udp_get_next_priv_port(udp);
5155 		} else {
5156 			if ((count == 0) && (requested_port != 0)) {
5157 				/*
5158 				 * If the application wants us to find
5159 				 * a port, get one to start with. Set
5160 				 * requested_port to 0, so that we will
5161 				 * update us->us_next_port_to_try below.
5162 				 */
5163 				port = udp_update_next_port(udp,
5164 				    us->us_next_port_to_try, B_TRUE);
5165 				requested_port = 0;
5166 			} else {
5167 				port = udp_update_next_port(udp, port + 1,
5168 				    B_FALSE);
5169 			}
5170 		}
5171 
5172 		if (port == 0 || ++count >= loopmax) {
5173 			/*
5174 			 * We've tried every possible port number and
5175 			 * there are none available, so send an error
5176 			 * to the user.
5177 			 */
5178 			mutex_exit(&connp->conn_lock);
5179 			return (-TNOADDR);
5180 		}
5181 	}
5182 
5183 	/*
5184 	 * Copy the source address into our udp structure.  This address
5185 	 * may still be zero; if so, ip_attr_connect will fill in the correct
5186 	 * address when a packet is about to be sent.
5187 	 * If we are binding to a broadcast or multicast address then
5188 	 * we just set the conn_bound_addr since we don't want to use
5189 	 * that as the source address when sending.
5190 	 */
5191 	connp->conn_bound_addr_v6 = v6src;
5192 	connp->conn_laddr_v6 = v6src;
5193 	if (scopeid != 0) {
5194 		connp->conn_ixa->ixa_flags |= IXAF_SCOPEID_SET;
5195 		connp->conn_ixa->ixa_scopeid = scopeid;
5196 		connp->conn_incoming_ifindex = scopeid;
5197 	} else {
5198 		connp->conn_ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
5199 		connp->conn_incoming_ifindex = connp->conn_bound_if;
5200 	}
5201 
5202 	switch (laddr_type) {
5203 	case IPVL_UNICAST_UP:
5204 	case IPVL_UNICAST_DOWN:
5205 		connp->conn_saddr_v6 = v6src;
5206 		connp->conn_mcbc_bind = B_FALSE;
5207 		break;
5208 	case IPVL_MCAST:
5209 	case IPVL_BCAST:
5210 		/* ip_set_destination will pick a source address later */
5211 		connp->conn_saddr_v6 = ipv6_all_zeros;
5212 		connp->conn_mcbc_bind = B_TRUE;
5213 		break;
5214 	}
5215 
5216 	/* Any errors after this point should use late_error */
5217 	connp->conn_lport = lport;
5218 
5219 	/*
5220 	 * Now reset the next anonymous port if the application requested
5221 	 * an anonymous port, or we handed out the next anonymous port.
5222 	 */
5223 	if ((requested_port == 0) && (!connp->conn_anon_priv_bind)) {
5224 		us->us_next_port_to_try = port + 1;
5225 	}
5226 
5227 	/* Initialize the T_BIND_ACK. */
5228 	if (connp->conn_family == AF_INET) {
5229 		sin->sin_port = connp->conn_lport;
5230 	} else {
5231 		sin6->sin6_port = connp->conn_lport;
5232 	}
5233 	udp->udp_state = TS_IDLE;
5234 	udp_bind_hash_insert(udpf, udp);
5235 	mutex_exit(&udpf->uf_lock);
5236 	mutex_exit(&connp->conn_lock);
5237 
5238 	if (cl_inet_bind) {
5239 		/*
5240 		 * Running in cluster mode - register bind information
5241 		 */
5242 		if (connp->conn_ipversion == IPV4_VERSION) {
5243 			(*cl_inet_bind)(connp->conn_netstack->netstack_stackid,
5244 			    IPPROTO_UDP, AF_INET, (uint8_t *)&v4src,
5245 			    (in_port_t)connp->conn_lport, NULL);
5246 		} else {
5247 			(*cl_inet_bind)(connp->conn_netstack->netstack_stackid,
5248 			    IPPROTO_UDP, AF_INET6, (uint8_t *)&v6src,
5249 			    (in_port_t)connp->conn_lport, NULL);
5250 		}
5251 	}
5252 
5253 	mutex_enter(&connp->conn_lock);
5254 	connp->conn_anon_port = (is_system_labeled() && requested_port == 0);
5255 	if (is_system_labeled() && (!connp->conn_anon_port ||
5256 	    connp->conn_anon_mlp)) {
5257 		uint16_t mlpport;
5258 		zone_t *zone;
5259 
5260 		zone = crgetzone(cr);
5261 		connp->conn_mlp_type =
5262 		    connp->conn_recv_ancillary.crb_recvucred ? mlptBoth :
5263 		    mlptSingle;
5264 		addrtype = tsol_mlp_addr_type(
5265 		    connp->conn_allzones ? ALL_ZONES : zone->zone_id,
5266 		    IPV6_VERSION, &v6src, us->us_netstack->netstack_ip);
5267 		if (addrtype == mlptSingle) {
5268 			error = -TNOADDR;
5269 			mutex_exit(&connp->conn_lock);
5270 			goto late_error;
5271 		}
5272 		mlpport = connp->conn_anon_port ? PMAPPORT : port;
5273 		mlptype = tsol_mlp_port_type(zone, IPPROTO_UDP, mlpport,
5274 		    addrtype);
5275 
5276 		/*
5277 		 * It is a coding error to attempt to bind an MLP port
5278 		 * without first setting SOL_SOCKET/SCM_UCRED.
5279 		 */
5280 		if (mlptype != mlptSingle &&
5281 		    connp->conn_mlp_type == mlptSingle) {
5282 			error = EINVAL;
5283 			mutex_exit(&connp->conn_lock);
5284 			goto late_error;
5285 		}
5286 
5287 		/*
5288 		 * It is an access violation to attempt to bind an MLP port
5289 		 * without NET_BINDMLP privilege.
5290 		 */
5291 		if (mlptype != mlptSingle &&
5292 		    secpolicy_net_bindmlp(cr) != 0) {
5293 			if (connp->conn_debug) {
5294 				(void) strlog(UDP_MOD_ID, 0, 1,
5295 				    SL_ERROR|SL_TRACE,
5296 				    "udp_bind: no priv for multilevel port %d",
5297 				    mlpport);
5298 			}
5299 			error = -TACCES;
5300 			mutex_exit(&connp->conn_lock);
5301 			goto late_error;
5302 		}
5303 
5304 		/*
5305 		 * If we're specifically binding a shared IP address and the
5306 		 * port is MLP on shared addresses, then check to see if this
5307 		 * zone actually owns the MLP.  Reject if not.
5308 		 */
5309 		if (mlptype == mlptShared && addrtype == mlptShared) {
5310 			/*
5311 			 * No need to handle exclusive-stack zones since
5312 			 * ALL_ZONES only applies to the shared stack.
5313 			 */
5314 			zoneid_t mlpzone;
5315 
5316 			mlpzone = tsol_mlp_findzone(IPPROTO_UDP,
5317 			    htons(mlpport));
5318 			if (connp->conn_zoneid != mlpzone) {
5319 				if (connp->conn_debug) {
5320 					(void) strlog(UDP_MOD_ID, 0, 1,
5321 					    SL_ERROR|SL_TRACE,
5322 					    "udp_bind: attempt to bind port "
5323 					    "%d on shared addr in zone %d "
5324 					    "(should be %d)",
5325 					    mlpport, connp->conn_zoneid,
5326 					    mlpzone);
5327 				}
5328 				error = -TACCES;
5329 				mutex_exit(&connp->conn_lock);
5330 				goto late_error;
5331 			}
5332 		}
5333 		if (connp->conn_anon_port) {
5334 			error = tsol_mlp_anon(zone, mlptype, connp->conn_proto,
5335 			    port, B_TRUE);
5336 			if (error != 0) {
5337 				if (connp->conn_debug) {
5338 					(void) strlog(UDP_MOD_ID, 0, 1,
5339 					    SL_ERROR|SL_TRACE,
5340 					    "udp_bind: cannot establish anon "
5341 					    "MLP for port %d", port);
5342 				}
5343 				error = -TACCES;
5344 				mutex_exit(&connp->conn_lock);
5345 				goto late_error;
5346 			}
5347 		}
5348 		connp->conn_mlp_type = mlptype;
5349 	}
5350 
5351 	/*
5352 	 * We create an initial header template here to make a subsequent
5353 	 * sendto have a starting point. Since conn_last_dst is zero the
5354 	 * first sendto will always follow the 'dst changed' code path.
5355 	 * Note that we defer massaging options and the related checksum
5356 	 * adjustment until we have a destination address.
5357 	 */
5358 	error = udp_build_hdr_template(connp, &connp->conn_saddr_v6,
5359 	    &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo);
5360 	if (error != 0) {
5361 		mutex_exit(&connp->conn_lock);
5362 		goto late_error;
5363 	}
5364 	/* Just in case */
5365 	connp->conn_faddr_v6 = ipv6_all_zeros;
5366 	connp->conn_fport = 0;
5367 	connp->conn_v6lastdst = ipv6_all_zeros;
5368 	mutex_exit(&connp->conn_lock);
5369 
5370 	error = ip_laddr_fanout_insert(connp);
5371 	if (error != 0)
5372 		goto late_error;
5373 
5374 	/* Bind succeeded */
5375 	return (0);
5376 
5377 late_error:
5378 	/* We had already picked the port number, and then the bind failed */
5379 	mutex_enter(&connp->conn_lock);
5380 	udpf = &us->us_bind_fanout[
5381 	    UDP_BIND_HASH(connp->conn_lport,
5382 	    us->us_bind_fanout_size)];
5383 	mutex_enter(&udpf->uf_lock);
5384 	connp->conn_saddr_v6 = ipv6_all_zeros;
5385 	connp->conn_bound_addr_v6 = ipv6_all_zeros;
5386 	connp->conn_laddr_v6 = ipv6_all_zeros;
5387 	if (scopeid != 0) {
5388 		connp->conn_ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
5389 		connp->conn_incoming_ifindex = connp->conn_bound_if;
5390 	}
5391 	udp->udp_state = TS_UNBND;
5392 	udp_bind_hash_remove(udp, B_TRUE);
5393 	connp->conn_lport = 0;
5394 	mutex_exit(&udpf->uf_lock);
5395 	connp->conn_anon_port = B_FALSE;
5396 	connp->conn_mlp_type = mlptSingle;
5397 
5398 	connp->conn_v6lastdst = ipv6_all_zeros;
5399 
5400 	/* Restore the header that was built above - different source address */
5401 	(void) udp_build_hdr_template(connp, &connp->conn_saddr_v6,
5402 	    &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo);
5403 	mutex_exit(&connp->conn_lock);
5404 	return (error);
5405 }
5406 
5407 int
5408 udp_bind(sock_lower_handle_t proto_handle, struct sockaddr *sa,
5409     socklen_t len, cred_t *cr)
5410 {
5411 	int		error;
5412 	conn_t		*connp;
5413 
5414 	/* All Solaris components should pass a cred for this operation. */
5415 	ASSERT(cr != NULL);
5416 
5417 	connp = (conn_t *)proto_handle;
5418 
5419 	if (sa == NULL)
5420 		error = udp_do_unbind(connp);
5421 	else
5422 		error = udp_do_bind(connp, sa, len, cr, B_TRUE);
5423 
5424 	if (error < 0) {
5425 		if (error == -TOUTSTATE)
5426 			error = EINVAL;
5427 		else
5428 			error = proto_tlitosyserr(-error);
5429 	}
5430 
5431 	return (error);
5432 }
5433 
5434 static int
5435 udp_implicit_bind(conn_t *connp, cred_t *cr)
5436 {
5437 	sin6_t sin6addr;
5438 	sin_t *sin;
5439 	sin6_t *sin6;
5440 	socklen_t len;
5441 	int error;
5442 
5443 	/* All Solaris components should pass a cred for this operation. */
5444 	ASSERT(cr != NULL);
5445 
5446 	if (connp->conn_family == AF_INET) {
5447 		len = sizeof (struct sockaddr_in);
5448 		sin = (sin_t *)&sin6addr;
5449 		*sin = sin_null;
5450 		sin->sin_family = AF_INET;
5451 		sin->sin_addr.s_addr = INADDR_ANY;
5452 	} else {
5453 		ASSERT(connp->conn_family == AF_INET6);
5454 		len = sizeof (sin6_t);
5455 		sin6 = (sin6_t *)&sin6addr;
5456 		*sin6 = sin6_null;
5457 		sin6->sin6_family = AF_INET6;
5458 		V6_SET_ZERO(sin6->sin6_addr);
5459 	}
5460 
5461 	error = udp_do_bind(connp, (struct sockaddr *)&sin6addr, len,
5462 	    cr, B_FALSE);
5463 	return ((error < 0) ? proto_tlitosyserr(-error) : error);
5464 }
5465 
5466 /*
5467  * This routine removes a port number association from a stream. It
5468  * is called by udp_unbind and udp_tpi_unbind.
5469  */
5470 static int
5471 udp_do_unbind(conn_t *connp)
5472 {
5473 	udp_t		*udp = connp->conn_udp;
5474 	udp_fanout_t	*udpf;
5475 	udp_stack_t	*us = udp->udp_us;
5476 
5477 	if (cl_inet_unbind != NULL) {
5478 		/*
5479 		 * Running in cluster mode - register unbind information
5480 		 */
5481 		if (connp->conn_ipversion == IPV4_VERSION) {
5482 			(*cl_inet_unbind)(
5483 			    connp->conn_netstack->netstack_stackid,
5484 			    IPPROTO_UDP, AF_INET,
5485 			    (uint8_t *)(&V4_PART_OF_V6(connp->conn_laddr_v6)),
5486 			    (in_port_t)connp->conn_lport, NULL);
5487 		} else {
5488 			(*cl_inet_unbind)(
5489 			    connp->conn_netstack->netstack_stackid,
5490 			    IPPROTO_UDP, AF_INET6,
5491 			    (uint8_t *)&(connp->conn_laddr_v6),
5492 			    (in_port_t)connp->conn_lport, NULL);
5493 		}
5494 	}
5495 
5496 	mutex_enter(&connp->conn_lock);
5497 	/* If a bind has not been done, we can't unbind. */
5498 	if (udp->udp_state == TS_UNBND) {
5499 		mutex_exit(&connp->conn_lock);
5500 		return (-TOUTSTATE);
5501 	}
5502 	udpf = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport,
5503 	    us->us_bind_fanout_size)];
5504 	mutex_enter(&udpf->uf_lock);
5505 	udp_bind_hash_remove(udp, B_TRUE);
5506 	connp->conn_saddr_v6 = ipv6_all_zeros;
5507 	connp->conn_bound_addr_v6 = ipv6_all_zeros;
5508 	connp->conn_laddr_v6 = ipv6_all_zeros;
5509 	connp->conn_mcbc_bind = B_FALSE;
5510 	connp->conn_lport = 0;
5511 	/* In case we were also connected */
5512 	connp->conn_faddr_v6 = ipv6_all_zeros;
5513 	connp->conn_fport = 0;
5514 	mutex_exit(&udpf->uf_lock);
5515 
5516 	connp->conn_v6lastdst = ipv6_all_zeros;
5517 	udp->udp_state = TS_UNBND;
5518 
5519 	(void) udp_build_hdr_template(connp, &connp->conn_saddr_v6,
5520 	    &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo);
5521 	mutex_exit(&connp->conn_lock);
5522 
5523 	ip_unbind(connp);
5524 
5525 	return (0);
5526 }
5527 
5528 /*
5529  * It associates a default destination address with the stream.
5530  */
5531 static int
5532 udp_do_connect(conn_t *connp, const struct sockaddr *sa, socklen_t len,
5533     cred_t *cr, pid_t pid)
5534 {
5535 	sin6_t		*sin6;
5536 	sin_t		*sin;
5537 	in6_addr_t	v6dst;
5538 	ipaddr_t	v4dst;
5539 	uint16_t	dstport;
5540 	uint32_t	flowinfo;
5541 	udp_fanout_t	*udpf;
5542 	udp_t		*udp, *udp1;
5543 	ushort_t	ipversion;
5544 	udp_stack_t	*us;
5545 	int		error;
5546 	conn_t		*connp1;
5547 	ip_xmit_attr_t	*ixa;
5548 	ip_xmit_attr_t	*oldixa;
5549 	uint_t		scopeid = 0;
5550 	uint_t		srcid = 0;
5551 	in6_addr_t	v6src = connp->conn_saddr_v6;
5552 	boolean_t	v4mapped;
5553 
5554 	udp = connp->conn_udp;
5555 	us = udp->udp_us;
5556 	sin = NULL;
5557 	sin6 = NULL;
5558 	v4dst = INADDR_ANY;
5559 	flowinfo = 0;
5560 
5561 	/*
5562 	 * Address has been verified by the caller
5563 	 */
5564 	switch (len) {
5565 	default:
5566 		/*
5567 		 * Should never happen
5568 		 */
5569 		return (EINVAL);
5570 
5571 	case sizeof (sin_t):
5572 		sin = (sin_t *)sa;
5573 		v4dst = sin->sin_addr.s_addr;
5574 		dstport = sin->sin_port;
5575 		IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst);
5576 		ASSERT(connp->conn_ipversion == IPV4_VERSION);
5577 		ipversion = IPV4_VERSION;
5578 		break;
5579 
5580 	case sizeof (sin6_t):
5581 		sin6 = (sin6_t *)sa;
5582 		v6dst = sin6->sin6_addr;
5583 		dstport = sin6->sin6_port;
5584 		srcid = sin6->__sin6_src_id;
5585 		v4mapped = IN6_IS_ADDR_V4MAPPED(&v6dst);
5586 		if (srcid != 0 && IN6_IS_ADDR_UNSPECIFIED(&v6src)) {
5587 			if (!ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp),
5588 			    v4mapped, connp->conn_netstack)) {
5589 				/* Mismatch v4mapped/v6 specified by srcid. */
5590 				return (EADDRNOTAVAIL);
5591 			}
5592 		}
5593 		if (v4mapped) {
5594 			if (connp->conn_ipv6_v6only)
5595 				return (EADDRNOTAVAIL);
5596 
5597 			/*
5598 			 * Destination adress is mapped IPv6 address.
5599 			 * Source bound address should be unspecified or
5600 			 * IPv6 mapped address as well.
5601 			 */
5602 			if (!IN6_IS_ADDR_UNSPECIFIED(
5603 			    &connp->conn_bound_addr_v6) &&
5604 			    !IN6_IS_ADDR_V4MAPPED(&connp->conn_bound_addr_v6)) {
5605 				return (EADDRNOTAVAIL);
5606 			}
5607 			IN6_V4MAPPED_TO_IPADDR(&v6dst, v4dst);
5608 			ipversion = IPV4_VERSION;
5609 			flowinfo = 0;
5610 		} else {
5611 			ipversion = IPV6_VERSION;
5612 			flowinfo = sin6->sin6_flowinfo;
5613 			if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr))
5614 				scopeid = sin6->sin6_scope_id;
5615 		}
5616 		break;
5617 	}
5618 
5619 	if (dstport == 0)
5620 		return (-TBADADDR);
5621 
5622 	/*
5623 	 * If there is a different thread using conn_ixa then we get a new
5624 	 * copy and cut the old one loose from conn_ixa. Otherwise we use
5625 	 * conn_ixa and prevent any other thread from using/changing it.
5626 	 * Once connect() is done other threads can use conn_ixa since the
5627 	 * refcnt will be back at one.
5628 	 * We defer updating conn_ixa until later to handle any concurrent
5629 	 * conn_ixa_cleanup thread.
5630 	 */
5631 	ixa = conn_get_ixa(connp, B_FALSE);
5632 	if (ixa == NULL)
5633 		return (ENOMEM);
5634 
5635 	mutex_enter(&connp->conn_lock);
5636 	/*
5637 	 * This udp_t must have bound to a port already before doing a connect.
5638 	 * Reject if a connect is in progress (we drop conn_lock during
5639 	 * udp_do_connect).
5640 	 */
5641 	if (udp->udp_state == TS_UNBND || udp->udp_state == TS_WCON_CREQ) {
5642 		mutex_exit(&connp->conn_lock);
5643 		(void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
5644 		    "udp_connect: bad state, %u", udp->udp_state);
5645 		ixa_refrele(ixa);
5646 		return (-TOUTSTATE);
5647 	}
5648 	ASSERT(connp->conn_lport != 0 && udp->udp_ptpbhn != NULL);
5649 
5650 	udpf = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport,
5651 	    us->us_bind_fanout_size)];
5652 
5653 	mutex_enter(&udpf->uf_lock);
5654 	if (udp->udp_state == TS_DATA_XFER) {
5655 		/* Already connected - clear out state */
5656 		if (connp->conn_mcbc_bind)
5657 			connp->conn_saddr_v6 = ipv6_all_zeros;
5658 		else
5659 			connp->conn_saddr_v6 = connp->conn_bound_addr_v6;
5660 		connp->conn_laddr_v6 = connp->conn_bound_addr_v6;
5661 		connp->conn_faddr_v6 = ipv6_all_zeros;
5662 		connp->conn_fport = 0;
5663 		udp->udp_state = TS_IDLE;
5664 	}
5665 
5666 	connp->conn_fport = dstport;
5667 	connp->conn_ipversion = ipversion;
5668 	if (ipversion == IPV4_VERSION) {
5669 		/*
5670 		 * Interpret a zero destination to mean loopback.
5671 		 * Update the T_CONN_REQ (sin/sin6) since it is used to
5672 		 * generate the T_CONN_CON.
5673 		 */
5674 		if (v4dst == INADDR_ANY) {
5675 			v4dst = htonl(INADDR_LOOPBACK);
5676 			IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst);
5677 			if (connp->conn_family == AF_INET) {
5678 				sin->sin_addr.s_addr = v4dst;
5679 			} else {
5680 				sin6->sin6_addr = v6dst;
5681 			}
5682 		}
5683 		connp->conn_faddr_v6 = v6dst;
5684 		connp->conn_flowinfo = 0;
5685 	} else {
5686 		ASSERT(connp->conn_ipversion == IPV6_VERSION);
5687 		/*
5688 		 * Interpret a zero destination to mean loopback.
5689 		 * Update the T_CONN_REQ (sin/sin6) since it is used to
5690 		 * generate the T_CONN_CON.
5691 		 */
5692 		if (IN6_IS_ADDR_UNSPECIFIED(&v6dst)) {
5693 			v6dst = ipv6_loopback;
5694 			sin6->sin6_addr = v6dst;
5695 		}
5696 		connp->conn_faddr_v6 = v6dst;
5697 		connp->conn_flowinfo = flowinfo;
5698 	}
5699 	mutex_exit(&udpf->uf_lock);
5700 
5701 	/*
5702 	 * We update our cred/cpid based on the caller of connect
5703 	 */
5704 	if (connp->conn_cred != cr) {
5705 		crhold(cr);
5706 		crfree(connp->conn_cred);
5707 		connp->conn_cred = cr;
5708 	}
5709 	connp->conn_cpid = pid;
5710 	ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
5711 	ixa->ixa_cred = cr;
5712 	ixa->ixa_cpid = pid;
5713 	if (is_system_labeled()) {
5714 		/* We need to restart with a label based on the cred */
5715 		ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred);
5716 	}
5717 
5718 	if (scopeid != 0) {
5719 		ixa->ixa_flags |= IXAF_SCOPEID_SET;
5720 		ixa->ixa_scopeid = scopeid;
5721 		connp->conn_incoming_ifindex = scopeid;
5722 	} else {
5723 		ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
5724 		connp->conn_incoming_ifindex = connp->conn_bound_if;
5725 	}
5726 	/*
5727 	 * conn_connect will drop conn_lock and reacquire it.
5728 	 * To prevent a send* from messing with this udp_t while the lock
5729 	 * is dropped we set udp_state and clear conn_v6lastdst.
5730 	 * That will make all send* fail with EISCONN.
5731 	 */
5732 	connp->conn_v6lastdst = ipv6_all_zeros;
5733 	udp->udp_state = TS_WCON_CREQ;
5734 
5735 	error = conn_connect(connp, NULL, IPDF_ALLOW_MCBC);
5736 	mutex_exit(&connp->conn_lock);
5737 	if (error != 0)
5738 		goto connect_failed;
5739 
5740 	/*
5741 	 * The addresses have been verified. Time to insert in
5742 	 * the correct fanout list.
5743 	 */
5744 	error = ipcl_conn_insert(connp);
5745 	if (error != 0)
5746 		goto connect_failed;
5747 
5748 	mutex_enter(&connp->conn_lock);
5749 	error = udp_build_hdr_template(connp, &connp->conn_saddr_v6,
5750 	    &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo);
5751 	if (error != 0) {
5752 		mutex_exit(&connp->conn_lock);
5753 		goto connect_failed;
5754 	}
5755 
5756 	udp->udp_state = TS_DATA_XFER;
5757 	/* Record this as the "last" send even though we haven't sent any */
5758 	connp->conn_v6lastdst = connp->conn_faddr_v6;
5759 	connp->conn_lastipversion = connp->conn_ipversion;
5760 	connp->conn_lastdstport = connp->conn_fport;
5761 	connp->conn_lastflowinfo = connp->conn_flowinfo;
5762 	connp->conn_lastscopeid = scopeid;
5763 	connp->conn_lastsrcid = srcid;
5764 	/* Also remember a source to use together with lastdst */
5765 	connp->conn_v6lastsrc = v6src;
5766 
5767 	oldixa = conn_replace_ixa(connp, ixa);
5768 	mutex_exit(&connp->conn_lock);
5769 	ixa_refrele(oldixa);
5770 
5771 	/*
5772 	 * We've picked a source address above. Now we can
5773 	 * verify that the src/port/dst/port is unique for all
5774 	 * connections in TS_DATA_XFER, skipping ourselves.
5775 	 */
5776 	mutex_enter(&udpf->uf_lock);
5777 	for (udp1 = udpf->uf_udp; udp1 != NULL; udp1 = udp1->udp_bind_hash) {
5778 		if (udp1->udp_state != TS_DATA_XFER)
5779 			continue;
5780 
5781 		if (udp1 == udp)
5782 			continue;
5783 
5784 		connp1 = udp1->udp_connp;
5785 		if (connp->conn_lport != connp1->conn_lport ||
5786 		    connp->conn_ipversion != connp1->conn_ipversion ||
5787 		    dstport != connp1->conn_fport ||
5788 		    !IN6_ARE_ADDR_EQUAL(&connp->conn_laddr_v6,
5789 		    &connp1->conn_laddr_v6) ||
5790 		    !IN6_ARE_ADDR_EQUAL(&v6dst, &connp1->conn_faddr_v6) ||
5791 		    !(IPCL_ZONE_MATCH(connp, connp1->conn_zoneid) ||
5792 		    IPCL_ZONE_MATCH(connp1, connp->conn_zoneid)))
5793 			continue;
5794 		mutex_exit(&udpf->uf_lock);
5795 		error = -TBADADDR;
5796 		goto connect_failed;
5797 	}
5798 	if (cl_inet_connect2 != NULL) {
5799 		CL_INET_UDP_CONNECT(connp, B_TRUE, &v6dst, dstport, error);
5800 		if (error != 0) {
5801 			mutex_exit(&udpf->uf_lock);
5802 			error = -TBADADDR;
5803 			goto connect_failed;
5804 		}
5805 	}
5806 	mutex_exit(&udpf->uf_lock);
5807 
5808 	ixa_refrele(ixa);
5809 	return (0);
5810 
5811 connect_failed:
5812 	if (ixa != NULL)
5813 		ixa_refrele(ixa);
5814 	mutex_enter(&connp->conn_lock);
5815 	mutex_enter(&udpf->uf_lock);
5816 	udp->udp_state = TS_IDLE;
5817 	connp->conn_faddr_v6 = ipv6_all_zeros;
5818 	connp->conn_fport = 0;
5819 	/* In case the source address was set above */
5820 	if (connp->conn_mcbc_bind)
5821 		connp->conn_saddr_v6 = ipv6_all_zeros;
5822 	else
5823 		connp->conn_saddr_v6 = connp->conn_bound_addr_v6;
5824 	connp->conn_laddr_v6 = connp->conn_bound_addr_v6;
5825 	mutex_exit(&udpf->uf_lock);
5826 
5827 	connp->conn_v6lastdst = ipv6_all_zeros;
5828 	connp->conn_flowinfo = 0;
5829 
5830 	(void) udp_build_hdr_template(connp, &connp->conn_saddr_v6,
5831 	    &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo);
5832 	mutex_exit(&connp->conn_lock);
5833 	return (error);
5834 }
5835 
5836 static int
5837 udp_connect(sock_lower_handle_t proto_handle, const struct sockaddr *sa,
5838     socklen_t len, sock_connid_t *id, cred_t *cr)
5839 {
5840 	conn_t	*connp = (conn_t *)proto_handle;
5841 	udp_t	*udp = connp->conn_udp;
5842 	int	error;
5843 	boolean_t did_bind = B_FALSE;
5844 	pid_t	pid = curproc->p_pid;
5845 
5846 	/* All Solaris components should pass a cred for this operation. */
5847 	ASSERT(cr != NULL);
5848 
5849 	if (sa == NULL) {
5850 		/*
5851 		 * Disconnect
5852 		 * Make sure we are connected
5853 		 */
5854 		if (udp->udp_state != TS_DATA_XFER)
5855 			return (EINVAL);
5856 
5857 		error = udp_disconnect(connp);
5858 		return (error);
5859 	}
5860 
5861 	error = proto_verify_ip_addr(connp->conn_family, sa, len);
5862 	if (error != 0)
5863 		goto done;
5864 
5865 	/* do an implicit bind if necessary */
5866 	if (udp->udp_state == TS_UNBND) {
5867 		error = udp_implicit_bind(connp, cr);
5868 		/*
5869 		 * We could be racing with an actual bind, in which case
5870 		 * we would see EPROTO. We cross our fingers and try
5871 		 * to connect.
5872 		 */
5873 		if (!(error == 0 || error == EPROTO))
5874 			goto done;
5875 		did_bind = B_TRUE;
5876 	}
5877 	/*
5878 	 * set SO_DGRAM_ERRIND
5879 	 */
5880 	connp->conn_dgram_errind = B_TRUE;
5881 
5882 	error = udp_do_connect(connp, sa, len, cr, pid);
5883 
5884 	if (error != 0 && did_bind) {
5885 		int unbind_err;
5886 
5887 		unbind_err = udp_do_unbind(connp);
5888 		ASSERT(unbind_err == 0);
5889 	}
5890 
5891 	if (error == 0) {
5892 		*id = 0;
5893 		(*connp->conn_upcalls->su_connected)
5894 		    (connp->conn_upper_handle, 0, NULL, -1);
5895 	} else if (error < 0) {
5896 		error = proto_tlitosyserr(-error);
5897 	}
5898 
5899 done:
5900 	if (error != 0 && udp->udp_state == TS_DATA_XFER) {
5901 		/*
5902 		 * No need to hold locks to set state
5903 		 * after connect failure socket state is undefined
5904 		 * We set the state only to imitate old sockfs behavior
5905 		 */
5906 		udp->udp_state = TS_IDLE;
5907 	}
5908 	return (error);
5909 }
5910 
5911 int
5912 udp_send(sock_lower_handle_t proto_handle, mblk_t *mp, struct nmsghdr *msg,
5913     cred_t *cr)
5914 {
5915 	sin6_t		*sin6;
5916 	sin_t		*sin = NULL;
5917 	uint_t		srcid;
5918 	conn_t		*connp = (conn_t *)proto_handle;
5919 	udp_t		*udp = connp->conn_udp;
5920 	int		error = 0;
5921 	udp_stack_t	*us = udp->udp_us;
5922 	ushort_t	ipversion;
5923 	pid_t		pid = curproc->p_pid;
5924 	ip_xmit_attr_t	*ixa;
5925 
5926 	ASSERT(DB_TYPE(mp) == M_DATA);
5927 
5928 	/* All Solaris components should pass a cred for this operation. */
5929 	ASSERT(cr != NULL);
5930 
5931 	/* do an implicit bind if necessary */
5932 	if (udp->udp_state == TS_UNBND) {
5933 		error = udp_implicit_bind(connp, cr);
5934 		/*
5935 		 * We could be racing with an actual bind, in which case
5936 		 * we would see EPROTO. We cross our fingers and try
5937 		 * to connect.
5938 		 */
5939 		if (!(error == 0 || error == EPROTO)) {
5940 			freemsg(mp);
5941 			return (error);
5942 		}
5943 	}
5944 
5945 	/* Connected? */
5946 	if (msg->msg_name == NULL) {
5947 		if (udp->udp_state != TS_DATA_XFER) {
5948 			UDPS_BUMP_MIB(us, udpOutErrors);
5949 			return (EDESTADDRREQ);
5950 		}
5951 		if (msg->msg_controllen != 0) {
5952 			error = udp_output_ancillary(connp, NULL, NULL, mp,
5953 			    NULL, msg, cr, pid);
5954 		} else {
5955 			error = udp_output_connected(connp, mp, cr, pid);
5956 		}
5957 		if (us->us_sendto_ignerr)
5958 			return (0);
5959 		else
5960 			return (error);
5961 	}
5962 	if (udp->udp_state == TS_DATA_XFER) {
5963 		UDPS_BUMP_MIB(us, udpOutErrors);
5964 		return (EISCONN);
5965 	}
5966 	error = proto_verify_ip_addr(connp->conn_family,
5967 	    (struct sockaddr *)msg->msg_name, msg->msg_namelen);
5968 	if (error != 0) {
5969 		UDPS_BUMP_MIB(us, udpOutErrors);
5970 		return (error);
5971 	}
5972 	switch (connp->conn_family) {
5973 	case AF_INET6:
5974 		sin6 = (sin6_t *)msg->msg_name;
5975 
5976 		srcid = sin6->__sin6_src_id;
5977 
5978 		if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
5979 			/*
5980 			 * Destination is a non-IPv4-compatible IPv6 address.
5981 			 * Send out an IPv6 format packet.
5982 			 */
5983 
5984 			/*
5985 			 * If the local address is a mapped address return
5986 			 * an error.
5987 			 * It would be possible to send an IPv6 packet but the
5988 			 * response would never make it back to the application
5989 			 * since it is bound to a mapped address.
5990 			 */
5991 			if (IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6)) {
5992 				UDPS_BUMP_MIB(us, udpOutErrors);
5993 				return (EADDRNOTAVAIL);
5994 			}
5995 			if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
5996 				sin6->sin6_addr = ipv6_loopback;
5997 			ipversion = IPV6_VERSION;
5998 		} else {
5999 			if (connp->conn_ipv6_v6only) {
6000 				UDPS_BUMP_MIB(us, udpOutErrors);
6001 				return (EADDRNOTAVAIL);
6002 			}
6003 
6004 			/*
6005 			 * If the local address is not zero or a mapped address
6006 			 * return an error.  It would be possible to send an
6007 			 * IPv4 packet but the response would never make it
6008 			 * back to the application since it is bound to a
6009 			 * non-mapped address.
6010 			 */
6011 			if (!IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6) &&
6012 			    !IN6_IS_ADDR_UNSPECIFIED(&connp->conn_saddr_v6)) {
6013 				UDPS_BUMP_MIB(us, udpOutErrors);
6014 				return (EADDRNOTAVAIL);
6015 			}
6016 
6017 			if (V4_PART_OF_V6(sin6->sin6_addr) == INADDR_ANY) {
6018 				V4_PART_OF_V6(sin6->sin6_addr) =
6019 				    htonl(INADDR_LOOPBACK);
6020 			}
6021 			ipversion = IPV4_VERSION;
6022 		}
6023 
6024 		/*
6025 		 * We have to allocate an ip_xmit_attr_t before we grab
6026 		 * conn_lock and we need to hold conn_lock once we've check
6027 		 * conn_same_as_last_v6 to handle concurrent send* calls on a
6028 		 * socket.
6029 		 */
6030 		if (msg->msg_controllen == 0) {
6031 			ixa = conn_get_ixa(connp, B_FALSE);
6032 			if (ixa == NULL) {
6033 				UDPS_BUMP_MIB(us, udpOutErrors);
6034 				return (ENOMEM);
6035 			}
6036 		} else {
6037 			ixa = NULL;
6038 		}
6039 		mutex_enter(&connp->conn_lock);
6040 		if (udp->udp_delayed_error != 0) {
6041 			sin6_t  *sin2 = (sin6_t *)&udp->udp_delayed_addr;
6042 
6043 			error = udp->udp_delayed_error;
6044 			udp->udp_delayed_error = 0;
6045 
6046 			/* Compare IP address, port, and family */
6047 
6048 			if (sin6->sin6_port == sin2->sin6_port &&
6049 			    IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr,
6050 			    &sin2->sin6_addr) &&
6051 			    sin6->sin6_family == sin2->sin6_family) {
6052 				mutex_exit(&connp->conn_lock);
6053 				UDPS_BUMP_MIB(us, udpOutErrors);
6054 				if (ixa != NULL)
6055 					ixa_refrele(ixa);
6056 				return (error);
6057 			}
6058 		}
6059 
6060 		if (msg->msg_controllen != 0) {
6061 			mutex_exit(&connp->conn_lock);
6062 			ASSERT(ixa == NULL);
6063 			error = udp_output_ancillary(connp, NULL, sin6, mp,
6064 			    NULL, msg, cr, pid);
6065 		} else if (conn_same_as_last_v6(connp, sin6) &&
6066 		    connp->conn_lastsrcid == srcid &&
6067 		    ipsec_outbound_policy_current(ixa)) {
6068 			/* udp_output_lastdst drops conn_lock */
6069 			error = udp_output_lastdst(connp, mp, cr, pid, ixa);
6070 		} else {
6071 			/* udp_output_newdst drops conn_lock */
6072 			error = udp_output_newdst(connp, mp, NULL, sin6,
6073 			    ipversion, cr, pid, ixa);
6074 		}
6075 		ASSERT(MUTEX_NOT_HELD(&connp->conn_lock));
6076 		if (us->us_sendto_ignerr)
6077 			return (0);
6078 		else
6079 			return (error);
6080 	case AF_INET:
6081 		sin = (sin_t *)msg->msg_name;
6082 
6083 		ipversion = IPV4_VERSION;
6084 
6085 		if (sin->sin_addr.s_addr == INADDR_ANY)
6086 			sin->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
6087 
6088 		/*
6089 		 * We have to allocate an ip_xmit_attr_t before we grab
6090 		 * conn_lock and we need to hold conn_lock once we've check
6091 		 * conn_same_as_last_v6 to handle concurrent send* on a socket.
6092 		 */
6093 		if (msg->msg_controllen == 0) {
6094 			ixa = conn_get_ixa(connp, B_FALSE);
6095 			if (ixa == NULL) {
6096 				UDPS_BUMP_MIB(us, udpOutErrors);
6097 				return (ENOMEM);
6098 			}
6099 		} else {
6100 			ixa = NULL;
6101 		}
6102 		mutex_enter(&connp->conn_lock);
6103 		if (udp->udp_delayed_error != 0) {
6104 			sin_t  *sin2 = (sin_t *)&udp->udp_delayed_addr;
6105 
6106 			error = udp->udp_delayed_error;
6107 			udp->udp_delayed_error = 0;
6108 
6109 			/* Compare IP address and port */
6110 
6111 			if (sin->sin_port == sin2->sin_port &&
6112 			    sin->sin_addr.s_addr == sin2->sin_addr.s_addr) {
6113 				mutex_exit(&connp->conn_lock);
6114 				UDPS_BUMP_MIB(us, udpOutErrors);
6115 				if (ixa != NULL)
6116 					ixa_refrele(ixa);
6117 				return (error);
6118 			}
6119 		}
6120 		if (msg->msg_controllen != 0) {
6121 			mutex_exit(&connp->conn_lock);
6122 			ASSERT(ixa == NULL);
6123 			error = udp_output_ancillary(connp, sin, NULL, mp,
6124 			    NULL, msg, cr, pid);
6125 		} else if (conn_same_as_last_v4(connp, sin) &&
6126 		    ipsec_outbound_policy_current(ixa)) {
6127 			/* udp_output_lastdst drops conn_lock */
6128 			error = udp_output_lastdst(connp, mp, cr, pid, ixa);
6129 		} else {
6130 			/* udp_output_newdst drops conn_lock */
6131 			error = udp_output_newdst(connp, mp, sin, NULL,
6132 			    ipversion, cr, pid, ixa);
6133 		}
6134 		ASSERT(MUTEX_NOT_HELD(&connp->conn_lock));
6135 		if (us->us_sendto_ignerr)
6136 			return (0);
6137 		else
6138 			return (error);
6139 	default:
6140 		return (EINVAL);
6141 	}
6142 }
6143 
6144 int
6145 udp_fallback(sock_lower_handle_t proto_handle, queue_t *q,
6146     boolean_t issocket, so_proto_quiesced_cb_t quiesced_cb,
6147     sock_quiesce_arg_t *arg)
6148 {
6149 	conn_t	*connp = (conn_t *)proto_handle;
6150 	udp_t	*udp;
6151 	struct T_capability_ack tca;
6152 	struct sockaddr_in6 laddr, faddr;
6153 	socklen_t laddrlen, faddrlen;
6154 	short opts;
6155 	struct stroptions *stropt;
6156 	mblk_t *mp, *stropt_mp;
6157 	int error;
6158 
6159 	udp = connp->conn_udp;
6160 
6161 	stropt_mp = allocb_wait(sizeof (*stropt), BPRI_HI, STR_NOSIG, NULL);
6162 
6163 	/*
6164 	 * setup the fallback stream that was allocated
6165 	 */
6166 	connp->conn_dev = (dev_t)RD(q)->q_ptr;
6167 	connp->conn_minor_arena = WR(q)->q_ptr;
6168 
6169 	RD(q)->q_ptr = WR(q)->q_ptr = connp;
6170 
6171 	WR(q)->q_qinfo = &udp_winit;
6172 
6173 	connp->conn_rq = RD(q);
6174 	connp->conn_wq = WR(q);
6175 
6176 	/* Notify stream head about options before sending up data */
6177 	stropt_mp->b_datap->db_type = M_SETOPTS;
6178 	stropt_mp->b_wptr += sizeof (*stropt);
6179 	stropt = (struct stroptions *)stropt_mp->b_rptr;
6180 	stropt->so_flags = SO_WROFF | SO_HIWAT;
6181 	stropt->so_wroff = connp->conn_wroff;
6182 	stropt->so_hiwat = udp->udp_rcv_disply_hiwat;
6183 	putnext(RD(q), stropt_mp);
6184 
6185 	/*
6186 	 * Free the helper stream
6187 	 */
6188 	ip_free_helper_stream(connp);
6189 
6190 	if (!issocket)
6191 		udp_use_pure_tpi(udp);
6192 
6193 	/*
6194 	 * Collect the information needed to sync with the sonode
6195 	 */
6196 	udp_do_capability_ack(udp, &tca, TC1_INFO);
6197 
6198 	laddrlen = faddrlen = sizeof (sin6_t);
6199 	(void) udp_getsockname((sock_lower_handle_t)connp,
6200 	    (struct sockaddr *)&laddr, &laddrlen, CRED());
6201 	error = udp_getpeername((sock_lower_handle_t)connp,
6202 	    (struct sockaddr *)&faddr, &faddrlen, CRED());
6203 	if (error != 0)
6204 		faddrlen = 0;
6205 
6206 	opts = 0;
6207 	if (connp->conn_dgram_errind)
6208 		opts |= SO_DGRAM_ERRIND;
6209 	if (connp->conn_ixa->ixa_flags & IXAF_DONTROUTE)
6210 		opts |= SO_DONTROUTE;
6211 
6212 	mp = (*quiesced_cb)(connp->conn_upper_handle, arg, &tca,
6213 	    (struct sockaddr *)&laddr, laddrlen,
6214 	    (struct sockaddr *)&faddr, faddrlen, opts);
6215 
6216 	mutex_enter(&udp->udp_recv_lock);
6217 	/*
6218 	 * Attempts to send data up during fallback will result in it being
6219 	 * queued in udp_t. First push up the datagrams obtained from the
6220 	 * socket, then any packets queued in udp_t.
6221 	 */
6222 	if (mp != NULL) {
6223 		mp->b_next = udp->udp_fallback_queue_head;
6224 		udp->udp_fallback_queue_head = mp;
6225 	}
6226 	while (udp->udp_fallback_queue_head != NULL) {
6227 		mp = udp->udp_fallback_queue_head;
6228 		udp->udp_fallback_queue_head = mp->b_next;
6229 		mutex_exit(&udp->udp_recv_lock);
6230 		mp->b_next = NULL;
6231 		putnext(RD(q), mp);
6232 		mutex_enter(&udp->udp_recv_lock);
6233 	}
6234 	udp->udp_fallback_queue_tail = udp->udp_fallback_queue_head;
6235 	/*
6236 	 * No longer a streams less socket
6237 	 */
6238 	mutex_enter(&connp->conn_lock);
6239 	connp->conn_flags &= ~IPCL_NONSTR;
6240 	mutex_exit(&connp->conn_lock);
6241 
6242 	mutex_exit(&udp->udp_recv_lock);
6243 
6244 	ASSERT(connp->conn_ref >= 1);
6245 
6246 	return (0);
6247 }
6248 
6249 /* ARGSUSED3 */
6250 int
6251 udp_getpeername(sock_lower_handle_t proto_handle, struct sockaddr *sa,
6252     socklen_t *salenp, cred_t *cr)
6253 {
6254 	conn_t	*connp = (conn_t *)proto_handle;
6255 	udp_t	*udp = connp->conn_udp;
6256 	int error;
6257 
6258 	/* All Solaris components should pass a cred for this operation. */
6259 	ASSERT(cr != NULL);
6260 
6261 	mutex_enter(&connp->conn_lock);
6262 	if (udp->udp_state != TS_DATA_XFER)
6263 		error = ENOTCONN;
6264 	else
6265 		error = conn_getpeername(connp, sa, salenp);
6266 	mutex_exit(&connp->conn_lock);
6267 	return (error);
6268 }
6269 
6270 /* ARGSUSED3 */
6271 int
6272 udp_getsockname(sock_lower_handle_t proto_handle, struct sockaddr *sa,
6273     socklen_t *salenp, cred_t *cr)
6274 {
6275 	conn_t	*connp = (conn_t *)proto_handle;
6276 	int error;
6277 
6278 	/* All Solaris components should pass a cred for this operation. */
6279 	ASSERT(cr != NULL);
6280 
6281 	mutex_enter(&connp->conn_lock);
6282 	error = conn_getsockname(connp, sa, salenp);
6283 	mutex_exit(&connp->conn_lock);
6284 	return (error);
6285 }
6286 
6287 int
6288 udp_getsockopt(sock_lower_handle_t proto_handle, int level, int option_name,
6289     void *optvalp, socklen_t *optlen, cred_t *cr)
6290 {
6291 	conn_t		*connp = (conn_t *)proto_handle;
6292 	int		error;
6293 	t_uscalar_t	max_optbuf_len;
6294 	void		*optvalp_buf;
6295 	int		len;
6296 
6297 	/* All Solaris components should pass a cred for this operation. */
6298 	ASSERT(cr != NULL);
6299 
6300 	error = proto_opt_check(level, option_name, *optlen, &max_optbuf_len,
6301 	    udp_opt_obj.odb_opt_des_arr,
6302 	    udp_opt_obj.odb_opt_arr_cnt,
6303 	    B_FALSE, B_TRUE, cr);
6304 	if (error != 0) {
6305 		if (error < 0)
6306 			error = proto_tlitosyserr(-error);
6307 		return (error);
6308 	}
6309 
6310 	optvalp_buf = kmem_alloc(max_optbuf_len, KM_SLEEP);
6311 	len = udp_opt_get(connp, level, option_name, optvalp_buf);
6312 	if (len == -1) {
6313 		kmem_free(optvalp_buf, max_optbuf_len);
6314 		return (EINVAL);
6315 	}
6316 
6317 	/*
6318 	 * update optlen and copy option value
6319 	 */
6320 	t_uscalar_t size = MIN(len, *optlen);
6321 
6322 	bcopy(optvalp_buf, optvalp, size);
6323 	bcopy(&size, optlen, sizeof (size));
6324 
6325 	kmem_free(optvalp_buf, max_optbuf_len);
6326 	return (0);
6327 }
6328 
6329 int
6330 udp_setsockopt(sock_lower_handle_t proto_handle, int level, int option_name,
6331     const void *optvalp, socklen_t optlen, cred_t *cr)
6332 {
6333 	conn_t		*connp = (conn_t *)proto_handle;
6334 	int		error;
6335 
6336 	/* All Solaris components should pass a cred for this operation. */
6337 	ASSERT(cr != NULL);
6338 
6339 	error = proto_opt_check(level, option_name, optlen, NULL,
6340 	    udp_opt_obj.odb_opt_des_arr,
6341 	    udp_opt_obj.odb_opt_arr_cnt,
6342 	    B_TRUE, B_FALSE, cr);
6343 
6344 	if (error != 0) {
6345 		if (error < 0)
6346 			error = proto_tlitosyserr(-error);
6347 		return (error);
6348 	}
6349 
6350 	error = udp_opt_set(connp, SETFN_OPTCOM_NEGOTIATE, level, option_name,
6351 	    optlen, (uchar_t *)optvalp, (uint_t *)&optlen, (uchar_t *)optvalp,
6352 	    NULL, cr);
6353 
6354 	ASSERT(error >= 0);
6355 
6356 	return (error);
6357 }
6358 
6359 void
6360 udp_clr_flowctrl(sock_lower_handle_t proto_handle)
6361 {
6362 	conn_t	*connp = (conn_t *)proto_handle;
6363 	udp_t	*udp = connp->conn_udp;
6364 
6365 	mutex_enter(&udp->udp_recv_lock);
6366 	connp->conn_flow_cntrld = B_FALSE;
6367 	mutex_exit(&udp->udp_recv_lock);
6368 }
6369 
6370 /* ARGSUSED2 */
6371 int
6372 udp_shutdown(sock_lower_handle_t proto_handle, int how, cred_t *cr)
6373 {
6374 	conn_t	*connp = (conn_t *)proto_handle;
6375 
6376 	/* All Solaris components should pass a cred for this operation. */
6377 	ASSERT(cr != NULL);
6378 
6379 	/* shut down the send side */
6380 	if (how != SHUT_RD)
6381 		(*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle,
6382 		    SOCK_OPCTL_SHUT_SEND, 0);
6383 	/* shut down the recv side */
6384 	if (how != SHUT_WR)
6385 		(*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle,
6386 		    SOCK_OPCTL_SHUT_RECV, 0);
6387 	return (0);
6388 }
6389 
6390 int
6391 udp_ioctl(sock_lower_handle_t proto_handle, int cmd, intptr_t arg,
6392     int mode, int32_t *rvalp, cred_t *cr)
6393 {
6394 	conn_t		*connp = (conn_t *)proto_handle;
6395 	int		error;
6396 
6397 	/* All Solaris components should pass a cred for this operation. */
6398 	ASSERT(cr != NULL);
6399 
6400 	/*
6401 	 * If we don't have a helper stream then create one.
6402 	 * ip_create_helper_stream takes care of locking the conn_t,
6403 	 * so this check for NULL is just a performance optimization.
6404 	 */
6405 	if (connp->conn_helper_info == NULL) {
6406 		udp_stack_t *us = connp->conn_udp->udp_us;
6407 
6408 		ASSERT(us->us_ldi_ident != NULL);
6409 
6410 		/*
6411 		 * Create a helper stream for non-STREAMS socket.
6412 		 */
6413 		error = ip_create_helper_stream(connp, us->us_ldi_ident);
6414 		if (error != 0) {
6415 			ip0dbg(("udp_ioctl: create of IP helper stream "
6416 			    "failed %d\n", error));
6417 			return (error);
6418 		}
6419 	}
6420 
6421 	switch (cmd) {
6422 		case _SIOCSOCKFALLBACK:
6423 		case TI_GETPEERNAME:
6424 		case TI_GETMYNAME:
6425 			ip1dbg(("udp_ioctl: cmd 0x%x on non streams socket",
6426 			    cmd));
6427 			error = EINVAL;
6428 			break;
6429 		default:
6430 			/*
6431 			 * Pass on to IP using helper stream
6432 			 */
6433 			error = ldi_ioctl(connp->conn_helper_info->iphs_handle,
6434 			    cmd, arg, mode, cr, rvalp);
6435 			break;
6436 	}
6437 	return (error);
6438 }
6439 
6440 /* ARGSUSED */
6441 int
6442 udp_accept(sock_lower_handle_t lproto_handle,
6443     sock_lower_handle_t eproto_handle, sock_upper_handle_t sock_handle,
6444     cred_t *cr)
6445 {
6446 	return (EOPNOTSUPP);
6447 }
6448 
6449 /* ARGSUSED */
6450 int
6451 udp_listen(sock_lower_handle_t proto_handle, int backlog, cred_t *cr)
6452 {
6453 	return (EOPNOTSUPP);
6454 }
6455 
6456 sock_downcalls_t sock_udp_downcalls = {
6457 	udp_activate,		/* sd_activate */
6458 	udp_accept,		/* sd_accept */
6459 	udp_bind,		/* sd_bind */
6460 	udp_listen,		/* sd_listen */
6461 	udp_connect,		/* sd_connect */
6462 	udp_getpeername,	/* sd_getpeername */
6463 	udp_getsockname,	/* sd_getsockname */
6464 	udp_getsockopt,		/* sd_getsockopt */
6465 	udp_setsockopt,		/* sd_setsockopt */
6466 	udp_send,		/* sd_send */
6467 	NULL,			/* sd_send_uio */
6468 	NULL,			/* sd_recv_uio */
6469 	NULL,			/* sd_poll */
6470 	udp_shutdown,		/* sd_shutdown */
6471 	udp_clr_flowctrl,	/* sd_setflowctrl */
6472 	udp_ioctl,		/* sd_ioctl */
6473 	udp_close		/* sd_close */
6474 };
6475