xref: /illumos-gate/usr/src/uts/common/inet/udp/udp.c (revision 3e8b6b84)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright 2013 Nexenta Systems, Inc.  All rights reserved.
24  * Copyright 2014, OmniTI Computer Consulting, Inc. All rights reserved.
25  * Copyright 2015, Joyent, Inc.
26  * Copyright 2020 OmniOS Community Edition (OmniOSce) Association.
27  */
28 /* Copyright (c) 1990 Mentat Inc. */
29 
30 #include <sys/sysmacros.h>
31 #include <sys/types.h>
32 #include <sys/stream.h>
33 #include <sys/stropts.h>
34 #include <sys/strlog.h>
35 #include <sys/strsun.h>
36 #define	_SUN_TPI_VERSION 2
37 #include <sys/tihdr.h>
38 #include <sys/timod.h>
39 #include <sys/ddi.h>
40 #include <sys/sunddi.h>
41 #include <sys/strsubr.h>
42 #include <sys/suntpi.h>
43 #include <sys/xti_inet.h>
44 #include <sys/kmem.h>
45 #include <sys/cred_impl.h>
46 #include <sys/policy.h>
47 #include <sys/priv.h>
48 #include <sys/ucred.h>
49 #include <sys/zone.h>
50 
51 #include <sys/socket.h>
52 #include <sys/socketvar.h>
53 #include <sys/sockio.h>
54 #include <sys/vtrace.h>
55 #include <sys/sdt.h>
56 #include <sys/debug.h>
57 #include <sys/isa_defs.h>
58 #include <sys/random.h>
59 #include <netinet/in.h>
60 #include <netinet/ip6.h>
61 #include <netinet/icmp6.h>
62 #include <netinet/udp.h>
63 
64 #include <inet/common.h>
65 #include <inet/ip.h>
66 #include <inet/ip_impl.h>
67 #include <inet/ipsec_impl.h>
68 #include <inet/ip6.h>
69 #include <inet/ip_ire.h>
70 #include <inet/ip_if.h>
71 #include <inet/ip_multi.h>
72 #include <inet/ip_ndp.h>
73 #include <inet/proto_set.h>
74 #include <inet/mib2.h>
75 #include <inet/optcom.h>
76 #include <inet/snmpcom.h>
77 #include <inet/kstatcom.h>
78 #include <inet/ipclassifier.h>
79 #include <sys/squeue_impl.h>
80 #include <inet/ipnet.h>
81 #include <sys/vxlan.h>
82 #include <inet/inet_hash.h>
83 
84 #include <sys/tsol/label.h>
85 #include <sys/tsol/tnet.h>
86 #include <rpc/pmap_prot.h>
87 
88 #include <inet/udp_impl.h>
89 
90 /*
91  * Synchronization notes:
92  *
93  * UDP is MT and uses the usual kernel synchronization primitives. There are 2
94  * locks, the fanout lock (uf_lock) and conn_lock. conn_lock
95  * protects the contents of the udp_t. uf_lock protects the address and the
96  * fanout information.
97  * The lock order is conn_lock -> uf_lock.
98  *
99  * The fanout lock uf_lock:
100  * When a UDP endpoint is bound to a local port, it is inserted into
101  * a bind hash list.  The list consists of an array of udp_fanout_t buckets.
102  * The size of the array is controlled by the udp_bind_fanout_size variable.
103  * This variable can be changed in /etc/system if the default value is
104  * not large enough.  Each bind hash bucket is protected by a per bucket
105  * lock.  It protects the udp_bind_hash and udp_ptpbhn fields in the udp_t
106  * structure and a few other fields in the udp_t. A UDP endpoint is removed
107  * from the bind hash list only when it is being unbound or being closed.
108  * The per bucket lock also protects a UDP endpoint's state changes.
109  *
110  * Plumbing notes:
111  * UDP is always a device driver. For compatibility with mibopen() code
112  * it is possible to I_PUSH "udp", but that results in pushing a passthrough
113  * dummy module.
114  *
115  * The above implies that we don't support any intermediate module to
116  * reside in between /dev/ip and udp -- in fact, we never supported such
117  * scenario in the past as the inter-layer communication semantics have
118  * always been private.
119  */
120 
121 /* For /etc/system control */
122 uint_t udp_bind_fanout_size = UDP_BIND_FANOUT_SIZE;
123 
124 static void	udp_addr_req(queue_t *q, mblk_t *mp);
125 static void	udp_tpi_bind(queue_t *q, mblk_t *mp);
126 static void	udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp);
127 static void	udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock);
128 static int	udp_build_hdr_template(conn_t *, const in6_addr_t *,
129     const in6_addr_t *, in_port_t, uint32_t);
130 static void	udp_capability_req(queue_t *q, mblk_t *mp);
131 static int	udp_tpi_close(queue_t *q, int flags, cred_t *);
132 static void	udp_close_free(conn_t *);
133 static void	udp_tpi_connect(queue_t *q, mblk_t *mp);
134 static void	udp_tpi_disconnect(queue_t *q, mblk_t *mp);
135 static void	udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error,
136     int sys_error);
137 static void	udp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive,
138     t_scalar_t tlierr, int sys_error);
139 static int	udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp,
140 		    cred_t *cr);
141 static int	udp_extra_priv_ports_add(queue_t *q, mblk_t *mp,
142 		    char *value, caddr_t cp, cred_t *cr);
143 static int	udp_extra_priv_ports_del(queue_t *q, mblk_t *mp,
144 		    char *value, caddr_t cp, cred_t *cr);
145 static void	udp_icmp_input(void *, mblk_t *, void *, ip_recv_attr_t *);
146 static void	udp_icmp_error_ipv6(conn_t *connp, mblk_t *mp,
147     ip_recv_attr_t *ira);
148 static void	udp_info_req(queue_t *q, mblk_t *mp);
149 static void	udp_input(void *, mblk_t *, void *, ip_recv_attr_t *);
150 static int	udp_lrput(queue_t *, mblk_t *);
151 static int	udp_lwput(queue_t *, mblk_t *);
152 static int	udp_open(queue_t *q, dev_t *devp, int flag, int sflag,
153 		    cred_t *credp, boolean_t isv6);
154 static int	udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag,
155 		    cred_t *credp);
156 static int	udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag,
157 		    cred_t *credp);
158 static boolean_t udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name);
159 int		udp_opt_set(conn_t *connp, uint_t optset_context,
160 		    int level, int name, uint_t inlen,
161 		    uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp,
162 		    void *thisdg_attrs, cred_t *cr);
163 int		udp_opt_get(conn_t *connp, int level, int name,
164 		    uchar_t *ptr);
165 static int	udp_output_connected(conn_t *connp, mblk_t *mp, cred_t *cr,
166 		    pid_t pid);
167 static int	udp_output_lastdst(conn_t *connp, mblk_t *mp, cred_t *cr,
168     pid_t pid, ip_xmit_attr_t *ixa);
169 static int	udp_output_newdst(conn_t *connp, mblk_t *data_mp, sin_t *sin,
170 		    sin6_t *sin6, ushort_t ipversion, cred_t *cr, pid_t,
171 		    ip_xmit_attr_t *ixa);
172 static mblk_t	*udp_prepend_hdr(conn_t *, ip_xmit_attr_t *, const ip_pkt_t *,
173     const in6_addr_t *, const in6_addr_t *, in_port_t, uint32_t, mblk_t *,
174     int *);
175 static mblk_t	*udp_prepend_header_template(conn_t *, ip_xmit_attr_t *,
176     mblk_t *, const in6_addr_t *, in_port_t, uint32_t, int *);
177 static void	udp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err);
178 static void	udp_ud_err_connected(conn_t *, t_scalar_t);
179 static void	udp_tpi_unbind(queue_t *q, mblk_t *mp);
180 static in_port_t udp_update_next_port(udp_t *udp, in_port_t port,
181     boolean_t random);
182 static void	udp_wput_other(queue_t *q, mblk_t *mp);
183 static void	udp_wput_iocdata(queue_t *q, mblk_t *mp);
184 static int	udp_wput_fallback(queue_t *q, mblk_t *mp);
185 static size_t	udp_set_rcv_hiwat(udp_t *udp, size_t size);
186 
187 static void	*udp_stack_init(netstackid_t stackid, netstack_t *ns);
188 static void	udp_stack_fini(netstackid_t stackid, void *arg);
189 
190 /* Common routines for TPI and socket module */
191 static void	udp_ulp_recv(conn_t *, mblk_t *, uint_t, ip_recv_attr_t *);
192 
193 /* Common routine for TPI and socket module */
194 static conn_t	*udp_do_open(cred_t *, boolean_t, int, int *);
195 static void	udp_do_close(conn_t *);
196 static int	udp_do_bind(conn_t *, struct sockaddr *, socklen_t, cred_t *,
197     boolean_t);
198 static int	udp_do_unbind(conn_t *);
199 
200 int		udp_getsockname(sock_lower_handle_t,
201     struct sockaddr *, socklen_t *, cred_t *);
202 int		udp_getpeername(sock_lower_handle_t,
203     struct sockaddr *, socklen_t *, cred_t *);
204 static int	udp_do_connect(conn_t *, const struct sockaddr *, socklen_t,
205     cred_t *, pid_t);
206 
207 #pragma inline(udp_output_connected, udp_output_newdst, udp_output_lastdst)
208 
209 /*
210  * Checks if the given destination addr/port is allowed out.
211  * If allowed, registers the (dest_addr/port, node_ID) mapping at Cluster.
212  * Called for each connect() and for sendto()/sendmsg() to a different
213  * destination.
214  * For connect(), called in udp_connect().
215  * For sendto()/sendmsg(), called in udp_output_newdst().
216  *
217  * This macro assumes that the cl_inet_connect2 hook is not NULL.
218  * Please check this before calling this macro.
219  *
220  * void
221  * CL_INET_UDP_CONNECT(conn_t cp, udp_t *udp, boolean_t is_outgoing,
222  *     in6_addr_t *faddrp, in_port_t (or uint16_t) fport, int err);
223  */
224 #define	CL_INET_UDP_CONNECT(cp, is_outgoing, faddrp, fport, err) {	\
225 	(err) = 0;							\
226 	/*								\
227 	 * Running in cluster mode - check and register active		\
228 	 * "connection" information					\
229 	 */								\
230 	if ((cp)->conn_ipversion == IPV4_VERSION)			\
231 		(err) = (*cl_inet_connect2)(				\
232 		    (cp)->conn_netstack->netstack_stackid,		\
233 		    IPPROTO_UDP, is_outgoing, AF_INET,			\
234 		    (uint8_t *)&((cp)->conn_laddr_v4),			\
235 		    (cp)->conn_lport,					\
236 		    (uint8_t *)&(V4_PART_OF_V6(*faddrp)),		\
237 		    (in_port_t)(fport), NULL);				\
238 	else								\
239 		(err) = (*cl_inet_connect2)(				\
240 		    (cp)->conn_netstack->netstack_stackid,		\
241 		    IPPROTO_UDP, is_outgoing, AF_INET6,			\
242 		    (uint8_t *)&((cp)->conn_laddr_v6),			\
243 		    (cp)->conn_lport,					\
244 		    (uint8_t *)(faddrp), (in_port_t)(fport), NULL);	\
245 }
246 
247 static struct module_info udp_mod_info =  {
248 	UDP_MOD_ID, UDP_MOD_NAME, 1, INFPSZ, UDP_RECV_HIWATER, UDP_RECV_LOWATER
249 };
250 
251 /*
252  * Entry points for UDP as a device.
253  * We have separate open functions for the /dev/udp and /dev/udp6 devices.
254  */
255 static struct qinit udp_rinitv4 = {
256 	NULL, NULL, udp_openv4, udp_tpi_close, NULL, &udp_mod_info, NULL
257 };
258 
259 static struct qinit udp_rinitv6 = {
260 	NULL, NULL, udp_openv6, udp_tpi_close, NULL, &udp_mod_info, NULL
261 };
262 
263 static struct qinit udp_winit = {
264 	udp_wput, ip_wsrv, NULL, NULL, NULL, &udp_mod_info
265 };
266 
267 /* UDP entry point during fallback */
268 struct qinit udp_fallback_sock_winit = {
269 	udp_wput_fallback, NULL, NULL, NULL, NULL, &udp_mod_info
270 };
271 
272 /*
273  * UDP needs to handle I_LINK and I_PLINK since ifconfig
274  * likes to use it as a place to hang the various streams.
275  */
276 static struct qinit udp_lrinit = {
277 	udp_lrput, NULL, udp_openv4, udp_tpi_close, NULL, &udp_mod_info
278 };
279 
280 static struct qinit udp_lwinit = {
281 	udp_lwput, NULL, udp_openv4, udp_tpi_close, NULL, &udp_mod_info
282 };
283 
284 /* For AF_INET aka /dev/udp */
285 struct streamtab udpinfov4 = {
286 	&udp_rinitv4, &udp_winit, &udp_lrinit, &udp_lwinit
287 };
288 
289 /* For AF_INET6 aka /dev/udp6 */
290 struct streamtab udpinfov6 = {
291 	&udp_rinitv6, &udp_winit, &udp_lrinit, &udp_lwinit
292 };
293 
294 #define	UDP_MAXPACKET_IPV4 (IP_MAXPACKET - UDPH_SIZE - IP_SIMPLE_HDR_LENGTH)
295 
296 /* Default structure copied into T_INFO_ACK messages */
297 static struct T_info_ack udp_g_t_info_ack_ipv4 = {
298 	T_INFO_ACK,
299 	UDP_MAXPACKET_IPV4,	/* TSDU_size. Excl. headers */
300 	T_INVALID,	/* ETSU_size.  udp does not support expedited data. */
301 	T_INVALID,	/* CDATA_size. udp does not support connect data. */
302 	T_INVALID,	/* DDATA_size. udp does not support disconnect data. */
303 	sizeof (sin_t),	/* ADDR_size. */
304 	0,		/* OPT_size - not initialized here */
305 	UDP_MAXPACKET_IPV4,	/* TIDU_size.  Excl. headers */
306 	T_CLTS,		/* SERV_type.  udp supports connection-less. */
307 	TS_UNBND,	/* CURRENT_state.  This is set from udp_state. */
308 	(XPG4_1|SENDZERO) /* PROVIDER_flag */
309 };
310 
311 #define	UDP_MAXPACKET_IPV6 (IP_MAXPACKET - UDPH_SIZE - IPV6_HDR_LEN)
312 
313 static	struct T_info_ack udp_g_t_info_ack_ipv6 = {
314 	T_INFO_ACK,
315 	UDP_MAXPACKET_IPV6,	/* TSDU_size.  Excl. headers */
316 	T_INVALID,	/* ETSU_size.  udp does not support expedited data. */
317 	T_INVALID,	/* CDATA_size. udp does not support connect data. */
318 	T_INVALID,	/* DDATA_size. udp does not support disconnect data. */
319 	sizeof (sin6_t), /* ADDR_size. */
320 	0,		/* OPT_size - not initialized here */
321 	UDP_MAXPACKET_IPV6,	/* TIDU_size. Excl. headers */
322 	T_CLTS,		/* SERV_type.  udp supports connection-less. */
323 	TS_UNBND,	/* CURRENT_state.  This is set from udp_state. */
324 	(XPG4_1|SENDZERO) /* PROVIDER_flag */
325 };
326 
327 /*
328  * UDP tunables related declarations. Definitions are in udp_tunables.c
329  */
330 extern mod_prop_info_t udp_propinfo_tbl[];
331 extern int udp_propinfo_count;
332 
333 /* Setable in /etc/system */
334 /* If set to 0, pick ephemeral port sequentially; otherwise randomly. */
335 uint32_t udp_random_anon_port = 1;
336 
337 /*
338  * Hook functions to enable cluster networking.
339  * On non-clustered systems these vectors must always be NULL
340  */
341 
342 void (*cl_inet_bind)(netstackid_t stack_id, uchar_t protocol,
343     sa_family_t addr_family, uint8_t *laddrp, in_port_t lport,
344     void *args) = NULL;
345 void (*cl_inet_unbind)(netstackid_t stack_id, uint8_t protocol,
346     sa_family_t addr_family, uint8_t *laddrp, in_port_t lport,
347     void *args) = NULL;
348 
349 typedef union T_primitives *t_primp_t;
350 
351 /*
352  * Various protocols that encapsulate UDP have no real use for the source port.
353  * Instead, they want to vary the source port to provide better equal-cost
354  * multipathing and other systems that use fanout. Consider something like
355  * VXLAN. If you're actually sending multiple different streams to a single
356  * host, if you don't vary the source port, then the tuple of ( SRC IP, DST IP,
357  * SRC Port, DST Port) will always be the same.
358  *
359  * Here, we return a port to hash this to, if we know how to hash it. If for
360  * some reason we can't perform an L4 hash, then we just return the default
361  * value, usually the default port. After we determine the hash we transform it
362  * so that it's in the range of [ min, max ].
363  *
364  * We'd like to avoid a pull up for the sake of performing the hash. If the
365  * first mblk_t doesn't have the full protocol header, then we just send it to
366  * the default. If for some reason we have an encapsulated packet that has its
367  * protocol header in different parts of an mblk_t, then we'll go with the
368  * default port. This means that that if a driver isn't consistent about how it
369  * generates the frames for a given flow, it will not always be consistently
370  * hashed. That should be an uncommon event.
371  */
372 uint16_t
373 udp_srcport_hash(mblk_t *mp, int type, uint16_t min, uint16_t max,
374     uint16_t def)
375 {
376 	size_t szused = 0;
377 	ip6_t *ip6h;
378 	ipha_t *ipha;
379 	uint16_t sap;
380 	uint64_t hash;
381 	uint32_t mod;
382 
383 	ASSERT(min <= max);
384 
385 	if (type != UDP_HASH_VXLAN)
386 		return (def);
387 
388 	if (!IS_P2ALIGNED(mp->b_rptr, sizeof (uint16_t)))
389 		return (def);
390 
391 	if (MBLKL(mp) < VXLAN_HDR_LEN) {
392 		return (def);
393 	} else {
394 		szused = VXLAN_HDR_LEN;
395 	}
396 
397 	/* Can we hold a MAC header? */
398 	if (MBLKL(mp) + szused < sizeof (struct ether_header))
399 		return (def);
400 
401 	/*
402 	 * We need to lie about the starting offset into the message block for
403 	 * convenience. Undo it at the end. We know that inet_pkt_hash() won't
404 	 * modify the mblk_t.
405 	 */
406 	mp->b_rptr += szused;
407 	hash = inet_pkt_hash(DL_ETHER, mp, INET_PKT_HASH_L2 |
408 	    INET_PKT_HASH_L3 | INET_PKT_HASH_L4);
409 	mp->b_rptr -= szused;
410 
411 	if (hash == 0)
412 		return (def);
413 
414 	mod = max - min + 1;
415 	return ((hash % mod) + min);
416 }
417 
418 /*
419  * Return the next anonymous port in the privileged port range for
420  * bind checking.
421  *
422  * Trusted Extension (TX) notes: TX allows administrator to mark or
423  * reserve ports as Multilevel ports (MLP). MLP has special function
424  * on TX systems. Once a port is made MLP, it's not available as
425  * ordinary port. This creates "holes" in the port name space. It
426  * may be necessary to skip the "holes" find a suitable anon port.
427  */
428 static in_port_t
429 udp_get_next_priv_port(udp_t *udp)
430 {
431 	static in_port_t next_priv_port = IPPORT_RESERVED - 1;
432 	in_port_t nextport;
433 	boolean_t restart = B_FALSE;
434 	udp_stack_t *us = udp->udp_us;
435 
436 retry:
437 	if (next_priv_port < us->us_min_anonpriv_port ||
438 	    next_priv_port >= IPPORT_RESERVED) {
439 		next_priv_port = IPPORT_RESERVED - 1;
440 		if (restart)
441 			return (0);
442 		restart = B_TRUE;
443 	}
444 
445 	if (is_system_labeled() &&
446 	    (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred),
447 	    next_priv_port, IPPROTO_UDP, B_FALSE)) != 0) {
448 		next_priv_port = nextport;
449 		goto retry;
450 	}
451 
452 	return (next_priv_port--);
453 }
454 
455 /*
456  * Hash list removal routine for udp_t structures.
457  */
458 static void
459 udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock)
460 {
461 	udp_t		*udpnext;
462 	kmutex_t	*lockp;
463 	udp_stack_t	*us = udp->udp_us;
464 	conn_t		*connp = udp->udp_connp;
465 
466 	if (udp->udp_ptpbhn == NULL)
467 		return;
468 
469 	/*
470 	 * Extract the lock pointer in case there are concurrent
471 	 * hash_remove's for this instance.
472 	 */
473 	ASSERT(connp->conn_lport != 0);
474 	if (!caller_holds_lock) {
475 		lockp = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport,
476 		    us->us_bind_fanout_size)].uf_lock;
477 		ASSERT(lockp != NULL);
478 		mutex_enter(lockp);
479 	}
480 	if (udp->udp_ptpbhn != NULL) {
481 		udpnext = udp->udp_bind_hash;
482 		if (udpnext != NULL) {
483 			udpnext->udp_ptpbhn = udp->udp_ptpbhn;
484 			udp->udp_bind_hash = NULL;
485 		}
486 		*udp->udp_ptpbhn = udpnext;
487 		udp->udp_ptpbhn = NULL;
488 	}
489 	if (!caller_holds_lock) {
490 		mutex_exit(lockp);
491 	}
492 }
493 
494 static void
495 udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp)
496 {
497 	conn_t	*connp = udp->udp_connp;
498 	udp_t	**udpp;
499 	udp_t	*udpnext;
500 	conn_t	*connext;
501 
502 	ASSERT(MUTEX_HELD(&uf->uf_lock));
503 	ASSERT(udp->udp_ptpbhn == NULL);
504 	udpp = &uf->uf_udp;
505 	udpnext = udpp[0];
506 	if (udpnext != NULL) {
507 		/*
508 		 * If the new udp bound to the INADDR_ANY address
509 		 * and the first one in the list is not bound to
510 		 * INADDR_ANY we skip all entries until we find the
511 		 * first one bound to INADDR_ANY.
512 		 * This makes sure that applications binding to a
513 		 * specific address get preference over those binding to
514 		 * INADDR_ANY.
515 		 */
516 		connext = udpnext->udp_connp;
517 		if (V6_OR_V4_INADDR_ANY(connp->conn_bound_addr_v6) &&
518 		    !V6_OR_V4_INADDR_ANY(connext->conn_bound_addr_v6)) {
519 			while ((udpnext = udpp[0]) != NULL &&
520 			    !V6_OR_V4_INADDR_ANY(connext->conn_bound_addr_v6)) {
521 				udpp = &(udpnext->udp_bind_hash);
522 			}
523 			if (udpnext != NULL)
524 				udpnext->udp_ptpbhn = &udp->udp_bind_hash;
525 		} else {
526 			udpnext->udp_ptpbhn = &udp->udp_bind_hash;
527 		}
528 	}
529 	udp->udp_bind_hash = udpnext;
530 	udp->udp_ptpbhn = udpp;
531 	udpp[0] = udp;
532 }
533 
534 /*
535  * This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message
536  * passed to udp_wput.
537  * It associates a port number and local address with the stream.
538  * It calls IP to verify the local IP address, and calls IP to insert
539  * the conn_t in the fanout table.
540  * If everything is ok it then sends the T_BIND_ACK back up.
541  *
542  * Note that UDP over IPv4 and IPv6 sockets can use the same port number
543  * without setting SO_REUSEADDR. This is needed so that they
544  * can be viewed as two independent transport protocols.
545  * However, anonymouns ports are allocated from the same range to avoid
546  * duplicating the us->us_next_port_to_try.
547  */
548 static void
549 udp_tpi_bind(queue_t *q, mblk_t *mp)
550 {
551 	sin_t		*sin;
552 	sin6_t		*sin6;
553 	mblk_t		*mp1;
554 	struct T_bind_req *tbr;
555 	conn_t		*connp;
556 	udp_t		*udp;
557 	int		error;
558 	struct sockaddr	*sa;
559 	cred_t		*cr;
560 
561 	/*
562 	 * All Solaris components should pass a db_credp
563 	 * for this TPI message, hence we ASSERT.
564 	 * But in case there is some other M_PROTO that looks
565 	 * like a TPI message sent by some other kernel
566 	 * component, we check and return an error.
567 	 */
568 	cr = msg_getcred(mp, NULL);
569 	ASSERT(cr != NULL);
570 	if (cr == NULL) {
571 		udp_err_ack(q, mp, TSYSERR, EINVAL);
572 		return;
573 	}
574 
575 	connp = Q_TO_CONN(q);
576 	udp = connp->conn_udp;
577 	if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) {
578 		(void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
579 		    "udp_bind: bad req, len %u",
580 		    (uint_t)(mp->b_wptr - mp->b_rptr));
581 		udp_err_ack(q, mp, TPROTO, 0);
582 		return;
583 	}
584 	if (udp->udp_state != TS_UNBND) {
585 		(void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
586 		    "udp_bind: bad state, %u", udp->udp_state);
587 		udp_err_ack(q, mp, TOUTSTATE, 0);
588 		return;
589 	}
590 	/*
591 	 * Reallocate the message to make sure we have enough room for an
592 	 * address.
593 	 */
594 	mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t), 1);
595 	if (mp1 == NULL) {
596 		udp_err_ack(q, mp, TSYSERR, ENOMEM);
597 		return;
598 	}
599 
600 	mp = mp1;
601 
602 	/* Reset the message type in preparation for shipping it back. */
603 	DB_TYPE(mp) = M_PCPROTO;
604 
605 	tbr = (struct T_bind_req *)mp->b_rptr;
606 	switch (tbr->ADDR_length) {
607 	case 0:			/* Request for a generic port */
608 		tbr->ADDR_offset = sizeof (struct T_bind_req);
609 		if (connp->conn_family == AF_INET) {
610 			tbr->ADDR_length = sizeof (sin_t);
611 			sin = (sin_t *)&tbr[1];
612 			*sin = sin_null;
613 			sin->sin_family = AF_INET;
614 			mp->b_wptr = (uchar_t *)&sin[1];
615 			sa = (struct sockaddr *)sin;
616 		} else {
617 			ASSERT(connp->conn_family == AF_INET6);
618 			tbr->ADDR_length = sizeof (sin6_t);
619 			sin6 = (sin6_t *)&tbr[1];
620 			*sin6 = sin6_null;
621 			sin6->sin6_family = AF_INET6;
622 			mp->b_wptr = (uchar_t *)&sin6[1];
623 			sa = (struct sockaddr *)sin6;
624 		}
625 		break;
626 
627 	case sizeof (sin_t):	/* Complete IPv4 address */
628 		sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset,
629 		    sizeof (sin_t));
630 		if (sa == NULL || !OK_32PTR((char *)sa)) {
631 			udp_err_ack(q, mp, TSYSERR, EINVAL);
632 			return;
633 		}
634 		if (connp->conn_family != AF_INET ||
635 		    sa->sa_family != AF_INET) {
636 			udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT);
637 			return;
638 		}
639 		break;
640 
641 	case sizeof (sin6_t):	/* complete IPv6 address */
642 		sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset,
643 		    sizeof (sin6_t));
644 		if (sa == NULL || !OK_32PTR((char *)sa)) {
645 			udp_err_ack(q, mp, TSYSERR, EINVAL);
646 			return;
647 		}
648 		if (connp->conn_family != AF_INET6 ||
649 		    sa->sa_family != AF_INET6) {
650 			udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT);
651 			return;
652 		}
653 		break;
654 
655 	default:		/* Invalid request */
656 		(void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
657 		    "udp_bind: bad ADDR_length length %u", tbr->ADDR_length);
658 		udp_err_ack(q, mp, TBADADDR, 0);
659 		return;
660 	}
661 
662 	error = udp_do_bind(connp, sa, tbr->ADDR_length, cr,
663 	    tbr->PRIM_type != O_T_BIND_REQ);
664 
665 	if (error != 0) {
666 		if (error > 0) {
667 			udp_err_ack(q, mp, TSYSERR, error);
668 		} else {
669 			udp_err_ack(q, mp, -error, 0);
670 		}
671 	} else {
672 		tbr->PRIM_type = T_BIND_ACK;
673 		qreply(q, mp);
674 	}
675 }
676 
677 /*
678  * This routine handles each T_CONN_REQ message passed to udp.  It
679  * associates a default destination address with the stream.
680  *
681  * After various error checks are completed, udp_connect() lays
682  * the target address and port into the composite header template.
683  * Then we ask IP for information, including a source address if we didn't
684  * already have one. Finally we send up the T_OK_ACK reply message.
685  */
686 static void
687 udp_tpi_connect(queue_t *q, mblk_t *mp)
688 {
689 	conn_t	*connp = Q_TO_CONN(q);
690 	int	error;
691 	socklen_t	len;
692 	struct sockaddr		*sa;
693 	struct T_conn_req	*tcr;
694 	cred_t		*cr;
695 	pid_t		pid;
696 	/*
697 	 * All Solaris components should pass a db_credp
698 	 * for this TPI message, hence we ASSERT.
699 	 * But in case there is some other M_PROTO that looks
700 	 * like a TPI message sent by some other kernel
701 	 * component, we check and return an error.
702 	 */
703 	cr = msg_getcred(mp, &pid);
704 	ASSERT(cr != NULL);
705 	if (cr == NULL) {
706 		udp_err_ack(q, mp, TSYSERR, EINVAL);
707 		return;
708 	}
709 
710 	tcr = (struct T_conn_req *)mp->b_rptr;
711 
712 	/* A bit of sanity checking */
713 	if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_conn_req)) {
714 		udp_err_ack(q, mp, TPROTO, 0);
715 		return;
716 	}
717 
718 	if (tcr->OPT_length != 0) {
719 		udp_err_ack(q, mp, TBADOPT, 0);
720 		return;
721 	}
722 
723 	/*
724 	 * Determine packet type based on type of address passed in
725 	 * the request should contain an IPv4 or IPv6 address.
726 	 * Make sure that address family matches the type of
727 	 * family of the address passed down.
728 	 */
729 	len = tcr->DEST_length;
730 	switch (tcr->DEST_length) {
731 	default:
732 		udp_err_ack(q, mp, TBADADDR, 0);
733 		return;
734 
735 	case sizeof (sin_t):
736 		sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset,
737 		    sizeof (sin_t));
738 		break;
739 
740 	case sizeof (sin6_t):
741 		sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset,
742 		    sizeof (sin6_t));
743 		break;
744 	}
745 
746 	error = proto_verify_ip_addr(connp->conn_family, sa, len);
747 	if (error != 0) {
748 		udp_err_ack(q, mp, TSYSERR, error);
749 		return;
750 	}
751 
752 	error = udp_do_connect(connp, sa, len, cr, pid);
753 	if (error != 0) {
754 		if (error < 0)
755 			udp_err_ack(q, mp, -error, 0);
756 		else
757 			udp_err_ack(q, mp, TSYSERR, error);
758 	} else {
759 		mblk_t	*mp1;
760 		/*
761 		 * We have to send a connection confirmation to
762 		 * keep TLI happy.
763 		 */
764 		if (connp->conn_family == AF_INET) {
765 			mp1 = mi_tpi_conn_con(NULL, (char *)sa,
766 			    sizeof (sin_t), NULL, 0);
767 		} else {
768 			mp1 = mi_tpi_conn_con(NULL, (char *)sa,
769 			    sizeof (sin6_t), NULL, 0);
770 		}
771 		if (mp1 == NULL) {
772 			udp_err_ack(q, mp, TSYSERR, ENOMEM);
773 			return;
774 		}
775 
776 		/*
777 		 * Send ok_ack for T_CONN_REQ
778 		 */
779 		mp = mi_tpi_ok_ack_alloc(mp);
780 		if (mp == NULL) {
781 			/* Unable to reuse the T_CONN_REQ for the ack. */
782 			udp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM);
783 			return;
784 		}
785 
786 		putnext(connp->conn_rq, mp);
787 		putnext(connp->conn_rq, mp1);
788 	}
789 }
790 
791 /* ARGSUSED */
792 static int
793 udp_tpi_close(queue_t *q, int flags, cred_t *credp __unused)
794 {
795 	conn_t	*connp;
796 
797 	if (flags & SO_FALLBACK) {
798 		/*
799 		 * stream is being closed while in fallback
800 		 * simply free the resources that were allocated
801 		 */
802 		inet_minor_free(WR(q)->q_ptr, (dev_t)(RD(q)->q_ptr));
803 		qprocsoff(q);
804 		goto done;
805 	}
806 
807 	connp = Q_TO_CONN(q);
808 	udp_do_close(connp);
809 done:
810 	q->q_ptr = WR(q)->q_ptr = NULL;
811 	return (0);
812 }
813 
814 static void
815 udp_close_free(conn_t *connp)
816 {
817 	udp_t *udp = connp->conn_udp;
818 
819 	/* If there are any options associated with the stream, free them. */
820 	if (udp->udp_recv_ipp.ipp_fields != 0)
821 		ip_pkt_free(&udp->udp_recv_ipp);
822 
823 	/*
824 	 * Clear any fields which the kmem_cache constructor clears.
825 	 * Only udp_connp needs to be preserved.
826 	 * TBD: We should make this more efficient to avoid clearing
827 	 * everything.
828 	 */
829 	ASSERT(udp->udp_connp == connp);
830 	bzero(udp, sizeof (udp_t));
831 	udp->udp_connp = connp;
832 }
833 
834 static int
835 udp_do_disconnect(conn_t *connp)
836 {
837 	udp_t	*udp;
838 	udp_fanout_t *udpf;
839 	udp_stack_t *us;
840 	int	error;
841 
842 	udp = connp->conn_udp;
843 	us = udp->udp_us;
844 	mutex_enter(&connp->conn_lock);
845 	if (udp->udp_state != TS_DATA_XFER) {
846 		mutex_exit(&connp->conn_lock);
847 		return (-TOUTSTATE);
848 	}
849 	udpf = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport,
850 	    us->us_bind_fanout_size)];
851 	mutex_enter(&udpf->uf_lock);
852 	if (connp->conn_mcbc_bind)
853 		connp->conn_saddr_v6 = ipv6_all_zeros;
854 	else
855 		connp->conn_saddr_v6 = connp->conn_bound_addr_v6;
856 	connp->conn_laddr_v6 = connp->conn_bound_addr_v6;
857 	connp->conn_faddr_v6 = ipv6_all_zeros;
858 	connp->conn_fport = 0;
859 	udp->udp_state = TS_IDLE;
860 	mutex_exit(&udpf->uf_lock);
861 
862 	/* Remove any remnants of mapped address binding */
863 	if (connp->conn_family == AF_INET6)
864 		connp->conn_ipversion = IPV6_VERSION;
865 
866 	connp->conn_v6lastdst = ipv6_all_zeros;
867 	error = udp_build_hdr_template(connp, &connp->conn_saddr_v6,
868 	    &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo);
869 	mutex_exit(&connp->conn_lock);
870 	if (error != 0)
871 		return (error);
872 
873 	/*
874 	 * Tell IP to remove the full binding and revert
875 	 * to the local address binding.
876 	 */
877 	return (ip_laddr_fanout_insert(connp));
878 }
879 
880 static void
881 udp_tpi_disconnect(queue_t *q, mblk_t *mp)
882 {
883 	conn_t	*connp = Q_TO_CONN(q);
884 	int	error;
885 
886 	/*
887 	 * Allocate the largest primitive we need to send back
888 	 * T_error_ack is > than T_ok_ack
889 	 */
890 	mp = reallocb(mp, sizeof (struct T_error_ack), 1);
891 	if (mp == NULL) {
892 		/* Unable to reuse the T_DISCON_REQ for the ack. */
893 		udp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, ENOMEM);
894 		return;
895 	}
896 
897 	error = udp_do_disconnect(connp);
898 
899 	if (error != 0) {
900 		if (error < 0) {
901 			udp_err_ack(q, mp, -error, 0);
902 		} else {
903 			udp_err_ack(q, mp, TSYSERR, error);
904 		}
905 	} else {
906 		mp = mi_tpi_ok_ack_alloc(mp);
907 		ASSERT(mp != NULL);
908 		qreply(q, mp);
909 	}
910 }
911 
912 int
913 udp_disconnect(conn_t *connp)
914 {
915 	int error;
916 
917 	connp->conn_dgram_errind = B_FALSE;
918 	error = udp_do_disconnect(connp);
919 	if (error < 0)
920 		error = proto_tlitosyserr(-error);
921 
922 	return (error);
923 }
924 
925 /* This routine creates a T_ERROR_ACK message and passes it upstream. */
926 static void
927 udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error)
928 {
929 	if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL)
930 		qreply(q, mp);
931 }
932 
933 /* Shorthand to generate and send TPI error acks to our client */
934 static void
935 udp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive,
936     t_scalar_t t_error, int sys_error)
937 {
938 	struct T_error_ack	*teackp;
939 
940 	if ((mp = tpi_ack_alloc(mp, sizeof (struct T_error_ack),
941 	    M_PCPROTO, T_ERROR_ACK)) != NULL) {
942 		teackp = (struct T_error_ack *)mp->b_rptr;
943 		teackp->ERROR_prim = primitive;
944 		teackp->TLI_error = t_error;
945 		teackp->UNIX_error = sys_error;
946 		qreply(q, mp);
947 	}
948 }
949 
950 /* At minimum we need 4 bytes of UDP header */
951 #define	ICMP_MIN_UDP_HDR	4
952 
953 /*
954  * udp_icmp_input is called as conn_recvicmp to process ICMP messages.
955  * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors.
956  * Assumes that IP has pulled up everything up to and including the ICMP header.
957  */
958 /* ARGSUSED2 */
959 static void
960 udp_icmp_input(void *arg1, mblk_t *mp, void *arg2, ip_recv_attr_t *ira)
961 {
962 	conn_t		*connp = (conn_t *)arg1;
963 	icmph_t		*icmph;
964 	ipha_t		*ipha;
965 	int		iph_hdr_length;
966 	udpha_t		*udpha;
967 	sin_t		sin;
968 	sin6_t		sin6;
969 	mblk_t		*mp1;
970 	int		error = 0;
971 	udp_t		*udp = connp->conn_udp;
972 
973 	ipha = (ipha_t *)mp->b_rptr;
974 
975 	ASSERT(OK_32PTR(mp->b_rptr));
976 
977 	if (IPH_HDR_VERSION(ipha) != IPV4_VERSION) {
978 		ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION);
979 		udp_icmp_error_ipv6(connp, mp, ira);
980 		return;
981 	}
982 	ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION);
983 
984 	/* Skip past the outer IP and ICMP headers */
985 	ASSERT(IPH_HDR_LENGTH(ipha) == ira->ira_ip_hdr_length);
986 	iph_hdr_length = ira->ira_ip_hdr_length;
987 	icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length];
988 	ipha = (ipha_t *)&icmph[1];	/* Inner IP header */
989 
990 	/* Skip past the inner IP and find the ULP header */
991 	iph_hdr_length = IPH_HDR_LENGTH(ipha);
992 	udpha = (udpha_t *)((char *)ipha + iph_hdr_length);
993 
994 	switch (icmph->icmph_type) {
995 	case ICMP_DEST_UNREACHABLE:
996 		switch (icmph->icmph_code) {
997 		case ICMP_FRAGMENTATION_NEEDED: {
998 			ipha_t		*ipha;
999 			ip_xmit_attr_t	*ixa;
1000 			/*
1001 			 * IP has already adjusted the path MTU.
1002 			 * But we need to adjust DF for IPv4.
1003 			 */
1004 			if (connp->conn_ipversion != IPV4_VERSION)
1005 				break;
1006 
1007 			ixa = conn_get_ixa(connp, B_FALSE);
1008 			if (ixa == NULL || ixa->ixa_ire == NULL) {
1009 				/*
1010 				 * Some other thread holds conn_ixa. We will
1011 				 * redo this on the next ICMP too big.
1012 				 */
1013 				if (ixa != NULL)
1014 					ixa_refrele(ixa);
1015 				break;
1016 			}
1017 			(void) ip_get_pmtu(ixa);
1018 
1019 			mutex_enter(&connp->conn_lock);
1020 			ipha = (ipha_t *)connp->conn_ht_iphc;
1021 			if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF) {
1022 				ipha->ipha_fragment_offset_and_flags |=
1023 				    IPH_DF_HTONS;
1024 			} else {
1025 				ipha->ipha_fragment_offset_and_flags &=
1026 				    ~IPH_DF_HTONS;
1027 			}
1028 			mutex_exit(&connp->conn_lock);
1029 			ixa_refrele(ixa);
1030 			break;
1031 		}
1032 		case ICMP_PORT_UNREACHABLE:
1033 		case ICMP_PROTOCOL_UNREACHABLE:
1034 			error = ECONNREFUSED;
1035 			break;
1036 		default:
1037 			/* Transient errors */
1038 			break;
1039 		}
1040 		break;
1041 	default:
1042 		/* Transient errors */
1043 		break;
1044 	}
1045 	if (error == 0) {
1046 		freemsg(mp);
1047 		return;
1048 	}
1049 
1050 	/*
1051 	 * Deliver T_UDERROR_IND when the application has asked for it.
1052 	 * The socket layer enables this automatically when connected.
1053 	 */
1054 	if (!connp->conn_dgram_errind) {
1055 		freemsg(mp);
1056 		return;
1057 	}
1058 
1059 	switch (connp->conn_family) {
1060 	case AF_INET:
1061 		sin = sin_null;
1062 		sin.sin_family = AF_INET;
1063 		sin.sin_addr.s_addr = ipha->ipha_dst;
1064 		sin.sin_port = udpha->uha_dst_port;
1065 		if (IPCL_IS_NONSTR(connp)) {
1066 			mutex_enter(&connp->conn_lock);
1067 			if (udp->udp_state == TS_DATA_XFER) {
1068 				if (sin.sin_port == connp->conn_fport &&
1069 				    sin.sin_addr.s_addr ==
1070 				    connp->conn_faddr_v4) {
1071 					mutex_exit(&connp->conn_lock);
1072 					(*connp->conn_upcalls->su_set_error)
1073 					    (connp->conn_upper_handle, error);
1074 					goto done;
1075 				}
1076 			} else {
1077 				udp->udp_delayed_error = error;
1078 				*((sin_t *)&udp->udp_delayed_addr) = sin;
1079 			}
1080 			mutex_exit(&connp->conn_lock);
1081 		} else {
1082 			mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t),
1083 			    NULL, 0, error);
1084 			if (mp1 != NULL)
1085 				putnext(connp->conn_rq, mp1);
1086 		}
1087 		break;
1088 	case AF_INET6:
1089 		sin6 = sin6_null;
1090 		sin6.sin6_family = AF_INET6;
1091 		IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &sin6.sin6_addr);
1092 		sin6.sin6_port = udpha->uha_dst_port;
1093 		if (IPCL_IS_NONSTR(connp)) {
1094 			mutex_enter(&connp->conn_lock);
1095 			if (udp->udp_state == TS_DATA_XFER) {
1096 				if (sin6.sin6_port == connp->conn_fport &&
1097 				    IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr,
1098 				    &connp->conn_faddr_v6)) {
1099 					mutex_exit(&connp->conn_lock);
1100 					(*connp->conn_upcalls->su_set_error)
1101 					    (connp->conn_upper_handle, error);
1102 					goto done;
1103 				}
1104 			} else {
1105 				udp->udp_delayed_error = error;
1106 				*((sin6_t *)&udp->udp_delayed_addr) = sin6;
1107 			}
1108 			mutex_exit(&connp->conn_lock);
1109 		} else {
1110 			mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t),
1111 			    NULL, 0, error);
1112 			if (mp1 != NULL)
1113 				putnext(connp->conn_rq, mp1);
1114 		}
1115 		break;
1116 	}
1117 done:
1118 	freemsg(mp);
1119 }
1120 
1121 /*
1122  * udp_icmp_error_ipv6 is called by udp_icmp_error to process ICMP for IPv6.
1123  * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors.
1124  * Assumes that IP has pulled up all the extension headers as well as the
1125  * ICMPv6 header.
1126  */
1127 static void
1128 udp_icmp_error_ipv6(conn_t *connp, mblk_t *mp, ip_recv_attr_t *ira)
1129 {
1130 	icmp6_t		*icmp6;
1131 	ip6_t		*ip6h, *outer_ip6h;
1132 	uint16_t	iph_hdr_length;
1133 	uint8_t		*nexthdrp;
1134 	udpha_t		*udpha;
1135 	sin6_t		sin6;
1136 	mblk_t		*mp1;
1137 	int		error = 0;
1138 	udp_t		*udp = connp->conn_udp;
1139 	udp_stack_t	*us = udp->udp_us;
1140 
1141 	outer_ip6h = (ip6_t *)mp->b_rptr;
1142 #ifdef DEBUG
1143 	if (outer_ip6h->ip6_nxt != IPPROTO_ICMPV6)
1144 		iph_hdr_length = ip_hdr_length_v6(mp, outer_ip6h);
1145 	else
1146 		iph_hdr_length = IPV6_HDR_LEN;
1147 	ASSERT(iph_hdr_length == ira->ira_ip_hdr_length);
1148 #endif
1149 	/* Skip past the outer IP and ICMP headers */
1150 	iph_hdr_length = ira->ira_ip_hdr_length;
1151 	icmp6 = (icmp6_t *)&mp->b_rptr[iph_hdr_length];
1152 
1153 	/* Skip past the inner IP and find the ULP header */
1154 	ip6h = (ip6_t *)&icmp6[1];	/* Inner IP header */
1155 	if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, &nexthdrp)) {
1156 		freemsg(mp);
1157 		return;
1158 	}
1159 	udpha = (udpha_t *)((char *)ip6h + iph_hdr_length);
1160 
1161 	switch (icmp6->icmp6_type) {
1162 	case ICMP6_DST_UNREACH:
1163 		switch (icmp6->icmp6_code) {
1164 		case ICMP6_DST_UNREACH_NOPORT:
1165 			error = ECONNREFUSED;
1166 			break;
1167 		case ICMP6_DST_UNREACH_ADMIN:
1168 		case ICMP6_DST_UNREACH_NOROUTE:
1169 		case ICMP6_DST_UNREACH_BEYONDSCOPE:
1170 		case ICMP6_DST_UNREACH_ADDR:
1171 			/* Transient errors */
1172 			break;
1173 		default:
1174 			break;
1175 		}
1176 		break;
1177 	case ICMP6_PACKET_TOO_BIG: {
1178 		struct T_unitdata_ind	*tudi;
1179 		struct T_opthdr		*toh;
1180 		size_t			udi_size;
1181 		mblk_t			*newmp;
1182 		t_scalar_t		opt_length = sizeof (struct T_opthdr) +
1183 		    sizeof (struct ip6_mtuinfo);
1184 		sin6_t			*sin6;
1185 		struct ip6_mtuinfo	*mtuinfo;
1186 
1187 		/*
1188 		 * If the application has requested to receive path mtu
1189 		 * information, send up an empty message containing an
1190 		 * IPV6_PATHMTU ancillary data item.
1191 		 */
1192 		if (!connp->conn_ipv6_recvpathmtu)
1193 			break;
1194 
1195 		udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) +
1196 		    opt_length;
1197 		if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) {
1198 			UDPS_BUMP_MIB(us, udpInErrors);
1199 			break;
1200 		}
1201 
1202 		/*
1203 		 * newmp->b_cont is left to NULL on purpose.  This is an
1204 		 * empty message containing only ancillary data.
1205 		 */
1206 		newmp->b_datap->db_type = M_PROTO;
1207 		tudi = (struct T_unitdata_ind *)newmp->b_rptr;
1208 		newmp->b_wptr = (uchar_t *)tudi + udi_size;
1209 		tudi->PRIM_type = T_UNITDATA_IND;
1210 		tudi->SRC_length = sizeof (sin6_t);
1211 		tudi->SRC_offset = sizeof (struct T_unitdata_ind);
1212 		tudi->OPT_offset = tudi->SRC_offset + sizeof (sin6_t);
1213 		tudi->OPT_length = opt_length;
1214 
1215 		sin6 = (sin6_t *)&tudi[1];
1216 		bzero(sin6, sizeof (sin6_t));
1217 		sin6->sin6_family = AF_INET6;
1218 		sin6->sin6_addr = connp->conn_faddr_v6;
1219 
1220 		toh = (struct T_opthdr *)&sin6[1];
1221 		toh->level = IPPROTO_IPV6;
1222 		toh->name = IPV6_PATHMTU;
1223 		toh->len = opt_length;
1224 		toh->status = 0;
1225 
1226 		mtuinfo = (struct ip6_mtuinfo *)&toh[1];
1227 		bzero(mtuinfo, sizeof (struct ip6_mtuinfo));
1228 		mtuinfo->ip6m_addr.sin6_family = AF_INET6;
1229 		mtuinfo->ip6m_addr.sin6_addr = ip6h->ip6_dst;
1230 		mtuinfo->ip6m_mtu = icmp6->icmp6_mtu;
1231 		/*
1232 		 * We've consumed everything we need from the original
1233 		 * message.  Free it, then send our empty message.
1234 		 */
1235 		freemsg(mp);
1236 		udp_ulp_recv(connp, newmp, msgdsize(newmp), ira);
1237 		return;
1238 	}
1239 	case ICMP6_TIME_EXCEEDED:
1240 		/* Transient errors */
1241 		break;
1242 	case ICMP6_PARAM_PROB:
1243 		/* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */
1244 		if (icmp6->icmp6_code == ICMP6_PARAMPROB_NEXTHEADER &&
1245 		    (uchar_t *)ip6h + icmp6->icmp6_pptr ==
1246 		    (uchar_t *)nexthdrp) {
1247 			error = ECONNREFUSED;
1248 			break;
1249 		}
1250 		break;
1251 	}
1252 	if (error == 0) {
1253 		freemsg(mp);
1254 		return;
1255 	}
1256 
1257 	/*
1258 	 * Deliver T_UDERROR_IND when the application has asked for it.
1259 	 * The socket layer enables this automatically when connected.
1260 	 */
1261 	if (!connp->conn_dgram_errind) {
1262 		freemsg(mp);
1263 		return;
1264 	}
1265 
1266 	sin6 = sin6_null;
1267 	sin6.sin6_family = AF_INET6;
1268 	sin6.sin6_addr = ip6h->ip6_dst;
1269 	sin6.sin6_port = udpha->uha_dst_port;
1270 	sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK;
1271 
1272 	if (IPCL_IS_NONSTR(connp)) {
1273 		mutex_enter(&connp->conn_lock);
1274 		if (udp->udp_state == TS_DATA_XFER) {
1275 			if (sin6.sin6_port == connp->conn_fport &&
1276 			    IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr,
1277 			    &connp->conn_faddr_v6)) {
1278 				mutex_exit(&connp->conn_lock);
1279 				(*connp->conn_upcalls->su_set_error)
1280 				    (connp->conn_upper_handle, error);
1281 				goto done;
1282 			}
1283 		} else {
1284 			udp->udp_delayed_error = error;
1285 			*((sin6_t *)&udp->udp_delayed_addr) = sin6;
1286 		}
1287 		mutex_exit(&connp->conn_lock);
1288 	} else {
1289 		mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t),
1290 		    NULL, 0, error);
1291 		if (mp1 != NULL)
1292 			putnext(connp->conn_rq, mp1);
1293 	}
1294 done:
1295 	freemsg(mp);
1296 }
1297 
1298 /*
1299  * This routine responds to T_ADDR_REQ messages.  It is called by udp_wput.
1300  * The local address is filled in if endpoint is bound. The remote address
1301  * is filled in if remote address has been precified ("connected endpoint")
1302  * (The concept of connected CLTS sockets is alien to published TPI
1303  *  but we support it anyway).
1304  */
1305 static void
1306 udp_addr_req(queue_t *q, mblk_t *mp)
1307 {
1308 	struct sockaddr *sa;
1309 	mblk_t	*ackmp;
1310 	struct T_addr_ack *taa;
1311 	udp_t	*udp = Q_TO_UDP(q);
1312 	conn_t	*connp = udp->udp_connp;
1313 	uint_t	addrlen;
1314 
1315 	/* Make it large enough for worst case */
1316 	ackmp = reallocb(mp, sizeof (struct T_addr_ack) +
1317 	    2 * sizeof (sin6_t), 1);
1318 	if (ackmp == NULL) {
1319 		udp_err_ack(q, mp, TSYSERR, ENOMEM);
1320 		return;
1321 	}
1322 	taa = (struct T_addr_ack *)ackmp->b_rptr;
1323 
1324 	bzero(taa, sizeof (struct T_addr_ack));
1325 	ackmp->b_wptr = (uchar_t *)&taa[1];
1326 
1327 	taa->PRIM_type = T_ADDR_ACK;
1328 	ackmp->b_datap->db_type = M_PCPROTO;
1329 
1330 	if (connp->conn_family == AF_INET)
1331 		addrlen = sizeof (sin_t);
1332 	else
1333 		addrlen = sizeof (sin6_t);
1334 
1335 	mutex_enter(&connp->conn_lock);
1336 	/*
1337 	 * Note: Following code assumes 32 bit alignment of basic
1338 	 * data structures like sin_t and struct T_addr_ack.
1339 	 */
1340 	if (udp->udp_state != TS_UNBND) {
1341 		/*
1342 		 * Fill in local address first
1343 		 */
1344 		taa->LOCADDR_offset = sizeof (*taa);
1345 		taa->LOCADDR_length = addrlen;
1346 		sa = (struct sockaddr *)&taa[1];
1347 		(void) conn_getsockname(connp, sa, &addrlen);
1348 		ackmp->b_wptr += addrlen;
1349 	}
1350 	if (udp->udp_state == TS_DATA_XFER) {
1351 		/*
1352 		 * connected, fill remote address too
1353 		 */
1354 		taa->REMADDR_length = addrlen;
1355 		/* assumed 32-bit alignment */
1356 		taa->REMADDR_offset = taa->LOCADDR_offset + taa->LOCADDR_length;
1357 		sa = (struct sockaddr *)(ackmp->b_rptr + taa->REMADDR_offset);
1358 		(void) conn_getpeername(connp, sa, &addrlen);
1359 		ackmp->b_wptr += addrlen;
1360 	}
1361 	mutex_exit(&connp->conn_lock);
1362 	ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim);
1363 	qreply(q, ackmp);
1364 }
1365 
1366 static void
1367 udp_copy_info(struct T_info_ack *tap, udp_t *udp)
1368 {
1369 	conn_t		*connp = udp->udp_connp;
1370 
1371 	if (connp->conn_family == AF_INET) {
1372 		*tap = udp_g_t_info_ack_ipv4;
1373 	} else {
1374 		*tap = udp_g_t_info_ack_ipv6;
1375 	}
1376 	tap->CURRENT_state = udp->udp_state;
1377 	tap->OPT_size = udp_max_optsize;
1378 }
1379 
1380 static void
1381 udp_do_capability_ack(udp_t *udp, struct T_capability_ack *tcap,
1382     t_uscalar_t cap_bits1)
1383 {
1384 	tcap->CAP_bits1 = 0;
1385 
1386 	if (cap_bits1 & TC1_INFO) {
1387 		udp_copy_info(&tcap->INFO_ack, udp);
1388 		tcap->CAP_bits1 |= TC1_INFO;
1389 	}
1390 }
1391 
1392 /*
1393  * This routine responds to T_CAPABILITY_REQ messages.  It is called by
1394  * udp_wput.  Much of the T_CAPABILITY_ACK information is copied from
1395  * udp_g_t_info_ack.  The current state of the stream is copied from
1396  * udp_state.
1397  */
1398 static void
1399 udp_capability_req(queue_t *q, mblk_t *mp)
1400 {
1401 	t_uscalar_t		cap_bits1;
1402 	struct T_capability_ack	*tcap;
1403 	udp_t	*udp = Q_TO_UDP(q);
1404 
1405 	cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1;
1406 
1407 	mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack),
1408 	    mp->b_datap->db_type, T_CAPABILITY_ACK);
1409 	if (!mp)
1410 		return;
1411 
1412 	tcap = (struct T_capability_ack *)mp->b_rptr;
1413 	udp_do_capability_ack(udp, tcap, cap_bits1);
1414 
1415 	qreply(q, mp);
1416 }
1417 
1418 /*
1419  * This routine responds to T_INFO_REQ messages.  It is called by udp_wput.
1420  * Most of the T_INFO_ACK information is copied from udp_g_t_info_ack.
1421  * The current state of the stream is copied from udp_state.
1422  */
1423 static void
1424 udp_info_req(queue_t *q, mblk_t *mp)
1425 {
1426 	udp_t *udp = Q_TO_UDP(q);
1427 
1428 	/* Create a T_INFO_ACK message. */
1429 	mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO,
1430 	    T_INFO_ACK);
1431 	if (!mp)
1432 		return;
1433 	udp_copy_info((struct T_info_ack *)mp->b_rptr, udp);
1434 	qreply(q, mp);
1435 }
1436 
1437 /* For /dev/udp aka AF_INET open */
1438 static int
1439 udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
1440 {
1441 	return (udp_open(q, devp, flag, sflag, credp, B_FALSE));
1442 }
1443 
1444 /* For /dev/udp6 aka AF_INET6 open */
1445 static int
1446 udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
1447 {
1448 	return (udp_open(q, devp, flag, sflag, credp, B_TRUE));
1449 }
1450 
1451 /*
1452  * This is the open routine for udp.  It allocates a udp_t structure for
1453  * the stream and, on the first open of the module, creates an ND table.
1454  */
1455 static int
1456 udp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp,
1457     boolean_t isv6)
1458 {
1459 	udp_t		*udp;
1460 	conn_t		*connp;
1461 	dev_t		conn_dev;
1462 	vmem_t		*minor_arena;
1463 	int		err;
1464 
1465 	/* If the stream is already open, return immediately. */
1466 	if (q->q_ptr != NULL)
1467 		return (0);
1468 
1469 	if (sflag == MODOPEN)
1470 		return (EINVAL);
1471 
1472 	if ((ip_minor_arena_la != NULL) && (flag & SO_SOCKSTR) &&
1473 	    ((conn_dev = inet_minor_alloc(ip_minor_arena_la)) != 0)) {
1474 		minor_arena = ip_minor_arena_la;
1475 	} else {
1476 		/*
1477 		 * Either minor numbers in the large arena were exhausted
1478 		 * or a non socket application is doing the open.
1479 		 * Try to allocate from the small arena.
1480 		 */
1481 		if ((conn_dev = inet_minor_alloc(ip_minor_arena_sa)) == 0)
1482 			return (EBUSY);
1483 
1484 		minor_arena = ip_minor_arena_sa;
1485 	}
1486 
1487 	if (flag & SO_FALLBACK) {
1488 		/*
1489 		 * Non streams socket needs a stream to fallback to
1490 		 */
1491 		RD(q)->q_ptr = (void *)conn_dev;
1492 		WR(q)->q_qinfo = &udp_fallback_sock_winit;
1493 		WR(q)->q_ptr = (void *)minor_arena;
1494 		qprocson(q);
1495 		return (0);
1496 	}
1497 
1498 	connp = udp_do_open(credp, isv6, KM_SLEEP, &err);
1499 	if (connp == NULL) {
1500 		inet_minor_free(minor_arena, conn_dev);
1501 		return (err);
1502 	}
1503 	udp = connp->conn_udp;
1504 
1505 	*devp = makedevice(getemajor(*devp), (minor_t)conn_dev);
1506 	connp->conn_dev = conn_dev;
1507 	connp->conn_minor_arena = minor_arena;
1508 
1509 	/*
1510 	 * Initialize the udp_t structure for this stream.
1511 	 */
1512 	q->q_ptr = connp;
1513 	WR(q)->q_ptr = connp;
1514 	connp->conn_rq = q;
1515 	connp->conn_wq = WR(q);
1516 
1517 	/*
1518 	 * Since this conn_t/udp_t is not yet visible to anybody else we don't
1519 	 * need to lock anything.
1520 	 */
1521 	ASSERT(connp->conn_proto == IPPROTO_UDP);
1522 	ASSERT(connp->conn_udp == udp);
1523 	ASSERT(udp->udp_connp == connp);
1524 
1525 	if (flag & SO_SOCKSTR) {
1526 		udp->udp_issocket = B_TRUE;
1527 	}
1528 
1529 	WR(q)->q_hiwat = connp->conn_sndbuf;
1530 	WR(q)->q_lowat = connp->conn_sndlowat;
1531 
1532 	qprocson(q);
1533 
1534 	/* Set the Stream head write offset and high watermark. */
1535 	(void) proto_set_tx_wroff(q, connp, connp->conn_wroff);
1536 	(void) proto_set_rx_hiwat(q, connp,
1537 	    udp_set_rcv_hiwat(udp, connp->conn_rcvbuf));
1538 
1539 	mutex_enter(&connp->conn_lock);
1540 	connp->conn_state_flags &= ~CONN_INCIPIENT;
1541 	mutex_exit(&connp->conn_lock);
1542 	return (0);
1543 }
1544 
1545 /*
1546  * Which UDP options OK to set through T_UNITDATA_REQ...
1547  */
1548 /* ARGSUSED */
1549 static boolean_t
1550 udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name)
1551 {
1552 	return (B_TRUE);
1553 }
1554 
1555 /*
1556  * This routine gets default values of certain options whose default
1557  * values are maintained by protcol specific code
1558  */
1559 int
1560 udp_opt_default(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr)
1561 {
1562 	udp_t		*udp = Q_TO_UDP(q);
1563 	udp_stack_t *us = udp->udp_us;
1564 	int *i1 = (int *)ptr;
1565 
1566 	switch (level) {
1567 	case IPPROTO_IP:
1568 		switch (name) {
1569 		case IP_MULTICAST_TTL:
1570 			*ptr = (uchar_t)IP_DEFAULT_MULTICAST_TTL;
1571 			return (sizeof (uchar_t));
1572 		case IP_MULTICAST_LOOP:
1573 			*ptr = (uchar_t)IP_DEFAULT_MULTICAST_LOOP;
1574 			return (sizeof (uchar_t));
1575 		}
1576 		break;
1577 	case IPPROTO_IPV6:
1578 		switch (name) {
1579 		case IPV6_MULTICAST_HOPS:
1580 			*i1 = IP_DEFAULT_MULTICAST_TTL;
1581 			return (sizeof (int));
1582 		case IPV6_MULTICAST_LOOP:
1583 			*i1 = IP_DEFAULT_MULTICAST_LOOP;
1584 			return (sizeof (int));
1585 		case IPV6_UNICAST_HOPS:
1586 			*i1 = us->us_ipv6_hoplimit;
1587 			return (sizeof (int));
1588 		}
1589 		break;
1590 	}
1591 	return (-1);
1592 }
1593 
1594 /*
1595  * This routine retrieves the current status of socket options.
1596  * It returns the size of the option retrieved, or -1.
1597  */
1598 int
1599 udp_opt_get(conn_t *connp, t_scalar_t level, t_scalar_t name,
1600     uchar_t *ptr)
1601 {
1602 	int		*i1 = (int *)ptr;
1603 	udp_t		*udp = connp->conn_udp;
1604 	int		len;
1605 	conn_opt_arg_t	coas;
1606 	int		retval;
1607 
1608 	coas.coa_connp = connp;
1609 	coas.coa_ixa = connp->conn_ixa;
1610 	coas.coa_ipp = &connp->conn_xmit_ipp;
1611 	coas.coa_ancillary = B_FALSE;
1612 	coas.coa_changed = 0;
1613 
1614 	/*
1615 	 * We assume that the optcom framework has checked for the set
1616 	 * of levels and names that are supported, hence we don't worry
1617 	 * about rejecting based on that.
1618 	 * First check for UDP specific handling, then pass to common routine.
1619 	 */
1620 	switch (level) {
1621 	case IPPROTO_IP:
1622 		/*
1623 		 * Only allow IPv4 option processing on IPv4 sockets.
1624 		 */
1625 		if (connp->conn_family != AF_INET)
1626 			return (-1);
1627 
1628 		switch (name) {
1629 		case IP_OPTIONS:
1630 		case T_IP_OPTIONS:
1631 			mutex_enter(&connp->conn_lock);
1632 			if (!(udp->udp_recv_ipp.ipp_fields &
1633 			    IPPF_IPV4_OPTIONS)) {
1634 				mutex_exit(&connp->conn_lock);
1635 				return (0);
1636 			}
1637 
1638 			len = udp->udp_recv_ipp.ipp_ipv4_options_len;
1639 			ASSERT(len != 0);
1640 			bcopy(udp->udp_recv_ipp.ipp_ipv4_options, ptr, len);
1641 			mutex_exit(&connp->conn_lock);
1642 			return (len);
1643 		}
1644 		break;
1645 	case IPPROTO_UDP:
1646 		switch (name) {
1647 		case UDP_NAT_T_ENDPOINT:
1648 			mutex_enter(&connp->conn_lock);
1649 			*i1 = udp->udp_nat_t_endpoint;
1650 			mutex_exit(&connp->conn_lock);
1651 			return (sizeof (int));
1652 		case UDP_RCVHDR:
1653 			mutex_enter(&connp->conn_lock);
1654 			*i1 = udp->udp_rcvhdr ? 1 : 0;
1655 			mutex_exit(&connp->conn_lock);
1656 			return (sizeof (int));
1657 		case UDP_SRCPORT_HASH:
1658 			mutex_enter(&connp->conn_lock);
1659 			*i1 = udp->udp_vxlanhash;
1660 			mutex_exit(&connp->conn_lock);
1661 			return (sizeof (int));
1662 		}
1663 	}
1664 	mutex_enter(&connp->conn_lock);
1665 	retval = conn_opt_get(&coas, level, name, ptr);
1666 	mutex_exit(&connp->conn_lock);
1667 	return (retval);
1668 }
1669 
1670 /*
1671  * This routine retrieves the current status of socket options.
1672  * It returns the size of the option retrieved, or -1.
1673  */
1674 int
1675 udp_tpi_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr)
1676 {
1677 	conn_t		*connp = Q_TO_CONN(q);
1678 	int		err;
1679 
1680 	err = udp_opt_get(connp, level, name, ptr);
1681 	return (err);
1682 }
1683 
1684 /*
1685  * This routine sets socket options.
1686  */
1687 int
1688 udp_do_opt_set(conn_opt_arg_t *coa, int level, int name,
1689     uint_t inlen, uchar_t *invalp, cred_t *cr, boolean_t checkonly)
1690 {
1691 	conn_t		*connp = coa->coa_connp;
1692 	ip_xmit_attr_t	*ixa = coa->coa_ixa;
1693 	udp_t		*udp = connp->conn_udp;
1694 	udp_stack_t	*us = udp->udp_us;
1695 	int		*i1 = (int *)invalp;
1696 	boolean_t	onoff = (*i1 == 0) ? 0 : 1;
1697 	int		error;
1698 
1699 	ASSERT(MUTEX_NOT_HELD(&coa->coa_connp->conn_lock));
1700 	/*
1701 	 * First do UDP specific sanity checks and handle UDP specific
1702 	 * options. Note that some IPPROTO_UDP options are handled
1703 	 * by conn_opt_set.
1704 	 */
1705 	switch (level) {
1706 	case SOL_SOCKET:
1707 		switch (name) {
1708 		case SO_SNDBUF:
1709 			if (*i1 > us->us_max_buf) {
1710 				return (ENOBUFS);
1711 			}
1712 			break;
1713 		case SO_RCVBUF:
1714 			if (*i1 > us->us_max_buf) {
1715 				return (ENOBUFS);
1716 			}
1717 			break;
1718 
1719 		case SCM_UCRED: {
1720 			struct ucred_s *ucr;
1721 			cred_t *newcr;
1722 			ts_label_t *tsl;
1723 
1724 			/*
1725 			 * Only sockets that have proper privileges and are
1726 			 * bound to MLPs will have any other value here, so
1727 			 * this implicitly tests for privilege to set label.
1728 			 */
1729 			if (connp->conn_mlp_type == mlptSingle)
1730 				break;
1731 
1732 			ucr = (struct ucred_s *)invalp;
1733 			if (inlen < sizeof (*ucr) + sizeof (bslabel_t) ||
1734 			    ucr->uc_labeloff < sizeof (*ucr) ||
1735 			    ucr->uc_labeloff + sizeof (bslabel_t) > inlen)
1736 				return (EINVAL);
1737 			if (!checkonly) {
1738 				/*
1739 				 * Set ixa_tsl to the new label.
1740 				 * We assume that crgetzoneid doesn't change
1741 				 * as part of the SCM_UCRED.
1742 				 */
1743 				ASSERT(cr != NULL);
1744 				if ((tsl = crgetlabel(cr)) == NULL)
1745 					return (EINVAL);
1746 				newcr = copycred_from_bslabel(cr, UCLABEL(ucr),
1747 				    tsl->tsl_doi, KM_NOSLEEP);
1748 				if (newcr == NULL)
1749 					return (ENOSR);
1750 				ASSERT(newcr->cr_label != NULL);
1751 				/*
1752 				 * Move the hold on the cr_label to ixa_tsl by
1753 				 * setting cr_label to NULL. Then release newcr.
1754 				 */
1755 				ip_xmit_attr_replace_tsl(ixa, newcr->cr_label);
1756 				ixa->ixa_flags |= IXAF_UCRED_TSL;
1757 				newcr->cr_label = NULL;
1758 				crfree(newcr);
1759 				coa->coa_changed |= COA_HEADER_CHANGED;
1760 				coa->coa_changed |= COA_WROFF_CHANGED;
1761 			}
1762 			/* Fully handled this option. */
1763 			return (0);
1764 		}
1765 		}
1766 		break;
1767 	case IPPROTO_UDP:
1768 		switch (name) {
1769 		case UDP_NAT_T_ENDPOINT:
1770 			if ((error = secpolicy_ip_config(cr, B_FALSE)) != 0) {
1771 				return (error);
1772 			}
1773 
1774 			/*
1775 			 * Use conn_family instead so we can avoid ambiguitites
1776 			 * with AF_INET6 sockets that may switch from IPv4
1777 			 * to IPv6.
1778 			 */
1779 			if (connp->conn_family != AF_INET) {
1780 				return (EAFNOSUPPORT);
1781 			}
1782 
1783 			if (!checkonly) {
1784 				mutex_enter(&connp->conn_lock);
1785 				udp->udp_nat_t_endpoint = onoff;
1786 				mutex_exit(&connp->conn_lock);
1787 				coa->coa_changed |= COA_HEADER_CHANGED;
1788 				coa->coa_changed |= COA_WROFF_CHANGED;
1789 			}
1790 			/* Fully handled this option. */
1791 			return (0);
1792 		case UDP_RCVHDR:
1793 			mutex_enter(&connp->conn_lock);
1794 			udp->udp_rcvhdr = onoff;
1795 			mutex_exit(&connp->conn_lock);
1796 			return (0);
1797 		case UDP_SRCPORT_HASH:
1798 			/*
1799 			 * This should have already been verified, but double
1800 			 * check.
1801 			 */
1802 			if ((error = secpolicy_ip_config(cr, B_FALSE)) != 0) {
1803 				return (error);
1804 			}
1805 
1806 			/* First see if the val is something we understand */
1807 			if (*i1 != UDP_HASH_DISABLE && *i1 != UDP_HASH_VXLAN)
1808 				return (EINVAL);
1809 
1810 			if (!checkonly) {
1811 				mutex_enter(&connp->conn_lock);
1812 				udp->udp_vxlanhash = *i1;
1813 				mutex_exit(&connp->conn_lock);
1814 			}
1815 			/* Fully handled this option. */
1816 			return (0);
1817 		}
1818 		break;
1819 	}
1820 	error = conn_opt_set(coa, level, name, inlen, invalp,
1821 	    checkonly, cr);
1822 	return (error);
1823 }
1824 
1825 /*
1826  * This routine sets socket options.
1827  */
1828 int
1829 udp_opt_set(conn_t *connp, uint_t optset_context, int level,
1830     int name, uint_t inlen, uchar_t *invalp, uint_t *outlenp,
1831     uchar_t *outvalp, void *thisdg_attrs, cred_t *cr)
1832 {
1833 	udp_t		*udp = connp->conn_udp;
1834 	int		err;
1835 	conn_opt_arg_t	coas, *coa;
1836 	boolean_t	checkonly;
1837 	udp_stack_t	*us = udp->udp_us;
1838 
1839 	switch (optset_context) {
1840 	case SETFN_OPTCOM_CHECKONLY:
1841 		checkonly = B_TRUE;
1842 		/*
1843 		 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ
1844 		 * inlen != 0 implies value supplied and
1845 		 *	we have to "pretend" to set it.
1846 		 * inlen == 0 implies that there is no
1847 		 *	value part in T_CHECK request and just validation
1848 		 * done elsewhere should be enough, we just return here.
1849 		 */
1850 		if (inlen == 0) {
1851 			*outlenp = 0;
1852 			return (0);
1853 		}
1854 		break;
1855 	case SETFN_OPTCOM_NEGOTIATE:
1856 		checkonly = B_FALSE;
1857 		break;
1858 	case SETFN_UD_NEGOTIATE:
1859 	case SETFN_CONN_NEGOTIATE:
1860 		checkonly = B_FALSE;
1861 		/*
1862 		 * Negotiating local and "association-related" options
1863 		 * through T_UNITDATA_REQ.
1864 		 *
1865 		 * Following routine can filter out ones we do not
1866 		 * want to be "set" this way.
1867 		 */
1868 		if (!udp_opt_allow_udr_set(level, name)) {
1869 			*outlenp = 0;
1870 			return (EINVAL);
1871 		}
1872 		break;
1873 	default:
1874 		/*
1875 		 * We should never get here
1876 		 */
1877 		*outlenp = 0;
1878 		return (EINVAL);
1879 	}
1880 
1881 	ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) ||
1882 	    (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0));
1883 
1884 	if (thisdg_attrs != NULL) {
1885 		/* Options from T_UNITDATA_REQ */
1886 		coa = (conn_opt_arg_t *)thisdg_attrs;
1887 		ASSERT(coa->coa_connp == connp);
1888 		ASSERT(coa->coa_ixa != NULL);
1889 		ASSERT(coa->coa_ipp != NULL);
1890 		ASSERT(coa->coa_ancillary);
1891 	} else {
1892 		coa = &coas;
1893 		coas.coa_connp = connp;
1894 		/* Get a reference on conn_ixa to prevent concurrent mods */
1895 		coas.coa_ixa = conn_get_ixa(connp, B_TRUE);
1896 		if (coas.coa_ixa == NULL) {
1897 			*outlenp = 0;
1898 			return (ENOMEM);
1899 		}
1900 		coas.coa_ipp = &connp->conn_xmit_ipp;
1901 		coas.coa_ancillary = B_FALSE;
1902 		coas.coa_changed = 0;
1903 	}
1904 
1905 	err = udp_do_opt_set(coa, level, name, inlen, invalp,
1906 	    cr, checkonly);
1907 	if (err != 0) {
1908 errout:
1909 		if (!coa->coa_ancillary)
1910 			ixa_refrele(coa->coa_ixa);
1911 		*outlenp = 0;
1912 		return (err);
1913 	}
1914 	/* Handle DHCPINIT here outside of lock */
1915 	if (level == IPPROTO_IP && name == IP_DHCPINIT_IF) {
1916 		uint_t	ifindex;
1917 		ill_t	*ill;
1918 
1919 		ifindex = *(uint_t *)invalp;
1920 		if (ifindex == 0) {
1921 			ill = NULL;
1922 		} else {
1923 			ill = ill_lookup_on_ifindex(ifindex, B_FALSE,
1924 			    coa->coa_ixa->ixa_ipst);
1925 			if (ill == NULL) {
1926 				err = ENXIO;
1927 				goto errout;
1928 			}
1929 
1930 			mutex_enter(&ill->ill_lock);
1931 			if (ill->ill_state_flags & ILL_CONDEMNED) {
1932 				mutex_exit(&ill->ill_lock);
1933 				ill_refrele(ill);
1934 				err = ENXIO;
1935 				goto errout;
1936 			}
1937 			if (IS_VNI(ill)) {
1938 				mutex_exit(&ill->ill_lock);
1939 				ill_refrele(ill);
1940 				err = EINVAL;
1941 				goto errout;
1942 			}
1943 		}
1944 		mutex_enter(&connp->conn_lock);
1945 
1946 		if (connp->conn_dhcpinit_ill != NULL) {
1947 			/*
1948 			 * We've locked the conn so conn_cleanup_ill()
1949 			 * cannot clear conn_dhcpinit_ill -- so it's
1950 			 * safe to access the ill.
1951 			 */
1952 			ill_t *oill = connp->conn_dhcpinit_ill;
1953 
1954 			ASSERT(oill->ill_dhcpinit != 0);
1955 			atomic_dec_32(&oill->ill_dhcpinit);
1956 			ill_set_inputfn(connp->conn_dhcpinit_ill);
1957 			connp->conn_dhcpinit_ill = NULL;
1958 		}
1959 
1960 		if (ill != NULL) {
1961 			connp->conn_dhcpinit_ill = ill;
1962 			atomic_inc_32(&ill->ill_dhcpinit);
1963 			ill_set_inputfn(ill);
1964 			mutex_exit(&connp->conn_lock);
1965 			mutex_exit(&ill->ill_lock);
1966 			ill_refrele(ill);
1967 		} else {
1968 			mutex_exit(&connp->conn_lock);
1969 		}
1970 	}
1971 
1972 	/*
1973 	 * Common case of OK return with outval same as inval.
1974 	 */
1975 	if (invalp != outvalp) {
1976 		/* don't trust bcopy for identical src/dst */
1977 		(void) bcopy(invalp, outvalp, inlen);
1978 	}
1979 	*outlenp = inlen;
1980 
1981 	/*
1982 	 * If this was not ancillary data, then we rebuild the headers,
1983 	 * update the IRE/NCE, and IPsec as needed.
1984 	 * Since the label depends on the destination we go through
1985 	 * ip_set_destination first.
1986 	 */
1987 	if (coa->coa_ancillary) {
1988 		return (0);
1989 	}
1990 
1991 	if (coa->coa_changed & COA_ROUTE_CHANGED) {
1992 		in6_addr_t saddr, faddr, nexthop;
1993 		in_port_t fport;
1994 
1995 		/*
1996 		 * We clear lastdst to make sure we pick up the change
1997 		 * next time sending.
1998 		 * If we are connected we re-cache the information.
1999 		 * We ignore errors to preserve BSD behavior.
2000 		 * Note that we don't redo IPsec policy lookup here
2001 		 * since the final destination (or source) didn't change.
2002 		 */
2003 		mutex_enter(&connp->conn_lock);
2004 		connp->conn_v6lastdst = ipv6_all_zeros;
2005 
2006 		ip_attr_nexthop(coa->coa_ipp, coa->coa_ixa,
2007 		    &connp->conn_faddr_v6, &nexthop);
2008 		saddr = connp->conn_saddr_v6;
2009 		faddr = connp->conn_faddr_v6;
2010 		fport = connp->conn_fport;
2011 		mutex_exit(&connp->conn_lock);
2012 
2013 		if (!IN6_IS_ADDR_UNSPECIFIED(&faddr) &&
2014 		    !IN6_IS_ADDR_V4MAPPED_ANY(&faddr)) {
2015 			(void) ip_attr_connect(connp, coa->coa_ixa,
2016 			    &saddr, &faddr, &nexthop, fport, NULL, NULL,
2017 			    IPDF_ALLOW_MCBC | IPDF_VERIFY_DST);
2018 		}
2019 	}
2020 
2021 	ixa_refrele(coa->coa_ixa);
2022 
2023 	if (coa->coa_changed & COA_HEADER_CHANGED) {
2024 		/*
2025 		 * Rebuild the header template if we are connected.
2026 		 * Otherwise clear conn_v6lastdst so we rebuild the header
2027 		 * in the data path.
2028 		 */
2029 		mutex_enter(&connp->conn_lock);
2030 		if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) &&
2031 		    !IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) {
2032 			err = udp_build_hdr_template(connp,
2033 			    &connp->conn_saddr_v6, &connp->conn_faddr_v6,
2034 			    connp->conn_fport, connp->conn_flowinfo);
2035 			if (err != 0) {
2036 				mutex_exit(&connp->conn_lock);
2037 				return (err);
2038 			}
2039 		} else {
2040 			connp->conn_v6lastdst = ipv6_all_zeros;
2041 		}
2042 		mutex_exit(&connp->conn_lock);
2043 	}
2044 	if (coa->coa_changed & COA_RCVBUF_CHANGED) {
2045 		(void) proto_set_rx_hiwat(connp->conn_rq, connp,
2046 		    connp->conn_rcvbuf);
2047 	}
2048 	if ((coa->coa_changed & COA_SNDBUF_CHANGED) && !IPCL_IS_NONSTR(connp)) {
2049 		connp->conn_wq->q_hiwat = connp->conn_sndbuf;
2050 	}
2051 	if (coa->coa_changed & COA_WROFF_CHANGED) {
2052 		/* Increase wroff if needed */
2053 		uint_t wroff;
2054 
2055 		mutex_enter(&connp->conn_lock);
2056 		wroff = connp->conn_ht_iphc_allocated + us->us_wroff_extra;
2057 		if (udp->udp_nat_t_endpoint)
2058 			wroff += sizeof (uint32_t);
2059 		if (wroff > connp->conn_wroff) {
2060 			connp->conn_wroff = wroff;
2061 			mutex_exit(&connp->conn_lock);
2062 			(void) proto_set_tx_wroff(connp->conn_rq, connp, wroff);
2063 		} else {
2064 			mutex_exit(&connp->conn_lock);
2065 		}
2066 	}
2067 	return (err);
2068 }
2069 
2070 /* This routine sets socket options. */
2071 int
2072 udp_tpi_opt_set(queue_t *q, uint_t optset_context, int level, int name,
2073     uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp,
2074     void *thisdg_attrs, cred_t *cr)
2075 {
2076 	conn_t	*connp = Q_TO_CONN(q);
2077 	int error;
2078 
2079 	error = udp_opt_set(connp, optset_context, level, name, inlen, invalp,
2080 	    outlenp, outvalp, thisdg_attrs, cr);
2081 	return (error);
2082 }
2083 
2084 /*
2085  * Setup IP and UDP headers.
2086  * Returns NULL on allocation failure, in which case data_mp is freed.
2087  */
2088 mblk_t *
2089 udp_prepend_hdr(conn_t *connp, ip_xmit_attr_t *ixa, const ip_pkt_t *ipp,
2090     const in6_addr_t *v6src, const in6_addr_t *v6dst, in_port_t dstport,
2091     uint32_t flowinfo, mblk_t *data_mp, int *errorp)
2092 {
2093 	mblk_t		*mp;
2094 	udpha_t		*udpha;
2095 	udp_stack_t	*us = connp->conn_netstack->netstack_udp;
2096 	uint_t		data_len;
2097 	uint32_t	cksum;
2098 	udp_t		*udp = connp->conn_udp;
2099 	boolean_t	insert_spi = udp->udp_nat_t_endpoint;
2100 	boolean_t	hash_srcport = udp->udp_vxlanhash;
2101 	uint_t		ulp_hdr_len;
2102 	uint16_t	srcport;
2103 
2104 	data_len = msgdsize(data_mp);
2105 	ulp_hdr_len = UDPH_SIZE;
2106 	if (insert_spi)
2107 		ulp_hdr_len += sizeof (uint32_t);
2108 
2109 	/*
2110 	 * If we have source port hashing going on, determine the hash before
2111 	 * we modify the mblk_t.
2112 	 */
2113 	if (hash_srcport == B_TRUE) {
2114 		srcport = udp_srcport_hash(mp, UDP_HASH_VXLAN,
2115 		    IPPORT_DYNAMIC_MIN, IPPORT_DYNAMIC_MAX,
2116 		    ntohs(connp->conn_lport));
2117 	}
2118 
2119 	mp = conn_prepend_hdr(ixa, ipp, v6src, v6dst, IPPROTO_UDP, flowinfo,
2120 	    ulp_hdr_len, data_mp, data_len, us->us_wroff_extra, &cksum, errorp);
2121 	if (mp == NULL) {
2122 		ASSERT(*errorp != 0);
2123 		return (NULL);
2124 	}
2125 
2126 	data_len += ulp_hdr_len;
2127 	ixa->ixa_pktlen = data_len + ixa->ixa_ip_hdr_length;
2128 
2129 	udpha = (udpha_t *)(mp->b_rptr + ixa->ixa_ip_hdr_length);
2130 	if (hash_srcport == B_TRUE) {
2131 		udpha->uha_src_port = htons(srcport);
2132 	} else {
2133 		udpha->uha_src_port = connp->conn_lport;
2134 	}
2135 	udpha->uha_dst_port = dstport;
2136 	udpha->uha_checksum = 0;
2137 	udpha->uha_length = htons(data_len);
2138 
2139 	/*
2140 	 * If there was a routing option/header then conn_prepend_hdr
2141 	 * has massaged it and placed the pseudo-header checksum difference
2142 	 * in the cksum argument.
2143 	 *
2144 	 * Setup header length and prepare for ULP checksum done in IP.
2145 	 *
2146 	 * We make it easy for IP to include our pseudo header
2147 	 * by putting our length in uha_checksum.
2148 	 * The IP source, destination, and length have already been set by
2149 	 * conn_prepend_hdr.
2150 	 */
2151 	cksum += data_len;
2152 	cksum = (cksum >> 16) + (cksum & 0xFFFF);
2153 	ASSERT(cksum < 0x10000);
2154 
2155 	if (ixa->ixa_flags & IXAF_IS_IPV4) {
2156 		ipha_t	*ipha = (ipha_t *)mp->b_rptr;
2157 
2158 		ASSERT(ntohs(ipha->ipha_length) == ixa->ixa_pktlen);
2159 
2160 		/* IP does the checksum if uha_checksum is non-zero */
2161 		if (us->us_do_checksum) {
2162 			if (cksum == 0)
2163 				udpha->uha_checksum = 0xffff;
2164 			else
2165 				udpha->uha_checksum = htons(cksum);
2166 		} else {
2167 			udpha->uha_checksum = 0;
2168 		}
2169 	} else {
2170 		ip6_t *ip6h = (ip6_t *)mp->b_rptr;
2171 
2172 		ASSERT(ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN == ixa->ixa_pktlen);
2173 		if (cksum == 0)
2174 			udpha->uha_checksum = 0xffff;
2175 		else
2176 			udpha->uha_checksum = htons(cksum);
2177 	}
2178 
2179 	/* Insert all-0s SPI now. */
2180 	if (insert_spi)
2181 		*((uint32_t *)(udpha + 1)) = 0;
2182 
2183 	return (mp);
2184 }
2185 
2186 static int
2187 udp_build_hdr_template(conn_t *connp, const in6_addr_t *v6src,
2188     const in6_addr_t *v6dst, in_port_t dstport, uint32_t flowinfo)
2189 {
2190 	udpha_t		*udpha;
2191 	int		error;
2192 
2193 	ASSERT(MUTEX_HELD(&connp->conn_lock));
2194 	/*
2195 	 * We clear lastdst to make sure we don't use the lastdst path
2196 	 * next time sending since we might not have set v6dst yet.
2197 	 */
2198 	connp->conn_v6lastdst = ipv6_all_zeros;
2199 
2200 	error = conn_build_hdr_template(connp, UDPH_SIZE, 0, v6src, v6dst,
2201 	    flowinfo);
2202 	if (error != 0)
2203 		return (error);
2204 
2205 	/*
2206 	 * Any routing header/option has been massaged. The checksum difference
2207 	 * is stored in conn_sum.
2208 	 */
2209 	udpha = (udpha_t *)connp->conn_ht_ulp;
2210 	udpha->uha_src_port = connp->conn_lport;
2211 	udpha->uha_dst_port = dstport;
2212 	udpha->uha_checksum = 0;
2213 	udpha->uha_length = htons(UDPH_SIZE);	/* Filled in later */
2214 	return (0);
2215 }
2216 
2217 static mblk_t *
2218 udp_queue_fallback(udp_t *udp, mblk_t *mp)
2219 {
2220 	ASSERT(MUTEX_HELD(&udp->udp_recv_lock));
2221 	if (IPCL_IS_NONSTR(udp->udp_connp)) {
2222 		/*
2223 		 * fallback has started but messages have not been moved yet
2224 		 */
2225 		if (udp->udp_fallback_queue_head == NULL) {
2226 			ASSERT(udp->udp_fallback_queue_tail == NULL);
2227 			udp->udp_fallback_queue_head = mp;
2228 			udp->udp_fallback_queue_tail = mp;
2229 		} else {
2230 			ASSERT(udp->udp_fallback_queue_tail != NULL);
2231 			udp->udp_fallback_queue_tail->b_next = mp;
2232 			udp->udp_fallback_queue_tail = mp;
2233 		}
2234 		return (NULL);
2235 	} else {
2236 		/*
2237 		 * Fallback completed, let the caller putnext() the mblk.
2238 		 */
2239 		return (mp);
2240 	}
2241 }
2242 
2243 /*
2244  * Deliver data to ULP. In case we have a socket, and it's falling back to
2245  * TPI, then we'll queue the mp for later processing.
2246  */
2247 static void
2248 udp_ulp_recv(conn_t *connp, mblk_t *mp, uint_t len, ip_recv_attr_t *ira)
2249 {
2250 	if (IPCL_IS_NONSTR(connp)) {
2251 		udp_t *udp = connp->conn_udp;
2252 		int error;
2253 
2254 		ASSERT(len == msgdsize(mp));
2255 		if ((*connp->conn_upcalls->su_recv)
2256 		    (connp->conn_upper_handle, mp, len, 0, &error, NULL) < 0) {
2257 			mutex_enter(&udp->udp_recv_lock);
2258 			if (error == ENOSPC) {
2259 				/*
2260 				 * let's confirm while holding the lock
2261 				 */
2262 				if ((*connp->conn_upcalls->su_recv)
2263 				    (connp->conn_upper_handle, NULL, 0, 0,
2264 				    &error, NULL) < 0) {
2265 					ASSERT(error == ENOSPC);
2266 					if (error == ENOSPC) {
2267 						connp->conn_flow_cntrld =
2268 						    B_TRUE;
2269 					}
2270 				}
2271 				mutex_exit(&udp->udp_recv_lock);
2272 			} else {
2273 				ASSERT(error == EOPNOTSUPP);
2274 				mp = udp_queue_fallback(udp, mp);
2275 				mutex_exit(&udp->udp_recv_lock);
2276 				if (mp != NULL)
2277 					putnext(connp->conn_rq, mp);
2278 			}
2279 		}
2280 		ASSERT(MUTEX_NOT_HELD(&udp->udp_recv_lock));
2281 	} else {
2282 		if (is_system_labeled()) {
2283 			ASSERT(ira->ira_cred != NULL);
2284 			/*
2285 			 * Provide for protocols above UDP such as RPC
2286 			 * NOPID leaves db_cpid unchanged.
2287 			 */
2288 			mblk_setcred(mp, ira->ira_cred, NOPID);
2289 		}
2290 
2291 		putnext(connp->conn_rq, mp);
2292 	}
2293 }
2294 
2295 /*
2296  * This is the inbound data path.
2297  * IP has already pulled up the IP plus UDP headers and verified alignment
2298  * etc.
2299  */
2300 /* ARGSUSED2 */
2301 static void
2302 udp_input(void *arg1, mblk_t *mp, void *arg2, ip_recv_attr_t *ira)
2303 {
2304 	conn_t			*connp = (conn_t *)arg1;
2305 	struct T_unitdata_ind	*tudi;
2306 	uchar_t			*rptr;		/* Pointer to IP header */
2307 	int			hdr_length;	/* Length of IP+UDP headers */
2308 	int			udi_size;	/* Size of T_unitdata_ind */
2309 	int			pkt_len;
2310 	udp_t			*udp;
2311 	udpha_t			*udpha;
2312 	ip_pkt_t		ipps;
2313 	ip6_t			*ip6h;
2314 	mblk_t			*mp1;
2315 	uint32_t		udp_ipv4_options_len;
2316 	crb_t			recv_ancillary;
2317 	udp_stack_t		*us;
2318 
2319 	ASSERT(connp->conn_flags & IPCL_UDPCONN);
2320 
2321 	udp = connp->conn_udp;
2322 	us = udp->udp_us;
2323 	rptr = mp->b_rptr;
2324 
2325 	ASSERT(DB_TYPE(mp) == M_DATA);
2326 	ASSERT(OK_32PTR(rptr));
2327 	ASSERT(ira->ira_pktlen == msgdsize(mp));
2328 	pkt_len = ira->ira_pktlen;
2329 
2330 	/*
2331 	 * Get a snapshot of these and allow other threads to change
2332 	 * them after that. We need the same recv_ancillary when determining
2333 	 * the size as when adding the ancillary data items.
2334 	 */
2335 	mutex_enter(&connp->conn_lock);
2336 	udp_ipv4_options_len = udp->udp_recv_ipp.ipp_ipv4_options_len;
2337 	recv_ancillary = connp->conn_recv_ancillary;
2338 	mutex_exit(&connp->conn_lock);
2339 
2340 	hdr_length = ira->ira_ip_hdr_length;
2341 
2342 	/*
2343 	 * IP inspected the UDP header thus all of it must be in the mblk.
2344 	 * UDP length check is performed for IPv6 packets and IPv4 packets
2345 	 * to check if the size of the packet as specified
2346 	 * by the UDP header is the same as the length derived from the IP
2347 	 * header.
2348 	 */
2349 	udpha = (udpha_t *)(rptr + hdr_length);
2350 	if (pkt_len != ntohs(udpha->uha_length) + hdr_length)
2351 		goto tossit;
2352 
2353 	hdr_length += UDPH_SIZE;
2354 	ASSERT(MBLKL(mp) >= hdr_length);	/* IP did a pullup */
2355 
2356 	/* Initialize regardless of IP version */
2357 	ipps.ipp_fields = 0;
2358 
2359 	if (((ira->ira_flags & IRAF_IPV4_OPTIONS) ||
2360 	    udp_ipv4_options_len > 0) &&
2361 	    connp->conn_family == AF_INET) {
2362 		int	err;
2363 
2364 		/*
2365 		 * Record/update udp_recv_ipp with the lock
2366 		 * held. Not needed for AF_INET6 sockets
2367 		 * since they don't support a getsockopt of IP_OPTIONS.
2368 		 */
2369 		mutex_enter(&connp->conn_lock);
2370 		err = ip_find_hdr_v4((ipha_t *)rptr, &udp->udp_recv_ipp,
2371 		    B_TRUE);
2372 		if (err != 0) {
2373 			/* Allocation failed. Drop packet */
2374 			mutex_exit(&connp->conn_lock);
2375 			freemsg(mp);
2376 			UDPS_BUMP_MIB(us, udpInErrors);
2377 			return;
2378 		}
2379 		mutex_exit(&connp->conn_lock);
2380 	}
2381 
2382 	if (recv_ancillary.crb_all != 0) {
2383 		/*
2384 		 * Record packet information in the ip_pkt_t
2385 		 */
2386 		if (ira->ira_flags & IRAF_IS_IPV4) {
2387 			ASSERT(IPH_HDR_VERSION(rptr) == IPV4_VERSION);
2388 			ASSERT(MBLKL(mp) >= sizeof (ipha_t));
2389 			ASSERT(((ipha_t *)rptr)->ipha_protocol == IPPROTO_UDP);
2390 			ASSERT(ira->ira_ip_hdr_length == IPH_HDR_LENGTH(rptr));
2391 
2392 			(void) ip_find_hdr_v4((ipha_t *)rptr, &ipps, B_FALSE);
2393 		} else {
2394 			uint8_t nexthdrp;
2395 
2396 			ASSERT(IPH_HDR_VERSION(rptr) == IPV6_VERSION);
2397 			/*
2398 			 * IPv6 packets can only be received by applications
2399 			 * that are prepared to receive IPv6 addresses.
2400 			 * The IP fanout must ensure this.
2401 			 */
2402 			ASSERT(connp->conn_family == AF_INET6);
2403 
2404 			ip6h = (ip6_t *)rptr;
2405 
2406 			/* We don't care about the length, but need the ipp */
2407 			hdr_length = ip_find_hdr_v6(mp, ip6h, B_TRUE, &ipps,
2408 			    &nexthdrp);
2409 			ASSERT(hdr_length == ira->ira_ip_hdr_length);
2410 			/* Restore */
2411 			hdr_length = ira->ira_ip_hdr_length + UDPH_SIZE;
2412 			ASSERT(nexthdrp == IPPROTO_UDP);
2413 		}
2414 	}
2415 
2416 	/*
2417 	 * This is the inbound data path.  Packets are passed upstream as
2418 	 * T_UNITDATA_IND messages.
2419 	 */
2420 	if (connp->conn_family == AF_INET) {
2421 		sin_t *sin;
2422 
2423 		ASSERT(IPH_HDR_VERSION((ipha_t *)rptr) == IPV4_VERSION);
2424 
2425 		/*
2426 		 * Normally only send up the source address.
2427 		 * If any ancillary data items are wanted we add those.
2428 		 */
2429 		udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t);
2430 		if (recv_ancillary.crb_all != 0) {
2431 			udi_size += conn_recvancillary_size(connp,
2432 			    recv_ancillary, ira, mp, &ipps);
2433 		}
2434 
2435 		/* Allocate a message block for the T_UNITDATA_IND structure. */
2436 		mp1 = allocb(udi_size, BPRI_MED);
2437 		if (mp1 == NULL) {
2438 			freemsg(mp);
2439 			UDPS_BUMP_MIB(us, udpInErrors);
2440 			return;
2441 		}
2442 		mp1->b_cont = mp;
2443 		mp1->b_datap->db_type = M_PROTO;
2444 		tudi = (struct T_unitdata_ind *)mp1->b_rptr;
2445 		mp1->b_wptr = (uchar_t *)tudi + udi_size;
2446 		tudi->PRIM_type = T_UNITDATA_IND;
2447 		tudi->SRC_length = sizeof (sin_t);
2448 		tudi->SRC_offset = sizeof (struct T_unitdata_ind);
2449 		tudi->OPT_offset = sizeof (struct T_unitdata_ind) +
2450 		    sizeof (sin_t);
2451 		udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t));
2452 		tudi->OPT_length = udi_size;
2453 		sin = (sin_t *)&tudi[1];
2454 		sin->sin_addr.s_addr = ((ipha_t *)rptr)->ipha_src;
2455 		sin->sin_port =	udpha->uha_src_port;
2456 		sin->sin_family = connp->conn_family;
2457 		*(uint32_t *)&sin->sin_zero[0] = 0;
2458 		*(uint32_t *)&sin->sin_zero[4] = 0;
2459 
2460 		/*
2461 		 * Add options if IP_RECVDSTADDR, IP_RECVIF, IP_RECVSLLA,
2462 		 * IP_RECVTTL or IP_RECVTOS has been set.
2463 		 */
2464 		if (udi_size != 0) {
2465 			conn_recvancillary_add(connp, recv_ancillary, ira,
2466 			    &ipps, (uchar_t *)&sin[1], udi_size);
2467 		}
2468 	} else {
2469 		sin6_t *sin6;
2470 
2471 		/*
2472 		 * Handle both IPv4 and IPv6 packets for IPv6 sockets.
2473 		 *
2474 		 * Normally we only send up the address. If receiving of any
2475 		 * optional receive side information is enabled, we also send
2476 		 * that up as options.
2477 		 */
2478 		udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t);
2479 
2480 		if (recv_ancillary.crb_all != 0) {
2481 			udi_size += conn_recvancillary_size(connp,
2482 			    recv_ancillary, ira, mp, &ipps);
2483 		}
2484 
2485 		mp1 = allocb(udi_size, BPRI_MED);
2486 		if (mp1 == NULL) {
2487 			freemsg(mp);
2488 			UDPS_BUMP_MIB(us, udpInErrors);
2489 			return;
2490 		}
2491 		mp1->b_cont = mp;
2492 		mp1->b_datap->db_type = M_PROTO;
2493 		tudi = (struct T_unitdata_ind *)mp1->b_rptr;
2494 		mp1->b_wptr = (uchar_t *)tudi + udi_size;
2495 		tudi->PRIM_type = T_UNITDATA_IND;
2496 		tudi->SRC_length = sizeof (sin6_t);
2497 		tudi->SRC_offset = sizeof (struct T_unitdata_ind);
2498 		tudi->OPT_offset = sizeof (struct T_unitdata_ind) +
2499 		    sizeof (sin6_t);
2500 		udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin6_t));
2501 		tudi->OPT_length = udi_size;
2502 		sin6 = (sin6_t *)&tudi[1];
2503 		if (ira->ira_flags & IRAF_IS_IPV4) {
2504 			in6_addr_t v6dst;
2505 
2506 			IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_src,
2507 			    &sin6->sin6_addr);
2508 			IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_dst,
2509 			    &v6dst);
2510 			sin6->sin6_flowinfo = 0;
2511 			sin6->sin6_scope_id = 0;
2512 			sin6->__sin6_src_id = ip_srcid_find_addr(&v6dst,
2513 			    IPCL_ZONEID(connp), us->us_netstack);
2514 		} else {
2515 			ip6h = (ip6_t *)rptr;
2516 
2517 			sin6->sin6_addr = ip6h->ip6_src;
2518 			/* No sin6_flowinfo per API */
2519 			sin6->sin6_flowinfo = 0;
2520 			/* For link-scope pass up scope id */
2521 			if (IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src))
2522 				sin6->sin6_scope_id = ira->ira_ruifindex;
2523 			else
2524 				sin6->sin6_scope_id = 0;
2525 			sin6->__sin6_src_id = ip_srcid_find_addr(
2526 			    &ip6h->ip6_dst, IPCL_ZONEID(connp),
2527 			    us->us_netstack);
2528 		}
2529 		sin6->sin6_port = udpha->uha_src_port;
2530 		sin6->sin6_family = connp->conn_family;
2531 
2532 		if (udi_size != 0) {
2533 			conn_recvancillary_add(connp, recv_ancillary, ira,
2534 			    &ipps, (uchar_t *)&sin6[1], udi_size);
2535 		}
2536 	}
2537 
2538 	/*
2539 	 * DTrace this UDP input as udp:::receive (this is for IPv4, IPv6 and
2540 	 * loopback traffic).
2541 	 */
2542 	DTRACE_UDP5(receive, mblk_t *, NULL, ip_xmit_attr_t *, connp->conn_ixa,
2543 	    void_ip_t *, rptr, udp_t *, udp, udpha_t *, udpha);
2544 
2545 	/* Walk past the headers unless IP_RECVHDR was set. */
2546 	if (!udp->udp_rcvhdr) {
2547 		mp->b_rptr = rptr + hdr_length;
2548 		pkt_len -= hdr_length;
2549 	}
2550 
2551 	UDPS_BUMP_MIB(us, udpHCInDatagrams);
2552 	udp_ulp_recv(connp, mp1, pkt_len, ira);
2553 	return;
2554 
2555 tossit:
2556 	freemsg(mp);
2557 	UDPS_BUMP_MIB(us, udpInErrors);
2558 }
2559 
2560 /*
2561  * This routine creates a T_UDERROR_IND message and passes it upstream.
2562  * The address and options are copied from the T_UNITDATA_REQ message
2563  * passed in mp.  This message is freed.
2564  */
2565 static void
2566 udp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err)
2567 {
2568 	struct T_unitdata_req *tudr;
2569 	mblk_t	*mp1;
2570 	uchar_t *destaddr;
2571 	t_scalar_t destlen;
2572 	uchar_t	*optaddr;
2573 	t_scalar_t optlen;
2574 
2575 	if ((mp->b_wptr < mp->b_rptr) ||
2576 	    (MBLKL(mp)) < sizeof (struct T_unitdata_req)) {
2577 		goto done;
2578 	}
2579 	tudr = (struct T_unitdata_req *)mp->b_rptr;
2580 	destaddr = mp->b_rptr + tudr->DEST_offset;
2581 	if (destaddr < mp->b_rptr || destaddr >= mp->b_wptr ||
2582 	    destaddr + tudr->DEST_length < mp->b_rptr ||
2583 	    destaddr + tudr->DEST_length > mp->b_wptr) {
2584 		goto done;
2585 	}
2586 	optaddr = mp->b_rptr + tudr->OPT_offset;
2587 	if (optaddr < mp->b_rptr || optaddr >= mp->b_wptr ||
2588 	    optaddr + tudr->OPT_length < mp->b_rptr ||
2589 	    optaddr + tudr->OPT_length > mp->b_wptr) {
2590 		goto done;
2591 	}
2592 	destlen = tudr->DEST_length;
2593 	optlen = tudr->OPT_length;
2594 
2595 	mp1 = mi_tpi_uderror_ind((char *)destaddr, destlen,
2596 	    (char *)optaddr, optlen, err);
2597 	if (mp1 != NULL)
2598 		qreply(q, mp1);
2599 
2600 done:
2601 	freemsg(mp);
2602 }
2603 
2604 /*
2605  * This routine removes a port number association from a stream.  It
2606  * is called by udp_wput to handle T_UNBIND_REQ messages.
2607  */
2608 static void
2609 udp_tpi_unbind(queue_t *q, mblk_t *mp)
2610 {
2611 	conn_t	*connp = Q_TO_CONN(q);
2612 	int	error;
2613 
2614 	error = udp_do_unbind(connp);
2615 	if (error) {
2616 		if (error < 0)
2617 			udp_err_ack(q, mp, -error, 0);
2618 		else
2619 			udp_err_ack(q, mp, TSYSERR, error);
2620 		return;
2621 	}
2622 
2623 	mp = mi_tpi_ok_ack_alloc(mp);
2624 	ASSERT(mp != NULL);
2625 	ASSERT(((struct T_ok_ack *)mp->b_rptr)->PRIM_type == T_OK_ACK);
2626 	qreply(q, mp);
2627 }
2628 
2629 /*
2630  * Don't let port fall into the privileged range.
2631  * Since the extra privileged ports can be arbitrary we also
2632  * ensure that we exclude those from consideration.
2633  * us->us_epriv_ports is not sorted thus we loop over it until
2634  * there are no changes.
2635  */
2636 static in_port_t
2637 udp_update_next_port(udp_t *udp, in_port_t port, boolean_t random)
2638 {
2639 	int i, bump;
2640 	in_port_t nextport;
2641 	boolean_t restart = B_FALSE;
2642 	udp_stack_t *us = udp->udp_us;
2643 
2644 	if (random && udp_random_anon_port != 0) {
2645 		(void) random_get_pseudo_bytes((uint8_t *)&port,
2646 		    sizeof (in_port_t));
2647 		/*
2648 		 * Unless changed by a sys admin, the smallest anon port
2649 		 * is 32768 and the largest anon port is 65535.  It is
2650 		 * very likely (50%) for the random port to be smaller
2651 		 * than the smallest anon port.  When that happens,
2652 		 * add port % (anon port range) to the smallest anon
2653 		 * port to get the random port.  It should fall into the
2654 		 * valid anon port range.
2655 		 */
2656 		if ((port < us->us_smallest_anon_port) ||
2657 		    (port > us->us_largest_anon_port)) {
2658 			if (us->us_smallest_anon_port ==
2659 			    us->us_largest_anon_port) {
2660 				bump = 0;
2661 			} else {
2662 				bump = port % (us->us_largest_anon_port -
2663 				    us->us_smallest_anon_port);
2664 			}
2665 
2666 			port = us->us_smallest_anon_port + bump;
2667 		}
2668 	}
2669 
2670 retry:
2671 	if (port < us->us_smallest_anon_port)
2672 		port = us->us_smallest_anon_port;
2673 
2674 	if (port > us->us_largest_anon_port) {
2675 		port = us->us_smallest_anon_port;
2676 		if (restart)
2677 			return (0);
2678 		restart = B_TRUE;
2679 	}
2680 
2681 	if (port < us->us_smallest_nonpriv_port)
2682 		port = us->us_smallest_nonpriv_port;
2683 
2684 	for (i = 0; i < us->us_num_epriv_ports; i++) {
2685 		if (port == us->us_epriv_ports[i]) {
2686 			port++;
2687 			/*
2688 			 * Make sure that the port is in the
2689 			 * valid range.
2690 			 */
2691 			goto retry;
2692 		}
2693 	}
2694 
2695 	if (is_system_labeled() &&
2696 	    (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred),
2697 	    port, IPPROTO_UDP, B_TRUE)) != 0) {
2698 		port = nextport;
2699 		goto retry;
2700 	}
2701 
2702 	return (port);
2703 }
2704 
2705 /*
2706  * Handle T_UNITDATA_REQ with options. Both IPv4 and IPv6
2707  * Either tudr_mp or msg is set. If tudr_mp we take ancillary data from
2708  * the TPI options, otherwise we take them from msg_control.
2709  * If both sin and sin6 is set it is a connected socket and we use conn_faddr.
2710  * Always consumes mp; never consumes tudr_mp.
2711  */
2712 static int
2713 udp_output_ancillary(conn_t *connp, sin_t *sin, sin6_t *sin6, mblk_t *mp,
2714     mblk_t *tudr_mp, struct nmsghdr *msg, cred_t *cr, pid_t pid)
2715 {
2716 	udp_t		*udp = connp->conn_udp;
2717 	udp_stack_t	*us = udp->udp_us;
2718 	int		error;
2719 	ip_xmit_attr_t	*ixa;
2720 	ip_pkt_t	*ipp;
2721 	in6_addr_t	v6src;
2722 	in6_addr_t	v6dst;
2723 	in6_addr_t	v6nexthop;
2724 	in_port_t	dstport;
2725 	uint32_t	flowinfo;
2726 	uint_t		srcid;
2727 	int		is_absreq_failure = 0;
2728 	conn_opt_arg_t	coas, *coa;
2729 
2730 	ASSERT(tudr_mp != NULL || msg != NULL);
2731 
2732 	/*
2733 	 * Get ixa before checking state to handle a disconnect race.
2734 	 *
2735 	 * We need an exclusive copy of conn_ixa since the ancillary data
2736 	 * options might modify it. That copy has no pointers hence we
2737 	 * need to set them up once we've parsed the ancillary data.
2738 	 */
2739 	ixa = conn_get_ixa_exclusive(connp);
2740 	if (ixa == NULL) {
2741 		UDPS_BUMP_MIB(us, udpOutErrors);
2742 		freemsg(mp);
2743 		return (ENOMEM);
2744 	}
2745 	ASSERT(cr != NULL);
2746 	ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
2747 	ixa->ixa_cred = cr;
2748 	ixa->ixa_cpid = pid;
2749 	if (is_system_labeled()) {
2750 		/* We need to restart with a label based on the cred */
2751 		ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred);
2752 	}
2753 
2754 	/* In case previous destination was multicast or multirt */
2755 	ip_attr_newdst(ixa);
2756 
2757 	/* Get a copy of conn_xmit_ipp since the options might change it */
2758 	ipp = kmem_zalloc(sizeof (*ipp), KM_NOSLEEP);
2759 	if (ipp == NULL) {
2760 		ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
2761 		ixa->ixa_cred = connp->conn_cred;	/* Restore */
2762 		ixa->ixa_cpid = connp->conn_cpid;
2763 		ixa_refrele(ixa);
2764 		UDPS_BUMP_MIB(us, udpOutErrors);
2765 		freemsg(mp);
2766 		return (ENOMEM);
2767 	}
2768 	mutex_enter(&connp->conn_lock);
2769 	error = ip_pkt_copy(&connp->conn_xmit_ipp, ipp, KM_NOSLEEP);
2770 	mutex_exit(&connp->conn_lock);
2771 	if (error != 0) {
2772 		UDPS_BUMP_MIB(us, udpOutErrors);
2773 		freemsg(mp);
2774 		goto done;
2775 	}
2776 
2777 	/*
2778 	 * Parse the options and update ixa and ipp as a result.
2779 	 * Note that ixa_tsl can be updated if SCM_UCRED.
2780 	 * ixa_refrele/ixa_inactivate will release any reference on ixa_tsl.
2781 	 */
2782 
2783 	coa = &coas;
2784 	coa->coa_connp = connp;
2785 	coa->coa_ixa = ixa;
2786 	coa->coa_ipp = ipp;
2787 	coa->coa_ancillary = B_TRUE;
2788 	coa->coa_changed = 0;
2789 
2790 	if (msg != NULL) {
2791 		error = process_auxiliary_options(connp, msg->msg_control,
2792 		    msg->msg_controllen, coa, &udp_opt_obj, udp_opt_set, cr);
2793 	} else {
2794 		struct T_unitdata_req *tudr;
2795 
2796 		tudr = (struct T_unitdata_req *)tudr_mp->b_rptr;
2797 		ASSERT(tudr->PRIM_type == T_UNITDATA_REQ);
2798 		error = tpi_optcom_buf(connp->conn_wq, tudr_mp,
2799 		    &tudr->OPT_length, tudr->OPT_offset, cr, &udp_opt_obj,
2800 		    coa, &is_absreq_failure);
2801 	}
2802 	if (error != 0) {
2803 		/*
2804 		 * Note: No special action needed in this
2805 		 * module for "is_absreq_failure"
2806 		 */
2807 		freemsg(mp);
2808 		UDPS_BUMP_MIB(us, udpOutErrors);
2809 		goto done;
2810 	}
2811 	ASSERT(is_absreq_failure == 0);
2812 
2813 	mutex_enter(&connp->conn_lock);
2814 	/*
2815 	 * If laddr is unspecified then we look at sin6_src_id.
2816 	 * We will give precedence to a source address set with IPV6_PKTINFO
2817 	 * (aka IPPF_ADDR) but that is handled in build_hdrs. However, we don't
2818 	 * want ip_attr_connect to select a source (since it can fail) when
2819 	 * IPV6_PKTINFO is specified.
2820 	 * If this doesn't result in a source address then we get a source
2821 	 * from ip_attr_connect() below.
2822 	 */
2823 	v6src = connp->conn_saddr_v6;
2824 	if (sin != NULL) {
2825 		IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &v6dst);
2826 		dstport = sin->sin_port;
2827 		flowinfo = 0;
2828 		ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
2829 		ixa->ixa_flags |= IXAF_IS_IPV4;
2830 	} else if (sin6 != NULL) {
2831 		boolean_t v4mapped;
2832 
2833 		v6dst = sin6->sin6_addr;
2834 		dstport = sin6->sin6_port;
2835 		flowinfo = sin6->sin6_flowinfo;
2836 		srcid = sin6->__sin6_src_id;
2837 		if (IN6_IS_ADDR_LINKSCOPE(&v6dst) && sin6->sin6_scope_id != 0) {
2838 			ixa->ixa_scopeid = sin6->sin6_scope_id;
2839 			ixa->ixa_flags |= IXAF_SCOPEID_SET;
2840 		} else {
2841 			ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
2842 		}
2843 		v4mapped = IN6_IS_ADDR_V4MAPPED(&v6dst);
2844 		if (v4mapped)
2845 			ixa->ixa_flags |= IXAF_IS_IPV4;
2846 		else
2847 			ixa->ixa_flags &= ~IXAF_IS_IPV4;
2848 		if (srcid != 0 && IN6_IS_ADDR_UNSPECIFIED(&v6src)) {
2849 			if (!ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp),
2850 			    v4mapped, connp->conn_netstack)) {
2851 				/* Mismatch - v4mapped/v6 specified by srcid. */
2852 				mutex_exit(&connp->conn_lock);
2853 				error = EADDRNOTAVAIL;
2854 				goto failed;	/* Does freemsg() and mib. */
2855 			}
2856 		}
2857 	} else {
2858 		/* Connected case */
2859 		v6dst = connp->conn_faddr_v6;
2860 		dstport = connp->conn_fport;
2861 		flowinfo = connp->conn_flowinfo;
2862 	}
2863 	mutex_exit(&connp->conn_lock);
2864 
2865 	/* Handle IP_PKTINFO/IPV6_PKTINFO setting source address. */
2866 	if (ipp->ipp_fields & IPPF_ADDR) {
2867 		if (ixa->ixa_flags & IXAF_IS_IPV4) {
2868 			if (IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr))
2869 				v6src = ipp->ipp_addr;
2870 		} else {
2871 			if (!IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr))
2872 				v6src = ipp->ipp_addr;
2873 		}
2874 	}
2875 
2876 	ip_attr_nexthop(ipp, ixa, &v6dst, &v6nexthop);
2877 	error = ip_attr_connect(connp, ixa, &v6src, &v6dst, &v6nexthop, dstport,
2878 	    &v6src, NULL, IPDF_ALLOW_MCBC | IPDF_VERIFY_DST | IPDF_IPSEC);
2879 
2880 	switch (error) {
2881 	case 0:
2882 		break;
2883 	case EADDRNOTAVAIL:
2884 		/*
2885 		 * IXAF_VERIFY_SOURCE tells us to pick a better source.
2886 		 * Don't have the application see that errno
2887 		 */
2888 		error = ENETUNREACH;
2889 		goto failed;
2890 	case ENETDOWN:
2891 		/*
2892 		 * Have !ipif_addr_ready address; drop packet silently
2893 		 * until we can get applications to not send until we
2894 		 * are ready.
2895 		 */
2896 		error = 0;
2897 		goto failed;
2898 	case EHOSTUNREACH:
2899 	case ENETUNREACH:
2900 		if (ixa->ixa_ire != NULL) {
2901 			/*
2902 			 * Let conn_ip_output/ire_send_noroute return
2903 			 * the error and send any local ICMP error.
2904 			 */
2905 			error = 0;
2906 			break;
2907 		}
2908 		/* FALLTHRU */
2909 	default:
2910 	failed:
2911 		freemsg(mp);
2912 		UDPS_BUMP_MIB(us, udpOutErrors);
2913 		goto done;
2914 	}
2915 
2916 	/*
2917 	 * We might be going to a different destination than last time,
2918 	 * thus check that TX allows the communication and compute any
2919 	 * needed label.
2920 	 *
2921 	 * TSOL Note: We have an exclusive ipp and ixa for this thread so we
2922 	 * don't have to worry about concurrent threads.
2923 	 */
2924 	if (is_system_labeled()) {
2925 		/* Using UDP MLP requires SCM_UCRED from user */
2926 		if (connp->conn_mlp_type != mlptSingle &&
2927 		    !((ixa->ixa_flags & IXAF_UCRED_TSL))) {
2928 			UDPS_BUMP_MIB(us, udpOutErrors);
2929 			error = ECONNREFUSED;
2930 			freemsg(mp);
2931 			goto done;
2932 		}
2933 		/*
2934 		 * Check whether Trusted Solaris policy allows communication
2935 		 * with this host, and pretend that the destination is
2936 		 * unreachable if not.
2937 		 * Compute any needed label and place it in ipp_label_v4/v6.
2938 		 *
2939 		 * Later conn_build_hdr_template/conn_prepend_hdr takes
2940 		 * ipp_label_v4/v6 to form the packet.
2941 		 *
2942 		 * Tsol note: We have ipp structure local to this thread so
2943 		 * no locking is needed.
2944 		 */
2945 		error = conn_update_label(connp, ixa, &v6dst, ipp);
2946 		if (error != 0) {
2947 			freemsg(mp);
2948 			UDPS_BUMP_MIB(us, udpOutErrors);
2949 			goto done;
2950 		}
2951 	}
2952 	mp = udp_prepend_hdr(connp, ixa, ipp, &v6src, &v6dst, dstport,
2953 	    flowinfo, mp, &error);
2954 	if (mp == NULL) {
2955 		ASSERT(error != 0);
2956 		UDPS_BUMP_MIB(us, udpOutErrors);
2957 		goto done;
2958 	}
2959 	if (ixa->ixa_pktlen > IP_MAXPACKET) {
2960 		error = EMSGSIZE;
2961 		UDPS_BUMP_MIB(us, udpOutErrors);
2962 		freemsg(mp);
2963 		goto done;
2964 	}
2965 	/* We're done.  Pass the packet to ip. */
2966 	UDPS_BUMP_MIB(us, udpHCOutDatagrams);
2967 
2968 	DTRACE_UDP5(send, mblk_t *, NULL, ip_xmit_attr_t *, ixa,
2969 	    void_ip_t *, mp->b_rptr, udp_t *, udp, udpha_t *,
2970 	    &mp->b_rptr[ixa->ixa_ip_hdr_length]);
2971 
2972 	error = conn_ip_output(mp, ixa);
2973 	/* No udpOutErrors if an error since IP increases its error counter */
2974 	switch (error) {
2975 	case 0:
2976 		break;
2977 	case EWOULDBLOCK:
2978 		(void) ixa_check_drain_insert(connp, ixa);
2979 		error = 0;
2980 		break;
2981 	case EADDRNOTAVAIL:
2982 		/*
2983 		 * IXAF_VERIFY_SOURCE tells us to pick a better source.
2984 		 * Don't have the application see that errno
2985 		 */
2986 		error = ENETUNREACH;
2987 		/* FALLTHRU */
2988 	default:
2989 		mutex_enter(&connp->conn_lock);
2990 		/*
2991 		 * Clear the source and v6lastdst so we call ip_attr_connect
2992 		 * for the next packet and try to pick a better source.
2993 		 */
2994 		if (connp->conn_mcbc_bind)
2995 			connp->conn_saddr_v6 = ipv6_all_zeros;
2996 		else
2997 			connp->conn_saddr_v6 = connp->conn_bound_addr_v6;
2998 		connp->conn_v6lastdst = ipv6_all_zeros;
2999 		mutex_exit(&connp->conn_lock);
3000 		break;
3001 	}
3002 done:
3003 	ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3004 	ixa->ixa_cred = connp->conn_cred;	/* Restore */
3005 	ixa->ixa_cpid = connp->conn_cpid;
3006 	ixa_refrele(ixa);
3007 	ip_pkt_free(ipp);
3008 	kmem_free(ipp, sizeof (*ipp));
3009 	return (error);
3010 }
3011 
3012 /*
3013  * Handle sending an M_DATA for a connected socket.
3014  * Handles both IPv4 and IPv6.
3015  */
3016 static int
3017 udp_output_connected(conn_t *connp, mblk_t *mp, cred_t *cr, pid_t pid)
3018 {
3019 	udp_t		*udp = connp->conn_udp;
3020 	udp_stack_t	*us = udp->udp_us;
3021 	int		error;
3022 	ip_xmit_attr_t	*ixa;
3023 
3024 	/*
3025 	 * If no other thread is using conn_ixa this just gets a reference to
3026 	 * conn_ixa. Otherwise we get a safe copy of conn_ixa.
3027 	 */
3028 	ixa = conn_get_ixa(connp, B_FALSE);
3029 	if (ixa == NULL) {
3030 		UDPS_BUMP_MIB(us, udpOutErrors);
3031 		freemsg(mp);
3032 		return (ENOMEM);
3033 	}
3034 
3035 	ASSERT(cr != NULL);
3036 	ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3037 	ixa->ixa_cred = cr;
3038 	ixa->ixa_cpid = pid;
3039 
3040 	mutex_enter(&connp->conn_lock);
3041 	mp = udp_prepend_header_template(connp, ixa, mp, &connp->conn_saddr_v6,
3042 	    connp->conn_fport, connp->conn_flowinfo, &error);
3043 
3044 	if (mp == NULL) {
3045 		ASSERT(error != 0);
3046 		mutex_exit(&connp->conn_lock);
3047 		ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3048 		ixa->ixa_cred = connp->conn_cred;	/* Restore */
3049 		ixa->ixa_cpid = connp->conn_cpid;
3050 		ixa_refrele(ixa);
3051 		UDPS_BUMP_MIB(us, udpOutErrors);
3052 		freemsg(mp);
3053 		return (error);
3054 	}
3055 
3056 	/*
3057 	 * In case we got a safe copy of conn_ixa, or if opt_set made us a new
3058 	 * safe copy, then we need to fill in any pointers in it.
3059 	 */
3060 	if (ixa->ixa_ire == NULL) {
3061 		in6_addr_t	faddr, saddr;
3062 		in6_addr_t	nexthop;
3063 		in_port_t	fport;
3064 
3065 		saddr = connp->conn_saddr_v6;
3066 		faddr = connp->conn_faddr_v6;
3067 		fport = connp->conn_fport;
3068 		ip_attr_nexthop(&connp->conn_xmit_ipp, ixa, &faddr, &nexthop);
3069 		mutex_exit(&connp->conn_lock);
3070 
3071 		error = ip_attr_connect(connp, ixa, &saddr, &faddr, &nexthop,
3072 		    fport, NULL, NULL, IPDF_ALLOW_MCBC | IPDF_VERIFY_DST |
3073 		    IPDF_IPSEC);
3074 		switch (error) {
3075 		case 0:
3076 			break;
3077 		case EADDRNOTAVAIL:
3078 			/*
3079 			 * IXAF_VERIFY_SOURCE tells us to pick a better source.
3080 			 * Don't have the application see that errno
3081 			 */
3082 			error = ENETUNREACH;
3083 			goto failed;
3084 		case ENETDOWN:
3085 			/*
3086 			 * Have !ipif_addr_ready address; drop packet silently
3087 			 * until we can get applications to not send until we
3088 			 * are ready.
3089 			 */
3090 			error = 0;
3091 			goto failed;
3092 		case EHOSTUNREACH:
3093 		case ENETUNREACH:
3094 			if (ixa->ixa_ire != NULL) {
3095 				/*
3096 				 * Let conn_ip_output/ire_send_noroute return
3097 				 * the error and send any local ICMP error.
3098 				 */
3099 				error = 0;
3100 				break;
3101 			}
3102 			/* FALLTHRU */
3103 		default:
3104 		failed:
3105 			ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3106 			ixa->ixa_cred = connp->conn_cred;	/* Restore */
3107 			ixa->ixa_cpid = connp->conn_cpid;
3108 			ixa_refrele(ixa);
3109 			freemsg(mp);
3110 			UDPS_BUMP_MIB(us, udpOutErrors);
3111 			return (error);
3112 		}
3113 	} else {
3114 		/* Done with conn_t */
3115 		mutex_exit(&connp->conn_lock);
3116 	}
3117 	ASSERT(ixa->ixa_ire != NULL);
3118 
3119 	/* We're done.  Pass the packet to ip. */
3120 	UDPS_BUMP_MIB(us, udpHCOutDatagrams);
3121 
3122 	DTRACE_UDP5(send, mblk_t *, NULL, ip_xmit_attr_t *, ixa,
3123 	    void_ip_t *, mp->b_rptr, udp_t *, udp, udpha_t *,
3124 	    &mp->b_rptr[ixa->ixa_ip_hdr_length]);
3125 
3126 	error = conn_ip_output(mp, ixa);
3127 	/* No udpOutErrors if an error since IP increases its error counter */
3128 	switch (error) {
3129 	case 0:
3130 		break;
3131 	case EWOULDBLOCK:
3132 		(void) ixa_check_drain_insert(connp, ixa);
3133 		error = 0;
3134 		break;
3135 	case EADDRNOTAVAIL:
3136 		/*
3137 		 * IXAF_VERIFY_SOURCE tells us to pick a better source.
3138 		 * Don't have the application see that errno
3139 		 */
3140 		error = ENETUNREACH;
3141 		break;
3142 	}
3143 	ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3144 	ixa->ixa_cred = connp->conn_cred;	/* Restore */
3145 	ixa->ixa_cpid = connp->conn_cpid;
3146 	ixa_refrele(ixa);
3147 	return (error);
3148 }
3149 
3150 /*
3151  * Handle sending an M_DATA to the last destination.
3152  * Handles both IPv4 and IPv6.
3153  *
3154  * NOTE: The caller must hold conn_lock and we drop it here.
3155  */
3156 static int
3157 udp_output_lastdst(conn_t *connp, mblk_t *mp, cred_t *cr, pid_t pid,
3158     ip_xmit_attr_t *ixa)
3159 {
3160 	udp_t		*udp = connp->conn_udp;
3161 	udp_stack_t	*us = udp->udp_us;
3162 	int		error;
3163 
3164 	ASSERT(MUTEX_HELD(&connp->conn_lock));
3165 	ASSERT(ixa != NULL);
3166 
3167 	ASSERT(cr != NULL);
3168 	ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3169 	ixa->ixa_cred = cr;
3170 	ixa->ixa_cpid = pid;
3171 
3172 	mp = udp_prepend_header_template(connp, ixa, mp, &connp->conn_v6lastsrc,
3173 	    connp->conn_lastdstport, connp->conn_lastflowinfo, &error);
3174 
3175 	if (mp == NULL) {
3176 		ASSERT(error != 0);
3177 		mutex_exit(&connp->conn_lock);
3178 		ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3179 		ixa->ixa_cred = connp->conn_cred;	/* Restore */
3180 		ixa->ixa_cpid = connp->conn_cpid;
3181 		ixa_refrele(ixa);
3182 		UDPS_BUMP_MIB(us, udpOutErrors);
3183 		freemsg(mp);
3184 		return (error);
3185 	}
3186 
3187 	/*
3188 	 * In case we got a safe copy of conn_ixa, or if opt_set made us a new
3189 	 * safe copy, then we need to fill in any pointers in it.
3190 	 */
3191 	if (ixa->ixa_ire == NULL) {
3192 		in6_addr_t	lastdst, lastsrc;
3193 		in6_addr_t	nexthop;
3194 		in_port_t	lastport;
3195 
3196 		lastsrc = connp->conn_v6lastsrc;
3197 		lastdst = connp->conn_v6lastdst;
3198 		lastport = connp->conn_lastdstport;
3199 		ip_attr_nexthop(&connp->conn_xmit_ipp, ixa, &lastdst, &nexthop);
3200 		mutex_exit(&connp->conn_lock);
3201 
3202 		error = ip_attr_connect(connp, ixa, &lastsrc, &lastdst,
3203 		    &nexthop, lastport, NULL, NULL, IPDF_ALLOW_MCBC |
3204 		    IPDF_VERIFY_DST | IPDF_IPSEC);
3205 		switch (error) {
3206 		case 0:
3207 			break;
3208 		case EADDRNOTAVAIL:
3209 			/*
3210 			 * IXAF_VERIFY_SOURCE tells us to pick a better source.
3211 			 * Don't have the application see that errno
3212 			 */
3213 			error = ENETUNREACH;
3214 			goto failed;
3215 		case ENETDOWN:
3216 			/*
3217 			 * Have !ipif_addr_ready address; drop packet silently
3218 			 * until we can get applications to not send until we
3219 			 * are ready.
3220 			 */
3221 			error = 0;
3222 			goto failed;
3223 		case EHOSTUNREACH:
3224 		case ENETUNREACH:
3225 			if (ixa->ixa_ire != NULL) {
3226 				/*
3227 				 * Let conn_ip_output/ire_send_noroute return
3228 				 * the error and send any local ICMP error.
3229 				 */
3230 				error = 0;
3231 				break;
3232 			}
3233 			/* FALLTHRU */
3234 		default:
3235 		failed:
3236 			ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3237 			ixa->ixa_cred = connp->conn_cred;	/* Restore */
3238 			ixa->ixa_cpid = connp->conn_cpid;
3239 			ixa_refrele(ixa);
3240 			freemsg(mp);
3241 			UDPS_BUMP_MIB(us, udpOutErrors);
3242 			return (error);
3243 		}
3244 	} else {
3245 		/* Done with conn_t */
3246 		mutex_exit(&connp->conn_lock);
3247 	}
3248 
3249 	/* We're done.  Pass the packet to ip. */
3250 	UDPS_BUMP_MIB(us, udpHCOutDatagrams);
3251 
3252 	DTRACE_UDP5(send, mblk_t *, NULL, ip_xmit_attr_t *, ixa,
3253 	    void_ip_t *, mp->b_rptr, udp_t *, udp, udpha_t *,
3254 	    &mp->b_rptr[ixa->ixa_ip_hdr_length]);
3255 
3256 	error = conn_ip_output(mp, ixa);
3257 	/* No udpOutErrors if an error since IP increases its error counter */
3258 	switch (error) {
3259 	case 0:
3260 		break;
3261 	case EWOULDBLOCK:
3262 		(void) ixa_check_drain_insert(connp, ixa);
3263 		error = 0;
3264 		break;
3265 	case EADDRNOTAVAIL:
3266 		/*
3267 		 * IXAF_VERIFY_SOURCE tells us to pick a better source.
3268 		 * Don't have the application see that errno
3269 		 */
3270 		error = ENETUNREACH;
3271 		/* FALLTHRU */
3272 	default:
3273 		mutex_enter(&connp->conn_lock);
3274 		/*
3275 		 * Clear the source and v6lastdst so we call ip_attr_connect
3276 		 * for the next packet and try to pick a better source.
3277 		 */
3278 		if (connp->conn_mcbc_bind)
3279 			connp->conn_saddr_v6 = ipv6_all_zeros;
3280 		else
3281 			connp->conn_saddr_v6 = connp->conn_bound_addr_v6;
3282 		connp->conn_v6lastdst = ipv6_all_zeros;
3283 		mutex_exit(&connp->conn_lock);
3284 		break;
3285 	}
3286 	ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3287 	ixa->ixa_cred = connp->conn_cred;	/* Restore */
3288 	ixa->ixa_cpid = connp->conn_cpid;
3289 	ixa_refrele(ixa);
3290 	return (error);
3291 }
3292 
3293 
3294 /*
3295  * Prepend the header template and then fill in the source and
3296  * flowinfo. The caller needs to handle the destination address since
3297  * it's setting is different if rthdr or source route.
3298  *
3299  * Returns NULL is allocation failed or if the packet would exceed IP_MAXPACKET.
3300  * When it returns NULL it sets errorp.
3301  */
3302 static mblk_t *
3303 udp_prepend_header_template(conn_t *connp, ip_xmit_attr_t *ixa, mblk_t *mp,
3304     const in6_addr_t *v6src, in_port_t dstport, uint32_t flowinfo, int *errorp)
3305 {
3306 	udp_t		*udp = connp->conn_udp;
3307 	udp_stack_t	*us = udp->udp_us;
3308 	boolean_t	insert_spi = udp->udp_nat_t_endpoint;
3309 	boolean_t	hash_srcport = udp->udp_vxlanhash;
3310 	uint_t		pktlen;
3311 	uint_t		alloclen;
3312 	uint_t		copylen;
3313 	uint8_t		*iph;
3314 	uint_t		ip_hdr_length;
3315 	udpha_t		*udpha;
3316 	uint32_t	cksum;
3317 	ip_pkt_t	*ipp;
3318 	uint16_t	srcport;
3319 
3320 	ASSERT(MUTEX_HELD(&connp->conn_lock));
3321 
3322 	/*
3323 	 * If we have source port hashing going on, determine the hash before
3324 	 * we modify the mblk_t.
3325 	 */
3326 	if (hash_srcport == B_TRUE) {
3327 		srcport = udp_srcport_hash(mp, UDP_HASH_VXLAN,
3328 		    IPPORT_DYNAMIC_MIN, IPPORT_DYNAMIC_MAX,
3329 		    ntohs(connp->conn_lport));
3330 	}
3331 
3332 	/*
3333 	 * Copy the header template and leave space for an SPI
3334 	 */
3335 	copylen = connp->conn_ht_iphc_len;
3336 	alloclen = copylen + (insert_spi ? sizeof (uint32_t) : 0);
3337 	pktlen = alloclen + msgdsize(mp);
3338 	if (pktlen > IP_MAXPACKET) {
3339 		freemsg(mp);
3340 		*errorp = EMSGSIZE;
3341 		return (NULL);
3342 	}
3343 	ixa->ixa_pktlen = pktlen;
3344 
3345 	/* check/fix buffer config, setup pointers into it */
3346 	iph = mp->b_rptr - alloclen;
3347 	if (DB_REF(mp) != 1 || iph < DB_BASE(mp) || !OK_32PTR(iph)) {
3348 		mblk_t *mp1;
3349 
3350 		mp1 = allocb(alloclen + us->us_wroff_extra, BPRI_MED);
3351 		if (mp1 == NULL) {
3352 			freemsg(mp);
3353 			*errorp = ENOMEM;
3354 			return (NULL);
3355 		}
3356 		mp1->b_wptr = DB_LIM(mp1);
3357 		mp1->b_cont = mp;
3358 		mp = mp1;
3359 		iph = (mp->b_wptr - alloclen);
3360 	}
3361 	mp->b_rptr = iph;
3362 	bcopy(connp->conn_ht_iphc, iph, copylen);
3363 	ip_hdr_length = (uint_t)(connp->conn_ht_ulp - connp->conn_ht_iphc);
3364 
3365 	ixa->ixa_ip_hdr_length = ip_hdr_length;
3366 	udpha = (udpha_t *)(iph + ip_hdr_length);
3367 
3368 	/*
3369 	 * Setup header length and prepare for ULP checksum done in IP.
3370 	 * udp_build_hdr_template has already massaged any routing header
3371 	 * and placed the result in conn_sum.
3372 	 *
3373 	 * We make it easy for IP to include our pseudo header
3374 	 * by putting our length in uha_checksum.
3375 	 */
3376 	cksum = pktlen - ip_hdr_length;
3377 	udpha->uha_length = htons(cksum);
3378 
3379 	cksum += connp->conn_sum;
3380 	cksum = (cksum >> 16) + (cksum & 0xFFFF);
3381 	ASSERT(cksum < 0x10000);
3382 
3383 	ipp = &connp->conn_xmit_ipp;
3384 	if (ixa->ixa_flags & IXAF_IS_IPV4) {
3385 		ipha_t	*ipha = (ipha_t *)iph;
3386 
3387 		ipha->ipha_length = htons((uint16_t)pktlen);
3388 
3389 		/* IP does the checksum if uha_checksum is non-zero */
3390 		if (us->us_do_checksum)
3391 			udpha->uha_checksum = htons(cksum);
3392 
3393 		/* if IP_PKTINFO specified an addres it wins over bind() */
3394 		if ((ipp->ipp_fields & IPPF_ADDR) &&
3395 		    IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) {
3396 			ASSERT(ipp->ipp_addr_v4 != INADDR_ANY);
3397 			ipha->ipha_src = ipp->ipp_addr_v4;
3398 		} else {
3399 			IN6_V4MAPPED_TO_IPADDR(v6src, ipha->ipha_src);
3400 		}
3401 	} else {
3402 		ip6_t *ip6h = (ip6_t *)iph;
3403 
3404 		ip6h->ip6_plen =  htons((uint16_t)(pktlen - IPV6_HDR_LEN));
3405 		udpha->uha_checksum = htons(cksum);
3406 
3407 		/* if IP_PKTINFO specified an addres it wins over bind() */
3408 		if ((ipp->ipp_fields & IPPF_ADDR) &&
3409 		    !IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) {
3410 			ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&ipp->ipp_addr));
3411 			ip6h->ip6_src = ipp->ipp_addr;
3412 		} else {
3413 			ip6h->ip6_src = *v6src;
3414 		}
3415 		ip6h->ip6_vcf =
3416 		    (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) |
3417 		    (flowinfo & ~IPV6_VERS_AND_FLOW_MASK);
3418 		if (ipp->ipp_fields & IPPF_TCLASS) {
3419 			/* Overrides the class part of flowinfo */
3420 			ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf,
3421 			    ipp->ipp_tclass);
3422 		}
3423 	}
3424 
3425 	/* Insert all-0s SPI now. */
3426 	if (insert_spi)
3427 		*((uint32_t *)(udpha + 1)) = 0;
3428 
3429 	udpha->uha_dst_port = dstport;
3430 	if (hash_srcport == B_TRUE)
3431 		udpha->uha_src_port = htons(srcport);
3432 
3433 	return (mp);
3434 }
3435 
3436 /*
3437  * Send a T_UDERR_IND in response to an M_DATA
3438  */
3439 static void
3440 udp_ud_err_connected(conn_t *connp, t_scalar_t error)
3441 {
3442 	struct sockaddr_storage ss;
3443 	sin_t		*sin;
3444 	sin6_t		*sin6;
3445 	struct sockaddr	*addr;
3446 	socklen_t	addrlen;
3447 	mblk_t		*mp1;
3448 
3449 	mutex_enter(&connp->conn_lock);
3450 	/* Initialize addr and addrlen as if they're passed in */
3451 	if (connp->conn_family == AF_INET) {
3452 		sin = (sin_t *)&ss;
3453 		*sin = sin_null;
3454 		sin->sin_family = AF_INET;
3455 		sin->sin_port = connp->conn_fport;
3456 		sin->sin_addr.s_addr = connp->conn_faddr_v4;
3457 		addr = (struct sockaddr *)sin;
3458 		addrlen = sizeof (*sin);
3459 	} else {
3460 		sin6 = (sin6_t *)&ss;
3461 		*sin6 = sin6_null;
3462 		sin6->sin6_family = AF_INET6;
3463 		sin6->sin6_port = connp->conn_fport;
3464 		sin6->sin6_flowinfo = connp->conn_flowinfo;
3465 		sin6->sin6_addr = connp->conn_faddr_v6;
3466 		if (IN6_IS_ADDR_LINKSCOPE(&connp->conn_faddr_v6) &&
3467 		    (connp->conn_ixa->ixa_flags & IXAF_SCOPEID_SET)) {
3468 			sin6->sin6_scope_id = connp->conn_ixa->ixa_scopeid;
3469 		} else {
3470 			sin6->sin6_scope_id = 0;
3471 		}
3472 		sin6->__sin6_src_id = 0;
3473 		addr = (struct sockaddr *)sin6;
3474 		addrlen = sizeof (*sin6);
3475 	}
3476 	mutex_exit(&connp->conn_lock);
3477 
3478 	mp1 = mi_tpi_uderror_ind((char *)addr, addrlen, NULL, 0, error);
3479 	if (mp1 != NULL)
3480 		putnext(connp->conn_rq, mp1);
3481 }
3482 
3483 /*
3484  * This routine handles all messages passed downstream.  It either
3485  * consumes the message or passes it downstream; it never queues a
3486  * a message.
3487  *
3488  * Also entry point for sockfs when udp is in "direct sockfs" mode.  This mode
3489  * is valid when we are directly beneath the stream head, and thus sockfs
3490  * is able to bypass STREAMS and directly call us, passing along the sockaddr
3491  * structure without the cumbersome T_UNITDATA_REQ interface for the case of
3492  * connected endpoints.
3493  */
3494 int
3495 udp_wput(queue_t *q, mblk_t *mp)
3496 {
3497 	sin6_t		*sin6;
3498 	sin_t		*sin = NULL;
3499 	uint_t		srcid;
3500 	conn_t		*connp = Q_TO_CONN(q);
3501 	udp_t		*udp = connp->conn_udp;
3502 	int		error = 0;
3503 	struct sockaddr	*addr = NULL;
3504 	socklen_t	addrlen;
3505 	udp_stack_t	*us = udp->udp_us;
3506 	struct T_unitdata_req *tudr;
3507 	mblk_t		*data_mp;
3508 	ushort_t	ipversion;
3509 	cred_t		*cr;
3510 	pid_t		pid;
3511 
3512 	/*
3513 	 * We directly handle several cases here: T_UNITDATA_REQ message
3514 	 * coming down as M_PROTO/M_PCPROTO and M_DATA messages for connected
3515 	 * socket.
3516 	 */
3517 	switch (DB_TYPE(mp)) {
3518 	case M_DATA:
3519 		if (!udp->udp_issocket || udp->udp_state != TS_DATA_XFER) {
3520 			/* Not connected; address is required */
3521 			UDPS_BUMP_MIB(us, udpOutErrors);
3522 			UDP_DBGSTAT(us, udp_data_notconn);
3523 			UDP_STAT(us, udp_out_err_notconn);
3524 			freemsg(mp);
3525 			return (0);
3526 		}
3527 		/*
3528 		 * All Solaris components should pass a db_credp
3529 		 * for this message, hence we ASSERT.
3530 		 * On production kernels we return an error to be robust against
3531 		 * random streams modules sitting on top of us.
3532 		 */
3533 		cr = msg_getcred(mp, &pid);
3534 		ASSERT(cr != NULL);
3535 		if (cr == NULL) {
3536 			UDPS_BUMP_MIB(us, udpOutErrors);
3537 			freemsg(mp);
3538 			return (0);
3539 		}
3540 		ASSERT(udp->udp_issocket);
3541 		UDP_DBGSTAT(us, udp_data_conn);
3542 		error = udp_output_connected(connp, mp, cr, pid);
3543 		if (error != 0) {
3544 			UDP_STAT(us, udp_out_err_output);
3545 			if (connp->conn_rq != NULL)
3546 				udp_ud_err_connected(connp, (t_scalar_t)error);
3547 #ifdef DEBUG
3548 			printf("udp_output_connected returned %d\n", error);
3549 #endif
3550 		}
3551 		return (0);
3552 
3553 	case M_PROTO:
3554 	case M_PCPROTO:
3555 		tudr = (struct T_unitdata_req *)mp->b_rptr;
3556 		if (MBLKL(mp) < sizeof (*tudr) ||
3557 		    ((t_primp_t)mp->b_rptr)->type != T_UNITDATA_REQ) {
3558 			udp_wput_other(q, mp);
3559 			return (0);
3560 		}
3561 		break;
3562 
3563 	default:
3564 		udp_wput_other(q, mp);
3565 		return (0);
3566 	}
3567 
3568 	/* Handle valid T_UNITDATA_REQ here */
3569 	data_mp = mp->b_cont;
3570 	if (data_mp == NULL) {
3571 		error = EPROTO;
3572 		goto ud_error2;
3573 	}
3574 	mp->b_cont = NULL;
3575 
3576 	if (!MBLKIN(mp, 0, tudr->DEST_offset + tudr->DEST_length)) {
3577 		error = EADDRNOTAVAIL;
3578 		goto ud_error2;
3579 	}
3580 
3581 	/*
3582 	 * All Solaris components should pass a db_credp
3583 	 * for this TPI message, hence we should ASSERT.
3584 	 * However, RPC (svc_clts_ksend) does this odd thing where it
3585 	 * passes the options from a T_UNITDATA_IND unchanged in a
3586 	 * T_UNITDATA_REQ. While that is the right thing to do for
3587 	 * some options, SCM_UCRED being the key one, this also makes it
3588 	 * pass down IP_RECVDSTADDR. Hence we can't ASSERT here.
3589 	 */
3590 	cr = msg_getcred(mp, &pid);
3591 	if (cr == NULL) {
3592 		cr = connp->conn_cred;
3593 		pid = connp->conn_cpid;
3594 	}
3595 
3596 	/*
3597 	 * If a port has not been bound to the stream, fail.
3598 	 * This is not a problem when sockfs is directly
3599 	 * above us, because it will ensure that the socket
3600 	 * is first bound before allowing data to be sent.
3601 	 */
3602 	if (udp->udp_state == TS_UNBND) {
3603 		error = EPROTO;
3604 		goto ud_error2;
3605 	}
3606 	addr = (struct sockaddr *)&mp->b_rptr[tudr->DEST_offset];
3607 	addrlen = tudr->DEST_length;
3608 
3609 	switch (connp->conn_family) {
3610 	case AF_INET6:
3611 		sin6 = (sin6_t *)addr;
3612 		if (!OK_32PTR((char *)sin6) || (addrlen != sizeof (sin6_t)) ||
3613 		    (sin6->sin6_family != AF_INET6)) {
3614 			error = EADDRNOTAVAIL;
3615 			goto ud_error2;
3616 		}
3617 
3618 		srcid = sin6->__sin6_src_id;
3619 		if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
3620 			/*
3621 			 * Destination is a non-IPv4-compatible IPv6 address.
3622 			 * Send out an IPv6 format packet.
3623 			 */
3624 
3625 			/*
3626 			 * If the local address is a mapped address return
3627 			 * an error.
3628 			 * It would be possible to send an IPv6 packet but the
3629 			 * response would never make it back to the application
3630 			 * since it is bound to a mapped address.
3631 			 */
3632 			if (IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6)) {
3633 				error = EADDRNOTAVAIL;
3634 				goto ud_error2;
3635 			}
3636 
3637 			UDP_DBGSTAT(us, udp_out_ipv6);
3638 
3639 			if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
3640 				sin6->sin6_addr = ipv6_loopback;
3641 			ipversion = IPV6_VERSION;
3642 		} else {
3643 			if (connp->conn_ipv6_v6only) {
3644 				error = EADDRNOTAVAIL;
3645 				goto ud_error2;
3646 			}
3647 
3648 			/*
3649 			 * If the local address is not zero or a mapped address
3650 			 * return an error.  It would be possible to send an
3651 			 * IPv4 packet but the response would never make it
3652 			 * back to the application since it is bound to a
3653 			 * non-mapped address.
3654 			 */
3655 			if (!IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6) &&
3656 			    !IN6_IS_ADDR_UNSPECIFIED(&connp->conn_saddr_v6)) {
3657 				error = EADDRNOTAVAIL;
3658 				goto ud_error2;
3659 			}
3660 			UDP_DBGSTAT(us, udp_out_mapped);
3661 
3662 			if (V4_PART_OF_V6(sin6->sin6_addr) == INADDR_ANY) {
3663 				V4_PART_OF_V6(sin6->sin6_addr) =
3664 				    htonl(INADDR_LOOPBACK);
3665 			}
3666 			ipversion = IPV4_VERSION;
3667 		}
3668 
3669 		if (tudr->OPT_length != 0) {
3670 			/*
3671 			 * If we are connected then the destination needs to be
3672 			 * the same as the connected one.
3673 			 */
3674 			if (udp->udp_state == TS_DATA_XFER &&
3675 			    !conn_same_as_last_v6(connp, sin6)) {
3676 				error = EISCONN;
3677 				goto ud_error2;
3678 			}
3679 			UDP_STAT(us, udp_out_opt);
3680 			error = udp_output_ancillary(connp, NULL, sin6,
3681 			    data_mp, mp, NULL, cr, pid);
3682 		} else {
3683 			ip_xmit_attr_t *ixa;
3684 
3685 			/*
3686 			 * We have to allocate an ip_xmit_attr_t before we grab
3687 			 * conn_lock and we need to hold conn_lock once we've
3688 			 * checked conn_same_as_last_v6 to handle concurrent
3689 			 * send* calls on a socket.
3690 			 */
3691 			ixa = conn_get_ixa(connp, B_FALSE);
3692 			if (ixa == NULL) {
3693 				error = ENOMEM;
3694 				goto ud_error2;
3695 			}
3696 			mutex_enter(&connp->conn_lock);
3697 
3698 			if (conn_same_as_last_v6(connp, sin6) &&
3699 			    connp->conn_lastsrcid == srcid &&
3700 			    ipsec_outbound_policy_current(ixa)) {
3701 				UDP_DBGSTAT(us, udp_out_lastdst);
3702 				/* udp_output_lastdst drops conn_lock */
3703 				error = udp_output_lastdst(connp, data_mp, cr,
3704 				    pid, ixa);
3705 			} else {
3706 				UDP_DBGSTAT(us, udp_out_diffdst);
3707 				/* udp_output_newdst drops conn_lock */
3708 				error = udp_output_newdst(connp, data_mp, NULL,
3709 				    sin6, ipversion, cr, pid, ixa);
3710 			}
3711 			ASSERT(MUTEX_NOT_HELD(&connp->conn_lock));
3712 		}
3713 		if (error == 0) {
3714 			freeb(mp);
3715 			return (0);
3716 		}
3717 		break;
3718 
3719 	case AF_INET:
3720 		sin = (sin_t *)addr;
3721 		if ((!OK_32PTR((char *)sin) || addrlen != sizeof (sin_t)) ||
3722 		    (sin->sin_family != AF_INET)) {
3723 			error = EADDRNOTAVAIL;
3724 			goto ud_error2;
3725 		}
3726 		UDP_DBGSTAT(us, udp_out_ipv4);
3727 		if (sin->sin_addr.s_addr == INADDR_ANY)
3728 			sin->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
3729 		ipversion = IPV4_VERSION;
3730 
3731 		srcid = 0;
3732 		if (tudr->OPT_length != 0) {
3733 			/*
3734 			 * If we are connected then the destination needs to be
3735 			 * the same as the connected one.
3736 			 */
3737 			if (udp->udp_state == TS_DATA_XFER &&
3738 			    !conn_same_as_last_v4(connp, sin)) {
3739 				error = EISCONN;
3740 				goto ud_error2;
3741 			}
3742 			UDP_STAT(us, udp_out_opt);
3743 			error = udp_output_ancillary(connp, sin, NULL,
3744 			    data_mp, mp, NULL, cr, pid);
3745 		} else {
3746 			ip_xmit_attr_t *ixa;
3747 
3748 			/*
3749 			 * We have to allocate an ip_xmit_attr_t before we grab
3750 			 * conn_lock and we need to hold conn_lock once we've
3751 			 * checked conn_same_as_last_v4 to handle concurrent
3752 			 * send* calls on a socket.
3753 			 */
3754 			ixa = conn_get_ixa(connp, B_FALSE);
3755 			if (ixa == NULL) {
3756 				error = ENOMEM;
3757 				goto ud_error2;
3758 			}
3759 			mutex_enter(&connp->conn_lock);
3760 
3761 			if (conn_same_as_last_v4(connp, sin) &&
3762 			    ipsec_outbound_policy_current(ixa)) {
3763 				UDP_DBGSTAT(us, udp_out_lastdst);
3764 				/* udp_output_lastdst drops conn_lock */
3765 				error = udp_output_lastdst(connp, data_mp, cr,
3766 				    pid, ixa);
3767 			} else {
3768 				UDP_DBGSTAT(us, udp_out_diffdst);
3769 				/* udp_output_newdst drops conn_lock */
3770 				error = udp_output_newdst(connp, data_mp, sin,
3771 				    NULL, ipversion, cr, pid, ixa);
3772 			}
3773 			ASSERT(MUTEX_NOT_HELD(&connp->conn_lock));
3774 		}
3775 		if (error == 0) {
3776 			freeb(mp);
3777 			return (0);
3778 		}
3779 		break;
3780 	}
3781 	UDP_STAT(us, udp_out_err_output);
3782 	ASSERT(mp != NULL);
3783 	/* mp is freed by the following routine */
3784 	udp_ud_err(q, mp, (t_scalar_t)error);
3785 	return (0);
3786 
3787 ud_error2:
3788 	UDPS_BUMP_MIB(us, udpOutErrors);
3789 	freemsg(data_mp);
3790 	UDP_STAT(us, udp_out_err_output);
3791 	ASSERT(mp != NULL);
3792 	/* mp is freed by the following routine */
3793 	udp_ud_err(q, mp, (t_scalar_t)error);
3794 	return (0);
3795 }
3796 
3797 /*
3798  * Handle the case of the IP address, port, flow label being different
3799  * for both IPv4 and IPv6.
3800  *
3801  * NOTE: The caller must hold conn_lock and we drop it here.
3802  */
3803 static int
3804 udp_output_newdst(conn_t *connp, mblk_t *data_mp, sin_t *sin, sin6_t *sin6,
3805     ushort_t ipversion, cred_t *cr, pid_t pid, ip_xmit_attr_t *ixa)
3806 {
3807 	uint_t		srcid;
3808 	uint32_t	flowinfo;
3809 	udp_t		*udp = connp->conn_udp;
3810 	int		error = 0;
3811 	ip_xmit_attr_t	*oldixa;
3812 	udp_stack_t	*us = udp->udp_us;
3813 	in6_addr_t	v6src;
3814 	in6_addr_t	v6dst;
3815 	in6_addr_t	v6nexthop;
3816 	in_port_t	dstport;
3817 
3818 	ASSERT(MUTEX_HELD(&connp->conn_lock));
3819 	ASSERT(ixa != NULL);
3820 	/*
3821 	 * We hold conn_lock across all the use and modifications of
3822 	 * the conn_lastdst, conn_ixa, and conn_xmit_ipp to ensure that they
3823 	 * stay consistent.
3824 	 */
3825 
3826 	ASSERT(cr != NULL);
3827 	ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3828 	ixa->ixa_cred = cr;
3829 	ixa->ixa_cpid = pid;
3830 	if (is_system_labeled()) {
3831 		/* We need to restart with a label based on the cred */
3832 		ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred);
3833 	}
3834 
3835 	/*
3836 	 * If we are connected then the destination needs to be the
3837 	 * same as the connected one, which is not the case here since we
3838 	 * checked for that above.
3839 	 */
3840 	if (udp->udp_state == TS_DATA_XFER) {
3841 		mutex_exit(&connp->conn_lock);
3842 		error = EISCONN;
3843 		goto ud_error;
3844 	}
3845 
3846 	/* In case previous destination was multicast or multirt */
3847 	ip_attr_newdst(ixa);
3848 
3849 	/*
3850 	 * If laddr is unspecified then we look at sin6_src_id.
3851 	 * We will give precedence to a source address set with IPV6_PKTINFO
3852 	 * (aka IPPF_ADDR) but that is handled in build_hdrs. However, we don't
3853 	 * want ip_attr_connect to select a source (since it can fail) when
3854 	 * IPV6_PKTINFO is specified.
3855 	 * If this doesn't result in a source address then we get a source
3856 	 * from ip_attr_connect() below.
3857 	 */
3858 	v6src = connp->conn_saddr_v6;
3859 	if (sin != NULL) {
3860 		IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &v6dst);
3861 		dstport = sin->sin_port;
3862 		flowinfo = 0;
3863 		/* Don't bother with ip_srcid_find_id(), but indicate anyway. */
3864 		srcid = 0;
3865 		ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
3866 		ixa->ixa_flags |= IXAF_IS_IPV4;
3867 	} else {
3868 		boolean_t v4mapped;
3869 
3870 		v6dst = sin6->sin6_addr;
3871 		dstport = sin6->sin6_port;
3872 		flowinfo = sin6->sin6_flowinfo;
3873 		srcid = sin6->__sin6_src_id;
3874 		if (IN6_IS_ADDR_LINKSCOPE(&v6dst) && sin6->sin6_scope_id != 0) {
3875 			ixa->ixa_scopeid = sin6->sin6_scope_id;
3876 			ixa->ixa_flags |= IXAF_SCOPEID_SET;
3877 		} else {
3878 			ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
3879 		}
3880 		v4mapped = IN6_IS_ADDR_V4MAPPED(&v6dst);
3881 		if (v4mapped)
3882 			ixa->ixa_flags |= IXAF_IS_IPV4;
3883 		else
3884 			ixa->ixa_flags &= ~IXAF_IS_IPV4;
3885 		if (srcid != 0 && IN6_IS_ADDR_UNSPECIFIED(&v6src)) {
3886 			if (!ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp),
3887 			    v4mapped, connp->conn_netstack)) {
3888 				/* Mismatched v4mapped/v6 specified by srcid. */
3889 				mutex_exit(&connp->conn_lock);
3890 				error = EADDRNOTAVAIL;
3891 				goto ud_error;
3892 			}
3893 		}
3894 	}
3895 	/* Handle IP_PKTINFO/IPV6_PKTINFO setting source address. */
3896 	if (connp->conn_xmit_ipp.ipp_fields & IPPF_ADDR) {
3897 		ip_pkt_t *ipp = &connp->conn_xmit_ipp;
3898 
3899 		if (ixa->ixa_flags & IXAF_IS_IPV4) {
3900 			if (IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr))
3901 				v6src = ipp->ipp_addr;
3902 		} else {
3903 			if (!IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr))
3904 				v6src = ipp->ipp_addr;
3905 		}
3906 	}
3907 
3908 	ip_attr_nexthop(&connp->conn_xmit_ipp, ixa, &v6dst, &v6nexthop);
3909 	mutex_exit(&connp->conn_lock);
3910 
3911 	error = ip_attr_connect(connp, ixa, &v6src, &v6dst, &v6nexthop, dstport,
3912 	    &v6src, NULL, IPDF_ALLOW_MCBC | IPDF_VERIFY_DST | IPDF_IPSEC);
3913 	switch (error) {
3914 	case 0:
3915 		break;
3916 	case EADDRNOTAVAIL:
3917 		/*
3918 		 * IXAF_VERIFY_SOURCE tells us to pick a better source.
3919 		 * Don't have the application see that errno
3920 		 */
3921 		error = ENETUNREACH;
3922 		goto failed;
3923 	case ENETDOWN:
3924 		/*
3925 		 * Have !ipif_addr_ready address; drop packet silently
3926 		 * until we can get applications to not send until we
3927 		 * are ready.
3928 		 */
3929 		error = 0;
3930 		goto failed;
3931 	case EHOSTUNREACH:
3932 	case ENETUNREACH:
3933 		if (ixa->ixa_ire != NULL) {
3934 			/*
3935 			 * Let conn_ip_output/ire_send_noroute return
3936 			 * the error and send any local ICMP error.
3937 			 */
3938 			error = 0;
3939 			break;
3940 		}
3941 		/* FALLTHRU */
3942 	failed:
3943 	default:
3944 		goto ud_error;
3945 	}
3946 
3947 
3948 	/*
3949 	 * Cluster note: we let the cluster hook know that we are sending to a
3950 	 * new address and/or port.
3951 	 */
3952 	if (cl_inet_connect2 != NULL) {
3953 		CL_INET_UDP_CONNECT(connp, B_TRUE, &v6dst, dstport, error);
3954 		if (error != 0) {
3955 			error = EHOSTUNREACH;
3956 			goto ud_error;
3957 		}
3958 	}
3959 
3960 	mutex_enter(&connp->conn_lock);
3961 	/*
3962 	 * While we dropped the lock some other thread might have connected
3963 	 * this socket. If so we bail out with EISCONN to ensure that the
3964 	 * connecting thread is the one that updates conn_ixa, conn_ht_*
3965 	 * and conn_*last*.
3966 	 */
3967 	if (udp->udp_state == TS_DATA_XFER) {
3968 		mutex_exit(&connp->conn_lock);
3969 		error = EISCONN;
3970 		goto ud_error;
3971 	}
3972 
3973 	/*
3974 	 * We need to rebuild the headers if
3975 	 *  - we are labeling packets (could be different for different
3976 	 *    destinations)
3977 	 *  - we have a source route (or routing header) since we need to
3978 	 *    massage that to get the pseudo-header checksum
3979 	 *  - the IP version is different than the last time
3980 	 *  - a socket option with COA_HEADER_CHANGED has been set which
3981 	 *    set conn_v6lastdst to zero.
3982 	 *
3983 	 * Otherwise the prepend function will just update the src, dst,
3984 	 * dstport, and flow label.
3985 	 */
3986 	if (is_system_labeled()) {
3987 		/* TX MLP requires SCM_UCRED and don't have that here */
3988 		if (connp->conn_mlp_type != mlptSingle) {
3989 			mutex_exit(&connp->conn_lock);
3990 			error = ECONNREFUSED;
3991 			goto ud_error;
3992 		}
3993 		/*
3994 		 * Check whether Trusted Solaris policy allows communication
3995 		 * with this host, and pretend that the destination is
3996 		 * unreachable if not.
3997 		 * Compute any needed label and place it in ipp_label_v4/v6.
3998 		 *
3999 		 * Later conn_build_hdr_template/conn_prepend_hdr takes
4000 		 * ipp_label_v4/v6 to form the packet.
4001 		 *
4002 		 * Tsol note: Since we hold conn_lock we know no other
4003 		 * thread manipulates conn_xmit_ipp.
4004 		 */
4005 		error = conn_update_label(connp, ixa, &v6dst,
4006 		    &connp->conn_xmit_ipp);
4007 		if (error != 0) {
4008 			mutex_exit(&connp->conn_lock);
4009 			goto ud_error;
4010 		}
4011 		/* Rebuild the header template */
4012 		error = udp_build_hdr_template(connp, &v6src, &v6dst, dstport,
4013 		    flowinfo);
4014 		if (error != 0) {
4015 			mutex_exit(&connp->conn_lock);
4016 			goto ud_error;
4017 		}
4018 	} else if ((connp->conn_xmit_ipp.ipp_fields &
4019 	    (IPPF_IPV4_OPTIONS|IPPF_RTHDR)) ||
4020 	    ipversion != connp->conn_lastipversion ||
4021 	    IN6_IS_ADDR_UNSPECIFIED(&connp->conn_v6lastdst)) {
4022 		/* Rebuild the header template */
4023 		error = udp_build_hdr_template(connp, &v6src, &v6dst, dstport,
4024 		    flowinfo);
4025 		if (error != 0) {
4026 			mutex_exit(&connp->conn_lock);
4027 			goto ud_error;
4028 		}
4029 	} else {
4030 		/* Simply update the destination address if no source route */
4031 		if (ixa->ixa_flags & IXAF_IS_IPV4) {
4032 			ipha_t	*ipha = (ipha_t *)connp->conn_ht_iphc;
4033 
4034 			IN6_V4MAPPED_TO_IPADDR(&v6dst, ipha->ipha_dst);
4035 			if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF) {
4036 				ipha->ipha_fragment_offset_and_flags |=
4037 				    IPH_DF_HTONS;
4038 			} else {
4039 				ipha->ipha_fragment_offset_and_flags &=
4040 				    ~IPH_DF_HTONS;
4041 			}
4042 		} else {
4043 			ip6_t *ip6h = (ip6_t *)connp->conn_ht_iphc;
4044 			ip6h->ip6_dst = v6dst;
4045 		}
4046 	}
4047 
4048 	/*
4049 	 * Remember the dst/dstport etc which corresponds to the built header
4050 	 * template and conn_ixa.
4051 	 */
4052 	oldixa = conn_replace_ixa(connp, ixa);
4053 	connp->conn_v6lastdst = v6dst;
4054 	connp->conn_lastipversion = ipversion;
4055 	connp->conn_lastdstport = dstport;
4056 	connp->conn_lastflowinfo = flowinfo;
4057 	connp->conn_lastscopeid = ixa->ixa_scopeid;
4058 	connp->conn_lastsrcid = srcid;
4059 	/* Also remember a source to use together with lastdst */
4060 	connp->conn_v6lastsrc = v6src;
4061 
4062 	data_mp = udp_prepend_header_template(connp, ixa, data_mp, &v6src,
4063 	    dstport, flowinfo, &error);
4064 
4065 	/* Done with conn_t */
4066 	mutex_exit(&connp->conn_lock);
4067 	ixa_refrele(oldixa);
4068 
4069 	if (data_mp == NULL) {
4070 		ASSERT(error != 0);
4071 		goto ud_error;
4072 	}
4073 
4074 	/* We're done.  Pass the packet to ip. */
4075 	UDPS_BUMP_MIB(us, udpHCOutDatagrams);
4076 
4077 	DTRACE_UDP5(send, mblk_t *, NULL, ip_xmit_attr_t *, ixa,
4078 	    void_ip_t *, data_mp->b_rptr, udp_t *, udp, udpha_t *,
4079 	    &data_mp->b_rptr[ixa->ixa_ip_hdr_length]);
4080 
4081 	error = conn_ip_output(data_mp, ixa);
4082 	/* No udpOutErrors if an error since IP increases its error counter */
4083 	switch (error) {
4084 	case 0:
4085 		break;
4086 	case EWOULDBLOCK:
4087 		(void) ixa_check_drain_insert(connp, ixa);
4088 		error = 0;
4089 		break;
4090 	case EADDRNOTAVAIL:
4091 		/*
4092 		 * IXAF_VERIFY_SOURCE tells us to pick a better source.
4093 		 * Don't have the application see that errno
4094 		 */
4095 		error = ENETUNREACH;
4096 		/* FALLTHRU */
4097 	default:
4098 		mutex_enter(&connp->conn_lock);
4099 		/*
4100 		 * Clear the source and v6lastdst so we call ip_attr_connect
4101 		 * for the next packet and try to pick a better source.
4102 		 */
4103 		if (connp->conn_mcbc_bind)
4104 			connp->conn_saddr_v6 = ipv6_all_zeros;
4105 		else
4106 			connp->conn_saddr_v6 = connp->conn_bound_addr_v6;
4107 		connp->conn_v6lastdst = ipv6_all_zeros;
4108 		mutex_exit(&connp->conn_lock);
4109 		break;
4110 	}
4111 	ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
4112 	ixa->ixa_cred = connp->conn_cred;	/* Restore */
4113 	ixa->ixa_cpid = connp->conn_cpid;
4114 	ixa_refrele(ixa);
4115 	return (error);
4116 
4117 ud_error:
4118 	ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
4119 	ixa->ixa_cred = connp->conn_cred;	/* Restore */
4120 	ixa->ixa_cpid = connp->conn_cpid;
4121 	ixa_refrele(ixa);
4122 
4123 	freemsg(data_mp);
4124 	UDPS_BUMP_MIB(us, udpOutErrors);
4125 	UDP_STAT(us, udp_out_err_output);
4126 	return (error);
4127 }
4128 
4129 /* ARGSUSED */
4130 static int
4131 udp_wput_fallback(queue_t *wq, mblk_t *mp)
4132 {
4133 #ifdef DEBUG
4134 	cmn_err(CE_CONT, "udp_wput_fallback: Message in fallback \n");
4135 #endif
4136 	freemsg(mp);
4137 	return (0);
4138 }
4139 
4140 
4141 /*
4142  * Handle special out-of-band ioctl requests (see PSARC/2008/265).
4143  */
4144 static void
4145 udp_wput_cmdblk(queue_t *q, mblk_t *mp)
4146 {
4147 	void	*data;
4148 	mblk_t	*datamp = mp->b_cont;
4149 	conn_t	*connp = Q_TO_CONN(q);
4150 	udp_t	*udp = connp->conn_udp;
4151 	cmdblk_t *cmdp = (cmdblk_t *)mp->b_rptr;
4152 
4153 	if (datamp == NULL || MBLKL(datamp) < cmdp->cb_len) {
4154 		cmdp->cb_error = EPROTO;
4155 		qreply(q, mp);
4156 		return;
4157 	}
4158 	data = datamp->b_rptr;
4159 
4160 	mutex_enter(&connp->conn_lock);
4161 	switch (cmdp->cb_cmd) {
4162 	case TI_GETPEERNAME:
4163 		if (udp->udp_state != TS_DATA_XFER)
4164 			cmdp->cb_error = ENOTCONN;
4165 		else
4166 			cmdp->cb_error = conn_getpeername(connp, data,
4167 			    &cmdp->cb_len);
4168 		break;
4169 	case TI_GETMYNAME:
4170 		cmdp->cb_error = conn_getsockname(connp, data, &cmdp->cb_len);
4171 		break;
4172 	default:
4173 		cmdp->cb_error = EINVAL;
4174 		break;
4175 	}
4176 	mutex_exit(&connp->conn_lock);
4177 
4178 	qreply(q, mp);
4179 }
4180 
4181 static void
4182 udp_use_pure_tpi(udp_t *udp)
4183 {
4184 	conn_t	*connp = udp->udp_connp;
4185 
4186 	mutex_enter(&connp->conn_lock);
4187 	udp->udp_issocket = B_FALSE;
4188 	mutex_exit(&connp->conn_lock);
4189 	UDP_STAT(udp->udp_us, udp_sock_fallback);
4190 }
4191 
4192 static void
4193 udp_wput_other(queue_t *q, mblk_t *mp)
4194 {
4195 	uchar_t	*rptr = mp->b_rptr;
4196 	struct iocblk *iocp;
4197 	conn_t	*connp = Q_TO_CONN(q);
4198 	udp_t	*udp = connp->conn_udp;
4199 	cred_t	*cr;
4200 
4201 	switch (mp->b_datap->db_type) {
4202 	case M_CMD:
4203 		udp_wput_cmdblk(q, mp);
4204 		return;
4205 
4206 	case M_PROTO:
4207 	case M_PCPROTO:
4208 		if (mp->b_wptr - rptr < sizeof (t_scalar_t)) {
4209 			/*
4210 			 * If the message does not contain a PRIM_type,
4211 			 * throw it away.
4212 			 */
4213 			freemsg(mp);
4214 			return;
4215 		}
4216 		switch (((t_primp_t)rptr)->type) {
4217 		case T_ADDR_REQ:
4218 			udp_addr_req(q, mp);
4219 			return;
4220 		case O_T_BIND_REQ:
4221 		case T_BIND_REQ:
4222 			udp_tpi_bind(q, mp);
4223 			return;
4224 		case T_CONN_REQ:
4225 			udp_tpi_connect(q, mp);
4226 			return;
4227 		case T_CAPABILITY_REQ:
4228 			udp_capability_req(q, mp);
4229 			return;
4230 		case T_INFO_REQ:
4231 			udp_info_req(q, mp);
4232 			return;
4233 		case T_UNITDATA_REQ:
4234 			/*
4235 			 * If a T_UNITDATA_REQ gets here, the address must
4236 			 * be bad.  Valid T_UNITDATA_REQs are handled
4237 			 * in udp_wput.
4238 			 */
4239 			udp_ud_err(q, mp, EADDRNOTAVAIL);
4240 			return;
4241 		case T_UNBIND_REQ:
4242 			udp_tpi_unbind(q, mp);
4243 			return;
4244 		case T_SVR4_OPTMGMT_REQ:
4245 			/*
4246 			 * All Solaris components should pass a db_credp
4247 			 * for this TPI message, hence we ASSERT.
4248 			 * But in case there is some other M_PROTO that looks
4249 			 * like a TPI message sent by some other kernel
4250 			 * component, we check and return an error.
4251 			 */
4252 			cr = msg_getcred(mp, NULL);
4253 			ASSERT(cr != NULL);
4254 			if (cr == NULL) {
4255 				udp_err_ack(q, mp, TSYSERR, EINVAL);
4256 				return;
4257 			}
4258 			if (!snmpcom_req(q, mp, udp_snmp_set, ip_snmp_get,
4259 			    cr)) {
4260 				svr4_optcom_req(q, mp, cr, &udp_opt_obj);
4261 			}
4262 			return;
4263 
4264 		case T_OPTMGMT_REQ:
4265 			/*
4266 			 * All Solaris components should pass a db_credp
4267 			 * for this TPI message, hence we ASSERT.
4268 			 * But in case there is some other M_PROTO that looks
4269 			 * like a TPI message sent by some other kernel
4270 			 * component, we check and return an error.
4271 			 */
4272 			cr = msg_getcred(mp, NULL);
4273 			ASSERT(cr != NULL);
4274 			if (cr == NULL) {
4275 				udp_err_ack(q, mp, TSYSERR, EINVAL);
4276 				return;
4277 			}
4278 			tpi_optcom_req(q, mp, cr, &udp_opt_obj);
4279 			return;
4280 
4281 		case T_DISCON_REQ:
4282 			udp_tpi_disconnect(q, mp);
4283 			return;
4284 
4285 		/* The following TPI message is not supported by udp. */
4286 		case O_T_CONN_RES:
4287 		case T_CONN_RES:
4288 			udp_err_ack(q, mp, TNOTSUPPORT, 0);
4289 			return;
4290 
4291 		/* The following 3 TPI requests are illegal for udp. */
4292 		case T_DATA_REQ:
4293 		case T_EXDATA_REQ:
4294 		case T_ORDREL_REQ:
4295 			udp_err_ack(q, mp, TNOTSUPPORT, 0);
4296 			return;
4297 		default:
4298 			break;
4299 		}
4300 		break;
4301 	case M_FLUSH:
4302 		if (*rptr & FLUSHW)
4303 			flushq(q, FLUSHDATA);
4304 		break;
4305 	case M_IOCTL:
4306 		iocp = (struct iocblk *)mp->b_rptr;
4307 		switch (iocp->ioc_cmd) {
4308 		case TI_GETPEERNAME:
4309 			if (udp->udp_state != TS_DATA_XFER) {
4310 				/*
4311 				 * If a default destination address has not
4312 				 * been associated with the stream, then we
4313 				 * don't know the peer's name.
4314 				 */
4315 				iocp->ioc_error = ENOTCONN;
4316 				iocp->ioc_count = 0;
4317 				mp->b_datap->db_type = M_IOCACK;
4318 				qreply(q, mp);
4319 				return;
4320 			}
4321 			/* FALLTHRU */
4322 		case TI_GETMYNAME:
4323 			/*
4324 			 * For TI_GETPEERNAME and TI_GETMYNAME, we first
4325 			 * need to copyin the user's strbuf structure.
4326 			 * Processing will continue in the M_IOCDATA case
4327 			 * below.
4328 			 */
4329 			mi_copyin(q, mp, NULL,
4330 			    SIZEOF_STRUCT(strbuf, iocp->ioc_flag));
4331 			return;
4332 		case _SIOCSOCKFALLBACK:
4333 			/*
4334 			 * Either sockmod is about to be popped and the
4335 			 * socket would now be treated as a plain stream,
4336 			 * or a module is about to be pushed so we have
4337 			 * to follow pure TPI semantics.
4338 			 */
4339 			if (!udp->udp_issocket) {
4340 				DB_TYPE(mp) = M_IOCNAK;
4341 				iocp->ioc_error = EINVAL;
4342 			} else {
4343 				udp_use_pure_tpi(udp);
4344 
4345 				DB_TYPE(mp) = M_IOCACK;
4346 				iocp->ioc_error = 0;
4347 			}
4348 			iocp->ioc_count = 0;
4349 			iocp->ioc_rval = 0;
4350 			qreply(q, mp);
4351 			return;
4352 		default:
4353 			break;
4354 		}
4355 		break;
4356 	case M_IOCDATA:
4357 		udp_wput_iocdata(q, mp);
4358 		return;
4359 	default:
4360 		/* Unrecognized messages are passed through without change. */
4361 		break;
4362 	}
4363 	ip_wput_nondata(q, mp);
4364 }
4365 
4366 /*
4367  * udp_wput_iocdata is called by udp_wput_other to handle all M_IOCDATA
4368  * messages.
4369  */
4370 static void
4371 udp_wput_iocdata(queue_t *q, mblk_t *mp)
4372 {
4373 	mblk_t		*mp1;
4374 	struct	iocblk *iocp = (struct iocblk *)mp->b_rptr;
4375 	STRUCT_HANDLE(strbuf, sb);
4376 	uint_t		addrlen;
4377 	conn_t		*connp = Q_TO_CONN(q);
4378 	udp_t		*udp = connp->conn_udp;
4379 
4380 	/* Make sure it is one of ours. */
4381 	switch (iocp->ioc_cmd) {
4382 	case TI_GETMYNAME:
4383 	case TI_GETPEERNAME:
4384 		break;
4385 	default:
4386 		ip_wput_nondata(q, mp);
4387 		return;
4388 	}
4389 
4390 	switch (mi_copy_state(q, mp, &mp1)) {
4391 	case -1:
4392 		return;
4393 	case MI_COPY_CASE(MI_COPY_IN, 1):
4394 		break;
4395 	case MI_COPY_CASE(MI_COPY_OUT, 1):
4396 		/*
4397 		 * The address has been copied out, so now
4398 		 * copyout the strbuf.
4399 		 */
4400 		mi_copyout(q, mp);
4401 		return;
4402 	case MI_COPY_CASE(MI_COPY_OUT, 2):
4403 		/*
4404 		 * The address and strbuf have been copied out.
4405 		 * We're done, so just acknowledge the original
4406 		 * M_IOCTL.
4407 		 */
4408 		mi_copy_done(q, mp, 0);
4409 		return;
4410 	default:
4411 		/*
4412 		 * Something strange has happened, so acknowledge
4413 		 * the original M_IOCTL with an EPROTO error.
4414 		 */
4415 		mi_copy_done(q, mp, EPROTO);
4416 		return;
4417 	}
4418 
4419 	/*
4420 	 * Now we have the strbuf structure for TI_GETMYNAME
4421 	 * and TI_GETPEERNAME.  Next we copyout the requested
4422 	 * address and then we'll copyout the strbuf.
4423 	 */
4424 	STRUCT_SET_HANDLE(sb, iocp->ioc_flag, (void *)mp1->b_rptr);
4425 
4426 	if (connp->conn_family == AF_INET)
4427 		addrlen = sizeof (sin_t);
4428 	else
4429 		addrlen = sizeof (sin6_t);
4430 
4431 	if (STRUCT_FGET(sb, maxlen) < addrlen) {
4432 		mi_copy_done(q, mp, EINVAL);
4433 		return;
4434 	}
4435 
4436 	switch (iocp->ioc_cmd) {
4437 	case TI_GETMYNAME:
4438 		break;
4439 	case TI_GETPEERNAME:
4440 		if (udp->udp_state != TS_DATA_XFER) {
4441 			mi_copy_done(q, mp, ENOTCONN);
4442 			return;
4443 		}
4444 		break;
4445 	}
4446 	mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), addrlen, B_TRUE);
4447 	if (!mp1)
4448 		return;
4449 
4450 	STRUCT_FSET(sb, len, addrlen);
4451 	switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) {
4452 	case TI_GETMYNAME:
4453 		(void) conn_getsockname(connp, (struct sockaddr *)mp1->b_wptr,
4454 		    &addrlen);
4455 		break;
4456 	case TI_GETPEERNAME:
4457 		(void) conn_getpeername(connp, (struct sockaddr *)mp1->b_wptr,
4458 		    &addrlen);
4459 		break;
4460 	}
4461 	mp1->b_wptr += addrlen;
4462 	/* Copy out the address */
4463 	mi_copyout(q, mp);
4464 }
4465 
4466 void
4467 udp_ddi_g_init(void)
4468 {
4469 	udp_max_optsize = optcom_max_optsize(udp_opt_obj.odb_opt_des_arr,
4470 	    udp_opt_obj.odb_opt_arr_cnt);
4471 
4472 	/*
4473 	 * We want to be informed each time a stack is created or
4474 	 * destroyed in the kernel, so we can maintain the
4475 	 * set of udp_stack_t's.
4476 	 */
4477 	netstack_register(NS_UDP, udp_stack_init, NULL, udp_stack_fini);
4478 }
4479 
4480 void
4481 udp_ddi_g_destroy(void)
4482 {
4483 	netstack_unregister(NS_UDP);
4484 }
4485 
4486 #define	INET_NAME	"ip"
4487 
4488 /*
4489  * Initialize the UDP stack instance.
4490  */
4491 static void *
4492 udp_stack_init(netstackid_t stackid, netstack_t *ns)
4493 {
4494 	udp_stack_t	*us;
4495 	int		i;
4496 	int		error = 0;
4497 	major_t		major;
4498 	size_t		arrsz;
4499 
4500 	us = (udp_stack_t *)kmem_zalloc(sizeof (*us), KM_SLEEP);
4501 	us->us_netstack = ns;
4502 
4503 	mutex_init(&us->us_epriv_port_lock, NULL, MUTEX_DEFAULT, NULL);
4504 	us->us_num_epriv_ports = UDP_NUM_EPRIV_PORTS;
4505 	us->us_epriv_ports[0] = ULP_DEF_EPRIV_PORT1;
4506 	us->us_epriv_ports[1] = ULP_DEF_EPRIV_PORT2;
4507 
4508 	/*
4509 	 * The smallest anonymous port in the priviledged port range which UDP
4510 	 * looks for free port.  Use in the option UDP_ANONPRIVBIND.
4511 	 */
4512 	us->us_min_anonpriv_port = 512;
4513 
4514 	us->us_bind_fanout_size = udp_bind_fanout_size;
4515 
4516 	/* Roundup variable that might have been modified in /etc/system */
4517 	if (!ISP2(us->us_bind_fanout_size)) {
4518 		/* Not a power of two. Round up to nearest power of two */
4519 		for (i = 0; i < 31; i++) {
4520 			if (us->us_bind_fanout_size < (1 << i))
4521 				break;
4522 		}
4523 		us->us_bind_fanout_size = 1 << i;
4524 	}
4525 	us->us_bind_fanout = kmem_zalloc(us->us_bind_fanout_size *
4526 	    sizeof (udp_fanout_t), KM_SLEEP);
4527 	for (i = 0; i < us->us_bind_fanout_size; i++) {
4528 		mutex_init(&us->us_bind_fanout[i].uf_lock, NULL, MUTEX_DEFAULT,
4529 		    NULL);
4530 	}
4531 
4532 	arrsz = udp_propinfo_count * sizeof (mod_prop_info_t);
4533 	us->us_propinfo_tbl = (mod_prop_info_t *)kmem_alloc(arrsz,
4534 	    KM_SLEEP);
4535 	bcopy(udp_propinfo_tbl, us->us_propinfo_tbl, arrsz);
4536 
4537 	/* Allocate the per netstack stats */
4538 	mutex_enter(&cpu_lock);
4539 	us->us_sc_cnt = MAX(ncpus, boot_ncpus);
4540 	mutex_exit(&cpu_lock);
4541 	us->us_sc = kmem_zalloc(max_ncpus  * sizeof (udp_stats_cpu_t *),
4542 	    KM_SLEEP);
4543 	for (i = 0; i < us->us_sc_cnt; i++) {
4544 		us->us_sc[i] = kmem_zalloc(sizeof (udp_stats_cpu_t),
4545 		    KM_SLEEP);
4546 	}
4547 
4548 	us->us_kstat = udp_kstat2_init(stackid);
4549 	us->us_mibkp = udp_kstat_init(stackid);
4550 
4551 	major = mod_name_to_major(INET_NAME);
4552 	error = ldi_ident_from_major(major, &us->us_ldi_ident);
4553 	ASSERT(error == 0);
4554 	return (us);
4555 }
4556 
4557 /*
4558  * Free the UDP stack instance.
4559  */
4560 static void
4561 udp_stack_fini(netstackid_t stackid, void *arg)
4562 {
4563 	udp_stack_t *us = (udp_stack_t *)arg;
4564 	int i;
4565 
4566 	for (i = 0; i < us->us_bind_fanout_size; i++) {
4567 		mutex_destroy(&us->us_bind_fanout[i].uf_lock);
4568 	}
4569 
4570 	kmem_free(us->us_bind_fanout, us->us_bind_fanout_size *
4571 	    sizeof (udp_fanout_t));
4572 
4573 	us->us_bind_fanout = NULL;
4574 
4575 	for (i = 0; i < us->us_sc_cnt; i++)
4576 		kmem_free(us->us_sc[i], sizeof (udp_stats_cpu_t));
4577 	kmem_free(us->us_sc, max_ncpus * sizeof (udp_stats_cpu_t *));
4578 
4579 	kmem_free(us->us_propinfo_tbl,
4580 	    udp_propinfo_count * sizeof (mod_prop_info_t));
4581 	us->us_propinfo_tbl = NULL;
4582 
4583 	udp_kstat_fini(stackid, us->us_mibkp);
4584 	us->us_mibkp = NULL;
4585 
4586 	udp_kstat2_fini(stackid, us->us_kstat);
4587 	us->us_kstat = NULL;
4588 
4589 	mutex_destroy(&us->us_epriv_port_lock);
4590 	ldi_ident_release(us->us_ldi_ident);
4591 	kmem_free(us, sizeof (*us));
4592 }
4593 
4594 static size_t
4595 udp_set_rcv_hiwat(udp_t *udp, size_t size)
4596 {
4597 	udp_stack_t *us = udp->udp_us;
4598 
4599 	/* We add a bit of extra buffering */
4600 	size += size >> 1;
4601 	if (size > us->us_max_buf)
4602 		size = us->us_max_buf;
4603 
4604 	udp->udp_rcv_hiwat = size;
4605 	return (size);
4606 }
4607 
4608 /*
4609  * For the lower queue so that UDP can be a dummy mux.
4610  * Nobody should be sending
4611  * packets up this stream
4612  */
4613 static int
4614 udp_lrput(queue_t *q, mblk_t *mp)
4615 {
4616 	switch (mp->b_datap->db_type) {
4617 	case M_FLUSH:
4618 		/* Turn around */
4619 		if (*mp->b_rptr & FLUSHW) {
4620 			*mp->b_rptr &= ~FLUSHR;
4621 			qreply(q, mp);
4622 			return (0);
4623 		}
4624 		break;
4625 	}
4626 	freemsg(mp);
4627 	return (0);
4628 }
4629 
4630 /*
4631  * For the lower queue so that UDP can be a dummy mux.
4632  * Nobody should be sending packets down this stream.
4633  */
4634 /* ARGSUSED */
4635 int
4636 udp_lwput(queue_t *q, mblk_t *mp)
4637 {
4638 	freemsg(mp);
4639 	return (0);
4640 }
4641 
4642 /*
4643  * When a CPU is added, we need to allocate the per CPU stats struct.
4644  */
4645 void
4646 udp_stack_cpu_add(udp_stack_t *us, processorid_t cpu_seqid)
4647 {
4648 	int i;
4649 
4650 	if (cpu_seqid < us->us_sc_cnt)
4651 		return;
4652 	for (i = us->us_sc_cnt; i <= cpu_seqid; i++) {
4653 		ASSERT(us->us_sc[i] == NULL);
4654 		us->us_sc[i] = kmem_zalloc(sizeof (udp_stats_cpu_t),
4655 		    KM_SLEEP);
4656 	}
4657 	membar_producer();
4658 	us->us_sc_cnt = cpu_seqid + 1;
4659 }
4660 
4661 /*
4662  * Below routines for UDP socket module.
4663  */
4664 
4665 static conn_t *
4666 udp_do_open(cred_t *credp, boolean_t isv6, int flags, int *errorp)
4667 {
4668 	udp_t		*udp;
4669 	conn_t		*connp;
4670 	zoneid_t	zoneid;
4671 	netstack_t	*ns;
4672 	udp_stack_t	*us;
4673 	int		len;
4674 
4675 	ASSERT(errorp != NULL);
4676 
4677 	if ((*errorp = secpolicy_basic_net_access(credp)) != 0)
4678 		return (NULL);
4679 
4680 	ns = netstack_find_by_cred(credp);
4681 	ASSERT(ns != NULL);
4682 	us = ns->netstack_udp;
4683 	ASSERT(us != NULL);
4684 
4685 	/*
4686 	 * For exclusive stacks we set the zoneid to zero
4687 	 * to make UDP operate as if in the global zone.
4688 	 */
4689 	if (ns->netstack_stackid != GLOBAL_NETSTACKID)
4690 		zoneid = GLOBAL_ZONEID;
4691 	else
4692 		zoneid = crgetzoneid(credp);
4693 
4694 	ASSERT(flags == KM_SLEEP || flags == KM_NOSLEEP);
4695 
4696 	connp = ipcl_conn_create(IPCL_UDPCONN, flags, ns);
4697 	if (connp == NULL) {
4698 		netstack_rele(ns);
4699 		*errorp = ENOMEM;
4700 		return (NULL);
4701 	}
4702 	udp = connp->conn_udp;
4703 
4704 	/*
4705 	 * ipcl_conn_create did a netstack_hold. Undo the hold that was
4706 	 * done by netstack_find_by_cred()
4707 	 */
4708 	netstack_rele(ns);
4709 
4710 	/*
4711 	 * Since this conn_t/udp_t is not yet visible to anybody else we don't
4712 	 * need to lock anything.
4713 	 */
4714 	ASSERT(connp->conn_proto == IPPROTO_UDP);
4715 	ASSERT(connp->conn_udp == udp);
4716 	ASSERT(udp->udp_connp == connp);
4717 
4718 	/* Set the initial state of the stream and the privilege status. */
4719 	udp->udp_state = TS_UNBND;
4720 	connp->conn_ixa->ixa_flags |= IXAF_VERIFY_SOURCE;
4721 	if (isv6) {
4722 		connp->conn_family = AF_INET6;
4723 		connp->conn_ipversion = IPV6_VERSION;
4724 		connp->conn_ixa->ixa_flags &= ~IXAF_IS_IPV4;
4725 		connp->conn_default_ttl = us->us_ipv6_hoplimit;
4726 		len = sizeof (ip6_t) + UDPH_SIZE;
4727 	} else {
4728 		connp->conn_family = AF_INET;
4729 		connp->conn_ipversion = IPV4_VERSION;
4730 		connp->conn_ixa->ixa_flags |= IXAF_IS_IPV4;
4731 		connp->conn_default_ttl = us->us_ipv4_ttl;
4732 		len = sizeof (ipha_t) + UDPH_SIZE;
4733 	}
4734 
4735 	ASSERT(connp->conn_ixa->ixa_protocol == connp->conn_proto);
4736 	connp->conn_xmit_ipp.ipp_unicast_hops = connp->conn_default_ttl;
4737 
4738 	connp->conn_ixa->ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
4739 	connp->conn_ixa->ixa_flags |= IXAF_MULTICAST_LOOP | IXAF_SET_ULP_CKSUM;
4740 	/* conn_allzones can not be set this early, hence no IPCL_ZONEID */
4741 	connp->conn_ixa->ixa_zoneid = zoneid;
4742 
4743 	connp->conn_zoneid = zoneid;
4744 
4745 	/*
4746 	 * If the caller has the process-wide flag set, then default to MAC
4747 	 * exempt mode.  This allows read-down to unlabeled hosts.
4748 	 */
4749 	if (getpflags(NET_MAC_AWARE, credp) != 0)
4750 		connp->conn_mac_mode = CONN_MAC_AWARE;
4751 
4752 	connp->conn_zone_is_global = (crgetzoneid(credp) == GLOBAL_ZONEID);
4753 
4754 	udp->udp_us = us;
4755 
4756 	connp->conn_rcvbuf = us->us_recv_hiwat;
4757 	connp->conn_sndbuf = us->us_xmit_hiwat;
4758 	connp->conn_sndlowat = us->us_xmit_lowat;
4759 	connp->conn_rcvlowat = udp_mod_info.mi_lowat;
4760 
4761 	connp->conn_wroff = len + us->us_wroff_extra;
4762 	connp->conn_so_type = SOCK_DGRAM;
4763 
4764 	connp->conn_recv = udp_input;
4765 	connp->conn_recvicmp = udp_icmp_input;
4766 	crhold(credp);
4767 	connp->conn_cred = credp;
4768 	connp->conn_cpid = curproc->p_pid;
4769 	connp->conn_open_time = ddi_get_lbolt64();
4770 	/* Cache things in ixa without an extra refhold */
4771 	ASSERT(!(connp->conn_ixa->ixa_free_flags & IXA_FREE_CRED));
4772 	connp->conn_ixa->ixa_cred = connp->conn_cred;
4773 	connp->conn_ixa->ixa_cpid = connp->conn_cpid;
4774 	if (is_system_labeled())
4775 		connp->conn_ixa->ixa_tsl = crgetlabel(connp->conn_cred);
4776 
4777 	*((sin6_t *)&udp->udp_delayed_addr) = sin6_null;
4778 
4779 	if (us->us_pmtu_discovery)
4780 		connp->conn_ixa->ixa_flags |= IXAF_PMTU_DISCOVERY;
4781 
4782 	return (connp);
4783 }
4784 
4785 sock_lower_handle_t
4786 udp_create(int family, int type, int proto, sock_downcalls_t **sock_downcalls,
4787     uint_t *smodep, int *errorp, int flags, cred_t *credp)
4788 {
4789 	udp_t		*udp = NULL;
4790 	udp_stack_t	*us;
4791 	conn_t		*connp;
4792 	boolean_t	isv6;
4793 
4794 	if (type != SOCK_DGRAM || (family != AF_INET && family != AF_INET6) ||
4795 	    (proto != 0 && proto != IPPROTO_UDP)) {
4796 		*errorp = EPROTONOSUPPORT;
4797 		return (NULL);
4798 	}
4799 
4800 	if (family == AF_INET6)
4801 		isv6 = B_TRUE;
4802 	else
4803 		isv6 = B_FALSE;
4804 
4805 	connp = udp_do_open(credp, isv6, flags, errorp);
4806 	if (connp == NULL)
4807 		return (NULL);
4808 
4809 	udp = connp->conn_udp;
4810 	ASSERT(udp != NULL);
4811 	us = udp->udp_us;
4812 	ASSERT(us != NULL);
4813 
4814 	udp->udp_issocket = B_TRUE;
4815 	connp->conn_flags |= IPCL_NONSTR;
4816 
4817 	/*
4818 	 * Set flow control
4819 	 * Since this conn_t/udp_t is not yet visible to anybody else we don't
4820 	 * need to lock anything.
4821 	 */
4822 	(void) udp_set_rcv_hiwat(udp, connp->conn_rcvbuf);
4823 	udp->udp_rcv_disply_hiwat = connp->conn_rcvbuf;
4824 
4825 	connp->conn_flow_cntrld = B_FALSE;
4826 
4827 	mutex_enter(&connp->conn_lock);
4828 	connp->conn_state_flags &= ~CONN_INCIPIENT;
4829 	mutex_exit(&connp->conn_lock);
4830 
4831 	*errorp = 0;
4832 	*smodep = SM_ATOMIC;
4833 	*sock_downcalls = &sock_udp_downcalls;
4834 	return ((sock_lower_handle_t)connp);
4835 }
4836 
4837 /* ARGSUSED3 */
4838 void
4839 udp_activate(sock_lower_handle_t proto_handle, sock_upper_handle_t sock_handle,
4840     sock_upcalls_t *sock_upcalls, int flags, cred_t *cr)
4841 {
4842 	conn_t		*connp = (conn_t *)proto_handle;
4843 	struct sock_proto_props sopp;
4844 
4845 	/* All Solaris components should pass a cred for this operation. */
4846 	ASSERT(cr != NULL);
4847 
4848 	connp->conn_upcalls = sock_upcalls;
4849 	connp->conn_upper_handle = sock_handle;
4850 
4851 	sopp.sopp_flags = SOCKOPT_WROFF | SOCKOPT_RCVHIWAT | SOCKOPT_RCVLOWAT |
4852 	    SOCKOPT_MAXBLK | SOCKOPT_MAXPSZ | SOCKOPT_MINPSZ;
4853 	sopp.sopp_wroff = connp->conn_wroff;
4854 	sopp.sopp_maxblk = INFPSZ;
4855 	sopp.sopp_rxhiwat = connp->conn_rcvbuf;
4856 	sopp.sopp_rxlowat = connp->conn_rcvlowat;
4857 	sopp.sopp_maxaddrlen = sizeof (sin6_t);
4858 	sopp.sopp_maxpsz =
4859 	    (connp->conn_family == AF_INET) ? UDP_MAXPACKET_IPV4 :
4860 	    UDP_MAXPACKET_IPV6;
4861 	sopp.sopp_minpsz = (udp_mod_info.mi_minpsz == 1) ? 0 :
4862 	    udp_mod_info.mi_minpsz;
4863 
4864 	(*connp->conn_upcalls->su_set_proto_props)(connp->conn_upper_handle,
4865 	    &sopp);
4866 }
4867 
4868 static void
4869 udp_do_close(conn_t *connp)
4870 {
4871 	udp_t	*udp;
4872 
4873 	ASSERT(connp != NULL && IPCL_IS_UDP(connp));
4874 	udp = connp->conn_udp;
4875 
4876 	if (cl_inet_unbind != NULL && udp->udp_state == TS_IDLE) {
4877 		/*
4878 		 * Running in cluster mode - register unbind information
4879 		 */
4880 		if (connp->conn_ipversion == IPV4_VERSION) {
4881 			(*cl_inet_unbind)(
4882 			    connp->conn_netstack->netstack_stackid,
4883 			    IPPROTO_UDP, AF_INET,
4884 			    (uint8_t *)(&V4_PART_OF_V6(connp->conn_laddr_v6)),
4885 			    (in_port_t)connp->conn_lport, NULL);
4886 		} else {
4887 			(*cl_inet_unbind)(
4888 			    connp->conn_netstack->netstack_stackid,
4889 			    IPPROTO_UDP, AF_INET6,
4890 			    (uint8_t *)&(connp->conn_laddr_v6),
4891 			    (in_port_t)connp->conn_lport, NULL);
4892 		}
4893 	}
4894 
4895 	udp_bind_hash_remove(udp, B_FALSE);
4896 
4897 	ip_quiesce_conn(connp);
4898 
4899 	if (!IPCL_IS_NONSTR(connp)) {
4900 		ASSERT(connp->conn_wq != NULL);
4901 		ASSERT(connp->conn_rq != NULL);
4902 		qprocsoff(connp->conn_rq);
4903 	}
4904 
4905 	udp_close_free(connp);
4906 
4907 	/*
4908 	 * Now we are truly single threaded on this stream, and can
4909 	 * delete the things hanging off the connp, and finally the connp.
4910 	 * We removed this connp from the fanout list, it cannot be
4911 	 * accessed thru the fanouts, and we already waited for the
4912 	 * conn_ref to drop to 0. We are already in close, so
4913 	 * there cannot be any other thread from the top. qprocsoff
4914 	 * has completed, and service has completed or won't run in
4915 	 * future.
4916 	 */
4917 	ASSERT(connp->conn_ref == 1);
4918 
4919 	if (!IPCL_IS_NONSTR(connp)) {
4920 		inet_minor_free(connp->conn_minor_arena, connp->conn_dev);
4921 	} else {
4922 		ip_free_helper_stream(connp);
4923 	}
4924 
4925 	connp->conn_ref--;
4926 	ipcl_conn_destroy(connp);
4927 }
4928 
4929 /* ARGSUSED1 */
4930 int
4931 udp_close(sock_lower_handle_t proto_handle, int flags, cred_t *cr)
4932 {
4933 	conn_t	*connp = (conn_t *)proto_handle;
4934 
4935 	/* All Solaris components should pass a cred for this operation. */
4936 	ASSERT(cr != NULL);
4937 
4938 	udp_do_close(connp);
4939 	return (0);
4940 }
4941 
4942 static int
4943 udp_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr,
4944     boolean_t bind_to_req_port_only)
4945 {
4946 	sin_t		*sin;
4947 	sin6_t		*sin6;
4948 	udp_t		*udp = connp->conn_udp;
4949 	int		error = 0;
4950 	ip_laddr_t	laddr_type = IPVL_UNICAST_UP;	/* INADDR_ANY */
4951 	in_port_t	port;		/* Host byte order */
4952 	in_port_t	requested_port;	/* Host byte order */
4953 	int		count;
4954 	ipaddr_t	v4src;		/* Set if AF_INET */
4955 	in6_addr_t	v6src;
4956 	int		loopmax;
4957 	udp_fanout_t	*udpf;
4958 	in_port_t	lport;		/* Network byte order */
4959 	uint_t		scopeid = 0;
4960 	zoneid_t	zoneid = IPCL_ZONEID(connp);
4961 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
4962 	boolean_t	is_inaddr_any;
4963 	mlp_type_t	addrtype, mlptype;
4964 	udp_stack_t	*us = udp->udp_us;
4965 
4966 	sin = NULL;
4967 	sin6 = NULL;
4968 	switch (len) {
4969 	case sizeof (sin_t):	/* Complete IPv4 address */
4970 		sin = (sin_t *)sa;
4971 
4972 		if (sin == NULL || !OK_32PTR((char *)sin))
4973 			return (EINVAL);
4974 
4975 		if (connp->conn_family != AF_INET ||
4976 		    sin->sin_family != AF_INET) {
4977 			return (EAFNOSUPPORT);
4978 		}
4979 		v4src = sin->sin_addr.s_addr;
4980 		IN6_IPADDR_TO_V4MAPPED(v4src, &v6src);
4981 		if (v4src != INADDR_ANY) {
4982 			laddr_type = ip_laddr_verify_v4(v4src, zoneid, ipst,
4983 			    B_TRUE);
4984 		}
4985 		port = ntohs(sin->sin_port);
4986 		break;
4987 
4988 	case sizeof (sin6_t):	/* complete IPv6 address */
4989 		sin6 = (sin6_t *)sa;
4990 
4991 		if (sin6 == NULL || !OK_32PTR((char *)sin6))
4992 			return (EINVAL);
4993 
4994 		if (connp->conn_family != AF_INET6 ||
4995 		    sin6->sin6_family != AF_INET6) {
4996 			return (EAFNOSUPPORT);
4997 		}
4998 		v6src = sin6->sin6_addr;
4999 		if (IN6_IS_ADDR_V4MAPPED(&v6src)) {
5000 			if (connp->conn_ipv6_v6only)
5001 				return (EADDRNOTAVAIL);
5002 
5003 			IN6_V4MAPPED_TO_IPADDR(&v6src, v4src);
5004 			if (v4src != INADDR_ANY) {
5005 				laddr_type = ip_laddr_verify_v4(v4src,
5006 				    zoneid, ipst, B_FALSE);
5007 			}
5008 		} else {
5009 			if (!IN6_IS_ADDR_UNSPECIFIED(&v6src)) {
5010 				if (IN6_IS_ADDR_LINKSCOPE(&v6src))
5011 					scopeid = sin6->sin6_scope_id;
5012 				laddr_type = ip_laddr_verify_v6(&v6src,
5013 				    zoneid, ipst, B_TRUE, scopeid);
5014 			}
5015 		}
5016 		port = ntohs(sin6->sin6_port);
5017 		break;
5018 
5019 	default:		/* Invalid request */
5020 		(void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
5021 		    "udp_bind: bad ADDR_length length %u", len);
5022 		return (-TBADADDR);
5023 	}
5024 
5025 	/* Is the local address a valid unicast, multicast, or broadcast? */
5026 	if (laddr_type == IPVL_BAD)
5027 		return (EADDRNOTAVAIL);
5028 
5029 	requested_port = port;
5030 
5031 	if (requested_port == 0 || !bind_to_req_port_only)
5032 		bind_to_req_port_only = B_FALSE;
5033 	else		/* T_BIND_REQ and requested_port != 0 */
5034 		bind_to_req_port_only = B_TRUE;
5035 
5036 	if (requested_port == 0) {
5037 		/*
5038 		 * If the application passed in zero for the port number, it
5039 		 * doesn't care which port number we bind to. Get one in the
5040 		 * valid range.
5041 		 */
5042 		if (connp->conn_anon_priv_bind) {
5043 			port = udp_get_next_priv_port(udp);
5044 		} else {
5045 			port = udp_update_next_port(udp,
5046 			    us->us_next_port_to_try, B_TRUE);
5047 		}
5048 	} else {
5049 		/*
5050 		 * If the port is in the well-known privileged range,
5051 		 * make sure the caller was privileged.
5052 		 */
5053 		int i;
5054 		boolean_t priv = B_FALSE;
5055 
5056 		if (port < us->us_smallest_nonpriv_port) {
5057 			priv = B_TRUE;
5058 		} else {
5059 			for (i = 0; i < us->us_num_epriv_ports; i++) {
5060 				if (port == us->us_epriv_ports[i]) {
5061 					priv = B_TRUE;
5062 					break;
5063 				}
5064 			}
5065 		}
5066 
5067 		if (priv) {
5068 			if (secpolicy_net_privaddr(cr, port, IPPROTO_UDP) != 0)
5069 				return (-TACCES);
5070 		}
5071 	}
5072 
5073 	if (port == 0)
5074 		return (-TNOADDR);
5075 
5076 	/*
5077 	 * The state must be TS_UNBND. TPI mandates that users must send
5078 	 * TPI primitives only 1 at a time and wait for the response before
5079 	 * sending the next primitive.
5080 	 */
5081 	mutex_enter(&connp->conn_lock);
5082 	if (udp->udp_state != TS_UNBND) {
5083 		mutex_exit(&connp->conn_lock);
5084 		(void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
5085 		    "udp_bind: bad state, %u", udp->udp_state);
5086 		return (-TOUTSTATE);
5087 	}
5088 	/*
5089 	 * Copy the source address into our udp structure. This address
5090 	 * may still be zero; if so, IP will fill in the correct address
5091 	 * each time an outbound packet is passed to it. Since the udp is
5092 	 * not yet in the bind hash list, we don't grab the uf_lock to
5093 	 * change conn_ipversion
5094 	 */
5095 	if (connp->conn_family == AF_INET) {
5096 		ASSERT(sin != NULL);
5097 		ASSERT(connp->conn_ixa->ixa_flags & IXAF_IS_IPV4);
5098 	} else {
5099 		if (IN6_IS_ADDR_V4MAPPED(&v6src)) {
5100 			/*
5101 			 * no need to hold the uf_lock to set the conn_ipversion
5102 			 * since we are not yet in the fanout list
5103 			 */
5104 			connp->conn_ipversion = IPV4_VERSION;
5105 			connp->conn_ixa->ixa_flags |= IXAF_IS_IPV4;
5106 		} else {
5107 			connp->conn_ipversion = IPV6_VERSION;
5108 			connp->conn_ixa->ixa_flags &= ~IXAF_IS_IPV4;
5109 		}
5110 	}
5111 
5112 	/*
5113 	 * If conn_reuseaddr is not set, then we have to make sure that
5114 	 * the IP address and port number the application requested
5115 	 * (or we selected for the application) is not being used by
5116 	 * another stream.  If another stream is already using the
5117 	 * requested IP address and port, the behavior depends on
5118 	 * "bind_to_req_port_only". If set the bind fails; otherwise we
5119 	 * search for any unused port to bind to the stream.
5120 	 *
5121 	 * As per the BSD semantics, as modified by the Deering multicast
5122 	 * changes, if conn_reuseaddr is set, then we allow multiple binds
5123 	 * to the same port independent of the local IP address.
5124 	 *
5125 	 * This is slightly different than in SunOS 4.X which did not
5126 	 * support IP multicast. Note that the change implemented by the
5127 	 * Deering multicast code effects all binds - not only binding
5128 	 * to IP multicast addresses.
5129 	 *
5130 	 * Note that when binding to port zero we ignore SO_REUSEADDR in
5131 	 * order to guarantee a unique port.
5132 	 */
5133 
5134 	count = 0;
5135 	if (connp->conn_anon_priv_bind) {
5136 		/*
5137 		 * loopmax = (IPPORT_RESERVED-1) -
5138 		 *    us->us_min_anonpriv_port + 1
5139 		 */
5140 		loopmax = IPPORT_RESERVED - us->us_min_anonpriv_port;
5141 	} else {
5142 		loopmax = us->us_largest_anon_port -
5143 		    us->us_smallest_anon_port + 1;
5144 	}
5145 
5146 	is_inaddr_any = V6_OR_V4_INADDR_ANY(v6src);
5147 
5148 	for (;;) {
5149 		udp_t		*udp1;
5150 		boolean_t	found_exclbind = B_FALSE;
5151 		conn_t		*connp1;
5152 
5153 		/*
5154 		 * Walk through the list of udp streams bound to
5155 		 * requested port with the same IP address.
5156 		 */
5157 		lport = htons(port);
5158 		udpf = &us->us_bind_fanout[UDP_BIND_HASH(lport,
5159 		    us->us_bind_fanout_size)];
5160 		mutex_enter(&udpf->uf_lock);
5161 		for (udp1 = udpf->uf_udp; udp1 != NULL;
5162 		    udp1 = udp1->udp_bind_hash) {
5163 			connp1 = udp1->udp_connp;
5164 
5165 			if (lport != connp1->conn_lport)
5166 				continue;
5167 
5168 			/*
5169 			 * On a labeled system, we must treat bindings to ports
5170 			 * on shared IP addresses by sockets with MAC exemption
5171 			 * privilege as being in all zones, as there's
5172 			 * otherwise no way to identify the right receiver.
5173 			 */
5174 			if (!IPCL_BIND_ZONE_MATCH(connp1, connp))
5175 				continue;
5176 
5177 			/*
5178 			 * If UDP_EXCLBIND is set for either the bound or
5179 			 * binding endpoint, the semantics of bind
5180 			 * is changed according to the following chart.
5181 			 *
5182 			 * spec = specified address (v4 or v6)
5183 			 * unspec = unspecified address (v4 or v6)
5184 			 * A = specified addresses are different for endpoints
5185 			 *
5186 			 * bound	bind to		allowed?
5187 			 * -------------------------------------
5188 			 * unspec	unspec		no
5189 			 * unspec	spec		no
5190 			 * spec		unspec		no
5191 			 * spec		spec		yes if A
5192 			 *
5193 			 * For labeled systems, SO_MAC_EXEMPT behaves the same
5194 			 * as UDP_EXCLBIND, except that zoneid is ignored.
5195 			 */
5196 			if (connp1->conn_exclbind || connp->conn_exclbind ||
5197 			    IPCL_CONNS_MAC(udp1->udp_connp, connp)) {
5198 				if (V6_OR_V4_INADDR_ANY(
5199 				    connp1->conn_bound_addr_v6) ||
5200 				    is_inaddr_any ||
5201 				    IN6_ARE_ADDR_EQUAL(
5202 				    &connp1->conn_bound_addr_v6,
5203 				    &v6src)) {
5204 					found_exclbind = B_TRUE;
5205 					break;
5206 				}
5207 				continue;
5208 			}
5209 
5210 			/*
5211 			 * Check ipversion to allow IPv4 and IPv6 sockets to
5212 			 * have disjoint port number spaces.
5213 			 */
5214 			if (connp->conn_ipversion != connp1->conn_ipversion) {
5215 
5216 				/*
5217 				 * On the first time through the loop, if the
5218 				 * the user intentionally specified a
5219 				 * particular port number, then ignore any
5220 				 * bindings of the other protocol that may
5221 				 * conflict. This allows the user to bind IPv6
5222 				 * alone and get both v4 and v6, or bind both
5223 				 * both and get each seperately. On subsequent
5224 				 * times through the loop, we're checking a
5225 				 * port that we chose (not the user) and thus
5226 				 * we do not allow casual duplicate bindings.
5227 				 */
5228 				if (count == 0 && requested_port != 0)
5229 					continue;
5230 			}
5231 
5232 			/*
5233 			 * No difference depending on SO_REUSEADDR.
5234 			 *
5235 			 * If existing port is bound to a
5236 			 * non-wildcard IP address and
5237 			 * the requesting stream is bound to
5238 			 * a distinct different IP addresses
5239 			 * (non-wildcard, also), keep going.
5240 			 */
5241 			if (!is_inaddr_any &&
5242 			    !V6_OR_V4_INADDR_ANY(connp1->conn_bound_addr_v6) &&
5243 			    !IN6_ARE_ADDR_EQUAL(&connp1->conn_laddr_v6,
5244 			    &v6src)) {
5245 				continue;
5246 			}
5247 			break;
5248 		}
5249 
5250 		if (!found_exclbind &&
5251 		    (connp->conn_reuseaddr && requested_port != 0)) {
5252 			break;
5253 		}
5254 
5255 		if (udp1 == NULL) {
5256 			/*
5257 			 * No other stream has this IP address
5258 			 * and port number. We can use it.
5259 			 */
5260 			break;
5261 		}
5262 		mutex_exit(&udpf->uf_lock);
5263 		if (bind_to_req_port_only) {
5264 			/*
5265 			 * We get here only when requested port
5266 			 * is bound (and only first  of the for()
5267 			 * loop iteration).
5268 			 *
5269 			 * The semantics of this bind request
5270 			 * require it to fail so we return from
5271 			 * the routine (and exit the loop).
5272 			 *
5273 			 */
5274 			mutex_exit(&connp->conn_lock);
5275 			return (-TADDRBUSY);
5276 		}
5277 
5278 		if (connp->conn_anon_priv_bind) {
5279 			port = udp_get_next_priv_port(udp);
5280 		} else {
5281 			if ((count == 0) && (requested_port != 0)) {
5282 				/*
5283 				 * If the application wants us to find
5284 				 * a port, get one to start with. Set
5285 				 * requested_port to 0, so that we will
5286 				 * update us->us_next_port_to_try below.
5287 				 */
5288 				port = udp_update_next_port(udp,
5289 				    us->us_next_port_to_try, B_TRUE);
5290 				requested_port = 0;
5291 			} else {
5292 				port = udp_update_next_port(udp, port + 1,
5293 				    B_FALSE);
5294 			}
5295 		}
5296 
5297 		if (port == 0 || ++count >= loopmax) {
5298 			/*
5299 			 * We've tried every possible port number and
5300 			 * there are none available, so send an error
5301 			 * to the user.
5302 			 */
5303 			mutex_exit(&connp->conn_lock);
5304 			return (-TNOADDR);
5305 		}
5306 	}
5307 
5308 	/*
5309 	 * Copy the source address into our udp structure.  This address
5310 	 * may still be zero; if so, ip_attr_connect will fill in the correct
5311 	 * address when a packet is about to be sent.
5312 	 * If we are binding to a broadcast or multicast address then
5313 	 * we just set the conn_bound_addr since we don't want to use
5314 	 * that as the source address when sending.
5315 	 */
5316 	connp->conn_bound_addr_v6 = v6src;
5317 	connp->conn_laddr_v6 = v6src;
5318 	if (scopeid != 0) {
5319 		connp->conn_ixa->ixa_flags |= IXAF_SCOPEID_SET;
5320 		connp->conn_ixa->ixa_scopeid = scopeid;
5321 		connp->conn_incoming_ifindex = scopeid;
5322 	} else {
5323 		connp->conn_ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
5324 		connp->conn_incoming_ifindex = connp->conn_bound_if;
5325 	}
5326 
5327 	switch (laddr_type) {
5328 	case IPVL_UNICAST_UP:
5329 	case IPVL_UNICAST_DOWN:
5330 		connp->conn_saddr_v6 = v6src;
5331 		connp->conn_mcbc_bind = B_FALSE;
5332 		break;
5333 	case IPVL_MCAST:
5334 	case IPVL_BCAST:
5335 		/* ip_set_destination will pick a source address later */
5336 		connp->conn_saddr_v6 = ipv6_all_zeros;
5337 		connp->conn_mcbc_bind = B_TRUE;
5338 		break;
5339 	}
5340 
5341 	/* Any errors after this point should use late_error */
5342 	connp->conn_lport = lport;
5343 
5344 	/*
5345 	 * Now reset the next anonymous port if the application requested
5346 	 * an anonymous port, or we handed out the next anonymous port.
5347 	 */
5348 	if ((requested_port == 0) && (!connp->conn_anon_priv_bind)) {
5349 		us->us_next_port_to_try = port + 1;
5350 	}
5351 
5352 	/* Initialize the T_BIND_ACK. */
5353 	if (connp->conn_family == AF_INET) {
5354 		sin->sin_port = connp->conn_lport;
5355 	} else {
5356 		sin6->sin6_port = connp->conn_lport;
5357 	}
5358 	udp->udp_state = TS_IDLE;
5359 	udp_bind_hash_insert(udpf, udp);
5360 	mutex_exit(&udpf->uf_lock);
5361 	mutex_exit(&connp->conn_lock);
5362 
5363 	if (cl_inet_bind) {
5364 		/*
5365 		 * Running in cluster mode - register bind information
5366 		 */
5367 		if (connp->conn_ipversion == IPV4_VERSION) {
5368 			(*cl_inet_bind)(connp->conn_netstack->netstack_stackid,
5369 			    IPPROTO_UDP, AF_INET, (uint8_t *)&v4src,
5370 			    (in_port_t)connp->conn_lport, NULL);
5371 		} else {
5372 			(*cl_inet_bind)(connp->conn_netstack->netstack_stackid,
5373 			    IPPROTO_UDP, AF_INET6, (uint8_t *)&v6src,
5374 			    (in_port_t)connp->conn_lport, NULL);
5375 		}
5376 	}
5377 
5378 	mutex_enter(&connp->conn_lock);
5379 	connp->conn_anon_port = (is_system_labeled() && requested_port == 0);
5380 	if (is_system_labeled() && (!connp->conn_anon_port ||
5381 	    connp->conn_anon_mlp)) {
5382 		uint16_t mlpport;
5383 		zone_t *zone;
5384 
5385 		zone = crgetzone(cr);
5386 		connp->conn_mlp_type =
5387 		    connp->conn_recv_ancillary.crb_recvucred ? mlptBoth :
5388 		    mlptSingle;
5389 		addrtype = tsol_mlp_addr_type(
5390 		    connp->conn_allzones ? ALL_ZONES : zone->zone_id,
5391 		    IPV6_VERSION, &v6src, us->us_netstack->netstack_ip);
5392 		if (addrtype == mlptSingle) {
5393 			error = -TNOADDR;
5394 			mutex_exit(&connp->conn_lock);
5395 			goto late_error;
5396 		}
5397 		mlpport = connp->conn_anon_port ? PMAPPORT : port;
5398 		mlptype = tsol_mlp_port_type(zone, IPPROTO_UDP, mlpport,
5399 		    addrtype);
5400 
5401 		/*
5402 		 * It is a coding error to attempt to bind an MLP port
5403 		 * without first setting SOL_SOCKET/SCM_UCRED.
5404 		 */
5405 		if (mlptype != mlptSingle &&
5406 		    connp->conn_mlp_type == mlptSingle) {
5407 			error = EINVAL;
5408 			mutex_exit(&connp->conn_lock);
5409 			goto late_error;
5410 		}
5411 
5412 		/*
5413 		 * It is an access violation to attempt to bind an MLP port
5414 		 * without NET_BINDMLP privilege.
5415 		 */
5416 		if (mlptype != mlptSingle &&
5417 		    secpolicy_net_bindmlp(cr) != 0) {
5418 			if (connp->conn_debug) {
5419 				(void) strlog(UDP_MOD_ID, 0, 1,
5420 				    SL_ERROR|SL_TRACE,
5421 				    "udp_bind: no priv for multilevel port %d",
5422 				    mlpport);
5423 			}
5424 			error = -TACCES;
5425 			mutex_exit(&connp->conn_lock);
5426 			goto late_error;
5427 		}
5428 
5429 		/*
5430 		 * If we're specifically binding a shared IP address and the
5431 		 * port is MLP on shared addresses, then check to see if this
5432 		 * zone actually owns the MLP.  Reject if not.
5433 		 */
5434 		if (mlptype == mlptShared && addrtype == mlptShared) {
5435 			/*
5436 			 * No need to handle exclusive-stack zones since
5437 			 * ALL_ZONES only applies to the shared stack.
5438 			 */
5439 			zoneid_t mlpzone;
5440 
5441 			mlpzone = tsol_mlp_findzone(IPPROTO_UDP,
5442 			    htons(mlpport));
5443 			if (connp->conn_zoneid != mlpzone) {
5444 				if (connp->conn_debug) {
5445 					(void) strlog(UDP_MOD_ID, 0, 1,
5446 					    SL_ERROR|SL_TRACE,
5447 					    "udp_bind: attempt to bind port "
5448 					    "%d on shared addr in zone %d "
5449 					    "(should be %d)",
5450 					    mlpport, connp->conn_zoneid,
5451 					    mlpzone);
5452 				}
5453 				error = -TACCES;
5454 				mutex_exit(&connp->conn_lock);
5455 				goto late_error;
5456 			}
5457 		}
5458 		if (connp->conn_anon_port) {
5459 			error = tsol_mlp_anon(zone, mlptype, connp->conn_proto,
5460 			    port, B_TRUE);
5461 			if (error != 0) {
5462 				if (connp->conn_debug) {
5463 					(void) strlog(UDP_MOD_ID, 0, 1,
5464 					    SL_ERROR|SL_TRACE,
5465 					    "udp_bind: cannot establish anon "
5466 					    "MLP for port %d", port);
5467 				}
5468 				error = -TACCES;
5469 				mutex_exit(&connp->conn_lock);
5470 				goto late_error;
5471 			}
5472 		}
5473 		connp->conn_mlp_type = mlptype;
5474 	}
5475 
5476 	/*
5477 	 * We create an initial header template here to make a subsequent
5478 	 * sendto have a starting point. Since conn_last_dst is zero the
5479 	 * first sendto will always follow the 'dst changed' code path.
5480 	 * Note that we defer massaging options and the related checksum
5481 	 * adjustment until we have a destination address.
5482 	 */
5483 	error = udp_build_hdr_template(connp, &connp->conn_saddr_v6,
5484 	    &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo);
5485 	if (error != 0) {
5486 		mutex_exit(&connp->conn_lock);
5487 		goto late_error;
5488 	}
5489 	/* Just in case */
5490 	connp->conn_faddr_v6 = ipv6_all_zeros;
5491 	connp->conn_fport = 0;
5492 	connp->conn_v6lastdst = ipv6_all_zeros;
5493 	mutex_exit(&connp->conn_lock);
5494 
5495 	error = ip_laddr_fanout_insert(connp);
5496 	if (error != 0)
5497 		goto late_error;
5498 
5499 	/* Bind succeeded */
5500 	return (0);
5501 
5502 late_error:
5503 	/* We had already picked the port number, and then the bind failed */
5504 	mutex_enter(&connp->conn_lock);
5505 	udpf = &us->us_bind_fanout[
5506 	    UDP_BIND_HASH(connp->conn_lport,
5507 	    us->us_bind_fanout_size)];
5508 	mutex_enter(&udpf->uf_lock);
5509 	connp->conn_saddr_v6 = ipv6_all_zeros;
5510 	connp->conn_bound_addr_v6 = ipv6_all_zeros;
5511 	connp->conn_laddr_v6 = ipv6_all_zeros;
5512 	if (scopeid != 0) {
5513 		connp->conn_ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
5514 		connp->conn_incoming_ifindex = connp->conn_bound_if;
5515 	}
5516 	udp->udp_state = TS_UNBND;
5517 	udp_bind_hash_remove(udp, B_TRUE);
5518 	connp->conn_lport = 0;
5519 	mutex_exit(&udpf->uf_lock);
5520 	connp->conn_anon_port = B_FALSE;
5521 	connp->conn_mlp_type = mlptSingle;
5522 
5523 	connp->conn_v6lastdst = ipv6_all_zeros;
5524 
5525 	/* Restore the header that was built above - different source address */
5526 	(void) udp_build_hdr_template(connp, &connp->conn_saddr_v6,
5527 	    &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo);
5528 	mutex_exit(&connp->conn_lock);
5529 	return (error);
5530 }
5531 
5532 int
5533 udp_bind(sock_lower_handle_t proto_handle, struct sockaddr *sa,
5534     socklen_t len, cred_t *cr)
5535 {
5536 	int		error;
5537 	conn_t		*connp;
5538 
5539 	/* All Solaris components should pass a cred for this operation. */
5540 	ASSERT(cr != NULL);
5541 
5542 	connp = (conn_t *)proto_handle;
5543 
5544 	if (sa == NULL)
5545 		error = udp_do_unbind(connp);
5546 	else
5547 		error = udp_do_bind(connp, sa, len, cr, B_TRUE);
5548 
5549 	if (error < 0) {
5550 		if (error == -TOUTSTATE)
5551 			error = EINVAL;
5552 		else
5553 			error = proto_tlitosyserr(-error);
5554 	}
5555 
5556 	return (error);
5557 }
5558 
5559 static int
5560 udp_implicit_bind(conn_t *connp, cred_t *cr)
5561 {
5562 	sin6_t sin6addr;
5563 	sin_t *sin;
5564 	sin6_t *sin6;
5565 	socklen_t len;
5566 	int error;
5567 
5568 	/* All Solaris components should pass a cred for this operation. */
5569 	ASSERT(cr != NULL);
5570 
5571 	if (connp->conn_family == AF_INET) {
5572 		len = sizeof (struct sockaddr_in);
5573 		sin = (sin_t *)&sin6addr;
5574 		*sin = sin_null;
5575 		sin->sin_family = AF_INET;
5576 		sin->sin_addr.s_addr = INADDR_ANY;
5577 	} else {
5578 		ASSERT(connp->conn_family == AF_INET6);
5579 		len = sizeof (sin6_t);
5580 		sin6 = (sin6_t *)&sin6addr;
5581 		*sin6 = sin6_null;
5582 		sin6->sin6_family = AF_INET6;
5583 		V6_SET_ZERO(sin6->sin6_addr);
5584 	}
5585 
5586 	error = udp_do_bind(connp, (struct sockaddr *)&sin6addr, len,
5587 	    cr, B_FALSE);
5588 	return ((error < 0) ? proto_tlitosyserr(-error) : error);
5589 }
5590 
5591 /*
5592  * This routine removes a port number association from a stream. It
5593  * is called by udp_unbind and udp_tpi_unbind.
5594  */
5595 static int
5596 udp_do_unbind(conn_t *connp)
5597 {
5598 	udp_t		*udp = connp->conn_udp;
5599 	udp_fanout_t	*udpf;
5600 	udp_stack_t	*us = udp->udp_us;
5601 
5602 	if (cl_inet_unbind != NULL) {
5603 		/*
5604 		 * Running in cluster mode - register unbind information
5605 		 */
5606 		if (connp->conn_ipversion == IPV4_VERSION) {
5607 			(*cl_inet_unbind)(
5608 			    connp->conn_netstack->netstack_stackid,
5609 			    IPPROTO_UDP, AF_INET,
5610 			    (uint8_t *)(&V4_PART_OF_V6(connp->conn_laddr_v6)),
5611 			    (in_port_t)connp->conn_lport, NULL);
5612 		} else {
5613 			(*cl_inet_unbind)(
5614 			    connp->conn_netstack->netstack_stackid,
5615 			    IPPROTO_UDP, AF_INET6,
5616 			    (uint8_t *)&(connp->conn_laddr_v6),
5617 			    (in_port_t)connp->conn_lport, NULL);
5618 		}
5619 	}
5620 
5621 	mutex_enter(&connp->conn_lock);
5622 	/* If a bind has not been done, we can't unbind. */
5623 	if (udp->udp_state == TS_UNBND) {
5624 		mutex_exit(&connp->conn_lock);
5625 		return (-TOUTSTATE);
5626 	}
5627 	udpf = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport,
5628 	    us->us_bind_fanout_size)];
5629 	mutex_enter(&udpf->uf_lock);
5630 	udp_bind_hash_remove(udp, B_TRUE);
5631 	connp->conn_saddr_v6 = ipv6_all_zeros;
5632 	connp->conn_bound_addr_v6 = ipv6_all_zeros;
5633 	connp->conn_laddr_v6 = ipv6_all_zeros;
5634 	connp->conn_mcbc_bind = B_FALSE;
5635 	connp->conn_lport = 0;
5636 	/* In case we were also connected */
5637 	connp->conn_faddr_v6 = ipv6_all_zeros;
5638 	connp->conn_fport = 0;
5639 	mutex_exit(&udpf->uf_lock);
5640 
5641 	connp->conn_v6lastdst = ipv6_all_zeros;
5642 	udp->udp_state = TS_UNBND;
5643 
5644 	(void) udp_build_hdr_template(connp, &connp->conn_saddr_v6,
5645 	    &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo);
5646 	mutex_exit(&connp->conn_lock);
5647 
5648 	ip_unbind(connp);
5649 
5650 	return (0);
5651 }
5652 
5653 /*
5654  * It associates a default destination address with the stream.
5655  */
5656 static int
5657 udp_do_connect(conn_t *connp, const struct sockaddr *sa, socklen_t len,
5658     cred_t *cr, pid_t pid)
5659 {
5660 	sin6_t		*sin6;
5661 	sin_t		*sin;
5662 	in6_addr_t	v6dst;
5663 	ipaddr_t	v4dst;
5664 	uint16_t	dstport;
5665 	uint32_t	flowinfo;
5666 	udp_fanout_t	*udpf;
5667 	udp_t		*udp, *udp1;
5668 	ushort_t	ipversion;
5669 	udp_stack_t	*us;
5670 	int		error;
5671 	conn_t		*connp1;
5672 	ip_xmit_attr_t	*ixa;
5673 	ip_xmit_attr_t	*oldixa;
5674 	uint_t		scopeid = 0;
5675 	uint_t		srcid = 0;
5676 	in6_addr_t	v6src = connp->conn_saddr_v6;
5677 	boolean_t	v4mapped;
5678 
5679 	udp = connp->conn_udp;
5680 	us = udp->udp_us;
5681 	sin = NULL;
5682 	sin6 = NULL;
5683 	v4dst = INADDR_ANY;
5684 	flowinfo = 0;
5685 
5686 	/*
5687 	 * Address has been verified by the caller
5688 	 */
5689 	switch (len) {
5690 	default:
5691 		/*
5692 		 * Should never happen
5693 		 */
5694 		return (EINVAL);
5695 
5696 	case sizeof (sin_t):
5697 		sin = (sin_t *)sa;
5698 		v4dst = sin->sin_addr.s_addr;
5699 		dstport = sin->sin_port;
5700 		IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst);
5701 		ASSERT(connp->conn_ipversion == IPV4_VERSION);
5702 		ipversion = IPV4_VERSION;
5703 		break;
5704 
5705 	case sizeof (sin6_t):
5706 		sin6 = (sin6_t *)sa;
5707 		v6dst = sin6->sin6_addr;
5708 		dstport = sin6->sin6_port;
5709 		srcid = sin6->__sin6_src_id;
5710 		v4mapped = IN6_IS_ADDR_V4MAPPED(&v6dst);
5711 		if (srcid != 0 && IN6_IS_ADDR_UNSPECIFIED(&v6src)) {
5712 			if (!ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp),
5713 			    v4mapped, connp->conn_netstack)) {
5714 				/* Mismatch v4mapped/v6 specified by srcid. */
5715 				return (EADDRNOTAVAIL);
5716 			}
5717 		}
5718 		if (v4mapped) {
5719 			if (connp->conn_ipv6_v6only)
5720 				return (EADDRNOTAVAIL);
5721 
5722 			/*
5723 			 * Destination adress is mapped IPv6 address.
5724 			 * Source bound address should be unspecified or
5725 			 * IPv6 mapped address as well.
5726 			 */
5727 			if (!IN6_IS_ADDR_UNSPECIFIED(
5728 			    &connp->conn_bound_addr_v6) &&
5729 			    !IN6_IS_ADDR_V4MAPPED(&connp->conn_bound_addr_v6)) {
5730 				return (EADDRNOTAVAIL);
5731 			}
5732 			IN6_V4MAPPED_TO_IPADDR(&v6dst, v4dst);
5733 			ipversion = IPV4_VERSION;
5734 			flowinfo = 0;
5735 		} else {
5736 			ipversion = IPV6_VERSION;
5737 			flowinfo = sin6->sin6_flowinfo;
5738 			if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr))
5739 				scopeid = sin6->sin6_scope_id;
5740 		}
5741 		break;
5742 	}
5743 
5744 	if (dstport == 0)
5745 		return (-TBADADDR);
5746 
5747 	/*
5748 	 * If there is a different thread using conn_ixa then we get a new
5749 	 * copy and cut the old one loose from conn_ixa. Otherwise we use
5750 	 * conn_ixa and prevent any other thread from using/changing it.
5751 	 * Once connect() is done other threads can use conn_ixa since the
5752 	 * refcnt will be back at one.
5753 	 * We defer updating conn_ixa until later to handle any concurrent
5754 	 * conn_ixa_cleanup thread.
5755 	 */
5756 	ixa = conn_get_ixa(connp, B_FALSE);
5757 	if (ixa == NULL)
5758 		return (ENOMEM);
5759 
5760 	mutex_enter(&connp->conn_lock);
5761 	/*
5762 	 * This udp_t must have bound to a port already before doing a connect.
5763 	 * Reject if a connect is in progress (we drop conn_lock during
5764 	 * udp_do_connect).
5765 	 */
5766 	if (udp->udp_state == TS_UNBND || udp->udp_state == TS_WCON_CREQ) {
5767 		mutex_exit(&connp->conn_lock);
5768 		(void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
5769 		    "udp_connect: bad state, %u", udp->udp_state);
5770 		ixa_refrele(ixa);
5771 		return (-TOUTSTATE);
5772 	}
5773 	ASSERT(connp->conn_lport != 0 && udp->udp_ptpbhn != NULL);
5774 
5775 	udpf = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport,
5776 	    us->us_bind_fanout_size)];
5777 
5778 	mutex_enter(&udpf->uf_lock);
5779 	if (udp->udp_state == TS_DATA_XFER) {
5780 		/* Already connected - clear out state */
5781 		if (connp->conn_mcbc_bind)
5782 			connp->conn_saddr_v6 = ipv6_all_zeros;
5783 		else
5784 			connp->conn_saddr_v6 = connp->conn_bound_addr_v6;
5785 		connp->conn_laddr_v6 = connp->conn_bound_addr_v6;
5786 		connp->conn_faddr_v6 = ipv6_all_zeros;
5787 		connp->conn_fport = 0;
5788 		udp->udp_state = TS_IDLE;
5789 	}
5790 
5791 	connp->conn_fport = dstport;
5792 	connp->conn_ipversion = ipversion;
5793 	if (ipversion == IPV4_VERSION) {
5794 		/*
5795 		 * Interpret a zero destination to mean loopback.
5796 		 * Update the T_CONN_REQ (sin/sin6) since it is used to
5797 		 * generate the T_CONN_CON.
5798 		 */
5799 		if (v4dst == INADDR_ANY) {
5800 			v4dst = htonl(INADDR_LOOPBACK);
5801 			IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst);
5802 			if (connp->conn_family == AF_INET) {
5803 				sin->sin_addr.s_addr = v4dst;
5804 			} else {
5805 				sin6->sin6_addr = v6dst;
5806 			}
5807 		}
5808 		connp->conn_faddr_v6 = v6dst;
5809 		connp->conn_flowinfo = 0;
5810 	} else {
5811 		ASSERT(connp->conn_ipversion == IPV6_VERSION);
5812 		/*
5813 		 * Interpret a zero destination to mean loopback.
5814 		 * Update the T_CONN_REQ (sin/sin6) since it is used to
5815 		 * generate the T_CONN_CON.
5816 		 */
5817 		if (IN6_IS_ADDR_UNSPECIFIED(&v6dst)) {
5818 			v6dst = ipv6_loopback;
5819 			sin6->sin6_addr = v6dst;
5820 		}
5821 		connp->conn_faddr_v6 = v6dst;
5822 		connp->conn_flowinfo = flowinfo;
5823 	}
5824 	mutex_exit(&udpf->uf_lock);
5825 
5826 	/*
5827 	 * We update our cred/cpid based on the caller of connect
5828 	 */
5829 	if (connp->conn_cred != cr) {
5830 		crhold(cr);
5831 		crfree(connp->conn_cred);
5832 		connp->conn_cred = cr;
5833 	}
5834 	connp->conn_cpid = pid;
5835 	ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
5836 	ixa->ixa_cred = cr;
5837 	ixa->ixa_cpid = pid;
5838 	if (is_system_labeled()) {
5839 		/* We need to restart with a label based on the cred */
5840 		ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred);
5841 	}
5842 
5843 	if (scopeid != 0) {
5844 		ixa->ixa_flags |= IXAF_SCOPEID_SET;
5845 		ixa->ixa_scopeid = scopeid;
5846 		connp->conn_incoming_ifindex = scopeid;
5847 	} else {
5848 		ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
5849 		connp->conn_incoming_ifindex = connp->conn_bound_if;
5850 	}
5851 	/*
5852 	 * conn_connect will drop conn_lock and reacquire it.
5853 	 * To prevent a send* from messing with this udp_t while the lock
5854 	 * is dropped we set udp_state and clear conn_v6lastdst.
5855 	 * That will make all send* fail with EISCONN.
5856 	 */
5857 	connp->conn_v6lastdst = ipv6_all_zeros;
5858 	udp->udp_state = TS_WCON_CREQ;
5859 
5860 	error = conn_connect(connp, NULL, IPDF_ALLOW_MCBC);
5861 	mutex_exit(&connp->conn_lock);
5862 	if (error != 0)
5863 		goto connect_failed;
5864 
5865 	/*
5866 	 * The addresses have been verified. Time to insert in
5867 	 * the correct fanout list.
5868 	 */
5869 	error = ipcl_conn_insert(connp);
5870 	if (error != 0)
5871 		goto connect_failed;
5872 
5873 	mutex_enter(&connp->conn_lock);
5874 	error = udp_build_hdr_template(connp, &connp->conn_saddr_v6,
5875 	    &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo);
5876 	if (error != 0) {
5877 		mutex_exit(&connp->conn_lock);
5878 		goto connect_failed;
5879 	}
5880 
5881 	udp->udp_state = TS_DATA_XFER;
5882 	/* Record this as the "last" send even though we haven't sent any */
5883 	connp->conn_v6lastdst = connp->conn_faddr_v6;
5884 	connp->conn_lastipversion = connp->conn_ipversion;
5885 	connp->conn_lastdstport = connp->conn_fport;
5886 	connp->conn_lastflowinfo = connp->conn_flowinfo;
5887 	connp->conn_lastscopeid = scopeid;
5888 	connp->conn_lastsrcid = srcid;
5889 	/* Also remember a source to use together with lastdst */
5890 	connp->conn_v6lastsrc = v6src;
5891 
5892 	oldixa = conn_replace_ixa(connp, ixa);
5893 	mutex_exit(&connp->conn_lock);
5894 	ixa_refrele(oldixa);
5895 
5896 	/*
5897 	 * We've picked a source address above. Now we can
5898 	 * verify that the src/port/dst/port is unique for all
5899 	 * connections in TS_DATA_XFER, skipping ourselves.
5900 	 */
5901 	mutex_enter(&udpf->uf_lock);
5902 	for (udp1 = udpf->uf_udp; udp1 != NULL; udp1 = udp1->udp_bind_hash) {
5903 		if (udp1->udp_state != TS_DATA_XFER)
5904 			continue;
5905 
5906 		if (udp1 == udp)
5907 			continue;
5908 
5909 		connp1 = udp1->udp_connp;
5910 		if (connp->conn_lport != connp1->conn_lport ||
5911 		    connp->conn_ipversion != connp1->conn_ipversion ||
5912 		    dstport != connp1->conn_fport ||
5913 		    !IN6_ARE_ADDR_EQUAL(&connp->conn_laddr_v6,
5914 		    &connp1->conn_laddr_v6) ||
5915 		    !IN6_ARE_ADDR_EQUAL(&v6dst, &connp1->conn_faddr_v6) ||
5916 		    !(IPCL_ZONE_MATCH(connp, connp1->conn_zoneid) ||
5917 		    IPCL_ZONE_MATCH(connp1, connp->conn_zoneid)))
5918 			continue;
5919 		mutex_exit(&udpf->uf_lock);
5920 		error = -TBADADDR;
5921 		goto connect_failed;
5922 	}
5923 	if (cl_inet_connect2 != NULL) {
5924 		CL_INET_UDP_CONNECT(connp, B_TRUE, &v6dst, dstport, error);
5925 		if (error != 0) {
5926 			mutex_exit(&udpf->uf_lock);
5927 			error = -TBADADDR;
5928 			goto connect_failed;
5929 		}
5930 	}
5931 	mutex_exit(&udpf->uf_lock);
5932 
5933 	ixa_refrele(ixa);
5934 	return (0);
5935 
5936 connect_failed:
5937 	if (ixa != NULL)
5938 		ixa_refrele(ixa);
5939 	mutex_enter(&connp->conn_lock);
5940 	mutex_enter(&udpf->uf_lock);
5941 	udp->udp_state = TS_IDLE;
5942 	connp->conn_faddr_v6 = ipv6_all_zeros;
5943 	connp->conn_fport = 0;
5944 	/* In case the source address was set above */
5945 	if (connp->conn_mcbc_bind)
5946 		connp->conn_saddr_v6 = ipv6_all_zeros;
5947 	else
5948 		connp->conn_saddr_v6 = connp->conn_bound_addr_v6;
5949 	connp->conn_laddr_v6 = connp->conn_bound_addr_v6;
5950 	mutex_exit(&udpf->uf_lock);
5951 
5952 	connp->conn_v6lastdst = ipv6_all_zeros;
5953 	connp->conn_flowinfo = 0;
5954 
5955 	(void) udp_build_hdr_template(connp, &connp->conn_saddr_v6,
5956 	    &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo);
5957 	mutex_exit(&connp->conn_lock);
5958 	return (error);
5959 }
5960 
5961 static int
5962 udp_connect(sock_lower_handle_t proto_handle, const struct sockaddr *sa,
5963     socklen_t len, sock_connid_t *id, cred_t *cr)
5964 {
5965 	conn_t	*connp = (conn_t *)proto_handle;
5966 	udp_t	*udp = connp->conn_udp;
5967 	int	error;
5968 	boolean_t did_bind = B_FALSE;
5969 	pid_t	pid = curproc->p_pid;
5970 
5971 	/* All Solaris components should pass a cred for this operation. */
5972 	ASSERT(cr != NULL);
5973 
5974 	if (sa == NULL) {
5975 		/*
5976 		 * Disconnect
5977 		 * Make sure we are connected
5978 		 */
5979 		if (udp->udp_state != TS_DATA_XFER)
5980 			return (EINVAL);
5981 
5982 		error = udp_disconnect(connp);
5983 		return (error);
5984 	}
5985 
5986 	error = proto_verify_ip_addr(connp->conn_family, sa, len);
5987 	if (error != 0)
5988 		goto done;
5989 
5990 	/* do an implicit bind if necessary */
5991 	if (udp->udp_state == TS_UNBND) {
5992 		error = udp_implicit_bind(connp, cr);
5993 		/*
5994 		 * We could be racing with an actual bind, in which case
5995 		 * we would see EPROTO. We cross our fingers and try
5996 		 * to connect.
5997 		 */
5998 		if (!(error == 0 || error == EPROTO))
5999 			goto done;
6000 		did_bind = B_TRUE;
6001 	}
6002 	/*
6003 	 * set SO_DGRAM_ERRIND
6004 	 */
6005 	connp->conn_dgram_errind = B_TRUE;
6006 
6007 	error = udp_do_connect(connp, sa, len, cr, pid);
6008 
6009 	if (error != 0 && did_bind) {
6010 		int unbind_err;
6011 
6012 		unbind_err = udp_do_unbind(connp);
6013 		ASSERT(unbind_err == 0);
6014 	}
6015 
6016 	if (error == 0) {
6017 		*id = 0;
6018 		(*connp->conn_upcalls->su_connected)
6019 		    (connp->conn_upper_handle, 0, NULL, -1);
6020 	} else if (error < 0) {
6021 		error = proto_tlitosyserr(-error);
6022 	}
6023 
6024 done:
6025 	if (error != 0 && udp->udp_state == TS_DATA_XFER) {
6026 		/*
6027 		 * No need to hold locks to set state
6028 		 * after connect failure socket state is undefined
6029 		 * We set the state only to imitate old sockfs behavior
6030 		 */
6031 		udp->udp_state = TS_IDLE;
6032 	}
6033 	return (error);
6034 }
6035 
6036 int
6037 udp_send(sock_lower_handle_t proto_handle, mblk_t *mp, struct nmsghdr *msg,
6038     cred_t *cr)
6039 {
6040 	sin6_t		*sin6;
6041 	sin_t		*sin = NULL;
6042 	uint_t		srcid;
6043 	conn_t		*connp = (conn_t *)proto_handle;
6044 	udp_t		*udp = connp->conn_udp;
6045 	int		error = 0;
6046 	udp_stack_t	*us = udp->udp_us;
6047 	ushort_t	ipversion;
6048 	pid_t		pid = curproc->p_pid;
6049 	ip_xmit_attr_t	*ixa;
6050 
6051 	ASSERT(DB_TYPE(mp) == M_DATA);
6052 
6053 	/* All Solaris components should pass a cred for this operation. */
6054 	ASSERT(cr != NULL);
6055 
6056 	/* do an implicit bind if necessary */
6057 	if (udp->udp_state == TS_UNBND) {
6058 		error = udp_implicit_bind(connp, cr);
6059 		/*
6060 		 * We could be racing with an actual bind, in which case
6061 		 * we would see EPROTO. We cross our fingers and try
6062 		 * to connect.
6063 		 */
6064 		if (!(error == 0 || error == EPROTO)) {
6065 			freemsg(mp);
6066 			return (error);
6067 		}
6068 	}
6069 
6070 	/* Connected? */
6071 	if (msg->msg_name == NULL) {
6072 		if (udp->udp_state != TS_DATA_XFER) {
6073 			UDPS_BUMP_MIB(us, udpOutErrors);
6074 			return (EDESTADDRREQ);
6075 		}
6076 		if (msg->msg_controllen != 0) {
6077 			error = udp_output_ancillary(connp, NULL, NULL, mp,
6078 			    NULL, msg, cr, pid);
6079 		} else {
6080 			error = udp_output_connected(connp, mp, cr, pid);
6081 		}
6082 		if (us->us_sendto_ignerr)
6083 			return (0);
6084 		else
6085 			return (error);
6086 	}
6087 	if (udp->udp_state == TS_DATA_XFER) {
6088 		UDPS_BUMP_MIB(us, udpOutErrors);
6089 		return (EISCONN);
6090 	}
6091 	error = proto_verify_ip_addr(connp->conn_family,
6092 	    (struct sockaddr *)msg->msg_name, msg->msg_namelen);
6093 	if (error != 0) {
6094 		UDPS_BUMP_MIB(us, udpOutErrors);
6095 		return (error);
6096 	}
6097 	switch (connp->conn_family) {
6098 	case AF_INET6:
6099 		sin6 = (sin6_t *)msg->msg_name;
6100 
6101 		srcid = sin6->__sin6_src_id;
6102 
6103 		if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
6104 			/*
6105 			 * Destination is a non-IPv4-compatible IPv6 address.
6106 			 * Send out an IPv6 format packet.
6107 			 */
6108 
6109 			/*
6110 			 * If the local address is a mapped address return
6111 			 * an error.
6112 			 * It would be possible to send an IPv6 packet but the
6113 			 * response would never make it back to the application
6114 			 * since it is bound to a mapped address.
6115 			 */
6116 			if (IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6)) {
6117 				UDPS_BUMP_MIB(us, udpOutErrors);
6118 				return (EADDRNOTAVAIL);
6119 			}
6120 			if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
6121 				sin6->sin6_addr = ipv6_loopback;
6122 			ipversion = IPV6_VERSION;
6123 		} else {
6124 			if (connp->conn_ipv6_v6only) {
6125 				UDPS_BUMP_MIB(us, udpOutErrors);
6126 				return (EADDRNOTAVAIL);
6127 			}
6128 
6129 			/*
6130 			 * If the local address is not zero or a mapped address
6131 			 * return an error.  It would be possible to send an
6132 			 * IPv4 packet but the response would never make it
6133 			 * back to the application since it is bound to a
6134 			 * non-mapped address.
6135 			 */
6136 			if (!IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6) &&
6137 			    !IN6_IS_ADDR_UNSPECIFIED(&connp->conn_saddr_v6)) {
6138 				UDPS_BUMP_MIB(us, udpOutErrors);
6139 				return (EADDRNOTAVAIL);
6140 			}
6141 
6142 			if (V4_PART_OF_V6(sin6->sin6_addr) == INADDR_ANY) {
6143 				V4_PART_OF_V6(sin6->sin6_addr) =
6144 				    htonl(INADDR_LOOPBACK);
6145 			}
6146 			ipversion = IPV4_VERSION;
6147 		}
6148 
6149 		/*
6150 		 * We have to allocate an ip_xmit_attr_t before we grab
6151 		 * conn_lock and we need to hold conn_lock once we've check
6152 		 * conn_same_as_last_v6 to handle concurrent send* calls on a
6153 		 * socket.
6154 		 */
6155 		if (msg->msg_controllen == 0) {
6156 			ixa = conn_get_ixa(connp, B_FALSE);
6157 			if (ixa == NULL) {
6158 				UDPS_BUMP_MIB(us, udpOutErrors);
6159 				return (ENOMEM);
6160 			}
6161 		} else {
6162 			ixa = NULL;
6163 		}
6164 		mutex_enter(&connp->conn_lock);
6165 		if (udp->udp_delayed_error != 0) {
6166 			sin6_t  *sin2 = (sin6_t *)&udp->udp_delayed_addr;
6167 
6168 			error = udp->udp_delayed_error;
6169 			udp->udp_delayed_error = 0;
6170 
6171 			/* Compare IP address, port, and family */
6172 
6173 			if (sin6->sin6_port == sin2->sin6_port &&
6174 			    IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr,
6175 			    &sin2->sin6_addr) &&
6176 			    sin6->sin6_family == sin2->sin6_family) {
6177 				mutex_exit(&connp->conn_lock);
6178 				UDPS_BUMP_MIB(us, udpOutErrors);
6179 				if (ixa != NULL)
6180 					ixa_refrele(ixa);
6181 				return (error);
6182 			}
6183 		}
6184 
6185 		if (msg->msg_controllen != 0) {
6186 			mutex_exit(&connp->conn_lock);
6187 			ASSERT(ixa == NULL);
6188 			error = udp_output_ancillary(connp, NULL, sin6, mp,
6189 			    NULL, msg, cr, pid);
6190 		} else if (conn_same_as_last_v6(connp, sin6) &&
6191 		    connp->conn_lastsrcid == srcid &&
6192 		    ipsec_outbound_policy_current(ixa)) {
6193 			/* udp_output_lastdst drops conn_lock */
6194 			error = udp_output_lastdst(connp, mp, cr, pid, ixa);
6195 		} else {
6196 			/* udp_output_newdst drops conn_lock */
6197 			error = udp_output_newdst(connp, mp, NULL, sin6,
6198 			    ipversion, cr, pid, ixa);
6199 		}
6200 		ASSERT(MUTEX_NOT_HELD(&connp->conn_lock));
6201 		if (us->us_sendto_ignerr)
6202 			return (0);
6203 		else
6204 			return (error);
6205 	case AF_INET:
6206 		sin = (sin_t *)msg->msg_name;
6207 
6208 		ipversion = IPV4_VERSION;
6209 
6210 		if (sin->sin_addr.s_addr == INADDR_ANY)
6211 			sin->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
6212 
6213 		/*
6214 		 * We have to allocate an ip_xmit_attr_t before we grab
6215 		 * conn_lock and we need to hold conn_lock once we've check
6216 		 * conn_same_as_last_v6 to handle concurrent send* on a socket.
6217 		 */
6218 		if (msg->msg_controllen == 0) {
6219 			ixa = conn_get_ixa(connp, B_FALSE);
6220 			if (ixa == NULL) {
6221 				UDPS_BUMP_MIB(us, udpOutErrors);
6222 				return (ENOMEM);
6223 			}
6224 		} else {
6225 			ixa = NULL;
6226 		}
6227 		mutex_enter(&connp->conn_lock);
6228 		if (udp->udp_delayed_error != 0) {
6229 			sin_t  *sin2 = (sin_t *)&udp->udp_delayed_addr;
6230 
6231 			error = udp->udp_delayed_error;
6232 			udp->udp_delayed_error = 0;
6233 
6234 			/* Compare IP address and port */
6235 
6236 			if (sin->sin_port == sin2->sin_port &&
6237 			    sin->sin_addr.s_addr == sin2->sin_addr.s_addr) {
6238 				mutex_exit(&connp->conn_lock);
6239 				UDPS_BUMP_MIB(us, udpOutErrors);
6240 				if (ixa != NULL)
6241 					ixa_refrele(ixa);
6242 				return (error);
6243 			}
6244 		}
6245 		if (msg->msg_controllen != 0) {
6246 			mutex_exit(&connp->conn_lock);
6247 			ASSERT(ixa == NULL);
6248 			error = udp_output_ancillary(connp, sin, NULL, mp,
6249 			    NULL, msg, cr, pid);
6250 		} else if (conn_same_as_last_v4(connp, sin) &&
6251 		    ipsec_outbound_policy_current(ixa)) {
6252 			/* udp_output_lastdst drops conn_lock */
6253 			error = udp_output_lastdst(connp, mp, cr, pid, ixa);
6254 		} else {
6255 			/* udp_output_newdst drops conn_lock */
6256 			error = udp_output_newdst(connp, mp, sin, NULL,
6257 			    ipversion, cr, pid, ixa);
6258 		}
6259 		ASSERT(MUTEX_NOT_HELD(&connp->conn_lock));
6260 		if (us->us_sendto_ignerr)
6261 			return (0);
6262 		else
6263 			return (error);
6264 	default:
6265 		return (EINVAL);
6266 	}
6267 }
6268 
6269 int
6270 udp_fallback(sock_lower_handle_t proto_handle, queue_t *q,
6271     boolean_t issocket, so_proto_quiesced_cb_t quiesced_cb,
6272     sock_quiesce_arg_t *arg)
6273 {
6274 	conn_t	*connp = (conn_t *)proto_handle;
6275 	udp_t	*udp;
6276 	struct T_capability_ack tca;
6277 	struct sockaddr_in6 laddr, faddr;
6278 	socklen_t laddrlen, faddrlen;
6279 	short opts;
6280 	struct stroptions *stropt;
6281 	mblk_t *mp, *stropt_mp;
6282 	int error;
6283 
6284 	udp = connp->conn_udp;
6285 
6286 	stropt_mp = allocb_wait(sizeof (*stropt), BPRI_HI, STR_NOSIG, NULL);
6287 
6288 	/*
6289 	 * setup the fallback stream that was allocated
6290 	 */
6291 	connp->conn_dev = (dev_t)RD(q)->q_ptr;
6292 	connp->conn_minor_arena = WR(q)->q_ptr;
6293 
6294 	RD(q)->q_ptr = WR(q)->q_ptr = connp;
6295 
6296 	WR(q)->q_qinfo = &udp_winit;
6297 
6298 	connp->conn_rq = RD(q);
6299 	connp->conn_wq = WR(q);
6300 
6301 	/* Notify stream head about options before sending up data */
6302 	stropt_mp->b_datap->db_type = M_SETOPTS;
6303 	stropt_mp->b_wptr += sizeof (*stropt);
6304 	stropt = (struct stroptions *)stropt_mp->b_rptr;
6305 	stropt->so_flags = SO_WROFF | SO_HIWAT;
6306 	stropt->so_wroff = connp->conn_wroff;
6307 	stropt->so_hiwat = udp->udp_rcv_disply_hiwat;
6308 	putnext(RD(q), stropt_mp);
6309 
6310 	/*
6311 	 * Free the helper stream
6312 	 */
6313 	ip_free_helper_stream(connp);
6314 
6315 	if (!issocket)
6316 		udp_use_pure_tpi(udp);
6317 
6318 	/*
6319 	 * Collect the information needed to sync with the sonode
6320 	 */
6321 	udp_do_capability_ack(udp, &tca, TC1_INFO);
6322 
6323 	laddrlen = faddrlen = sizeof (sin6_t);
6324 	(void) udp_getsockname((sock_lower_handle_t)connp,
6325 	    (struct sockaddr *)&laddr, &laddrlen, CRED());
6326 	error = udp_getpeername((sock_lower_handle_t)connp,
6327 	    (struct sockaddr *)&faddr, &faddrlen, CRED());
6328 	if (error != 0)
6329 		faddrlen = 0;
6330 
6331 	opts = 0;
6332 	if (connp->conn_dgram_errind)
6333 		opts |= SO_DGRAM_ERRIND;
6334 	if (connp->conn_ixa->ixa_flags & IXAF_DONTROUTE)
6335 		opts |= SO_DONTROUTE;
6336 
6337 	mp = (*quiesced_cb)(connp->conn_upper_handle, arg, &tca,
6338 	    (struct sockaddr *)&laddr, laddrlen,
6339 	    (struct sockaddr *)&faddr, faddrlen, opts);
6340 
6341 	mutex_enter(&udp->udp_recv_lock);
6342 	/*
6343 	 * Attempts to send data up during fallback will result in it being
6344 	 * queued in udp_t. First push up the datagrams obtained from the
6345 	 * socket, then any packets queued in udp_t.
6346 	 */
6347 	if (mp != NULL) {
6348 		mp->b_next = udp->udp_fallback_queue_head;
6349 		udp->udp_fallback_queue_head = mp;
6350 	}
6351 	while (udp->udp_fallback_queue_head != NULL) {
6352 		mp = udp->udp_fallback_queue_head;
6353 		udp->udp_fallback_queue_head = mp->b_next;
6354 		mutex_exit(&udp->udp_recv_lock);
6355 		mp->b_next = NULL;
6356 		putnext(RD(q), mp);
6357 		mutex_enter(&udp->udp_recv_lock);
6358 	}
6359 	udp->udp_fallback_queue_tail = udp->udp_fallback_queue_head;
6360 	/*
6361 	 * No longer a streams less socket
6362 	 */
6363 	mutex_enter(&connp->conn_lock);
6364 	connp->conn_flags &= ~IPCL_NONSTR;
6365 	mutex_exit(&connp->conn_lock);
6366 
6367 	mutex_exit(&udp->udp_recv_lock);
6368 
6369 	ASSERT(connp->conn_ref >= 1);
6370 
6371 	return (0);
6372 }
6373 
6374 /* ARGSUSED3 */
6375 int
6376 udp_getpeername(sock_lower_handle_t proto_handle, struct sockaddr *sa,
6377     socklen_t *salenp, cred_t *cr)
6378 {
6379 	conn_t	*connp = (conn_t *)proto_handle;
6380 	udp_t	*udp = connp->conn_udp;
6381 	int error;
6382 
6383 	/* All Solaris components should pass a cred for this operation. */
6384 	ASSERT(cr != NULL);
6385 
6386 	mutex_enter(&connp->conn_lock);
6387 	if (udp->udp_state != TS_DATA_XFER)
6388 		error = ENOTCONN;
6389 	else
6390 		error = conn_getpeername(connp, sa, salenp);
6391 	mutex_exit(&connp->conn_lock);
6392 	return (error);
6393 }
6394 
6395 /* ARGSUSED3 */
6396 int
6397 udp_getsockname(sock_lower_handle_t proto_handle, struct sockaddr *sa,
6398     socklen_t *salenp, cred_t *cr)
6399 {
6400 	conn_t	*connp = (conn_t *)proto_handle;
6401 	int error;
6402 
6403 	/* All Solaris components should pass a cred for this operation. */
6404 	ASSERT(cr != NULL);
6405 
6406 	mutex_enter(&connp->conn_lock);
6407 	error = conn_getsockname(connp, sa, salenp);
6408 	mutex_exit(&connp->conn_lock);
6409 	return (error);
6410 }
6411 
6412 int
6413 udp_getsockopt(sock_lower_handle_t proto_handle, int level, int option_name,
6414     void *optvalp, socklen_t *optlen, cred_t *cr)
6415 {
6416 	conn_t		*connp = (conn_t *)proto_handle;
6417 	int		error;
6418 	t_uscalar_t	max_optbuf_len;
6419 	void		*optvalp_buf;
6420 	int		len;
6421 
6422 	/* All Solaris components should pass a cred for this operation. */
6423 	ASSERT(cr != NULL);
6424 
6425 	error = proto_opt_check(level, option_name, *optlen, &max_optbuf_len,
6426 	    udp_opt_obj.odb_opt_des_arr,
6427 	    udp_opt_obj.odb_opt_arr_cnt,
6428 	    B_FALSE, B_TRUE, cr);
6429 	if (error != 0) {
6430 		if (error < 0)
6431 			error = proto_tlitosyserr(-error);
6432 		return (error);
6433 	}
6434 
6435 	optvalp_buf = kmem_alloc(max_optbuf_len, KM_SLEEP);
6436 	len = udp_opt_get(connp, level, option_name, optvalp_buf);
6437 	if (len == -1) {
6438 		kmem_free(optvalp_buf, max_optbuf_len);
6439 		return (EINVAL);
6440 	}
6441 
6442 	/*
6443 	 * update optlen and copy option value
6444 	 */
6445 	t_uscalar_t size = MIN(len, *optlen);
6446 
6447 	bcopy(optvalp_buf, optvalp, size);
6448 	bcopy(&size, optlen, sizeof (size));
6449 
6450 	kmem_free(optvalp_buf, max_optbuf_len);
6451 	return (0);
6452 }
6453 
6454 int
6455 udp_setsockopt(sock_lower_handle_t proto_handle, int level, int option_name,
6456     const void *optvalp, socklen_t optlen, cred_t *cr)
6457 {
6458 	conn_t		*connp = (conn_t *)proto_handle;
6459 	int		error;
6460 
6461 	/* All Solaris components should pass a cred for this operation. */
6462 	ASSERT(cr != NULL);
6463 
6464 	error = proto_opt_check(level, option_name, optlen, NULL,
6465 	    udp_opt_obj.odb_opt_des_arr,
6466 	    udp_opt_obj.odb_opt_arr_cnt,
6467 	    B_TRUE, B_FALSE, cr);
6468 
6469 	if (error != 0) {
6470 		if (error < 0)
6471 			error = proto_tlitosyserr(-error);
6472 		return (error);
6473 	}
6474 
6475 	error = udp_opt_set(connp, SETFN_OPTCOM_NEGOTIATE, level, option_name,
6476 	    optlen, (uchar_t *)optvalp, (uint_t *)&optlen, (uchar_t *)optvalp,
6477 	    NULL, cr);
6478 
6479 	ASSERT(error >= 0);
6480 
6481 	return (error);
6482 }
6483 
6484 void
6485 udp_clr_flowctrl(sock_lower_handle_t proto_handle)
6486 {
6487 	conn_t	*connp = (conn_t *)proto_handle;
6488 	udp_t	*udp = connp->conn_udp;
6489 
6490 	mutex_enter(&udp->udp_recv_lock);
6491 	connp->conn_flow_cntrld = B_FALSE;
6492 	mutex_exit(&udp->udp_recv_lock);
6493 }
6494 
6495 /* ARGSUSED2 */
6496 int
6497 udp_shutdown(sock_lower_handle_t proto_handle, int how, cred_t *cr)
6498 {
6499 	conn_t	*connp = (conn_t *)proto_handle;
6500 
6501 	/* All Solaris components should pass a cred for this operation. */
6502 	ASSERT(cr != NULL);
6503 
6504 	/* shut down the send side */
6505 	if (how != SHUT_RD)
6506 		(*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle,
6507 		    SOCK_OPCTL_SHUT_SEND, 0);
6508 	/* shut down the recv side */
6509 	if (how != SHUT_WR)
6510 		(*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle,
6511 		    SOCK_OPCTL_SHUT_RECV, 0);
6512 	return (0);
6513 }
6514 
6515 int
6516 udp_ioctl(sock_lower_handle_t proto_handle, int cmd, intptr_t arg,
6517     int mode, int32_t *rvalp, cred_t *cr)
6518 {
6519 	conn_t		*connp = (conn_t *)proto_handle;
6520 	int		error;
6521 
6522 	/* All Solaris components should pass a cred for this operation. */
6523 	ASSERT(cr != NULL);
6524 
6525 	/*
6526 	 * If we don't have a helper stream then create one.
6527 	 * ip_create_helper_stream takes care of locking the conn_t,
6528 	 * so this check for NULL is just a performance optimization.
6529 	 */
6530 	if (connp->conn_helper_info == NULL) {
6531 		udp_stack_t *us = connp->conn_udp->udp_us;
6532 
6533 		ASSERT(us->us_ldi_ident != NULL);
6534 
6535 		/*
6536 		 * Create a helper stream for non-STREAMS socket.
6537 		 */
6538 		error = ip_create_helper_stream(connp, us->us_ldi_ident);
6539 		if (error != 0) {
6540 			ip0dbg(("udp_ioctl: create of IP helper stream "
6541 			    "failed %d\n", error));
6542 			return (error);
6543 		}
6544 	}
6545 
6546 	switch (cmd) {
6547 		case _SIOCSOCKFALLBACK:
6548 		case TI_GETPEERNAME:
6549 		case TI_GETMYNAME:
6550 			ip1dbg(("udp_ioctl: cmd 0x%x on non streams socket",
6551 			    cmd));
6552 			error = EINVAL;
6553 			break;
6554 		default:
6555 			/*
6556 			 * Pass on to IP using helper stream
6557 			 */
6558 			error = ldi_ioctl(connp->conn_helper_info->iphs_handle,
6559 			    cmd, arg, mode, cr, rvalp);
6560 			break;
6561 	}
6562 	return (error);
6563 }
6564 
6565 /* ARGSUSED */
6566 int
6567 udp_accept(sock_lower_handle_t lproto_handle,
6568     sock_lower_handle_t eproto_handle, sock_upper_handle_t sock_handle,
6569     cred_t *cr)
6570 {
6571 	return (EOPNOTSUPP);
6572 }
6573 
6574 /* ARGSUSED */
6575 int
6576 udp_listen(sock_lower_handle_t proto_handle, int backlog, cred_t *cr)
6577 {
6578 	return (EOPNOTSUPP);
6579 }
6580 
6581 sock_downcalls_t sock_udp_downcalls = {
6582 	udp_activate,		/* sd_activate */
6583 	udp_accept,		/* sd_accept */
6584 	udp_bind,		/* sd_bind */
6585 	udp_listen,		/* sd_listen */
6586 	udp_connect,		/* sd_connect */
6587 	udp_getpeername,	/* sd_getpeername */
6588 	udp_getsockname,	/* sd_getsockname */
6589 	udp_getsockopt,		/* sd_getsockopt */
6590 	udp_setsockopt,		/* sd_setsockopt */
6591 	udp_send,		/* sd_send */
6592 	NULL,			/* sd_send_uio */
6593 	NULL,			/* sd_recv_uio */
6594 	NULL,			/* sd_poll */
6595 	udp_shutdown,		/* sd_shutdown */
6596 	udp_clr_flowctrl,	/* sd_setflowctrl */
6597 	udp_ioctl,		/* sd_ioctl */
6598 	udp_close		/* sd_close */
6599 };
6600