xref: /illumos-gate/usr/src/uts/common/inet/udp/udp.c (revision 36589d6b)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright 2013 Nexenta Systems, Inc.  All rights reserved.
24  * Copyright 2014, OmniTI Computer Consulting, Inc. All rights reserved.
25  * Copyright 2018, Joyent, Inc.
26  * Copyright 2020 OmniOS Community Edition (OmniOSce) Association.
27  */
28 /* Copyright (c) 1990 Mentat Inc. */
29 
30 #include <sys/sysmacros.h>
31 #include <sys/types.h>
32 #include <sys/stream.h>
33 #include <sys/stropts.h>
34 #include <sys/strlog.h>
35 #include <sys/strsun.h>
36 #define	_SUN_TPI_VERSION 2
37 #include <sys/tihdr.h>
38 #include <sys/timod.h>
39 #include <sys/ddi.h>
40 #include <sys/sunddi.h>
41 #include <sys/strsubr.h>
42 #include <sys/suntpi.h>
43 #include <sys/xti_inet.h>
44 #include <sys/kmem.h>
45 #include <sys/cred_impl.h>
46 #include <sys/policy.h>
47 #include <sys/priv.h>
48 #include <sys/ucred.h>
49 #include <sys/zone.h>
50 
51 #include <sys/socket.h>
52 #include <sys/socketvar.h>
53 #include <sys/sockio.h>
54 #include <sys/vtrace.h>
55 #include <sys/sdt.h>
56 #include <sys/debug.h>
57 #include <sys/isa_defs.h>
58 #include <sys/random.h>
59 #include <netinet/in.h>
60 #include <netinet/ip6.h>
61 #include <netinet/icmp6.h>
62 #include <netinet/udp.h>
63 
64 #include <inet/common.h>
65 #include <inet/ip.h>
66 #include <inet/ip_impl.h>
67 #include <inet/ipsec_impl.h>
68 #include <inet/ip6.h>
69 #include <inet/ip_ire.h>
70 #include <inet/ip_if.h>
71 #include <inet/ip_multi.h>
72 #include <inet/ip_ndp.h>
73 #include <inet/proto_set.h>
74 #include <inet/mib2.h>
75 #include <inet/optcom.h>
76 #include <inet/snmpcom.h>
77 #include <inet/kstatcom.h>
78 #include <inet/ipclassifier.h>
79 #include <sys/squeue_impl.h>
80 #include <inet/ipnet.h>
81 #include <sys/vxlan.h>
82 #include <inet/inet_hash.h>
83 
84 #include <sys/tsol/label.h>
85 #include <sys/tsol/tnet.h>
86 #include <rpc/pmap_prot.h>
87 
88 #include <inet/udp_impl.h>
89 
90 /*
91  * Synchronization notes:
92  *
93  * UDP is MT and uses the usual kernel synchronization primitives. There are 2
94  * locks, the fanout lock (uf_lock) and conn_lock. conn_lock
95  * protects the contents of the udp_t. uf_lock protects the address and the
96  * fanout information.
97  * The lock order is conn_lock -> uf_lock.
98  *
99  * The fanout lock uf_lock:
100  * When a UDP endpoint is bound to a local port, it is inserted into
101  * a bind hash list.  The list consists of an array of udp_fanout_t buckets.
102  * The size of the array is controlled by the udp_bind_fanout_size variable.
103  * This variable can be changed in /etc/system if the default value is
104  * not large enough.  Each bind hash bucket is protected by a per bucket
105  * lock.  It protects the udp_bind_hash and udp_ptpbhn fields in the udp_t
106  * structure and a few other fields in the udp_t. A UDP endpoint is removed
107  * from the bind hash list only when it is being unbound or being closed.
108  * The per bucket lock also protects a UDP endpoint's state changes.
109  *
110  * Plumbing notes:
111  * UDP is always a device driver. For compatibility with mibopen() code
112  * it is possible to I_PUSH "udp", but that results in pushing a passthrough
113  * dummy module.
114  *
115  * The above implies that we don't support any intermediate module to
116  * reside in between /dev/ip and udp -- in fact, we never supported such
117  * scenario in the past as the inter-layer communication semantics have
118  * always been private.
119  */
120 
121 /* For /etc/system control */
122 uint_t udp_bind_fanout_size = UDP_BIND_FANOUT_SIZE;
123 
124 static void	udp_addr_req(queue_t *q, mblk_t *mp);
125 static void	udp_tpi_bind(queue_t *q, mblk_t *mp);
126 static void	udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp);
127 static void	udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock);
128 static int	udp_build_hdr_template(conn_t *, const in6_addr_t *,
129     const in6_addr_t *, in_port_t, uint32_t);
130 static void	udp_capability_req(queue_t *q, mblk_t *mp);
131 static int	udp_tpi_close(queue_t *q, int flags, cred_t *);
132 static void	udp_close_free(conn_t *);
133 static void	udp_tpi_connect(queue_t *q, mblk_t *mp);
134 static void	udp_tpi_disconnect(queue_t *q, mblk_t *mp);
135 static void	udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error,
136     int sys_error);
137 static void	udp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive,
138     t_scalar_t tlierr, int sys_error);
139 static int	udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp,
140 		    cred_t *cr);
141 static int	udp_extra_priv_ports_add(queue_t *q, mblk_t *mp,
142 		    char *value, caddr_t cp, cred_t *cr);
143 static int	udp_extra_priv_ports_del(queue_t *q, mblk_t *mp,
144 		    char *value, caddr_t cp, cred_t *cr);
145 static void	udp_icmp_input(void *, mblk_t *, void *, ip_recv_attr_t *);
146 static void	udp_icmp_error_ipv6(conn_t *connp, mblk_t *mp,
147     ip_recv_attr_t *ira);
148 static void	udp_info_req(queue_t *q, mblk_t *mp);
149 static void	udp_input(void *, mblk_t *, void *, ip_recv_attr_t *);
150 static int	udp_lrput(queue_t *, mblk_t *);
151 static int	udp_lwput(queue_t *, mblk_t *);
152 static int	udp_open(queue_t *q, dev_t *devp, int flag, int sflag,
153 		    cred_t *credp, boolean_t isv6);
154 static int	udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag,
155 		    cred_t *credp);
156 static int	udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag,
157 		    cred_t *credp);
158 static boolean_t udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name);
159 int		udp_opt_set(conn_t *connp, uint_t optset_context,
160 		    int level, int name, uint_t inlen,
161 		    uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp,
162 		    void *thisdg_attrs, cred_t *cr);
163 int		udp_opt_get(conn_t *connp, int level, int name,
164 		    uchar_t *ptr);
165 static int	udp_output_connected(conn_t *connp, mblk_t *mp, cred_t *cr,
166 		    pid_t pid);
167 static int	udp_output_lastdst(conn_t *connp, mblk_t *mp, cred_t *cr,
168     pid_t pid, ip_xmit_attr_t *ixa);
169 static int	udp_output_newdst(conn_t *connp, mblk_t *data_mp, sin_t *sin,
170 		    sin6_t *sin6, ushort_t ipversion, cred_t *cr, pid_t,
171 		    ip_xmit_attr_t *ixa);
172 static mblk_t	*udp_prepend_hdr(conn_t *, ip_xmit_attr_t *, const ip_pkt_t *,
173     const in6_addr_t *, const in6_addr_t *, in_port_t, uint32_t, mblk_t *,
174     int *);
175 static mblk_t	*udp_prepend_header_template(conn_t *, ip_xmit_attr_t *,
176     mblk_t *, const in6_addr_t *, in_port_t, uint32_t, int *);
177 static void	udp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err);
178 static void	udp_ud_err_connected(conn_t *, t_scalar_t);
179 static void	udp_tpi_unbind(queue_t *q, mblk_t *mp);
180 static in_port_t udp_update_next_port(udp_t *udp, in_port_t port,
181     boolean_t random);
182 static void	udp_wput_other(queue_t *q, mblk_t *mp);
183 static void	udp_wput_iocdata(queue_t *q, mblk_t *mp);
184 static int	udp_wput_fallback(queue_t *q, mblk_t *mp);
185 static size_t	udp_set_rcv_hiwat(udp_t *udp, size_t size);
186 
187 static void	*udp_stack_init(netstackid_t stackid, netstack_t *ns);
188 static void	udp_stack_fini(netstackid_t stackid, void *arg);
189 
190 /* Common routines for TPI and socket module */
191 static void	udp_ulp_recv(conn_t *, mblk_t *, uint_t, ip_recv_attr_t *);
192 
193 /* Common routine for TPI and socket module */
194 static conn_t	*udp_do_open(cred_t *, boolean_t, int, int *);
195 static void	udp_do_close(conn_t *);
196 static int	udp_do_bind(conn_t *, struct sockaddr *, socklen_t, cred_t *,
197     boolean_t);
198 static int	udp_do_unbind(conn_t *);
199 
200 int		udp_getsockname(sock_lower_handle_t,
201     struct sockaddr *, socklen_t *, cred_t *);
202 int		udp_getpeername(sock_lower_handle_t,
203     struct sockaddr *, socklen_t *, cred_t *);
204 static int	udp_do_connect(conn_t *, const struct sockaddr *, socklen_t,
205     cred_t *, pid_t);
206 
207 #pragma inline(udp_output_connected, udp_output_newdst, udp_output_lastdst)
208 
209 /*
210  * Checks if the given destination addr/port is allowed out.
211  * If allowed, registers the (dest_addr/port, node_ID) mapping at Cluster.
212  * Called for each connect() and for sendto()/sendmsg() to a different
213  * destination.
214  * For connect(), called in udp_connect().
215  * For sendto()/sendmsg(), called in udp_output_newdst().
216  *
217  * This macro assumes that the cl_inet_connect2 hook is not NULL.
218  * Please check this before calling this macro.
219  *
220  * void
221  * CL_INET_UDP_CONNECT(conn_t cp, udp_t *udp, boolean_t is_outgoing,
222  *     in6_addr_t *faddrp, in_port_t (or uint16_t) fport, int err);
223  */
224 #define	CL_INET_UDP_CONNECT(cp, is_outgoing, faddrp, fport, err) {	\
225 	(err) = 0;							\
226 	/*								\
227 	 * Running in cluster mode - check and register active		\
228 	 * "connection" information					\
229 	 */								\
230 	if ((cp)->conn_ipversion == IPV4_VERSION)			\
231 		(err) = (*cl_inet_connect2)(				\
232 		    (cp)->conn_netstack->netstack_stackid,		\
233 		    IPPROTO_UDP, is_outgoing, AF_INET,			\
234 		    (uint8_t *)&((cp)->conn_laddr_v4),			\
235 		    (cp)->conn_lport,					\
236 		    (uint8_t *)&(V4_PART_OF_V6(*faddrp)),		\
237 		    (in_port_t)(fport), NULL);				\
238 	else								\
239 		(err) = (*cl_inet_connect2)(				\
240 		    (cp)->conn_netstack->netstack_stackid,		\
241 		    IPPROTO_UDP, is_outgoing, AF_INET6,			\
242 		    (uint8_t *)&((cp)->conn_laddr_v6),			\
243 		    (cp)->conn_lport,					\
244 		    (uint8_t *)(faddrp), (in_port_t)(fport), NULL);	\
245 }
246 
247 static struct module_info udp_mod_info =  {
248 	UDP_MOD_ID, UDP_MOD_NAME, 1, INFPSZ, UDP_RECV_HIWATER, UDP_RECV_LOWATER
249 };
250 
251 /*
252  * Entry points for UDP as a device.
253  * We have separate open functions for the /dev/udp and /dev/udp6 devices.
254  */
255 static struct qinit udp_rinitv4 = {
256 	NULL, NULL, udp_openv4, udp_tpi_close, NULL, &udp_mod_info, NULL
257 };
258 
259 static struct qinit udp_rinitv6 = {
260 	NULL, NULL, udp_openv6, udp_tpi_close, NULL, &udp_mod_info, NULL
261 };
262 
263 static struct qinit udp_winit = {
264 	udp_wput, ip_wsrv, NULL, NULL, NULL, &udp_mod_info
265 };
266 
267 /* UDP entry point during fallback */
268 struct qinit udp_fallback_sock_winit = {
269 	udp_wput_fallback, NULL, NULL, NULL, NULL, &udp_mod_info
270 };
271 
272 /*
273  * UDP needs to handle I_LINK and I_PLINK since ifconfig
274  * likes to use it as a place to hang the various streams.
275  */
276 static struct qinit udp_lrinit = {
277 	udp_lrput, NULL, udp_openv4, udp_tpi_close, NULL, &udp_mod_info
278 };
279 
280 static struct qinit udp_lwinit = {
281 	udp_lwput, NULL, udp_openv4, udp_tpi_close, NULL, &udp_mod_info
282 };
283 
284 /* For AF_INET aka /dev/udp */
285 struct streamtab udpinfov4 = {
286 	&udp_rinitv4, &udp_winit, &udp_lrinit, &udp_lwinit
287 };
288 
289 /* For AF_INET6 aka /dev/udp6 */
290 struct streamtab udpinfov6 = {
291 	&udp_rinitv6, &udp_winit, &udp_lrinit, &udp_lwinit
292 };
293 
294 #define	UDP_MAXPACKET_IPV4 (IP_MAXPACKET - UDPH_SIZE - IP_SIMPLE_HDR_LENGTH)
295 
296 /* Default structure copied into T_INFO_ACK messages */
297 static struct T_info_ack udp_g_t_info_ack_ipv4 = {
298 	T_INFO_ACK,
299 	UDP_MAXPACKET_IPV4,	/* TSDU_size. Excl. headers */
300 	T_INVALID,	/* ETSU_size.  udp does not support expedited data. */
301 	T_INVALID,	/* CDATA_size. udp does not support connect data. */
302 	T_INVALID,	/* DDATA_size. udp does not support disconnect data. */
303 	sizeof (sin_t),	/* ADDR_size. */
304 	0,		/* OPT_size - not initialized here */
305 	UDP_MAXPACKET_IPV4,	/* TIDU_size.  Excl. headers */
306 	T_CLTS,		/* SERV_type.  udp supports connection-less. */
307 	TS_UNBND,	/* CURRENT_state.  This is set from udp_state. */
308 	(XPG4_1|SENDZERO) /* PROVIDER_flag */
309 };
310 
311 #define	UDP_MAXPACKET_IPV6 (IP_MAXPACKET - UDPH_SIZE - IPV6_HDR_LEN)
312 
313 static	struct T_info_ack udp_g_t_info_ack_ipv6 = {
314 	T_INFO_ACK,
315 	UDP_MAXPACKET_IPV6,	/* TSDU_size.  Excl. headers */
316 	T_INVALID,	/* ETSU_size.  udp does not support expedited data. */
317 	T_INVALID,	/* CDATA_size. udp does not support connect data. */
318 	T_INVALID,	/* DDATA_size. udp does not support disconnect data. */
319 	sizeof (sin6_t), /* ADDR_size. */
320 	0,		/* OPT_size - not initialized here */
321 	UDP_MAXPACKET_IPV6,	/* TIDU_size. Excl. headers */
322 	T_CLTS,		/* SERV_type.  udp supports connection-less. */
323 	TS_UNBND,	/* CURRENT_state.  This is set from udp_state. */
324 	(XPG4_1|SENDZERO) /* PROVIDER_flag */
325 };
326 
327 /*
328  * UDP tunables related declarations. Definitions are in udp_tunables.c
329  */
330 extern mod_prop_info_t udp_propinfo_tbl[];
331 extern int udp_propinfo_count;
332 
333 /* Setable in /etc/system */
334 /* If set to 0, pick ephemeral port sequentially; otherwise randomly. */
335 uint32_t udp_random_anon_port = 1;
336 
337 /*
338  * Hook functions to enable cluster networking.
339  * On non-clustered systems these vectors must always be NULL
340  */
341 
342 void (*cl_inet_bind)(netstackid_t stack_id, uchar_t protocol,
343     sa_family_t addr_family, uint8_t *laddrp, in_port_t lport,
344     void *args) = NULL;
345 void (*cl_inet_unbind)(netstackid_t stack_id, uint8_t protocol,
346     sa_family_t addr_family, uint8_t *laddrp, in_port_t lport,
347     void *args) = NULL;
348 
349 typedef union T_primitives *t_primp_t;
350 
351 /*
352  * Various protocols that encapsulate UDP have no real use for the source port.
353  * Instead, they want to vary the source port to provide better equal-cost
354  * multipathing and other systems that use fanout. Consider something like
355  * VXLAN. If you're actually sending multiple different streams to a single
356  * host, if you don't vary the source port, then the tuple of ( SRC IP, DST IP,
357  * SRC Port, DST Port) will always be the same.
358  *
359  * Here, we return a port to hash this to, if we know how to hash it. If for
360  * some reason we can't perform an L4 hash, then we just return the default
361  * value, usually the default port. After we determine the hash we transform it
362  * so that it's in the range of [ min, max ].
363  *
364  * We'd like to avoid a pull up for the sake of performing the hash. If the
365  * first mblk_t doesn't have the full protocol header, then we just send it to
366  * the default. If for some reason we have an encapsulated packet that has its
367  * protocol header in different parts of an mblk_t, then we'll go with the
368  * default port. This means that that if a driver isn't consistent about how it
369  * generates the frames for a given flow, it will not always be consistently
370  * hashed. That should be an uncommon event.
371  */
372 uint16_t
udp_srcport_hash(mblk_t * mp,int type,uint16_t min,uint16_t max,uint16_t def)373 udp_srcport_hash(mblk_t *mp, int type, uint16_t min, uint16_t max,
374     uint16_t def)
375 {
376 	size_t szused = 0;
377 	ip6_t *ip6h;
378 	ipha_t *ipha;
379 	uint16_t sap;
380 	uint64_t hash;
381 	uint32_t mod;
382 
383 	ASSERT(min <= max);
384 
385 	if (type != UDP_HASH_VXLAN)
386 		return (def);
387 
388 	if (!IS_P2ALIGNED(mp->b_rptr, sizeof (uint16_t)))
389 		return (def);
390 
391 	/*
392 	 * The following logic is VXLAN specific to get at the header, if we
393 	 * have formats, eg. GENEVE, then we should ignore this.
394 	 *
395 	 * The kernel overlay device often puts a first mblk_t for the data
396 	 * which is just the encap. If so, then we're going to use that and try
397 	 * to avoid a pull up.
398 	 */
399 	if (MBLKL(mp) == VXLAN_HDR_LEN) {
400 		if (mp->b_cont == NULL)
401 			return (def);
402 		mp = mp->b_cont;
403 	} else if (MBLKL(mp) < VXLAN_HDR_LEN) {
404 		return (def);
405 	} else {
406 		szused = VXLAN_HDR_LEN;
407 	}
408 
409 	/* Can we hold a MAC header? */
410 	if (MBLKL(mp) + szused < sizeof (struct ether_header))
411 		return (def);
412 
413 	/*
414 	 * We need to lie about the starting offset into the message block for
415 	 * convenience. Undo it at the end. We know that inet_pkt_hash() won't
416 	 * modify the mblk_t.
417 	 */
418 	mp->b_rptr += szused;
419 	hash = inet_pkt_hash(DL_ETHER, mp, INET_PKT_HASH_L2 |
420 	    INET_PKT_HASH_L3 | INET_PKT_HASH_L4);
421 	mp->b_rptr -= szused;
422 
423 	if (hash == 0)
424 		return (def);
425 
426 	mod = max - min + 1;
427 	return ((hash % mod) + min);
428 }
429 
430 /*
431  * Return the next anonymous port in the privileged port range for
432  * bind checking.
433  *
434  * Trusted Extension (TX) notes: TX allows administrator to mark or
435  * reserve ports as Multilevel ports (MLP). MLP has special function
436  * on TX systems. Once a port is made MLP, it's not available as
437  * ordinary port. This creates "holes" in the port name space. It
438  * may be necessary to skip the "holes" find a suitable anon port.
439  */
440 static in_port_t
udp_get_next_priv_port(udp_t * udp)441 udp_get_next_priv_port(udp_t *udp)
442 {
443 	static in_port_t next_priv_port = IPPORT_RESERVED - 1;
444 	in_port_t nextport;
445 	boolean_t restart = B_FALSE;
446 	udp_stack_t *us = udp->udp_us;
447 
448 retry:
449 	if (next_priv_port < us->us_min_anonpriv_port ||
450 	    next_priv_port >= IPPORT_RESERVED) {
451 		next_priv_port = IPPORT_RESERVED - 1;
452 		if (restart)
453 			return (0);
454 		restart = B_TRUE;
455 	}
456 
457 	if (is_system_labeled() &&
458 	    (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred),
459 	    next_priv_port, IPPROTO_UDP, B_FALSE)) != 0) {
460 		next_priv_port = nextport;
461 		goto retry;
462 	}
463 
464 	return (next_priv_port--);
465 }
466 
467 /*
468  * Hash list removal routine for udp_t structures.
469  */
470 static void
udp_bind_hash_remove(udp_t * udp,boolean_t caller_holds_lock)471 udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock)
472 {
473 	udp_t		*udpnext;
474 	kmutex_t	*lockp;
475 	udp_stack_t	*us = udp->udp_us;
476 	conn_t		*connp = udp->udp_connp;
477 
478 	if (udp->udp_ptpbhn == NULL)
479 		return;
480 
481 	/*
482 	 * Extract the lock pointer in case there are concurrent
483 	 * hash_remove's for this instance.
484 	 */
485 	ASSERT(connp->conn_lport != 0);
486 	if (!caller_holds_lock) {
487 		lockp = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport,
488 		    us->us_bind_fanout_size)].uf_lock;
489 		ASSERT(lockp != NULL);
490 		mutex_enter(lockp);
491 	}
492 	if (udp->udp_ptpbhn != NULL) {
493 		udpnext = udp->udp_bind_hash;
494 		if (udpnext != NULL) {
495 			udpnext->udp_ptpbhn = udp->udp_ptpbhn;
496 			udp->udp_bind_hash = NULL;
497 		}
498 		*udp->udp_ptpbhn = udpnext;
499 		udp->udp_ptpbhn = NULL;
500 	}
501 	if (!caller_holds_lock) {
502 		mutex_exit(lockp);
503 	}
504 }
505 
506 static void
udp_bind_hash_insert(udp_fanout_t * uf,udp_t * udp)507 udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp)
508 {
509 	conn_t	*connp = udp->udp_connp;
510 	udp_t	**udpp;
511 	udp_t	*udpnext;
512 	conn_t	*connext;
513 
514 	ASSERT(MUTEX_HELD(&uf->uf_lock));
515 	ASSERT(udp->udp_ptpbhn == NULL);
516 	udpp = &uf->uf_udp;
517 	udpnext = udpp[0];
518 	if (udpnext != NULL) {
519 		/*
520 		 * If the new udp bound to the INADDR_ANY address
521 		 * and the first one in the list is not bound to
522 		 * INADDR_ANY we skip all entries until we find the
523 		 * first one bound to INADDR_ANY.
524 		 * This makes sure that applications binding to a
525 		 * specific address get preference over those binding to
526 		 * INADDR_ANY.
527 		 */
528 		connext = udpnext->udp_connp;
529 		if (V6_OR_V4_INADDR_ANY(connp->conn_bound_addr_v6) &&
530 		    !V6_OR_V4_INADDR_ANY(connext->conn_bound_addr_v6)) {
531 			while ((udpnext = udpp[0]) != NULL &&
532 			    !V6_OR_V4_INADDR_ANY(connext->conn_bound_addr_v6)) {
533 				udpp = &(udpnext->udp_bind_hash);
534 			}
535 			if (udpnext != NULL)
536 				udpnext->udp_ptpbhn = &udp->udp_bind_hash;
537 		} else {
538 			udpnext->udp_ptpbhn = &udp->udp_bind_hash;
539 		}
540 	}
541 	udp->udp_bind_hash = udpnext;
542 	udp->udp_ptpbhn = udpp;
543 	udpp[0] = udp;
544 }
545 
546 /*
547  * This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message
548  * passed to udp_wput.
549  * It associates a port number and local address with the stream.
550  * It calls IP to verify the local IP address, and calls IP to insert
551  * the conn_t in the fanout table.
552  * If everything is ok it then sends the T_BIND_ACK back up.
553  *
554  * Note that UDP over IPv4 and IPv6 sockets can use the same port number
555  * without setting SO_REUSEADDR. This is needed so that they
556  * can be viewed as two independent transport protocols.
557  * However, anonymouns ports are allocated from the same range to avoid
558  * duplicating the us->us_next_port_to_try.
559  */
560 static void
udp_tpi_bind(queue_t * q,mblk_t * mp)561 udp_tpi_bind(queue_t *q, mblk_t *mp)
562 {
563 	sin_t		*sin;
564 	sin6_t		*sin6;
565 	mblk_t		*mp1;
566 	struct T_bind_req *tbr;
567 	conn_t		*connp;
568 	udp_t		*udp;
569 	int		error;
570 	struct sockaddr	*sa;
571 	cred_t		*cr;
572 
573 	/*
574 	 * All Solaris components should pass a db_credp
575 	 * for this TPI message, hence we ASSERT.
576 	 * But in case there is some other M_PROTO that looks
577 	 * like a TPI message sent by some other kernel
578 	 * component, we check and return an error.
579 	 */
580 	cr = msg_getcred(mp, NULL);
581 	ASSERT(cr != NULL);
582 	if (cr == NULL) {
583 		udp_err_ack(q, mp, TSYSERR, EINVAL);
584 		return;
585 	}
586 
587 	connp = Q_TO_CONN(q);
588 	udp = connp->conn_udp;
589 	if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) {
590 		(void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
591 		    "udp_bind: bad req, len %u",
592 		    (uint_t)(mp->b_wptr - mp->b_rptr));
593 		udp_err_ack(q, mp, TPROTO, 0);
594 		return;
595 	}
596 	if (udp->udp_state != TS_UNBND) {
597 		(void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
598 		    "udp_bind: bad state, %u", udp->udp_state);
599 		udp_err_ack(q, mp, TOUTSTATE, 0);
600 		return;
601 	}
602 	/*
603 	 * Reallocate the message to make sure we have enough room for an
604 	 * address.
605 	 */
606 	mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t), 1);
607 	if (mp1 == NULL) {
608 		udp_err_ack(q, mp, TSYSERR, ENOMEM);
609 		return;
610 	}
611 
612 	mp = mp1;
613 
614 	/* Reset the message type in preparation for shipping it back. */
615 	DB_TYPE(mp) = M_PCPROTO;
616 
617 	tbr = (struct T_bind_req *)mp->b_rptr;
618 	switch (tbr->ADDR_length) {
619 	case 0:			/* Request for a generic port */
620 		tbr->ADDR_offset = sizeof (struct T_bind_req);
621 		if (connp->conn_family == AF_INET) {
622 			tbr->ADDR_length = sizeof (sin_t);
623 			sin = (sin_t *)&tbr[1];
624 			*sin = sin_null;
625 			sin->sin_family = AF_INET;
626 			mp->b_wptr = (uchar_t *)&sin[1];
627 			sa = (struct sockaddr *)sin;
628 		} else {
629 			ASSERT(connp->conn_family == AF_INET6);
630 			tbr->ADDR_length = sizeof (sin6_t);
631 			sin6 = (sin6_t *)&tbr[1];
632 			*sin6 = sin6_null;
633 			sin6->sin6_family = AF_INET6;
634 			mp->b_wptr = (uchar_t *)&sin6[1];
635 			sa = (struct sockaddr *)sin6;
636 		}
637 		break;
638 
639 	case sizeof (sin_t):	/* Complete IPv4 address */
640 		sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset,
641 		    sizeof (sin_t));
642 		if (sa == NULL || !OK_32PTR((char *)sa)) {
643 			udp_err_ack(q, mp, TSYSERR, EINVAL);
644 			return;
645 		}
646 		if (connp->conn_family != AF_INET ||
647 		    sa->sa_family != AF_INET) {
648 			udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT);
649 			return;
650 		}
651 		break;
652 
653 	case sizeof (sin6_t):	/* complete IPv6 address */
654 		sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset,
655 		    sizeof (sin6_t));
656 		if (sa == NULL || !OK_32PTR((char *)sa)) {
657 			udp_err_ack(q, mp, TSYSERR, EINVAL);
658 			return;
659 		}
660 		if (connp->conn_family != AF_INET6 ||
661 		    sa->sa_family != AF_INET6) {
662 			udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT);
663 			return;
664 		}
665 		break;
666 
667 	default:		/* Invalid request */
668 		(void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
669 		    "udp_bind: bad ADDR_length length %u", tbr->ADDR_length);
670 		udp_err_ack(q, mp, TBADADDR, 0);
671 		return;
672 	}
673 
674 	error = udp_do_bind(connp, sa, tbr->ADDR_length, cr,
675 	    tbr->PRIM_type != O_T_BIND_REQ);
676 
677 	if (error != 0) {
678 		if (error > 0) {
679 			udp_err_ack(q, mp, TSYSERR, error);
680 		} else {
681 			udp_err_ack(q, mp, -error, 0);
682 		}
683 	} else {
684 		tbr->PRIM_type = T_BIND_ACK;
685 		qreply(q, mp);
686 	}
687 }
688 
689 /*
690  * This routine handles each T_CONN_REQ message passed to udp.  It
691  * associates a default destination address with the stream.
692  *
693  * After various error checks are completed, udp_connect() lays
694  * the target address and port into the composite header template.
695  * Then we ask IP for information, including a source address if we didn't
696  * already have one. Finally we send up the T_OK_ACK reply message.
697  */
698 static void
udp_tpi_connect(queue_t * q,mblk_t * mp)699 udp_tpi_connect(queue_t *q, mblk_t *mp)
700 {
701 	conn_t	*connp = Q_TO_CONN(q);
702 	int	error;
703 	socklen_t	len;
704 	struct sockaddr		*sa;
705 	struct T_conn_req	*tcr;
706 	cred_t		*cr;
707 	pid_t		pid;
708 	/*
709 	 * All Solaris components should pass a db_credp
710 	 * for this TPI message, hence we ASSERT.
711 	 * But in case there is some other M_PROTO that looks
712 	 * like a TPI message sent by some other kernel
713 	 * component, we check and return an error.
714 	 */
715 	cr = msg_getcred(mp, &pid);
716 	ASSERT(cr != NULL);
717 	if (cr == NULL) {
718 		udp_err_ack(q, mp, TSYSERR, EINVAL);
719 		return;
720 	}
721 
722 	tcr = (struct T_conn_req *)mp->b_rptr;
723 
724 	/* A bit of sanity checking */
725 	if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_conn_req)) {
726 		udp_err_ack(q, mp, TPROTO, 0);
727 		return;
728 	}
729 
730 	if (tcr->OPT_length != 0) {
731 		udp_err_ack(q, mp, TBADOPT, 0);
732 		return;
733 	}
734 
735 	/*
736 	 * Determine packet type based on type of address passed in
737 	 * the request should contain an IPv4 or IPv6 address.
738 	 * Make sure that address family matches the type of
739 	 * family of the address passed down.
740 	 */
741 	len = tcr->DEST_length;
742 	switch (tcr->DEST_length) {
743 	default:
744 		udp_err_ack(q, mp, TBADADDR, 0);
745 		return;
746 
747 	case sizeof (sin_t):
748 		sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset,
749 		    sizeof (sin_t));
750 		break;
751 
752 	case sizeof (sin6_t):
753 		sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset,
754 		    sizeof (sin6_t));
755 		break;
756 	}
757 
758 	error = proto_verify_ip_addr(connp->conn_family, sa, len);
759 	if (error != 0) {
760 		udp_err_ack(q, mp, TSYSERR, error);
761 		return;
762 	}
763 
764 	error = udp_do_connect(connp, sa, len, cr, pid);
765 	if (error != 0) {
766 		if (error < 0)
767 			udp_err_ack(q, mp, -error, 0);
768 		else
769 			udp_err_ack(q, mp, TSYSERR, error);
770 	} else {
771 		mblk_t	*mp1;
772 		/*
773 		 * We have to send a connection confirmation to
774 		 * keep TLI happy.
775 		 */
776 		if (connp->conn_family == AF_INET) {
777 			mp1 = mi_tpi_conn_con(NULL, (char *)sa,
778 			    sizeof (sin_t), NULL, 0);
779 		} else {
780 			mp1 = mi_tpi_conn_con(NULL, (char *)sa,
781 			    sizeof (sin6_t), NULL, 0);
782 		}
783 		if (mp1 == NULL) {
784 			udp_err_ack(q, mp, TSYSERR, ENOMEM);
785 			return;
786 		}
787 
788 		/*
789 		 * Send ok_ack for T_CONN_REQ
790 		 */
791 		mp = mi_tpi_ok_ack_alloc(mp);
792 		if (mp == NULL) {
793 			/* Unable to reuse the T_CONN_REQ for the ack. */
794 			udp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM);
795 			return;
796 		}
797 
798 		putnext(connp->conn_rq, mp);
799 		putnext(connp->conn_rq, mp1);
800 	}
801 }
802 
803 /* ARGSUSED */
804 static int
udp_tpi_close(queue_t * q,int flags,cred_t * credp __unused)805 udp_tpi_close(queue_t *q, int flags, cred_t *credp __unused)
806 {
807 	conn_t	*connp;
808 
809 	if (flags & SO_FALLBACK) {
810 		/*
811 		 * stream is being closed while in fallback
812 		 * simply free the resources that were allocated
813 		 */
814 		inet_minor_free(WR(q)->q_ptr, (dev_t)(RD(q)->q_ptr));
815 		qprocsoff(q);
816 		goto done;
817 	}
818 
819 	connp = Q_TO_CONN(q);
820 	udp_do_close(connp);
821 done:
822 	q->q_ptr = WR(q)->q_ptr = NULL;
823 	return (0);
824 }
825 
826 static void
udp_close_free(conn_t * connp)827 udp_close_free(conn_t *connp)
828 {
829 	udp_t *udp = connp->conn_udp;
830 
831 	/* If there are any options associated with the stream, free them. */
832 	if (udp->udp_recv_ipp.ipp_fields != 0)
833 		ip_pkt_free(&udp->udp_recv_ipp);
834 
835 	/*
836 	 * Clear any fields which the kmem_cache constructor clears.
837 	 * Only udp_connp needs to be preserved.
838 	 * TBD: We should make this more efficient to avoid clearing
839 	 * everything.
840 	 */
841 	ASSERT(udp->udp_connp == connp);
842 	bzero(udp, sizeof (udp_t));
843 	udp->udp_connp = connp;
844 }
845 
846 static int
udp_do_disconnect(conn_t * connp)847 udp_do_disconnect(conn_t *connp)
848 {
849 	udp_t	*udp;
850 	udp_fanout_t *udpf;
851 	udp_stack_t *us;
852 	int	error;
853 
854 	udp = connp->conn_udp;
855 	us = udp->udp_us;
856 	mutex_enter(&connp->conn_lock);
857 	if (udp->udp_state != TS_DATA_XFER) {
858 		mutex_exit(&connp->conn_lock);
859 		return (-TOUTSTATE);
860 	}
861 	udpf = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport,
862 	    us->us_bind_fanout_size)];
863 	mutex_enter(&udpf->uf_lock);
864 	if (connp->conn_mcbc_bind)
865 		connp->conn_saddr_v6 = ipv6_all_zeros;
866 	else
867 		connp->conn_saddr_v6 = connp->conn_bound_addr_v6;
868 	connp->conn_laddr_v6 = connp->conn_bound_addr_v6;
869 	connp->conn_faddr_v6 = ipv6_all_zeros;
870 	connp->conn_fport = 0;
871 	udp->udp_state = TS_IDLE;
872 	mutex_exit(&udpf->uf_lock);
873 
874 	/* Remove any remnants of mapped address binding */
875 	if (connp->conn_family == AF_INET6)
876 		connp->conn_ipversion = IPV6_VERSION;
877 
878 	connp->conn_v6lastdst = ipv6_all_zeros;
879 	error = udp_build_hdr_template(connp, &connp->conn_saddr_v6,
880 	    &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo);
881 	mutex_exit(&connp->conn_lock);
882 	if (error != 0)
883 		return (error);
884 
885 	/*
886 	 * Tell IP to remove the full binding and revert
887 	 * to the local address binding.
888 	 */
889 	return (ip_laddr_fanout_insert(connp));
890 }
891 
892 static void
udp_tpi_disconnect(queue_t * q,mblk_t * mp)893 udp_tpi_disconnect(queue_t *q, mblk_t *mp)
894 {
895 	conn_t	*connp = Q_TO_CONN(q);
896 	int	error;
897 
898 	/*
899 	 * Allocate the largest primitive we need to send back
900 	 * T_error_ack is > than T_ok_ack
901 	 */
902 	mp = reallocb(mp, sizeof (struct T_error_ack), 1);
903 	if (mp == NULL) {
904 		/* Unable to reuse the T_DISCON_REQ for the ack. */
905 		udp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, ENOMEM);
906 		return;
907 	}
908 
909 	error = udp_do_disconnect(connp);
910 
911 	if (error != 0) {
912 		if (error < 0) {
913 			udp_err_ack(q, mp, -error, 0);
914 		} else {
915 			udp_err_ack(q, mp, TSYSERR, error);
916 		}
917 	} else {
918 		mp = mi_tpi_ok_ack_alloc(mp);
919 		ASSERT(mp != NULL);
920 		qreply(q, mp);
921 	}
922 }
923 
924 int
udp_disconnect(conn_t * connp)925 udp_disconnect(conn_t *connp)
926 {
927 	int error;
928 
929 	connp->conn_dgram_errind = B_FALSE;
930 	error = udp_do_disconnect(connp);
931 	if (error < 0)
932 		error = proto_tlitosyserr(-error);
933 
934 	return (error);
935 }
936 
937 /* This routine creates a T_ERROR_ACK message and passes it upstream. */
938 static void
udp_err_ack(queue_t * q,mblk_t * mp,t_scalar_t t_error,int sys_error)939 udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error)
940 {
941 	if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL)
942 		qreply(q, mp);
943 }
944 
945 /* Shorthand to generate and send TPI error acks to our client */
946 static void
udp_err_ack_prim(queue_t * q,mblk_t * mp,t_scalar_t primitive,t_scalar_t t_error,int sys_error)947 udp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive,
948     t_scalar_t t_error, int sys_error)
949 {
950 	struct T_error_ack	*teackp;
951 
952 	if ((mp = tpi_ack_alloc(mp, sizeof (struct T_error_ack),
953 	    M_PCPROTO, T_ERROR_ACK)) != NULL) {
954 		teackp = (struct T_error_ack *)mp->b_rptr;
955 		teackp->ERROR_prim = primitive;
956 		teackp->TLI_error = t_error;
957 		teackp->UNIX_error = sys_error;
958 		qreply(q, mp);
959 	}
960 }
961 
962 /* At minimum we need 4 bytes of UDP header */
963 #define	ICMP_MIN_UDP_HDR	4
964 
965 /*
966  * udp_icmp_input is called as conn_recvicmp to process ICMP messages.
967  * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors.
968  * Assumes that IP has pulled up everything up to and including the ICMP header.
969  */
970 /* ARGSUSED2 */
971 static void
udp_icmp_input(void * arg1,mblk_t * mp,void * arg2,ip_recv_attr_t * ira)972 udp_icmp_input(void *arg1, mblk_t *mp, void *arg2, ip_recv_attr_t *ira)
973 {
974 	conn_t		*connp = (conn_t *)arg1;
975 	icmph_t		*icmph;
976 	ipha_t		*ipha;
977 	int		iph_hdr_length;
978 	udpha_t		*udpha;
979 	sin_t		sin;
980 	sin6_t		sin6;
981 	mblk_t		*mp1;
982 	int		error = 0;
983 	udp_t		*udp = connp->conn_udp;
984 
985 	ipha = (ipha_t *)mp->b_rptr;
986 
987 	ASSERT(OK_32PTR(mp->b_rptr));
988 
989 	if (IPH_HDR_VERSION(ipha) != IPV4_VERSION) {
990 		ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION);
991 		udp_icmp_error_ipv6(connp, mp, ira);
992 		return;
993 	}
994 	ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION);
995 
996 	/* Skip past the outer IP and ICMP headers */
997 	ASSERT(IPH_HDR_LENGTH(ipha) == ira->ira_ip_hdr_length);
998 	iph_hdr_length = ira->ira_ip_hdr_length;
999 	icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length];
1000 	ipha = (ipha_t *)&icmph[1];	/* Inner IP header */
1001 
1002 	/* Skip past the inner IP and find the ULP header */
1003 	iph_hdr_length = IPH_HDR_LENGTH(ipha);
1004 	udpha = (udpha_t *)((char *)ipha + iph_hdr_length);
1005 
1006 	switch (icmph->icmph_type) {
1007 	case ICMP_DEST_UNREACHABLE:
1008 		switch (icmph->icmph_code) {
1009 		case ICMP_FRAGMENTATION_NEEDED: {
1010 			ipha_t		*ipha;
1011 			ip_xmit_attr_t	*ixa;
1012 			/*
1013 			 * IP has already adjusted the path MTU.
1014 			 * But we need to adjust DF for IPv4.
1015 			 */
1016 			if (connp->conn_ipversion != IPV4_VERSION)
1017 				break;
1018 
1019 			ixa = conn_get_ixa(connp, B_FALSE);
1020 			if (ixa == NULL || ixa->ixa_ire == NULL) {
1021 				/*
1022 				 * Some other thread holds conn_ixa. We will
1023 				 * redo this on the next ICMP too big.
1024 				 */
1025 				if (ixa != NULL)
1026 					ixa_refrele(ixa);
1027 				break;
1028 			}
1029 			(void) ip_get_pmtu(ixa);
1030 
1031 			mutex_enter(&connp->conn_lock);
1032 			ipha = (ipha_t *)connp->conn_ht_iphc;
1033 			if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF) {
1034 				ipha->ipha_fragment_offset_and_flags |=
1035 				    IPH_DF_HTONS;
1036 			} else {
1037 				ipha->ipha_fragment_offset_and_flags &=
1038 				    ~IPH_DF_HTONS;
1039 			}
1040 			mutex_exit(&connp->conn_lock);
1041 			ixa_refrele(ixa);
1042 			break;
1043 		}
1044 		case ICMP_PORT_UNREACHABLE:
1045 		case ICMP_PROTOCOL_UNREACHABLE:
1046 			error = ECONNREFUSED;
1047 			break;
1048 		default:
1049 			/* Transient errors */
1050 			break;
1051 		}
1052 		break;
1053 	default:
1054 		/* Transient errors */
1055 		break;
1056 	}
1057 	if (error == 0) {
1058 		freemsg(mp);
1059 		return;
1060 	}
1061 
1062 	/*
1063 	 * Deliver T_UDERROR_IND when the application has asked for it.
1064 	 * The socket layer enables this automatically when connected.
1065 	 */
1066 	if (!connp->conn_dgram_errind) {
1067 		freemsg(mp);
1068 		return;
1069 	}
1070 
1071 	switch (connp->conn_family) {
1072 	case AF_INET:
1073 		sin = sin_null;
1074 		sin.sin_family = AF_INET;
1075 		sin.sin_addr.s_addr = ipha->ipha_dst;
1076 		sin.sin_port = udpha->uha_dst_port;
1077 		if (IPCL_IS_NONSTR(connp)) {
1078 			mutex_enter(&connp->conn_lock);
1079 			if (udp->udp_state == TS_DATA_XFER) {
1080 				if (sin.sin_port == connp->conn_fport &&
1081 				    sin.sin_addr.s_addr ==
1082 				    connp->conn_faddr_v4) {
1083 					mutex_exit(&connp->conn_lock);
1084 					(*connp->conn_upcalls->su_set_error)
1085 					    (connp->conn_upper_handle, error);
1086 					goto done;
1087 				}
1088 			} else {
1089 				udp->udp_delayed_error = error;
1090 				*((sin_t *)&udp->udp_delayed_addr) = sin;
1091 			}
1092 			mutex_exit(&connp->conn_lock);
1093 		} else {
1094 			mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t),
1095 			    NULL, 0, error);
1096 			if (mp1 != NULL)
1097 				putnext(connp->conn_rq, mp1);
1098 		}
1099 		break;
1100 	case AF_INET6:
1101 		sin6 = sin6_null;
1102 		sin6.sin6_family = AF_INET6;
1103 		IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &sin6.sin6_addr);
1104 		sin6.sin6_port = udpha->uha_dst_port;
1105 		if (IPCL_IS_NONSTR(connp)) {
1106 			mutex_enter(&connp->conn_lock);
1107 			if (udp->udp_state == TS_DATA_XFER) {
1108 				if (sin6.sin6_port == connp->conn_fport &&
1109 				    IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr,
1110 				    &connp->conn_faddr_v6)) {
1111 					mutex_exit(&connp->conn_lock);
1112 					(*connp->conn_upcalls->su_set_error)
1113 					    (connp->conn_upper_handle, error);
1114 					goto done;
1115 				}
1116 			} else {
1117 				udp->udp_delayed_error = error;
1118 				*((sin6_t *)&udp->udp_delayed_addr) = sin6;
1119 			}
1120 			mutex_exit(&connp->conn_lock);
1121 		} else {
1122 			mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t),
1123 			    NULL, 0, error);
1124 			if (mp1 != NULL)
1125 				putnext(connp->conn_rq, mp1);
1126 		}
1127 		break;
1128 	}
1129 done:
1130 	freemsg(mp);
1131 }
1132 
1133 /*
1134  * udp_icmp_error_ipv6 is called by udp_icmp_error to process ICMP for IPv6.
1135  * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors.
1136  * Assumes that IP has pulled up all the extension headers as well as the
1137  * ICMPv6 header.
1138  */
1139 static void
udp_icmp_error_ipv6(conn_t * connp,mblk_t * mp,ip_recv_attr_t * ira)1140 udp_icmp_error_ipv6(conn_t *connp, mblk_t *mp, ip_recv_attr_t *ira)
1141 {
1142 	icmp6_t		*icmp6;
1143 	ip6_t		*ip6h, *outer_ip6h;
1144 	uint16_t	iph_hdr_length;
1145 	uint8_t		*nexthdrp;
1146 	udpha_t		*udpha;
1147 	sin6_t		sin6;
1148 	mblk_t		*mp1;
1149 	int		error = 0;
1150 	udp_t		*udp = connp->conn_udp;
1151 	udp_stack_t	*us = udp->udp_us;
1152 
1153 	outer_ip6h = (ip6_t *)mp->b_rptr;
1154 #ifdef DEBUG
1155 	if (outer_ip6h->ip6_nxt != IPPROTO_ICMPV6)
1156 		iph_hdr_length = ip_hdr_length_v6(mp, outer_ip6h);
1157 	else
1158 		iph_hdr_length = IPV6_HDR_LEN;
1159 	ASSERT(iph_hdr_length == ira->ira_ip_hdr_length);
1160 #endif
1161 	/* Skip past the outer IP and ICMP headers */
1162 	iph_hdr_length = ira->ira_ip_hdr_length;
1163 	icmp6 = (icmp6_t *)&mp->b_rptr[iph_hdr_length];
1164 
1165 	/* Skip past the inner IP and find the ULP header */
1166 	ip6h = (ip6_t *)&icmp6[1];	/* Inner IP header */
1167 	if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, &nexthdrp)) {
1168 		freemsg(mp);
1169 		return;
1170 	}
1171 	udpha = (udpha_t *)((char *)ip6h + iph_hdr_length);
1172 
1173 	switch (icmp6->icmp6_type) {
1174 	case ICMP6_DST_UNREACH:
1175 		switch (icmp6->icmp6_code) {
1176 		case ICMP6_DST_UNREACH_NOPORT:
1177 			error = ECONNREFUSED;
1178 			break;
1179 		case ICMP6_DST_UNREACH_ADMIN:
1180 		case ICMP6_DST_UNREACH_NOROUTE:
1181 		case ICMP6_DST_UNREACH_BEYONDSCOPE:
1182 		case ICMP6_DST_UNREACH_ADDR:
1183 			/* Transient errors */
1184 			break;
1185 		default:
1186 			break;
1187 		}
1188 		break;
1189 	case ICMP6_PACKET_TOO_BIG: {
1190 		struct T_unitdata_ind	*tudi;
1191 		struct T_opthdr		*toh;
1192 		size_t			udi_size;
1193 		mblk_t			*newmp;
1194 		t_scalar_t		opt_length = sizeof (struct T_opthdr) +
1195 		    sizeof (struct ip6_mtuinfo);
1196 		sin6_t			*sin6;
1197 		struct ip6_mtuinfo	*mtuinfo;
1198 
1199 		/*
1200 		 * If the application has requested to receive path mtu
1201 		 * information, send up an empty message containing an
1202 		 * IPV6_PATHMTU ancillary data item.
1203 		 */
1204 		if (!connp->conn_ipv6_recvpathmtu)
1205 			break;
1206 
1207 		udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) +
1208 		    opt_length;
1209 		if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) {
1210 			UDPS_BUMP_MIB(us, udpInErrors);
1211 			break;
1212 		}
1213 
1214 		/*
1215 		 * newmp->b_cont is left to NULL on purpose.  This is an
1216 		 * empty message containing only ancillary data.
1217 		 */
1218 		newmp->b_datap->db_type = M_PROTO;
1219 		tudi = (struct T_unitdata_ind *)newmp->b_rptr;
1220 		newmp->b_wptr = (uchar_t *)tudi + udi_size;
1221 		tudi->PRIM_type = T_UNITDATA_IND;
1222 		tudi->SRC_length = sizeof (sin6_t);
1223 		tudi->SRC_offset = sizeof (struct T_unitdata_ind);
1224 		tudi->OPT_offset = tudi->SRC_offset + sizeof (sin6_t);
1225 		tudi->OPT_length = opt_length;
1226 
1227 		sin6 = (sin6_t *)&tudi[1];
1228 		bzero(sin6, sizeof (sin6_t));
1229 		sin6->sin6_family = AF_INET6;
1230 		sin6->sin6_addr = connp->conn_faddr_v6;
1231 
1232 		toh = (struct T_opthdr *)&sin6[1];
1233 		toh->level = IPPROTO_IPV6;
1234 		toh->name = IPV6_PATHMTU;
1235 		toh->len = opt_length;
1236 		toh->status = 0;
1237 
1238 		mtuinfo = (struct ip6_mtuinfo *)&toh[1];
1239 		bzero(mtuinfo, sizeof (struct ip6_mtuinfo));
1240 		mtuinfo->ip6m_addr.sin6_family = AF_INET6;
1241 		mtuinfo->ip6m_addr.sin6_addr = ip6h->ip6_dst;
1242 		mtuinfo->ip6m_mtu = icmp6->icmp6_mtu;
1243 		/*
1244 		 * We've consumed everything we need from the original
1245 		 * message.  Free it, then send our empty message.
1246 		 */
1247 		freemsg(mp);
1248 		udp_ulp_recv(connp, newmp, msgdsize(newmp), ira);
1249 		return;
1250 	}
1251 	case ICMP6_TIME_EXCEEDED:
1252 		/* Transient errors */
1253 		break;
1254 	case ICMP6_PARAM_PROB:
1255 		/* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */
1256 		if (icmp6->icmp6_code == ICMP6_PARAMPROB_NEXTHEADER &&
1257 		    (uchar_t *)ip6h + icmp6->icmp6_pptr ==
1258 		    (uchar_t *)nexthdrp) {
1259 			error = ECONNREFUSED;
1260 			break;
1261 		}
1262 		break;
1263 	}
1264 	if (error == 0) {
1265 		freemsg(mp);
1266 		return;
1267 	}
1268 
1269 	/*
1270 	 * Deliver T_UDERROR_IND when the application has asked for it.
1271 	 * The socket layer enables this automatically when connected.
1272 	 */
1273 	if (!connp->conn_dgram_errind) {
1274 		freemsg(mp);
1275 		return;
1276 	}
1277 
1278 	sin6 = sin6_null;
1279 	sin6.sin6_family = AF_INET6;
1280 	sin6.sin6_addr = ip6h->ip6_dst;
1281 	sin6.sin6_port = udpha->uha_dst_port;
1282 	sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK;
1283 
1284 	if (IPCL_IS_NONSTR(connp)) {
1285 		mutex_enter(&connp->conn_lock);
1286 		if (udp->udp_state == TS_DATA_XFER) {
1287 			if (sin6.sin6_port == connp->conn_fport &&
1288 			    IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr,
1289 			    &connp->conn_faddr_v6)) {
1290 				mutex_exit(&connp->conn_lock);
1291 				(*connp->conn_upcalls->su_set_error)
1292 				    (connp->conn_upper_handle, error);
1293 				goto done;
1294 			}
1295 		} else {
1296 			udp->udp_delayed_error = error;
1297 			*((sin6_t *)&udp->udp_delayed_addr) = sin6;
1298 		}
1299 		mutex_exit(&connp->conn_lock);
1300 	} else {
1301 		mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t),
1302 		    NULL, 0, error);
1303 		if (mp1 != NULL)
1304 			putnext(connp->conn_rq, mp1);
1305 	}
1306 done:
1307 	freemsg(mp);
1308 }
1309 
1310 /*
1311  * This routine responds to T_ADDR_REQ messages.  It is called by udp_wput.
1312  * The local address is filled in if endpoint is bound. The remote address
1313  * is filled in if remote address has been precified ("connected endpoint")
1314  * (The concept of connected CLTS sockets is alien to published TPI
1315  *  but we support it anyway).
1316  */
1317 static void
udp_addr_req(queue_t * q,mblk_t * mp)1318 udp_addr_req(queue_t *q, mblk_t *mp)
1319 {
1320 	struct sockaddr *sa;
1321 	mblk_t	*ackmp;
1322 	struct T_addr_ack *taa;
1323 	udp_t	*udp = Q_TO_UDP(q);
1324 	conn_t	*connp = udp->udp_connp;
1325 	uint_t	addrlen;
1326 
1327 	/* Make it large enough for worst case */
1328 	ackmp = reallocb(mp, sizeof (struct T_addr_ack) +
1329 	    2 * sizeof (sin6_t), 1);
1330 	if (ackmp == NULL) {
1331 		udp_err_ack(q, mp, TSYSERR, ENOMEM);
1332 		return;
1333 	}
1334 	taa = (struct T_addr_ack *)ackmp->b_rptr;
1335 
1336 	bzero(taa, sizeof (struct T_addr_ack));
1337 	ackmp->b_wptr = (uchar_t *)&taa[1];
1338 
1339 	taa->PRIM_type = T_ADDR_ACK;
1340 	ackmp->b_datap->db_type = M_PCPROTO;
1341 
1342 	if (connp->conn_family == AF_INET)
1343 		addrlen = sizeof (sin_t);
1344 	else
1345 		addrlen = sizeof (sin6_t);
1346 
1347 	mutex_enter(&connp->conn_lock);
1348 	/*
1349 	 * Note: Following code assumes 32 bit alignment of basic
1350 	 * data structures like sin_t and struct T_addr_ack.
1351 	 */
1352 	if (udp->udp_state != TS_UNBND) {
1353 		/*
1354 		 * Fill in local address first
1355 		 */
1356 		taa->LOCADDR_offset = sizeof (*taa);
1357 		taa->LOCADDR_length = addrlen;
1358 		sa = (struct sockaddr *)&taa[1];
1359 		(void) conn_getsockname(connp, sa, &addrlen);
1360 		ackmp->b_wptr += addrlen;
1361 	}
1362 	if (udp->udp_state == TS_DATA_XFER) {
1363 		/*
1364 		 * connected, fill remote address too
1365 		 */
1366 		taa->REMADDR_length = addrlen;
1367 		/* assumed 32-bit alignment */
1368 		taa->REMADDR_offset = taa->LOCADDR_offset + taa->LOCADDR_length;
1369 		sa = (struct sockaddr *)(ackmp->b_rptr + taa->REMADDR_offset);
1370 		(void) conn_getpeername(connp, sa, &addrlen);
1371 		ackmp->b_wptr += addrlen;
1372 	}
1373 	mutex_exit(&connp->conn_lock);
1374 	ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim);
1375 	qreply(q, ackmp);
1376 }
1377 
1378 static void
udp_copy_info(struct T_info_ack * tap,udp_t * udp)1379 udp_copy_info(struct T_info_ack *tap, udp_t *udp)
1380 {
1381 	conn_t		*connp = udp->udp_connp;
1382 
1383 	if (connp->conn_family == AF_INET) {
1384 		*tap = udp_g_t_info_ack_ipv4;
1385 	} else {
1386 		*tap = udp_g_t_info_ack_ipv6;
1387 	}
1388 	tap->CURRENT_state = udp->udp_state;
1389 	tap->OPT_size = udp_max_optsize;
1390 }
1391 
1392 static void
udp_do_capability_ack(udp_t * udp,struct T_capability_ack * tcap,t_uscalar_t cap_bits1)1393 udp_do_capability_ack(udp_t *udp, struct T_capability_ack *tcap,
1394     t_uscalar_t cap_bits1)
1395 {
1396 	tcap->CAP_bits1 = 0;
1397 
1398 	if (cap_bits1 & TC1_INFO) {
1399 		udp_copy_info(&tcap->INFO_ack, udp);
1400 		tcap->CAP_bits1 |= TC1_INFO;
1401 	}
1402 }
1403 
1404 /*
1405  * This routine responds to T_CAPABILITY_REQ messages.  It is called by
1406  * udp_wput.  Much of the T_CAPABILITY_ACK information is copied from
1407  * udp_g_t_info_ack.  The current state of the stream is copied from
1408  * udp_state.
1409  */
1410 static void
udp_capability_req(queue_t * q,mblk_t * mp)1411 udp_capability_req(queue_t *q, mblk_t *mp)
1412 {
1413 	t_uscalar_t		cap_bits1;
1414 	struct T_capability_ack	*tcap;
1415 	udp_t	*udp = Q_TO_UDP(q);
1416 
1417 	cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1;
1418 
1419 	mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack),
1420 	    mp->b_datap->db_type, T_CAPABILITY_ACK);
1421 	if (!mp)
1422 		return;
1423 
1424 	tcap = (struct T_capability_ack *)mp->b_rptr;
1425 	udp_do_capability_ack(udp, tcap, cap_bits1);
1426 
1427 	qreply(q, mp);
1428 }
1429 
1430 /*
1431  * This routine responds to T_INFO_REQ messages.  It is called by udp_wput.
1432  * Most of the T_INFO_ACK information is copied from udp_g_t_info_ack.
1433  * The current state of the stream is copied from udp_state.
1434  */
1435 static void
udp_info_req(queue_t * q,mblk_t * mp)1436 udp_info_req(queue_t *q, mblk_t *mp)
1437 {
1438 	udp_t *udp = Q_TO_UDP(q);
1439 
1440 	/* Create a T_INFO_ACK message. */
1441 	mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO,
1442 	    T_INFO_ACK);
1443 	if (!mp)
1444 		return;
1445 	udp_copy_info((struct T_info_ack *)mp->b_rptr, udp);
1446 	qreply(q, mp);
1447 }
1448 
1449 /* For /dev/udp aka AF_INET open */
1450 static int
udp_openv4(queue_t * q,dev_t * devp,int flag,int sflag,cred_t * credp)1451 udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
1452 {
1453 	return (udp_open(q, devp, flag, sflag, credp, B_FALSE));
1454 }
1455 
1456 /* For /dev/udp6 aka AF_INET6 open */
1457 static int
udp_openv6(queue_t * q,dev_t * devp,int flag,int sflag,cred_t * credp)1458 udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
1459 {
1460 	return (udp_open(q, devp, flag, sflag, credp, B_TRUE));
1461 }
1462 
1463 /*
1464  * This is the open routine for udp.  It allocates a udp_t structure for
1465  * the stream and, on the first open of the module, creates an ND table.
1466  */
1467 static int
udp_open(queue_t * q,dev_t * devp,int flag,int sflag,cred_t * credp,boolean_t isv6)1468 udp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp,
1469     boolean_t isv6)
1470 {
1471 	udp_t		*udp;
1472 	conn_t		*connp;
1473 	dev_t		conn_dev;
1474 	vmem_t		*minor_arena;
1475 	int		err;
1476 
1477 	/* If the stream is already open, return immediately. */
1478 	if (q->q_ptr != NULL)
1479 		return (0);
1480 
1481 	if (sflag == MODOPEN)
1482 		return (EINVAL);
1483 
1484 	if ((ip_minor_arena_la != NULL) && (flag & SO_SOCKSTR) &&
1485 	    ((conn_dev = inet_minor_alloc(ip_minor_arena_la)) != 0)) {
1486 		minor_arena = ip_minor_arena_la;
1487 	} else {
1488 		/*
1489 		 * Either minor numbers in the large arena were exhausted
1490 		 * or a non socket application is doing the open.
1491 		 * Try to allocate from the small arena.
1492 		 */
1493 		if ((conn_dev = inet_minor_alloc(ip_minor_arena_sa)) == 0)
1494 			return (EBUSY);
1495 
1496 		minor_arena = ip_minor_arena_sa;
1497 	}
1498 
1499 	if (flag & SO_FALLBACK) {
1500 		/*
1501 		 * Non streams socket needs a stream to fallback to
1502 		 */
1503 		RD(q)->q_ptr = (void *)conn_dev;
1504 		WR(q)->q_qinfo = &udp_fallback_sock_winit;
1505 		WR(q)->q_ptr = (void *)minor_arena;
1506 		qprocson(q);
1507 		return (0);
1508 	}
1509 
1510 	connp = udp_do_open(credp, isv6, KM_SLEEP, &err);
1511 	if (connp == NULL) {
1512 		inet_minor_free(minor_arena, conn_dev);
1513 		return (err);
1514 	}
1515 	udp = connp->conn_udp;
1516 
1517 	*devp = makedevice(getemajor(*devp), (minor_t)conn_dev);
1518 	connp->conn_dev = conn_dev;
1519 	connp->conn_minor_arena = minor_arena;
1520 
1521 	/*
1522 	 * Initialize the udp_t structure for this stream.
1523 	 */
1524 	q->q_ptr = connp;
1525 	WR(q)->q_ptr = connp;
1526 	connp->conn_rq = q;
1527 	connp->conn_wq = WR(q);
1528 
1529 	/*
1530 	 * Since this conn_t/udp_t is not yet visible to anybody else we don't
1531 	 * need to lock anything.
1532 	 */
1533 	ASSERT(connp->conn_proto == IPPROTO_UDP);
1534 	ASSERT(connp->conn_udp == udp);
1535 	ASSERT(udp->udp_connp == connp);
1536 
1537 	if (flag & SO_SOCKSTR) {
1538 		udp->udp_issocket = B_TRUE;
1539 	}
1540 
1541 	WR(q)->q_hiwat = connp->conn_sndbuf;
1542 	WR(q)->q_lowat = connp->conn_sndlowat;
1543 
1544 	qprocson(q);
1545 
1546 	/* Set the Stream head write offset and high watermark. */
1547 	(void) proto_set_tx_wroff(q, connp, connp->conn_wroff);
1548 	(void) proto_set_rx_hiwat(q, connp,
1549 	    udp_set_rcv_hiwat(udp, connp->conn_rcvbuf));
1550 
1551 	mutex_enter(&connp->conn_lock);
1552 	connp->conn_state_flags &= ~CONN_INCIPIENT;
1553 	mutex_exit(&connp->conn_lock);
1554 	return (0);
1555 }
1556 
1557 /*
1558  * Which UDP options OK to set through T_UNITDATA_REQ...
1559  */
1560 /* ARGSUSED */
1561 static boolean_t
udp_opt_allow_udr_set(t_scalar_t level,t_scalar_t name)1562 udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name)
1563 {
1564 	return (B_TRUE);
1565 }
1566 
1567 /*
1568  * This routine gets default values of certain options whose default
1569  * values are maintained by protcol specific code
1570  */
1571 int
udp_opt_default(queue_t * q,t_scalar_t level,t_scalar_t name,uchar_t * ptr)1572 udp_opt_default(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr)
1573 {
1574 	udp_t		*udp = Q_TO_UDP(q);
1575 	udp_stack_t *us = udp->udp_us;
1576 	int *i1 = (int *)ptr;
1577 
1578 	switch (level) {
1579 	case IPPROTO_IP:
1580 		switch (name) {
1581 		case IP_MULTICAST_TTL:
1582 			*ptr = (uchar_t)IP_DEFAULT_MULTICAST_TTL;
1583 			return (sizeof (uchar_t));
1584 		case IP_MULTICAST_LOOP:
1585 			*ptr = (uchar_t)IP_DEFAULT_MULTICAST_LOOP;
1586 			return (sizeof (uchar_t));
1587 		}
1588 		break;
1589 	case IPPROTO_IPV6:
1590 		switch (name) {
1591 		case IPV6_MULTICAST_HOPS:
1592 			*i1 = IP_DEFAULT_MULTICAST_TTL;
1593 			return (sizeof (int));
1594 		case IPV6_MULTICAST_LOOP:
1595 			*i1 = IP_DEFAULT_MULTICAST_LOOP;
1596 			return (sizeof (int));
1597 		case IPV6_UNICAST_HOPS:
1598 			*i1 = us->us_ipv6_hoplimit;
1599 			return (sizeof (int));
1600 		}
1601 		break;
1602 	}
1603 	return (-1);
1604 }
1605 
1606 /*
1607  * This routine retrieves the current status of socket options.
1608  * It returns the size of the option retrieved, or -1.
1609  */
1610 int
udp_opt_get(conn_t * connp,t_scalar_t level,t_scalar_t name,uchar_t * ptr)1611 udp_opt_get(conn_t *connp, t_scalar_t level, t_scalar_t name,
1612     uchar_t *ptr)
1613 {
1614 	int		*i1 = (int *)ptr;
1615 	udp_t		*udp = connp->conn_udp;
1616 	int		len;
1617 	conn_opt_arg_t	coas;
1618 	int		retval;
1619 
1620 	coas.coa_connp = connp;
1621 	coas.coa_ixa = connp->conn_ixa;
1622 	coas.coa_ipp = &connp->conn_xmit_ipp;
1623 	coas.coa_ancillary = B_FALSE;
1624 	coas.coa_changed = 0;
1625 
1626 	/*
1627 	 * We assume that the optcom framework has checked for the set
1628 	 * of levels and names that are supported, hence we don't worry
1629 	 * about rejecting based on that.
1630 	 * First check for UDP specific handling, then pass to common routine.
1631 	 */
1632 	switch (level) {
1633 	case IPPROTO_IP:
1634 		/*
1635 		 * Only allow IPv4 option processing on IPv4 sockets.
1636 		 */
1637 		if (connp->conn_family != AF_INET)
1638 			return (-1);
1639 
1640 		switch (name) {
1641 		case IP_OPTIONS:
1642 		case T_IP_OPTIONS:
1643 			mutex_enter(&connp->conn_lock);
1644 			if (!(udp->udp_recv_ipp.ipp_fields &
1645 			    IPPF_IPV4_OPTIONS)) {
1646 				mutex_exit(&connp->conn_lock);
1647 				return (0);
1648 			}
1649 
1650 			len = udp->udp_recv_ipp.ipp_ipv4_options_len;
1651 			ASSERT(len != 0);
1652 			bcopy(udp->udp_recv_ipp.ipp_ipv4_options, ptr, len);
1653 			mutex_exit(&connp->conn_lock);
1654 			return (len);
1655 		}
1656 		break;
1657 	case IPPROTO_UDP:
1658 		switch (name) {
1659 		case UDP_NAT_T_ENDPOINT:
1660 			mutex_enter(&connp->conn_lock);
1661 			*i1 = udp->udp_nat_t_endpoint;
1662 			mutex_exit(&connp->conn_lock);
1663 			return (sizeof (int));
1664 		case UDP_RCVHDR:
1665 			mutex_enter(&connp->conn_lock);
1666 			*i1 = udp->udp_rcvhdr ? 1 : 0;
1667 			mutex_exit(&connp->conn_lock);
1668 			return (sizeof (int));
1669 		case UDP_SRCPORT_HASH:
1670 			mutex_enter(&connp->conn_lock);
1671 			*i1 = udp->udp_vxlanhash;
1672 			mutex_exit(&connp->conn_lock);
1673 			return (sizeof (int));
1674 		}
1675 	}
1676 	mutex_enter(&connp->conn_lock);
1677 	retval = conn_opt_get(&coas, level, name, ptr);
1678 	mutex_exit(&connp->conn_lock);
1679 	return (retval);
1680 }
1681 
1682 /*
1683  * This routine retrieves the current status of socket options.
1684  * It returns the size of the option retrieved, or -1.
1685  */
1686 int
udp_tpi_opt_get(queue_t * q,t_scalar_t level,t_scalar_t name,uchar_t * ptr)1687 udp_tpi_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr)
1688 {
1689 	conn_t		*connp = Q_TO_CONN(q);
1690 	int		err;
1691 
1692 	err = udp_opt_get(connp, level, name, ptr);
1693 	return (err);
1694 }
1695 
1696 /*
1697  * This routine sets socket options.
1698  */
1699 int
udp_do_opt_set(conn_opt_arg_t * coa,int level,int name,uint_t inlen,uchar_t * invalp,cred_t * cr,boolean_t checkonly)1700 udp_do_opt_set(conn_opt_arg_t *coa, int level, int name,
1701     uint_t inlen, uchar_t *invalp, cred_t *cr, boolean_t checkonly)
1702 {
1703 	conn_t		*connp = coa->coa_connp;
1704 	ip_xmit_attr_t	*ixa = coa->coa_ixa;
1705 	udp_t		*udp = connp->conn_udp;
1706 	udp_stack_t	*us = udp->udp_us;
1707 	int		*i1 = (int *)invalp;
1708 	boolean_t	onoff = (*i1 == 0) ? 0 : 1;
1709 	int		error;
1710 
1711 	ASSERT(MUTEX_NOT_HELD(&coa->coa_connp->conn_lock));
1712 	/*
1713 	 * First do UDP specific sanity checks and handle UDP specific
1714 	 * options. Note that some IPPROTO_UDP options are handled
1715 	 * by conn_opt_set.
1716 	 */
1717 	switch (level) {
1718 	case SOL_SOCKET:
1719 		switch (name) {
1720 		case SO_SNDBUF:
1721 			if (*i1 > us->us_max_buf) {
1722 				return (ENOBUFS);
1723 			}
1724 			break;
1725 		case SO_RCVBUF:
1726 			if (*i1 > us->us_max_buf) {
1727 				return (ENOBUFS);
1728 			}
1729 			break;
1730 
1731 		case SCM_UCRED: {
1732 			struct ucred_s *ucr;
1733 			cred_t *newcr;
1734 			ts_label_t *tsl;
1735 
1736 			/*
1737 			 * Only sockets that have proper privileges and are
1738 			 * bound to MLPs will have any other value here, so
1739 			 * this implicitly tests for privilege to set label.
1740 			 */
1741 			if (connp->conn_mlp_type == mlptSingle)
1742 				break;
1743 
1744 			ucr = (struct ucred_s *)invalp;
1745 			if (inlen < sizeof (*ucr) + sizeof (bslabel_t) ||
1746 			    ucr->uc_labeloff < sizeof (*ucr) ||
1747 			    ucr->uc_labeloff + sizeof (bslabel_t) > inlen)
1748 				return (EINVAL);
1749 			if (!checkonly) {
1750 				/*
1751 				 * Set ixa_tsl to the new label.
1752 				 * We assume that crgetzoneid doesn't change
1753 				 * as part of the SCM_UCRED.
1754 				 */
1755 				ASSERT(cr != NULL);
1756 				if ((tsl = crgetlabel(cr)) == NULL)
1757 					return (EINVAL);
1758 				newcr = copycred_from_bslabel(cr, UCLABEL(ucr),
1759 				    tsl->tsl_doi, KM_NOSLEEP);
1760 				if (newcr == NULL)
1761 					return (ENOSR);
1762 				ASSERT(newcr->cr_label != NULL);
1763 				/*
1764 				 * Move the hold on the cr_label to ixa_tsl by
1765 				 * setting cr_label to NULL. Then release newcr.
1766 				 */
1767 				ip_xmit_attr_replace_tsl(ixa, newcr->cr_label);
1768 				ixa->ixa_flags |= IXAF_UCRED_TSL;
1769 				newcr->cr_label = NULL;
1770 				crfree(newcr);
1771 				coa->coa_changed |= COA_HEADER_CHANGED;
1772 				coa->coa_changed |= COA_WROFF_CHANGED;
1773 			}
1774 			/* Fully handled this option. */
1775 			return (0);
1776 		}
1777 		}
1778 		break;
1779 	case IPPROTO_UDP:
1780 		switch (name) {
1781 		case UDP_NAT_T_ENDPOINT:
1782 			if ((error = secpolicy_ip_config(cr, B_FALSE)) != 0) {
1783 				return (error);
1784 			}
1785 
1786 			/*
1787 			 * Use conn_family instead so we can avoid ambiguitites
1788 			 * with AF_INET6 sockets that may switch from IPv4
1789 			 * to IPv6.
1790 			 */
1791 			if (connp->conn_family != AF_INET) {
1792 				return (EAFNOSUPPORT);
1793 			}
1794 
1795 			if (!checkonly) {
1796 				mutex_enter(&connp->conn_lock);
1797 				udp->udp_nat_t_endpoint = onoff;
1798 				mutex_exit(&connp->conn_lock);
1799 				coa->coa_changed |= COA_HEADER_CHANGED;
1800 				coa->coa_changed |= COA_WROFF_CHANGED;
1801 			}
1802 			/* Fully handled this option. */
1803 			return (0);
1804 		case UDP_RCVHDR:
1805 			mutex_enter(&connp->conn_lock);
1806 			udp->udp_rcvhdr = onoff;
1807 			mutex_exit(&connp->conn_lock);
1808 			return (0);
1809 		case UDP_SRCPORT_HASH:
1810 			/*
1811 			 * This should have already been verified, but double
1812 			 * check.
1813 			 */
1814 			if ((error = secpolicy_ip_config(cr, B_FALSE)) != 0) {
1815 				return (error);
1816 			}
1817 
1818 			/* First see if the val is something we understand */
1819 			if (*i1 != UDP_HASH_DISABLE && *i1 != UDP_HASH_VXLAN)
1820 				return (EINVAL);
1821 
1822 			if (!checkonly) {
1823 				mutex_enter(&connp->conn_lock);
1824 				udp->udp_vxlanhash = *i1;
1825 				mutex_exit(&connp->conn_lock);
1826 			}
1827 			/* Fully handled this option. */
1828 			return (0);
1829 		}
1830 		break;
1831 	}
1832 	error = conn_opt_set(coa, level, name, inlen, invalp,
1833 	    checkonly, cr);
1834 	return (error);
1835 }
1836 
1837 /*
1838  * This routine sets socket options.
1839  */
1840 int
udp_opt_set(conn_t * connp,uint_t optset_context,int level,int name,uint_t inlen,uchar_t * invalp,uint_t * outlenp,uchar_t * outvalp,void * thisdg_attrs,cred_t * cr)1841 udp_opt_set(conn_t *connp, uint_t optset_context, int level,
1842     int name, uint_t inlen, uchar_t *invalp, uint_t *outlenp,
1843     uchar_t *outvalp, void *thisdg_attrs, cred_t *cr)
1844 {
1845 	udp_t		*udp = connp->conn_udp;
1846 	int		err;
1847 	conn_opt_arg_t	coas, *coa;
1848 	boolean_t	checkonly;
1849 	udp_stack_t	*us = udp->udp_us;
1850 
1851 	switch (optset_context) {
1852 	case SETFN_OPTCOM_CHECKONLY:
1853 		checkonly = B_TRUE;
1854 		/*
1855 		 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ
1856 		 * inlen != 0 implies value supplied and
1857 		 *	we have to "pretend" to set it.
1858 		 * inlen == 0 implies that there is no
1859 		 *	value part in T_CHECK request and just validation
1860 		 * done elsewhere should be enough, we just return here.
1861 		 */
1862 		if (inlen == 0) {
1863 			*outlenp = 0;
1864 			return (0);
1865 		}
1866 		break;
1867 	case SETFN_OPTCOM_NEGOTIATE:
1868 		checkonly = B_FALSE;
1869 		break;
1870 	case SETFN_UD_NEGOTIATE:
1871 	case SETFN_CONN_NEGOTIATE:
1872 		checkonly = B_FALSE;
1873 		/*
1874 		 * Negotiating local and "association-related" options
1875 		 * through T_UNITDATA_REQ.
1876 		 *
1877 		 * Following routine can filter out ones we do not
1878 		 * want to be "set" this way.
1879 		 */
1880 		if (!udp_opt_allow_udr_set(level, name)) {
1881 			*outlenp = 0;
1882 			return (EINVAL);
1883 		}
1884 		break;
1885 	default:
1886 		/*
1887 		 * We should never get here
1888 		 */
1889 		*outlenp = 0;
1890 		return (EINVAL);
1891 	}
1892 
1893 	ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) ||
1894 	    (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0));
1895 
1896 	if (thisdg_attrs != NULL) {
1897 		/* Options from T_UNITDATA_REQ */
1898 		coa = (conn_opt_arg_t *)thisdg_attrs;
1899 		ASSERT(coa->coa_connp == connp);
1900 		ASSERT(coa->coa_ixa != NULL);
1901 		ASSERT(coa->coa_ipp != NULL);
1902 		ASSERT(coa->coa_ancillary);
1903 	} else {
1904 		coa = &coas;
1905 		coas.coa_connp = connp;
1906 		/* Get a reference on conn_ixa to prevent concurrent mods */
1907 		coas.coa_ixa = conn_get_ixa(connp, B_TRUE);
1908 		if (coas.coa_ixa == NULL) {
1909 			*outlenp = 0;
1910 			return (ENOMEM);
1911 		}
1912 		coas.coa_ipp = &connp->conn_xmit_ipp;
1913 		coas.coa_ancillary = B_FALSE;
1914 		coas.coa_changed = 0;
1915 	}
1916 
1917 	err = udp_do_opt_set(coa, level, name, inlen, invalp,
1918 	    cr, checkonly);
1919 	if (err != 0) {
1920 errout:
1921 		if (!coa->coa_ancillary)
1922 			ixa_refrele(coa->coa_ixa);
1923 		*outlenp = 0;
1924 		return (err);
1925 	}
1926 	/* Handle DHCPINIT here outside of lock */
1927 	if (level == IPPROTO_IP && name == IP_DHCPINIT_IF) {
1928 		uint_t	ifindex;
1929 		ill_t	*ill;
1930 
1931 		ifindex = *(uint_t *)invalp;
1932 		if (ifindex == 0) {
1933 			ill = NULL;
1934 		} else {
1935 			ill = ill_lookup_on_ifindex(ifindex, B_FALSE,
1936 			    coa->coa_ixa->ixa_ipst);
1937 			if (ill == NULL) {
1938 				err = ENXIO;
1939 				goto errout;
1940 			}
1941 
1942 			mutex_enter(&ill->ill_lock);
1943 			if (ill->ill_state_flags & ILL_CONDEMNED) {
1944 				mutex_exit(&ill->ill_lock);
1945 				ill_refrele(ill);
1946 				err = ENXIO;
1947 				goto errout;
1948 			}
1949 			if (IS_VNI(ill)) {
1950 				mutex_exit(&ill->ill_lock);
1951 				ill_refrele(ill);
1952 				err = EINVAL;
1953 				goto errout;
1954 			}
1955 		}
1956 		mutex_enter(&connp->conn_lock);
1957 
1958 		if (connp->conn_dhcpinit_ill != NULL) {
1959 			/*
1960 			 * We've locked the conn so conn_cleanup_ill()
1961 			 * cannot clear conn_dhcpinit_ill -- so it's
1962 			 * safe to access the ill.
1963 			 */
1964 			ill_t *oill = connp->conn_dhcpinit_ill;
1965 
1966 			ASSERT(oill->ill_dhcpinit != 0);
1967 			atomic_dec_32(&oill->ill_dhcpinit);
1968 			ill_set_inputfn(connp->conn_dhcpinit_ill);
1969 			connp->conn_dhcpinit_ill = NULL;
1970 		}
1971 
1972 		if (ill != NULL) {
1973 			connp->conn_dhcpinit_ill = ill;
1974 			atomic_inc_32(&ill->ill_dhcpinit);
1975 			ill_set_inputfn(ill);
1976 			mutex_exit(&connp->conn_lock);
1977 			mutex_exit(&ill->ill_lock);
1978 			ill_refrele(ill);
1979 		} else {
1980 			mutex_exit(&connp->conn_lock);
1981 		}
1982 	}
1983 
1984 	/*
1985 	 * Common case of OK return with outval same as inval.
1986 	 */
1987 	if (invalp != outvalp) {
1988 		/* don't trust bcopy for identical src/dst */
1989 		(void) bcopy(invalp, outvalp, inlen);
1990 	}
1991 	*outlenp = inlen;
1992 
1993 	/*
1994 	 * If this was not ancillary data, then we rebuild the headers,
1995 	 * update the IRE/NCE, and IPsec as needed.
1996 	 * Since the label depends on the destination we go through
1997 	 * ip_set_destination first.
1998 	 */
1999 	if (coa->coa_ancillary) {
2000 		return (0);
2001 	}
2002 
2003 	if (coa->coa_changed & COA_ROUTE_CHANGED) {
2004 		in6_addr_t saddr, faddr, nexthop;
2005 		in_port_t fport;
2006 
2007 		/*
2008 		 * We clear lastdst to make sure we pick up the change
2009 		 * next time sending.
2010 		 * If we are connected we re-cache the information.
2011 		 * We ignore errors to preserve BSD behavior.
2012 		 * Note that we don't redo IPsec policy lookup here
2013 		 * since the final destination (or source) didn't change.
2014 		 */
2015 		mutex_enter(&connp->conn_lock);
2016 		connp->conn_v6lastdst = ipv6_all_zeros;
2017 
2018 		ip_attr_nexthop(coa->coa_ipp, coa->coa_ixa,
2019 		    &connp->conn_faddr_v6, &nexthop);
2020 		saddr = connp->conn_saddr_v6;
2021 		faddr = connp->conn_faddr_v6;
2022 		fport = connp->conn_fport;
2023 		mutex_exit(&connp->conn_lock);
2024 
2025 		if (!IN6_IS_ADDR_UNSPECIFIED(&faddr) &&
2026 		    !IN6_IS_ADDR_V4MAPPED_ANY(&faddr)) {
2027 			(void) ip_attr_connect(connp, coa->coa_ixa,
2028 			    &saddr, &faddr, &nexthop, fport, NULL, NULL,
2029 			    IPDF_ALLOW_MCBC | IPDF_VERIFY_DST);
2030 		}
2031 	}
2032 
2033 	ixa_refrele(coa->coa_ixa);
2034 
2035 	if (coa->coa_changed & COA_HEADER_CHANGED) {
2036 		/*
2037 		 * Rebuild the header template if we are connected.
2038 		 * Otherwise clear conn_v6lastdst so we rebuild the header
2039 		 * in the data path.
2040 		 */
2041 		mutex_enter(&connp->conn_lock);
2042 		if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) &&
2043 		    !IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) {
2044 			err = udp_build_hdr_template(connp,
2045 			    &connp->conn_saddr_v6, &connp->conn_faddr_v6,
2046 			    connp->conn_fport, connp->conn_flowinfo);
2047 			if (err != 0) {
2048 				mutex_exit(&connp->conn_lock);
2049 				return (err);
2050 			}
2051 		} else {
2052 			connp->conn_v6lastdst = ipv6_all_zeros;
2053 		}
2054 		mutex_exit(&connp->conn_lock);
2055 	}
2056 	if (coa->coa_changed & COA_RCVBUF_CHANGED) {
2057 		(void) proto_set_rx_hiwat(connp->conn_rq, connp,
2058 		    connp->conn_rcvbuf);
2059 	}
2060 	if ((coa->coa_changed & COA_SNDBUF_CHANGED) && !IPCL_IS_NONSTR(connp)) {
2061 		connp->conn_wq->q_hiwat = connp->conn_sndbuf;
2062 	}
2063 	if (coa->coa_changed & COA_WROFF_CHANGED) {
2064 		/* Increase wroff if needed */
2065 		uint_t wroff;
2066 
2067 		mutex_enter(&connp->conn_lock);
2068 		wroff = connp->conn_ht_iphc_allocated + us->us_wroff_extra;
2069 		if (udp->udp_nat_t_endpoint)
2070 			wroff += sizeof (uint32_t);
2071 		if (wroff > connp->conn_wroff) {
2072 			connp->conn_wroff = wroff;
2073 			mutex_exit(&connp->conn_lock);
2074 			(void) proto_set_tx_wroff(connp->conn_rq, connp, wroff);
2075 		} else {
2076 			mutex_exit(&connp->conn_lock);
2077 		}
2078 	}
2079 	return (err);
2080 }
2081 
2082 /* This routine sets socket options. */
2083 int
udp_tpi_opt_set(queue_t * q,uint_t optset_context,int level,int name,uint_t inlen,uchar_t * invalp,uint_t * outlenp,uchar_t * outvalp,void * thisdg_attrs,cred_t * cr)2084 udp_tpi_opt_set(queue_t *q, uint_t optset_context, int level, int name,
2085     uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp,
2086     void *thisdg_attrs, cred_t *cr)
2087 {
2088 	conn_t	*connp = Q_TO_CONN(q);
2089 	int error;
2090 
2091 	error = udp_opt_set(connp, optset_context, level, name, inlen, invalp,
2092 	    outlenp, outvalp, thisdg_attrs, cr);
2093 	return (error);
2094 }
2095 
2096 /*
2097  * Setup IP and UDP headers.
2098  * Returns NULL on allocation failure, in which case data_mp is freed.
2099  */
2100 mblk_t *
udp_prepend_hdr(conn_t * connp,ip_xmit_attr_t * ixa,const ip_pkt_t * ipp,const in6_addr_t * v6src,const in6_addr_t * v6dst,in_port_t dstport,uint32_t flowinfo,mblk_t * data_mp,int * errorp)2101 udp_prepend_hdr(conn_t *connp, ip_xmit_attr_t *ixa, const ip_pkt_t *ipp,
2102     const in6_addr_t *v6src, const in6_addr_t *v6dst, in_port_t dstport,
2103     uint32_t flowinfo, mblk_t *data_mp, int *errorp)
2104 {
2105 	mblk_t		*mp;
2106 	udpha_t		*udpha;
2107 	udp_stack_t	*us = connp->conn_netstack->netstack_udp;
2108 	uint_t		data_len;
2109 	uint32_t	cksum;
2110 	udp_t		*udp = connp->conn_udp;
2111 	boolean_t	insert_spi = udp->udp_nat_t_endpoint;
2112 	boolean_t	hash_srcport = udp->udp_vxlanhash;
2113 	uint_t		ulp_hdr_len;
2114 	uint16_t	srcport;
2115 
2116 	data_len = msgdsize(data_mp);
2117 	ulp_hdr_len = UDPH_SIZE;
2118 	if (insert_spi)
2119 		ulp_hdr_len += sizeof (uint32_t);
2120 
2121 	/*
2122 	 * If we have source port hashing going on, determine the hash before
2123 	 * we modify the mblk_t.
2124 	 */
2125 	if (hash_srcport == B_TRUE) {
2126 		srcport = udp_srcport_hash(mp, UDP_HASH_VXLAN,
2127 		    IPPORT_DYNAMIC_MIN, IPPORT_DYNAMIC_MAX,
2128 		    ntohs(connp->conn_lport));
2129 	}
2130 
2131 	mp = conn_prepend_hdr(ixa, ipp, v6src, v6dst, IPPROTO_UDP, flowinfo,
2132 	    ulp_hdr_len, data_mp, data_len, us->us_wroff_extra, &cksum, errorp);
2133 	if (mp == NULL) {
2134 		ASSERT(*errorp != 0);
2135 		return (NULL);
2136 	}
2137 
2138 	data_len += ulp_hdr_len;
2139 	ixa->ixa_pktlen = data_len + ixa->ixa_ip_hdr_length;
2140 
2141 	udpha = (udpha_t *)(mp->b_rptr + ixa->ixa_ip_hdr_length);
2142 	if (hash_srcport == B_TRUE) {
2143 		udpha->uha_src_port = htons(srcport);
2144 	} else {
2145 		udpha->uha_src_port = connp->conn_lport;
2146 	}
2147 	udpha->uha_dst_port = dstport;
2148 	udpha->uha_checksum = 0;
2149 	udpha->uha_length = htons(data_len);
2150 
2151 	/*
2152 	 * If there was a routing option/header then conn_prepend_hdr
2153 	 * has massaged it and placed the pseudo-header checksum difference
2154 	 * in the cksum argument.
2155 	 *
2156 	 * Setup header length and prepare for ULP checksum done in IP.
2157 	 *
2158 	 * We make it easy for IP to include our pseudo header
2159 	 * by putting our length in uha_checksum.
2160 	 * The IP source, destination, and length have already been set by
2161 	 * conn_prepend_hdr.
2162 	 */
2163 	cksum += data_len;
2164 	cksum = (cksum >> 16) + (cksum & 0xFFFF);
2165 	ASSERT(cksum < 0x10000);
2166 
2167 	if (ixa->ixa_flags & IXAF_IS_IPV4) {
2168 		ipha_t	*ipha = (ipha_t *)mp->b_rptr;
2169 
2170 		ASSERT(ntohs(ipha->ipha_length) == ixa->ixa_pktlen);
2171 
2172 		/* IP does the checksum if uha_checksum is non-zero */
2173 		if (us->us_do_checksum) {
2174 			if (cksum == 0)
2175 				udpha->uha_checksum = 0xffff;
2176 			else
2177 				udpha->uha_checksum = htons(cksum);
2178 		} else {
2179 			udpha->uha_checksum = 0;
2180 		}
2181 	} else {
2182 		ip6_t *ip6h = (ip6_t *)mp->b_rptr;
2183 
2184 		ASSERT(ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN == ixa->ixa_pktlen);
2185 		if (cksum == 0)
2186 			udpha->uha_checksum = 0xffff;
2187 		else
2188 			udpha->uha_checksum = htons(cksum);
2189 	}
2190 
2191 	/* Insert all-0s SPI now. */
2192 	if (insert_spi)
2193 		*((uint32_t *)(udpha + 1)) = 0;
2194 
2195 	return (mp);
2196 }
2197 
2198 static int
udp_build_hdr_template(conn_t * connp,const in6_addr_t * v6src,const in6_addr_t * v6dst,in_port_t dstport,uint32_t flowinfo)2199 udp_build_hdr_template(conn_t *connp, const in6_addr_t *v6src,
2200     const in6_addr_t *v6dst, in_port_t dstport, uint32_t flowinfo)
2201 {
2202 	udpha_t		*udpha;
2203 	int		error;
2204 
2205 	ASSERT(MUTEX_HELD(&connp->conn_lock));
2206 	/*
2207 	 * We clear lastdst to make sure we don't use the lastdst path
2208 	 * next time sending since we might not have set v6dst yet.
2209 	 */
2210 	connp->conn_v6lastdst = ipv6_all_zeros;
2211 
2212 	error = conn_build_hdr_template(connp, UDPH_SIZE, 0, v6src, v6dst,
2213 	    flowinfo);
2214 	if (error != 0)
2215 		return (error);
2216 
2217 	/*
2218 	 * Any routing header/option has been massaged. The checksum difference
2219 	 * is stored in conn_sum.
2220 	 */
2221 	udpha = (udpha_t *)connp->conn_ht_ulp;
2222 	udpha->uha_src_port = connp->conn_lport;
2223 	udpha->uha_dst_port = dstport;
2224 	udpha->uha_checksum = 0;
2225 	udpha->uha_length = htons(UDPH_SIZE);	/* Filled in later */
2226 	return (0);
2227 }
2228 
2229 static mblk_t *
udp_queue_fallback(udp_t * udp,mblk_t * mp)2230 udp_queue_fallback(udp_t *udp, mblk_t *mp)
2231 {
2232 	ASSERT(MUTEX_HELD(&udp->udp_recv_lock));
2233 	if (IPCL_IS_NONSTR(udp->udp_connp)) {
2234 		/*
2235 		 * fallback has started but messages have not been moved yet
2236 		 */
2237 		if (udp->udp_fallback_queue_head == NULL) {
2238 			ASSERT(udp->udp_fallback_queue_tail == NULL);
2239 			udp->udp_fallback_queue_head = mp;
2240 			udp->udp_fallback_queue_tail = mp;
2241 		} else {
2242 			ASSERT(udp->udp_fallback_queue_tail != NULL);
2243 			udp->udp_fallback_queue_tail->b_next = mp;
2244 			udp->udp_fallback_queue_tail = mp;
2245 		}
2246 		return (NULL);
2247 	} else {
2248 		/*
2249 		 * Fallback completed, let the caller putnext() the mblk.
2250 		 */
2251 		return (mp);
2252 	}
2253 }
2254 
2255 /*
2256  * Deliver data to ULP. In case we have a socket, and it's falling back to
2257  * TPI, then we'll queue the mp for later processing.
2258  */
2259 static void
udp_ulp_recv(conn_t * connp,mblk_t * mp,uint_t len,ip_recv_attr_t * ira)2260 udp_ulp_recv(conn_t *connp, mblk_t *mp, uint_t len, ip_recv_attr_t *ira)
2261 {
2262 	if (IPCL_IS_NONSTR(connp)) {
2263 		udp_t *udp = connp->conn_udp;
2264 		int error;
2265 
2266 		ASSERT(len == msgdsize(mp));
2267 		if ((*connp->conn_upcalls->su_recv)
2268 		    (connp->conn_upper_handle, mp, len, 0, &error, NULL) < 0) {
2269 			mutex_enter(&udp->udp_recv_lock);
2270 			if (error == ENOSPC) {
2271 				/*
2272 				 * let's confirm while holding the lock
2273 				 */
2274 				if ((*connp->conn_upcalls->su_recv)
2275 				    (connp->conn_upper_handle, NULL, 0, 0,
2276 				    &error, NULL) < 0) {
2277 					ASSERT(error == ENOSPC);
2278 					if (error == ENOSPC) {
2279 						connp->conn_flow_cntrld =
2280 						    B_TRUE;
2281 					}
2282 				}
2283 				mutex_exit(&udp->udp_recv_lock);
2284 			} else {
2285 				ASSERT(error == EOPNOTSUPP);
2286 				mp = udp_queue_fallback(udp, mp);
2287 				mutex_exit(&udp->udp_recv_lock);
2288 				if (mp != NULL)
2289 					putnext(connp->conn_rq, mp);
2290 			}
2291 		}
2292 		ASSERT(MUTEX_NOT_HELD(&udp->udp_recv_lock));
2293 	} else {
2294 		if (is_system_labeled()) {
2295 			ASSERT(ira->ira_cred != NULL);
2296 			/*
2297 			 * Provide for protocols above UDP such as RPC
2298 			 * NOPID leaves db_cpid unchanged.
2299 			 */
2300 			mblk_setcred(mp, ira->ira_cred, NOPID);
2301 		}
2302 
2303 		putnext(connp->conn_rq, mp);
2304 	}
2305 }
2306 
2307 /*
2308  * This is the inbound data path.
2309  * IP has already pulled up the IP plus UDP headers and verified alignment
2310  * etc.
2311  */
2312 /* ARGSUSED2 */
2313 static void
udp_input(void * arg1,mblk_t * mp,void * arg2,ip_recv_attr_t * ira)2314 udp_input(void *arg1, mblk_t *mp, void *arg2, ip_recv_attr_t *ira)
2315 {
2316 	conn_t			*connp = (conn_t *)arg1;
2317 	struct T_unitdata_ind	*tudi;
2318 	uchar_t			*rptr;		/* Pointer to IP header */
2319 	int			hdr_length;	/* Length of IP+UDP headers */
2320 	int			udi_size;	/* Size of T_unitdata_ind */
2321 	int			pkt_len;
2322 	udp_t			*udp;
2323 	udpha_t			*udpha;
2324 	ip_pkt_t		ipps;
2325 	ip6_t			*ip6h;
2326 	mblk_t			*mp1;
2327 	uint32_t		udp_ipv4_options_len;
2328 	crb_t			recv_ancillary;
2329 	udp_stack_t		*us;
2330 
2331 	ASSERT(connp->conn_flags & IPCL_UDPCONN);
2332 
2333 	udp = connp->conn_udp;
2334 	us = udp->udp_us;
2335 	rptr = mp->b_rptr;
2336 
2337 	ASSERT(DB_TYPE(mp) == M_DATA);
2338 	ASSERT(OK_32PTR(rptr));
2339 	ASSERT(ira->ira_pktlen == msgdsize(mp));
2340 	pkt_len = ira->ira_pktlen;
2341 
2342 	/*
2343 	 * Get a snapshot of these and allow other threads to change
2344 	 * them after that. We need the same recv_ancillary when determining
2345 	 * the size as when adding the ancillary data items.
2346 	 */
2347 	mutex_enter(&connp->conn_lock);
2348 	udp_ipv4_options_len = udp->udp_recv_ipp.ipp_ipv4_options_len;
2349 	recv_ancillary = connp->conn_recv_ancillary;
2350 	mutex_exit(&connp->conn_lock);
2351 
2352 	hdr_length = ira->ira_ip_hdr_length;
2353 
2354 	/*
2355 	 * IP inspected the UDP header thus all of it must be in the mblk.
2356 	 * UDP length check is performed for IPv6 packets and IPv4 packets
2357 	 * to check if the size of the packet as specified
2358 	 * by the UDP header is the same as the length derived from the IP
2359 	 * header.
2360 	 */
2361 	udpha = (udpha_t *)(rptr + hdr_length);
2362 	if (pkt_len != ntohs(udpha->uha_length) + hdr_length)
2363 		goto tossit;
2364 
2365 	hdr_length += UDPH_SIZE;
2366 	ASSERT(MBLKL(mp) >= hdr_length);	/* IP did a pullup */
2367 
2368 	/* Initialize regardless of IP version */
2369 	ipps.ipp_fields = 0;
2370 
2371 	if (((ira->ira_flags & IRAF_IPV4_OPTIONS) ||
2372 	    udp_ipv4_options_len > 0) &&
2373 	    connp->conn_family == AF_INET) {
2374 		int	err;
2375 
2376 		/*
2377 		 * Record/update udp_recv_ipp with the lock
2378 		 * held. Not needed for AF_INET6 sockets
2379 		 * since they don't support a getsockopt of IP_OPTIONS.
2380 		 */
2381 		mutex_enter(&connp->conn_lock);
2382 		err = ip_find_hdr_v4((ipha_t *)rptr, &udp->udp_recv_ipp,
2383 		    B_TRUE);
2384 		if (err != 0) {
2385 			/* Allocation failed. Drop packet */
2386 			mutex_exit(&connp->conn_lock);
2387 			freemsg(mp);
2388 			UDPS_BUMP_MIB(us, udpInErrors);
2389 			return;
2390 		}
2391 		mutex_exit(&connp->conn_lock);
2392 	}
2393 
2394 	if (recv_ancillary.crb_all != 0) {
2395 		/*
2396 		 * Record packet information in the ip_pkt_t
2397 		 */
2398 		if (ira->ira_flags & IRAF_IS_IPV4) {
2399 			ASSERT(IPH_HDR_VERSION(rptr) == IPV4_VERSION);
2400 			ASSERT(MBLKL(mp) >= sizeof (ipha_t));
2401 			ASSERT(((ipha_t *)rptr)->ipha_protocol == IPPROTO_UDP);
2402 			ASSERT(ira->ira_ip_hdr_length == IPH_HDR_LENGTH(rptr));
2403 
2404 			(void) ip_find_hdr_v4((ipha_t *)rptr, &ipps, B_FALSE);
2405 		} else {
2406 			uint8_t nexthdrp;
2407 
2408 			ASSERT(IPH_HDR_VERSION(rptr) == IPV6_VERSION);
2409 			/*
2410 			 * IPv6 packets can only be received by applications
2411 			 * that are prepared to receive IPv6 addresses.
2412 			 * The IP fanout must ensure this.
2413 			 */
2414 			ASSERT(connp->conn_family == AF_INET6);
2415 
2416 			ip6h = (ip6_t *)rptr;
2417 
2418 			/* We don't care about the length, but need the ipp */
2419 			hdr_length = ip_find_hdr_v6(mp, ip6h, B_TRUE, &ipps,
2420 			    &nexthdrp);
2421 			ASSERT(hdr_length == ira->ira_ip_hdr_length);
2422 			/* Restore */
2423 			hdr_length = ira->ira_ip_hdr_length + UDPH_SIZE;
2424 			ASSERT(nexthdrp == IPPROTO_UDP);
2425 		}
2426 	}
2427 
2428 	/*
2429 	 * This is the inbound data path.  Packets are passed upstream as
2430 	 * T_UNITDATA_IND messages.
2431 	 */
2432 	if (connp->conn_family == AF_INET) {
2433 		sin_t *sin;
2434 
2435 		ASSERT(IPH_HDR_VERSION((ipha_t *)rptr) == IPV4_VERSION);
2436 
2437 		/*
2438 		 * Normally only send up the source address.
2439 		 * If any ancillary data items are wanted we add those.
2440 		 */
2441 		udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t);
2442 		if (recv_ancillary.crb_all != 0) {
2443 			udi_size += conn_recvancillary_size(connp,
2444 			    recv_ancillary, ira, mp, &ipps);
2445 		}
2446 
2447 		/* Allocate a message block for the T_UNITDATA_IND structure. */
2448 		mp1 = allocb(udi_size, BPRI_MED);
2449 		if (mp1 == NULL) {
2450 			freemsg(mp);
2451 			UDPS_BUMP_MIB(us, udpInErrors);
2452 			return;
2453 		}
2454 		mp1->b_cont = mp;
2455 		mp1->b_datap->db_type = M_PROTO;
2456 		tudi = (struct T_unitdata_ind *)mp1->b_rptr;
2457 		mp1->b_wptr = (uchar_t *)tudi + udi_size;
2458 		tudi->PRIM_type = T_UNITDATA_IND;
2459 		tudi->SRC_length = sizeof (sin_t);
2460 		tudi->SRC_offset = sizeof (struct T_unitdata_ind);
2461 		tudi->OPT_offset = sizeof (struct T_unitdata_ind) +
2462 		    sizeof (sin_t);
2463 		udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t));
2464 		tudi->OPT_length = udi_size;
2465 		sin = (sin_t *)&tudi[1];
2466 		sin->sin_addr.s_addr = ((ipha_t *)rptr)->ipha_src;
2467 		sin->sin_port =	udpha->uha_src_port;
2468 		sin->sin_family = connp->conn_family;
2469 		*(uint32_t *)&sin->sin_zero[0] = 0;
2470 		*(uint32_t *)&sin->sin_zero[4] = 0;
2471 
2472 		/*
2473 		 * Add options if IP_RECVDSTADDR, IP_RECVIF, IP_RECVSLLA,
2474 		 * IP_RECVTTL or IP_RECVTOS has been set.
2475 		 */
2476 		if (udi_size != 0) {
2477 			conn_recvancillary_add(connp, recv_ancillary, ira,
2478 			    &ipps, (uchar_t *)&sin[1], udi_size);
2479 		}
2480 	} else {
2481 		sin6_t *sin6;
2482 
2483 		/*
2484 		 * Handle both IPv4 and IPv6 packets for IPv6 sockets.
2485 		 *
2486 		 * Normally we only send up the address. If receiving of any
2487 		 * optional receive side information is enabled, we also send
2488 		 * that up as options.
2489 		 */
2490 		udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t);
2491 
2492 		if (recv_ancillary.crb_all != 0) {
2493 			udi_size += conn_recvancillary_size(connp,
2494 			    recv_ancillary, ira, mp, &ipps);
2495 		}
2496 
2497 		mp1 = allocb(udi_size, BPRI_MED);
2498 		if (mp1 == NULL) {
2499 			freemsg(mp);
2500 			UDPS_BUMP_MIB(us, udpInErrors);
2501 			return;
2502 		}
2503 		mp1->b_cont = mp;
2504 		mp1->b_datap->db_type = M_PROTO;
2505 		tudi = (struct T_unitdata_ind *)mp1->b_rptr;
2506 		mp1->b_wptr = (uchar_t *)tudi + udi_size;
2507 		tudi->PRIM_type = T_UNITDATA_IND;
2508 		tudi->SRC_length = sizeof (sin6_t);
2509 		tudi->SRC_offset = sizeof (struct T_unitdata_ind);
2510 		tudi->OPT_offset = sizeof (struct T_unitdata_ind) +
2511 		    sizeof (sin6_t);
2512 		udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin6_t));
2513 		tudi->OPT_length = udi_size;
2514 		sin6 = (sin6_t *)&tudi[1];
2515 		if (ira->ira_flags & IRAF_IS_IPV4) {
2516 			in6_addr_t v6dst;
2517 
2518 			IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_src,
2519 			    &sin6->sin6_addr);
2520 			IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_dst,
2521 			    &v6dst);
2522 			sin6->sin6_flowinfo = 0;
2523 			sin6->sin6_scope_id = 0;
2524 			sin6->__sin6_src_id = ip_srcid_find_addr(&v6dst,
2525 			    IPCL_ZONEID(connp), us->us_netstack);
2526 		} else {
2527 			ip6h = (ip6_t *)rptr;
2528 
2529 			sin6->sin6_addr = ip6h->ip6_src;
2530 			/* No sin6_flowinfo per API */
2531 			sin6->sin6_flowinfo = 0;
2532 			/* For link-scope pass up scope id */
2533 			if (IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src))
2534 				sin6->sin6_scope_id = ira->ira_ruifindex;
2535 			else
2536 				sin6->sin6_scope_id = 0;
2537 			sin6->__sin6_src_id = ip_srcid_find_addr(
2538 			    &ip6h->ip6_dst, IPCL_ZONEID(connp),
2539 			    us->us_netstack);
2540 		}
2541 		sin6->sin6_port = udpha->uha_src_port;
2542 		sin6->sin6_family = connp->conn_family;
2543 
2544 		if (udi_size != 0) {
2545 			conn_recvancillary_add(connp, recv_ancillary, ira,
2546 			    &ipps, (uchar_t *)&sin6[1], udi_size);
2547 		}
2548 	}
2549 
2550 	/*
2551 	 * DTrace this UDP input as udp:::receive (this is for IPv4, IPv6 and
2552 	 * loopback traffic).
2553 	 */
2554 	DTRACE_UDP5(receive, mblk_t *, NULL, ip_xmit_attr_t *, connp->conn_ixa,
2555 	    void_ip_t *, rptr, udp_t *, udp, udpha_t *, udpha);
2556 
2557 	/* Walk past the headers unless IP_RECVHDR was set. */
2558 	if (!udp->udp_rcvhdr) {
2559 		mp->b_rptr = rptr + hdr_length;
2560 		pkt_len -= hdr_length;
2561 	}
2562 
2563 	UDPS_BUMP_MIB(us, udpHCInDatagrams);
2564 	udp_ulp_recv(connp, mp1, pkt_len, ira);
2565 	return;
2566 
2567 tossit:
2568 	freemsg(mp);
2569 	UDPS_BUMP_MIB(us, udpInErrors);
2570 }
2571 
2572 /*
2573  * This routine creates a T_UDERROR_IND message and passes it upstream.
2574  * The address and options are copied from the T_UNITDATA_REQ message
2575  * passed in mp.  This message is freed.
2576  */
2577 static void
udp_ud_err(queue_t * q,mblk_t * mp,t_scalar_t err)2578 udp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err)
2579 {
2580 	struct T_unitdata_req *tudr;
2581 	mblk_t	*mp1;
2582 	uchar_t *destaddr;
2583 	t_scalar_t destlen;
2584 	uchar_t	*optaddr;
2585 	t_scalar_t optlen;
2586 
2587 	if ((mp->b_wptr < mp->b_rptr) ||
2588 	    (MBLKL(mp)) < sizeof (struct T_unitdata_req)) {
2589 		goto done;
2590 	}
2591 	tudr = (struct T_unitdata_req *)mp->b_rptr;
2592 	destaddr = mp->b_rptr + tudr->DEST_offset;
2593 	if (destaddr < mp->b_rptr || destaddr >= mp->b_wptr ||
2594 	    destaddr + tudr->DEST_length < mp->b_rptr ||
2595 	    destaddr + tudr->DEST_length > mp->b_wptr) {
2596 		goto done;
2597 	}
2598 	optaddr = mp->b_rptr + tudr->OPT_offset;
2599 	if (optaddr < mp->b_rptr || optaddr >= mp->b_wptr ||
2600 	    optaddr + tudr->OPT_length < mp->b_rptr ||
2601 	    optaddr + tudr->OPT_length > mp->b_wptr) {
2602 		goto done;
2603 	}
2604 	destlen = tudr->DEST_length;
2605 	optlen = tudr->OPT_length;
2606 
2607 	mp1 = mi_tpi_uderror_ind((char *)destaddr, destlen,
2608 	    (char *)optaddr, optlen, err);
2609 	if (mp1 != NULL)
2610 		qreply(q, mp1);
2611 
2612 done:
2613 	freemsg(mp);
2614 }
2615 
2616 /*
2617  * This routine removes a port number association from a stream.  It
2618  * is called by udp_wput to handle T_UNBIND_REQ messages.
2619  */
2620 static void
udp_tpi_unbind(queue_t * q,mblk_t * mp)2621 udp_tpi_unbind(queue_t *q, mblk_t *mp)
2622 {
2623 	conn_t	*connp = Q_TO_CONN(q);
2624 	int	error;
2625 
2626 	error = udp_do_unbind(connp);
2627 	if (error) {
2628 		if (error < 0)
2629 			udp_err_ack(q, mp, -error, 0);
2630 		else
2631 			udp_err_ack(q, mp, TSYSERR, error);
2632 		return;
2633 	}
2634 
2635 	mp = mi_tpi_ok_ack_alloc(mp);
2636 	ASSERT(mp != NULL);
2637 	ASSERT(((struct T_ok_ack *)mp->b_rptr)->PRIM_type == T_OK_ACK);
2638 	qreply(q, mp);
2639 }
2640 
2641 /*
2642  * Don't let port fall into the privileged range.
2643  * Since the extra privileged ports can be arbitrary we also
2644  * ensure that we exclude those from consideration.
2645  * us->us_epriv_ports is not sorted thus we loop over it until
2646  * there are no changes.
2647  */
2648 static in_port_t
udp_update_next_port(udp_t * udp,in_port_t port,boolean_t random)2649 udp_update_next_port(udp_t *udp, in_port_t port, boolean_t random)
2650 {
2651 	int i, bump;
2652 	in_port_t nextport;
2653 	boolean_t restart = B_FALSE;
2654 	udp_stack_t *us = udp->udp_us;
2655 
2656 	if (random && udp_random_anon_port != 0) {
2657 		(void) random_get_pseudo_bytes((uint8_t *)&port,
2658 		    sizeof (in_port_t));
2659 		/*
2660 		 * Unless changed by a sys admin, the smallest anon port
2661 		 * is 32768 and the largest anon port is 65535.  It is
2662 		 * very likely (50%) for the random port to be smaller
2663 		 * than the smallest anon port.  When that happens,
2664 		 * add port % (anon port range) to the smallest anon
2665 		 * port to get the random port.  It should fall into the
2666 		 * valid anon port range.
2667 		 */
2668 		if ((port < us->us_smallest_anon_port) ||
2669 		    (port > us->us_largest_anon_port)) {
2670 			if (us->us_smallest_anon_port ==
2671 			    us->us_largest_anon_port) {
2672 				bump = 0;
2673 			} else {
2674 				bump = port % (us->us_largest_anon_port -
2675 				    us->us_smallest_anon_port);
2676 			}
2677 
2678 			port = us->us_smallest_anon_port + bump;
2679 		}
2680 	}
2681 
2682 retry:
2683 	if (port < us->us_smallest_anon_port)
2684 		port = us->us_smallest_anon_port;
2685 
2686 	if (port > us->us_largest_anon_port) {
2687 		port = us->us_smallest_anon_port;
2688 		if (restart)
2689 			return (0);
2690 		restart = B_TRUE;
2691 	}
2692 
2693 	if (port < us->us_smallest_nonpriv_port)
2694 		port = us->us_smallest_nonpriv_port;
2695 
2696 	for (i = 0; i < us->us_num_epriv_ports; i++) {
2697 		if (port == us->us_epriv_ports[i]) {
2698 			port++;
2699 			/*
2700 			 * Make sure that the port is in the
2701 			 * valid range.
2702 			 */
2703 			goto retry;
2704 		}
2705 	}
2706 
2707 	if (is_system_labeled() &&
2708 	    (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred),
2709 	    port, IPPROTO_UDP, B_TRUE)) != 0) {
2710 		port = nextport;
2711 		goto retry;
2712 	}
2713 
2714 	return (port);
2715 }
2716 
2717 /*
2718  * Handle T_UNITDATA_REQ with options. Both IPv4 and IPv6
2719  * Either tudr_mp or msg is set. If tudr_mp we take ancillary data from
2720  * the TPI options, otherwise we take them from msg_control.
2721  * If both sin and sin6 is set it is a connected socket and we use conn_faddr.
2722  * Always consumes mp; never consumes tudr_mp.
2723  */
2724 static int
udp_output_ancillary(conn_t * connp,sin_t * sin,sin6_t * sin6,mblk_t * mp,mblk_t * tudr_mp,struct nmsghdr * msg,cred_t * cr,pid_t pid)2725 udp_output_ancillary(conn_t *connp, sin_t *sin, sin6_t *sin6, mblk_t *mp,
2726     mblk_t *tudr_mp, struct nmsghdr *msg, cred_t *cr, pid_t pid)
2727 {
2728 	udp_t		*udp = connp->conn_udp;
2729 	udp_stack_t	*us = udp->udp_us;
2730 	int		error;
2731 	ip_xmit_attr_t	*ixa;
2732 	ip_pkt_t	*ipp;
2733 	in6_addr_t	v6src;
2734 	in6_addr_t	v6dst;
2735 	in6_addr_t	v6nexthop;
2736 	in_port_t	dstport;
2737 	uint32_t	flowinfo;
2738 	uint_t		srcid;
2739 	int		is_absreq_failure = 0;
2740 	conn_opt_arg_t	coas, *coa;
2741 
2742 	ASSERT(tudr_mp != NULL || msg != NULL);
2743 
2744 	/*
2745 	 * Get ixa before checking state to handle a disconnect race.
2746 	 *
2747 	 * We need an exclusive copy of conn_ixa since the ancillary data
2748 	 * options might modify it. That copy has no pointers hence we
2749 	 * need to set them up once we've parsed the ancillary data.
2750 	 */
2751 	ixa = conn_get_ixa_exclusive(connp);
2752 	if (ixa == NULL) {
2753 		UDPS_BUMP_MIB(us, udpOutErrors);
2754 		freemsg(mp);
2755 		return (ENOMEM);
2756 	}
2757 	ASSERT(cr != NULL);
2758 	ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
2759 	ixa->ixa_cred = cr;
2760 	ixa->ixa_cpid = pid;
2761 	if (is_system_labeled()) {
2762 		/* We need to restart with a label based on the cred */
2763 		ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred);
2764 	}
2765 
2766 	/* In case previous destination was multicast or multirt */
2767 	ip_attr_newdst(ixa);
2768 
2769 	/* Get a copy of conn_xmit_ipp since the options might change it */
2770 	ipp = kmem_zalloc(sizeof (*ipp), KM_NOSLEEP);
2771 	if (ipp == NULL) {
2772 		ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
2773 		ixa->ixa_cred = connp->conn_cred;	/* Restore */
2774 		ixa->ixa_cpid = connp->conn_cpid;
2775 		ixa_refrele(ixa);
2776 		UDPS_BUMP_MIB(us, udpOutErrors);
2777 		freemsg(mp);
2778 		return (ENOMEM);
2779 	}
2780 	mutex_enter(&connp->conn_lock);
2781 	error = ip_pkt_copy(&connp->conn_xmit_ipp, ipp, KM_NOSLEEP);
2782 	mutex_exit(&connp->conn_lock);
2783 	if (error != 0) {
2784 		UDPS_BUMP_MIB(us, udpOutErrors);
2785 		freemsg(mp);
2786 		goto done;
2787 	}
2788 
2789 	/*
2790 	 * Parse the options and update ixa and ipp as a result.
2791 	 * Note that ixa_tsl can be updated if SCM_UCRED.
2792 	 * ixa_refrele/ixa_inactivate will release any reference on ixa_tsl.
2793 	 */
2794 
2795 	coa = &coas;
2796 	coa->coa_connp = connp;
2797 	coa->coa_ixa = ixa;
2798 	coa->coa_ipp = ipp;
2799 	coa->coa_ancillary = B_TRUE;
2800 	coa->coa_changed = 0;
2801 
2802 	if (msg != NULL) {
2803 		error = process_auxiliary_options(connp, msg->msg_control,
2804 		    msg->msg_controllen, coa, &udp_opt_obj, udp_opt_set, cr);
2805 	} else {
2806 		struct T_unitdata_req *tudr;
2807 
2808 		tudr = (struct T_unitdata_req *)tudr_mp->b_rptr;
2809 		ASSERT(tudr->PRIM_type == T_UNITDATA_REQ);
2810 		error = tpi_optcom_buf(connp->conn_wq, tudr_mp,
2811 		    &tudr->OPT_length, tudr->OPT_offset, cr, &udp_opt_obj,
2812 		    coa, &is_absreq_failure);
2813 	}
2814 	if (error != 0) {
2815 		/*
2816 		 * Note: No special action needed in this
2817 		 * module for "is_absreq_failure"
2818 		 */
2819 		freemsg(mp);
2820 		UDPS_BUMP_MIB(us, udpOutErrors);
2821 		goto done;
2822 	}
2823 	ASSERT(is_absreq_failure == 0);
2824 
2825 	mutex_enter(&connp->conn_lock);
2826 	/*
2827 	 * If laddr is unspecified then we look at sin6_src_id.
2828 	 * We will give precedence to a source address set with IPV6_PKTINFO
2829 	 * (aka IPPF_ADDR) but that is handled in build_hdrs. However, we don't
2830 	 * want ip_attr_connect to select a source (since it can fail) when
2831 	 * IPV6_PKTINFO is specified.
2832 	 * If this doesn't result in a source address then we get a source
2833 	 * from ip_attr_connect() below.
2834 	 */
2835 	v6src = connp->conn_saddr_v6;
2836 	if (sin != NULL) {
2837 		IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &v6dst);
2838 		dstport = sin->sin_port;
2839 		flowinfo = 0;
2840 		ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
2841 		ixa->ixa_flags |= IXAF_IS_IPV4;
2842 	} else if (sin6 != NULL) {
2843 		boolean_t v4mapped;
2844 
2845 		v6dst = sin6->sin6_addr;
2846 		dstport = sin6->sin6_port;
2847 		flowinfo = sin6->sin6_flowinfo;
2848 		srcid = sin6->__sin6_src_id;
2849 		if (IN6_IS_ADDR_LINKSCOPE(&v6dst) && sin6->sin6_scope_id != 0) {
2850 			ixa->ixa_scopeid = sin6->sin6_scope_id;
2851 			ixa->ixa_flags |= IXAF_SCOPEID_SET;
2852 		} else {
2853 			ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
2854 		}
2855 		v4mapped = IN6_IS_ADDR_V4MAPPED(&v6dst);
2856 		if (v4mapped)
2857 			ixa->ixa_flags |= IXAF_IS_IPV4;
2858 		else
2859 			ixa->ixa_flags &= ~IXAF_IS_IPV4;
2860 		if (srcid != 0 && IN6_IS_ADDR_UNSPECIFIED(&v6src)) {
2861 			if (!ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp),
2862 			    v4mapped, connp->conn_netstack)) {
2863 				/* Mismatch - v4mapped/v6 specified by srcid. */
2864 				mutex_exit(&connp->conn_lock);
2865 				error = EADDRNOTAVAIL;
2866 				goto failed;	/* Does freemsg() and mib. */
2867 			}
2868 		}
2869 	} else {
2870 		/* Connected case */
2871 		v6dst = connp->conn_faddr_v6;
2872 		dstport = connp->conn_fport;
2873 		flowinfo = connp->conn_flowinfo;
2874 	}
2875 	mutex_exit(&connp->conn_lock);
2876 
2877 	/* Handle IP_PKTINFO/IPV6_PKTINFO setting source address. */
2878 	if (ipp->ipp_fields & IPPF_ADDR) {
2879 		if (ixa->ixa_flags & IXAF_IS_IPV4) {
2880 			if (IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr))
2881 				v6src = ipp->ipp_addr;
2882 		} else {
2883 			if (!IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr))
2884 				v6src = ipp->ipp_addr;
2885 		}
2886 	}
2887 
2888 	ip_attr_nexthop(ipp, ixa, &v6dst, &v6nexthop);
2889 	error = ip_attr_connect(connp, ixa, &v6src, &v6dst, &v6nexthop, dstport,
2890 	    &v6src, NULL, IPDF_ALLOW_MCBC | IPDF_VERIFY_DST | IPDF_IPSEC);
2891 
2892 	switch (error) {
2893 	case 0:
2894 		break;
2895 	case EADDRNOTAVAIL:
2896 		/*
2897 		 * IXAF_VERIFY_SOURCE tells us to pick a better source.
2898 		 * Don't have the application see that errno
2899 		 */
2900 		error = ENETUNREACH;
2901 		goto failed;
2902 	case ENETDOWN:
2903 		/*
2904 		 * Have !ipif_addr_ready address; drop packet silently
2905 		 * until we can get applications to not send until we
2906 		 * are ready.
2907 		 */
2908 		error = 0;
2909 		goto failed;
2910 	case EHOSTUNREACH:
2911 	case ENETUNREACH:
2912 		if (ixa->ixa_ire != NULL) {
2913 			/*
2914 			 * Let conn_ip_output/ire_send_noroute return
2915 			 * the error and send any local ICMP error.
2916 			 */
2917 			error = 0;
2918 			break;
2919 		}
2920 		/* FALLTHRU */
2921 	default:
2922 	failed:
2923 		freemsg(mp);
2924 		UDPS_BUMP_MIB(us, udpOutErrors);
2925 		goto done;
2926 	}
2927 
2928 	/*
2929 	 * We might be going to a different destination than last time,
2930 	 * thus check that TX allows the communication and compute any
2931 	 * needed label.
2932 	 *
2933 	 * TSOL Note: We have an exclusive ipp and ixa for this thread so we
2934 	 * don't have to worry about concurrent threads.
2935 	 */
2936 	if (is_system_labeled()) {
2937 		/* Using UDP MLP requires SCM_UCRED from user */
2938 		if (connp->conn_mlp_type != mlptSingle &&
2939 		    !((ixa->ixa_flags & IXAF_UCRED_TSL))) {
2940 			UDPS_BUMP_MIB(us, udpOutErrors);
2941 			error = ECONNREFUSED;
2942 			freemsg(mp);
2943 			goto done;
2944 		}
2945 		/*
2946 		 * Check whether Trusted Solaris policy allows communication
2947 		 * with this host, and pretend that the destination is
2948 		 * unreachable if not.
2949 		 * Compute any needed label and place it in ipp_label_v4/v6.
2950 		 *
2951 		 * Later conn_build_hdr_template/conn_prepend_hdr takes
2952 		 * ipp_label_v4/v6 to form the packet.
2953 		 *
2954 		 * Tsol note: We have ipp structure local to this thread so
2955 		 * no locking is needed.
2956 		 */
2957 		error = conn_update_label(connp, ixa, &v6dst, ipp);
2958 		if (error != 0) {
2959 			freemsg(mp);
2960 			UDPS_BUMP_MIB(us, udpOutErrors);
2961 			goto done;
2962 		}
2963 	}
2964 	mp = udp_prepend_hdr(connp, ixa, ipp, &v6src, &v6dst, dstport,
2965 	    flowinfo, mp, &error);
2966 	if (mp == NULL) {
2967 		ASSERT(error != 0);
2968 		UDPS_BUMP_MIB(us, udpOutErrors);
2969 		goto done;
2970 	}
2971 	if (ixa->ixa_pktlen > IP_MAXPACKET) {
2972 		error = EMSGSIZE;
2973 		UDPS_BUMP_MIB(us, udpOutErrors);
2974 		freemsg(mp);
2975 		goto done;
2976 	}
2977 	/* We're done.  Pass the packet to ip. */
2978 	UDPS_BUMP_MIB(us, udpHCOutDatagrams);
2979 
2980 	DTRACE_UDP5(send, mblk_t *, NULL, ip_xmit_attr_t *, ixa,
2981 	    void_ip_t *, mp->b_rptr, udp_t *, udp, udpha_t *,
2982 	    &mp->b_rptr[ixa->ixa_ip_hdr_length]);
2983 
2984 	error = conn_ip_output(mp, ixa);
2985 	/* No udpOutErrors if an error since IP increases its error counter */
2986 	switch (error) {
2987 	case 0:
2988 		break;
2989 	case EWOULDBLOCK:
2990 		(void) ixa_check_drain_insert(connp, ixa);
2991 		error = 0;
2992 		break;
2993 	case EADDRNOTAVAIL:
2994 		/*
2995 		 * IXAF_VERIFY_SOURCE tells us to pick a better source.
2996 		 * Don't have the application see that errno
2997 		 */
2998 		error = ENETUNREACH;
2999 		/* FALLTHRU */
3000 	default:
3001 		mutex_enter(&connp->conn_lock);
3002 		/*
3003 		 * Clear the source and v6lastdst so we call ip_attr_connect
3004 		 * for the next packet and try to pick a better source.
3005 		 */
3006 		if (connp->conn_mcbc_bind)
3007 			connp->conn_saddr_v6 = ipv6_all_zeros;
3008 		else
3009 			connp->conn_saddr_v6 = connp->conn_bound_addr_v6;
3010 		connp->conn_v6lastdst = ipv6_all_zeros;
3011 		mutex_exit(&connp->conn_lock);
3012 		break;
3013 	}
3014 done:
3015 	ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3016 	ixa->ixa_cred = connp->conn_cred;	/* Restore */
3017 	ixa->ixa_cpid = connp->conn_cpid;
3018 	ixa_refrele(ixa);
3019 	ip_pkt_free(ipp);
3020 	kmem_free(ipp, sizeof (*ipp));
3021 	return (error);
3022 }
3023 
3024 /*
3025  * Handle sending an M_DATA for a connected socket.
3026  * Handles both IPv4 and IPv6.
3027  */
3028 static int
udp_output_connected(conn_t * connp,mblk_t * mp,cred_t * cr,pid_t pid)3029 udp_output_connected(conn_t *connp, mblk_t *mp, cred_t *cr, pid_t pid)
3030 {
3031 	udp_t		*udp = connp->conn_udp;
3032 	udp_stack_t	*us = udp->udp_us;
3033 	int		error;
3034 	ip_xmit_attr_t	*ixa;
3035 
3036 	/*
3037 	 * If no other thread is using conn_ixa this just gets a reference to
3038 	 * conn_ixa. Otherwise we get a safe copy of conn_ixa.
3039 	 */
3040 	ixa = conn_get_ixa(connp, B_FALSE);
3041 	if (ixa == NULL) {
3042 		UDPS_BUMP_MIB(us, udpOutErrors);
3043 		freemsg(mp);
3044 		return (ENOMEM);
3045 	}
3046 
3047 	ASSERT(cr != NULL);
3048 	ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3049 	ixa->ixa_cred = cr;
3050 	ixa->ixa_cpid = pid;
3051 
3052 	mutex_enter(&connp->conn_lock);
3053 	mp = udp_prepend_header_template(connp, ixa, mp, &connp->conn_saddr_v6,
3054 	    connp->conn_fport, connp->conn_flowinfo, &error);
3055 
3056 	if (mp == NULL) {
3057 		ASSERT(error != 0);
3058 		mutex_exit(&connp->conn_lock);
3059 		ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3060 		ixa->ixa_cred = connp->conn_cred;	/* Restore */
3061 		ixa->ixa_cpid = connp->conn_cpid;
3062 		ixa_refrele(ixa);
3063 		UDPS_BUMP_MIB(us, udpOutErrors);
3064 		freemsg(mp);
3065 		return (error);
3066 	}
3067 
3068 	/*
3069 	 * In case we got a safe copy of conn_ixa, or if opt_set made us a new
3070 	 * safe copy, then we need to fill in any pointers in it.
3071 	 */
3072 	if (ixa->ixa_ire == NULL) {
3073 		in6_addr_t	faddr, saddr;
3074 		in6_addr_t	nexthop;
3075 		in_port_t	fport;
3076 
3077 		saddr = connp->conn_saddr_v6;
3078 		faddr = connp->conn_faddr_v6;
3079 		fport = connp->conn_fport;
3080 		ip_attr_nexthop(&connp->conn_xmit_ipp, ixa, &faddr, &nexthop);
3081 		mutex_exit(&connp->conn_lock);
3082 
3083 		error = ip_attr_connect(connp, ixa, &saddr, &faddr, &nexthop,
3084 		    fport, NULL, NULL, IPDF_ALLOW_MCBC | IPDF_VERIFY_DST |
3085 		    IPDF_IPSEC);
3086 		switch (error) {
3087 		case 0:
3088 			break;
3089 		case EADDRNOTAVAIL:
3090 			/*
3091 			 * IXAF_VERIFY_SOURCE tells us to pick a better source.
3092 			 * Don't have the application see that errno
3093 			 */
3094 			error = ENETUNREACH;
3095 			goto failed;
3096 		case ENETDOWN:
3097 			/*
3098 			 * Have !ipif_addr_ready address; drop packet silently
3099 			 * until we can get applications to not send until we
3100 			 * are ready.
3101 			 */
3102 			error = 0;
3103 			goto failed;
3104 		case EHOSTUNREACH:
3105 		case ENETUNREACH:
3106 			if (ixa->ixa_ire != NULL) {
3107 				/*
3108 				 * Let conn_ip_output/ire_send_noroute return
3109 				 * the error and send any local ICMP error.
3110 				 */
3111 				error = 0;
3112 				break;
3113 			}
3114 			/* FALLTHRU */
3115 		default:
3116 		failed:
3117 			ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3118 			ixa->ixa_cred = connp->conn_cred;	/* Restore */
3119 			ixa->ixa_cpid = connp->conn_cpid;
3120 			ixa_refrele(ixa);
3121 			freemsg(mp);
3122 			UDPS_BUMP_MIB(us, udpOutErrors);
3123 			return (error);
3124 		}
3125 	} else {
3126 		/* Done with conn_t */
3127 		mutex_exit(&connp->conn_lock);
3128 	}
3129 	ASSERT(ixa->ixa_ire != NULL);
3130 
3131 	/* We're done.  Pass the packet to ip. */
3132 	UDPS_BUMP_MIB(us, udpHCOutDatagrams);
3133 
3134 	DTRACE_UDP5(send, mblk_t *, NULL, ip_xmit_attr_t *, ixa,
3135 	    void_ip_t *, mp->b_rptr, udp_t *, udp, udpha_t *,
3136 	    &mp->b_rptr[ixa->ixa_ip_hdr_length]);
3137 
3138 	error = conn_ip_output(mp, ixa);
3139 	/* No udpOutErrors if an error since IP increases its error counter */
3140 	switch (error) {
3141 	case 0:
3142 		break;
3143 	case EWOULDBLOCK:
3144 		(void) ixa_check_drain_insert(connp, ixa);
3145 		error = 0;
3146 		break;
3147 	case EADDRNOTAVAIL:
3148 		/*
3149 		 * IXAF_VERIFY_SOURCE tells us to pick a better source.
3150 		 * Don't have the application see that errno
3151 		 */
3152 		error = ENETUNREACH;
3153 		break;
3154 	}
3155 	ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3156 	ixa->ixa_cred = connp->conn_cred;	/* Restore */
3157 	ixa->ixa_cpid = connp->conn_cpid;
3158 	ixa_refrele(ixa);
3159 	return (error);
3160 }
3161 
3162 /*
3163  * Handle sending an M_DATA to the last destination.
3164  * Handles both IPv4 and IPv6.
3165  *
3166  * NOTE: The caller must hold conn_lock and we drop it here.
3167  */
3168 static int
udp_output_lastdst(conn_t * connp,mblk_t * mp,cred_t * cr,pid_t pid,ip_xmit_attr_t * ixa)3169 udp_output_lastdst(conn_t *connp, mblk_t *mp, cred_t *cr, pid_t pid,
3170     ip_xmit_attr_t *ixa)
3171 {
3172 	udp_t		*udp = connp->conn_udp;
3173 	udp_stack_t	*us = udp->udp_us;
3174 	int		error;
3175 
3176 	ASSERT(MUTEX_HELD(&connp->conn_lock));
3177 	ASSERT(ixa != NULL);
3178 
3179 	ASSERT(cr != NULL);
3180 	ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3181 	ixa->ixa_cred = cr;
3182 	ixa->ixa_cpid = pid;
3183 
3184 	mp = udp_prepend_header_template(connp, ixa, mp, &connp->conn_v6lastsrc,
3185 	    connp->conn_lastdstport, connp->conn_lastflowinfo, &error);
3186 
3187 	if (mp == NULL) {
3188 		ASSERT(error != 0);
3189 		mutex_exit(&connp->conn_lock);
3190 		ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3191 		ixa->ixa_cred = connp->conn_cred;	/* Restore */
3192 		ixa->ixa_cpid = connp->conn_cpid;
3193 		ixa_refrele(ixa);
3194 		UDPS_BUMP_MIB(us, udpOutErrors);
3195 		freemsg(mp);
3196 		return (error);
3197 	}
3198 
3199 	/*
3200 	 * In case we got a safe copy of conn_ixa, or if opt_set made us a new
3201 	 * safe copy, then we need to fill in any pointers in it.
3202 	 */
3203 	if (ixa->ixa_ire == NULL) {
3204 		in6_addr_t	lastdst, lastsrc;
3205 		in6_addr_t	nexthop;
3206 		in_port_t	lastport;
3207 
3208 		lastsrc = connp->conn_v6lastsrc;
3209 		lastdst = connp->conn_v6lastdst;
3210 		lastport = connp->conn_lastdstport;
3211 		ip_attr_nexthop(&connp->conn_xmit_ipp, ixa, &lastdst, &nexthop);
3212 		mutex_exit(&connp->conn_lock);
3213 
3214 		error = ip_attr_connect(connp, ixa, &lastsrc, &lastdst,
3215 		    &nexthop, lastport, NULL, NULL, IPDF_ALLOW_MCBC |
3216 		    IPDF_VERIFY_DST | IPDF_IPSEC);
3217 		switch (error) {
3218 		case 0:
3219 			break;
3220 		case EADDRNOTAVAIL:
3221 			/*
3222 			 * IXAF_VERIFY_SOURCE tells us to pick a better source.
3223 			 * Don't have the application see that errno
3224 			 */
3225 			error = ENETUNREACH;
3226 			goto failed;
3227 		case ENETDOWN:
3228 			/*
3229 			 * Have !ipif_addr_ready address; drop packet silently
3230 			 * until we can get applications to not send until we
3231 			 * are ready.
3232 			 */
3233 			error = 0;
3234 			goto failed;
3235 		case EHOSTUNREACH:
3236 		case ENETUNREACH:
3237 			if (ixa->ixa_ire != NULL) {
3238 				/*
3239 				 * Let conn_ip_output/ire_send_noroute return
3240 				 * the error and send any local ICMP error.
3241 				 */
3242 				error = 0;
3243 				break;
3244 			}
3245 			/* FALLTHRU */
3246 		default:
3247 		failed:
3248 			ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3249 			ixa->ixa_cred = connp->conn_cred;	/* Restore */
3250 			ixa->ixa_cpid = connp->conn_cpid;
3251 			ixa_refrele(ixa);
3252 			freemsg(mp);
3253 			UDPS_BUMP_MIB(us, udpOutErrors);
3254 			return (error);
3255 		}
3256 	} else {
3257 		/* Done with conn_t */
3258 		mutex_exit(&connp->conn_lock);
3259 	}
3260 
3261 	/* We're done.  Pass the packet to ip. */
3262 	UDPS_BUMP_MIB(us, udpHCOutDatagrams);
3263 
3264 	DTRACE_UDP5(send, mblk_t *, NULL, ip_xmit_attr_t *, ixa,
3265 	    void_ip_t *, mp->b_rptr, udp_t *, udp, udpha_t *,
3266 	    &mp->b_rptr[ixa->ixa_ip_hdr_length]);
3267 
3268 	error = conn_ip_output(mp, ixa);
3269 	/* No udpOutErrors if an error since IP increases its error counter */
3270 	switch (error) {
3271 	case 0:
3272 		break;
3273 	case EWOULDBLOCK:
3274 		(void) ixa_check_drain_insert(connp, ixa);
3275 		error = 0;
3276 		break;
3277 	case EADDRNOTAVAIL:
3278 		/*
3279 		 * IXAF_VERIFY_SOURCE tells us to pick a better source.
3280 		 * Don't have the application see that errno
3281 		 */
3282 		error = ENETUNREACH;
3283 		/* FALLTHRU */
3284 	default:
3285 		mutex_enter(&connp->conn_lock);
3286 		/*
3287 		 * Clear the source and v6lastdst so we call ip_attr_connect
3288 		 * for the next packet and try to pick a better source.
3289 		 */
3290 		if (connp->conn_mcbc_bind)
3291 			connp->conn_saddr_v6 = ipv6_all_zeros;
3292 		else
3293 			connp->conn_saddr_v6 = connp->conn_bound_addr_v6;
3294 		connp->conn_v6lastdst = ipv6_all_zeros;
3295 		mutex_exit(&connp->conn_lock);
3296 		break;
3297 	}
3298 	ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3299 	ixa->ixa_cred = connp->conn_cred;	/* Restore */
3300 	ixa->ixa_cpid = connp->conn_cpid;
3301 	ixa_refrele(ixa);
3302 	return (error);
3303 }
3304 
3305 
3306 /*
3307  * Prepend the header template and then fill in the source and
3308  * flowinfo. The caller needs to handle the destination address since
3309  * it's setting is different if rthdr or source route.
3310  *
3311  * Returns NULL is allocation failed or if the packet would exceed IP_MAXPACKET.
3312  * When it returns NULL it sets errorp.
3313  */
3314 static mblk_t *
udp_prepend_header_template(conn_t * connp,ip_xmit_attr_t * ixa,mblk_t * mp,const in6_addr_t * v6src,in_port_t dstport,uint32_t flowinfo,int * errorp)3315 udp_prepend_header_template(conn_t *connp, ip_xmit_attr_t *ixa, mblk_t *mp,
3316     const in6_addr_t *v6src, in_port_t dstport, uint32_t flowinfo, int *errorp)
3317 {
3318 	udp_t		*udp = connp->conn_udp;
3319 	udp_stack_t	*us = udp->udp_us;
3320 	boolean_t	insert_spi = udp->udp_nat_t_endpoint;
3321 	boolean_t	hash_srcport = udp->udp_vxlanhash;
3322 	uint_t		pktlen;
3323 	uint_t		alloclen;
3324 	uint_t		copylen;
3325 	uint8_t		*iph;
3326 	uint_t		ip_hdr_length;
3327 	udpha_t		*udpha;
3328 	uint32_t	cksum;
3329 	ip_pkt_t	*ipp;
3330 	uint16_t	srcport;
3331 
3332 	ASSERT(MUTEX_HELD(&connp->conn_lock));
3333 
3334 	/*
3335 	 * If we have source port hashing going on, determine the hash before
3336 	 * we modify the mblk_t.
3337 	 */
3338 	if (hash_srcport == B_TRUE) {
3339 		srcport = udp_srcport_hash(mp, UDP_HASH_VXLAN,
3340 		    IPPORT_DYNAMIC_MIN, IPPORT_DYNAMIC_MAX,
3341 		    ntohs(connp->conn_lport));
3342 	}
3343 
3344 	/*
3345 	 * Copy the header template and leave space for an SPI
3346 	 */
3347 	copylen = connp->conn_ht_iphc_len;
3348 	alloclen = copylen + (insert_spi ? sizeof (uint32_t) : 0);
3349 	pktlen = alloclen + msgdsize(mp);
3350 	if (pktlen > IP_MAXPACKET) {
3351 		freemsg(mp);
3352 		*errorp = EMSGSIZE;
3353 		return (NULL);
3354 	}
3355 	ixa->ixa_pktlen = pktlen;
3356 
3357 	/* check/fix buffer config, setup pointers into it */
3358 	iph = mp->b_rptr - alloclen;
3359 	if (DB_REF(mp) != 1 || iph < DB_BASE(mp) || !OK_32PTR(iph)) {
3360 		mblk_t *mp1;
3361 
3362 		mp1 = allocb(alloclen + us->us_wroff_extra, BPRI_MED);
3363 		if (mp1 == NULL) {
3364 			freemsg(mp);
3365 			*errorp = ENOMEM;
3366 			return (NULL);
3367 		}
3368 		mp1->b_wptr = DB_LIM(mp1);
3369 		mp1->b_cont = mp;
3370 		mp = mp1;
3371 		iph = (mp->b_wptr - alloclen);
3372 	}
3373 	mp->b_rptr = iph;
3374 	bcopy(connp->conn_ht_iphc, iph, copylen);
3375 	ip_hdr_length = (uint_t)(connp->conn_ht_ulp - connp->conn_ht_iphc);
3376 
3377 	ixa->ixa_ip_hdr_length = ip_hdr_length;
3378 	udpha = (udpha_t *)(iph + ip_hdr_length);
3379 
3380 	/*
3381 	 * Setup header length and prepare for ULP checksum done in IP.
3382 	 * udp_build_hdr_template has already massaged any routing header
3383 	 * and placed the result in conn_sum.
3384 	 *
3385 	 * We make it easy for IP to include our pseudo header
3386 	 * by putting our length in uha_checksum.
3387 	 */
3388 	cksum = pktlen - ip_hdr_length;
3389 	udpha->uha_length = htons(cksum);
3390 
3391 	cksum += connp->conn_sum;
3392 	cksum = (cksum >> 16) + (cksum & 0xFFFF);
3393 	ASSERT(cksum < 0x10000);
3394 
3395 	ipp = &connp->conn_xmit_ipp;
3396 	if (ixa->ixa_flags & IXAF_IS_IPV4) {
3397 		ipha_t	*ipha = (ipha_t *)iph;
3398 
3399 		ipha->ipha_length = htons((uint16_t)pktlen);
3400 
3401 		/* IP does the checksum if uha_checksum is non-zero */
3402 		if (us->us_do_checksum)
3403 			udpha->uha_checksum = htons(cksum);
3404 
3405 		/* if IP_PKTINFO specified an addres it wins over bind() */
3406 		if ((ipp->ipp_fields & IPPF_ADDR) &&
3407 		    IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) {
3408 			ASSERT(ipp->ipp_addr_v4 != INADDR_ANY);
3409 			ipha->ipha_src = ipp->ipp_addr_v4;
3410 		} else {
3411 			IN6_V4MAPPED_TO_IPADDR(v6src, ipha->ipha_src);
3412 		}
3413 	} else {
3414 		ip6_t *ip6h = (ip6_t *)iph;
3415 
3416 		ip6h->ip6_plen =  htons((uint16_t)(pktlen - IPV6_HDR_LEN));
3417 		udpha->uha_checksum = htons(cksum);
3418 
3419 		/* if IP_PKTINFO specified an addres it wins over bind() */
3420 		if ((ipp->ipp_fields & IPPF_ADDR) &&
3421 		    !IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) {
3422 			ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&ipp->ipp_addr));
3423 			ip6h->ip6_src = ipp->ipp_addr;
3424 		} else {
3425 			ip6h->ip6_src = *v6src;
3426 		}
3427 		ip6h->ip6_vcf =
3428 		    (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) |
3429 		    (flowinfo & ~IPV6_VERS_AND_FLOW_MASK);
3430 		if (ipp->ipp_fields & IPPF_TCLASS) {
3431 			/* Overrides the class part of flowinfo */
3432 			ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf,
3433 			    ipp->ipp_tclass);
3434 		}
3435 	}
3436 
3437 	/* Insert all-0s SPI now. */
3438 	if (insert_spi)
3439 		*((uint32_t *)(udpha + 1)) = 0;
3440 
3441 	udpha->uha_dst_port = dstport;
3442 	if (hash_srcport == B_TRUE)
3443 		udpha->uha_src_port = htons(srcport);
3444 
3445 	return (mp);
3446 }
3447 
3448 /*
3449  * Send a T_UDERR_IND in response to an M_DATA
3450  */
3451 static void
udp_ud_err_connected(conn_t * connp,t_scalar_t error)3452 udp_ud_err_connected(conn_t *connp, t_scalar_t error)
3453 {
3454 	struct sockaddr_storage ss;
3455 	sin_t		*sin;
3456 	sin6_t		*sin6;
3457 	struct sockaddr	*addr;
3458 	socklen_t	addrlen;
3459 	mblk_t		*mp1;
3460 
3461 	mutex_enter(&connp->conn_lock);
3462 	/* Initialize addr and addrlen as if they're passed in */
3463 	if (connp->conn_family == AF_INET) {
3464 		sin = (sin_t *)&ss;
3465 		*sin = sin_null;
3466 		sin->sin_family = AF_INET;
3467 		sin->sin_port = connp->conn_fport;
3468 		sin->sin_addr.s_addr = connp->conn_faddr_v4;
3469 		addr = (struct sockaddr *)sin;
3470 		addrlen = sizeof (*sin);
3471 	} else {
3472 		sin6 = (sin6_t *)&ss;
3473 		*sin6 = sin6_null;
3474 		sin6->sin6_family = AF_INET6;
3475 		sin6->sin6_port = connp->conn_fport;
3476 		sin6->sin6_flowinfo = connp->conn_flowinfo;
3477 		sin6->sin6_addr = connp->conn_faddr_v6;
3478 		if (IN6_IS_ADDR_LINKSCOPE(&connp->conn_faddr_v6) &&
3479 		    (connp->conn_ixa->ixa_flags & IXAF_SCOPEID_SET)) {
3480 			sin6->sin6_scope_id = connp->conn_ixa->ixa_scopeid;
3481 		} else {
3482 			sin6->sin6_scope_id = 0;
3483 		}
3484 		sin6->__sin6_src_id = 0;
3485 		addr = (struct sockaddr *)sin6;
3486 		addrlen = sizeof (*sin6);
3487 	}
3488 	mutex_exit(&connp->conn_lock);
3489 
3490 	mp1 = mi_tpi_uderror_ind((char *)addr, addrlen, NULL, 0, error);
3491 	if (mp1 != NULL)
3492 		putnext(connp->conn_rq, mp1);
3493 }
3494 
3495 /*
3496  * This routine handles all messages passed downstream.  It either
3497  * consumes the message or passes it downstream; it never queues a
3498  * a message.
3499  *
3500  * Also entry point for sockfs when udp is in "direct sockfs" mode.  This mode
3501  * is valid when we are directly beneath the stream head, and thus sockfs
3502  * is able to bypass STREAMS and directly call us, passing along the sockaddr
3503  * structure without the cumbersome T_UNITDATA_REQ interface for the case of
3504  * connected endpoints.
3505  */
3506 int
udp_wput(queue_t * q,mblk_t * mp)3507 udp_wput(queue_t *q, mblk_t *mp)
3508 {
3509 	sin6_t		*sin6;
3510 	sin_t		*sin = NULL;
3511 	uint_t		srcid;
3512 	conn_t		*connp = Q_TO_CONN(q);
3513 	udp_t		*udp = connp->conn_udp;
3514 	int		error = 0;
3515 	struct sockaddr	*addr = NULL;
3516 	socklen_t	addrlen;
3517 	udp_stack_t	*us = udp->udp_us;
3518 	struct T_unitdata_req *tudr;
3519 	mblk_t		*data_mp;
3520 	ushort_t	ipversion;
3521 	cred_t		*cr;
3522 	pid_t		pid;
3523 
3524 	/*
3525 	 * We directly handle several cases here: T_UNITDATA_REQ message
3526 	 * coming down as M_PROTO/M_PCPROTO and M_DATA messages for connected
3527 	 * socket.
3528 	 */
3529 	switch (DB_TYPE(mp)) {
3530 	case M_DATA:
3531 		if (!udp->udp_issocket || udp->udp_state != TS_DATA_XFER) {
3532 			/* Not connected; address is required */
3533 			UDPS_BUMP_MIB(us, udpOutErrors);
3534 			UDP_DBGSTAT(us, udp_data_notconn);
3535 			UDP_STAT(us, udp_out_err_notconn);
3536 			freemsg(mp);
3537 			return (0);
3538 		}
3539 		/*
3540 		 * All Solaris components should pass a db_credp
3541 		 * for this message, hence we ASSERT.
3542 		 * On production kernels we return an error to be robust against
3543 		 * random streams modules sitting on top of us.
3544 		 */
3545 		cr = msg_getcred(mp, &pid);
3546 		ASSERT(cr != NULL);
3547 		if (cr == NULL) {
3548 			UDPS_BUMP_MIB(us, udpOutErrors);
3549 			freemsg(mp);
3550 			return (0);
3551 		}
3552 		ASSERT(udp->udp_issocket);
3553 		UDP_DBGSTAT(us, udp_data_conn);
3554 		error = udp_output_connected(connp, mp, cr, pid);
3555 		if (error != 0) {
3556 			UDP_STAT(us, udp_out_err_output);
3557 			if (connp->conn_rq != NULL)
3558 				udp_ud_err_connected(connp, (t_scalar_t)error);
3559 #ifdef DEBUG
3560 			printf("udp_output_connected returned %d\n", error);
3561 #endif
3562 		}
3563 		return (0);
3564 
3565 	case M_PROTO:
3566 	case M_PCPROTO:
3567 		tudr = (struct T_unitdata_req *)mp->b_rptr;
3568 		if (MBLKL(mp) < sizeof (*tudr) ||
3569 		    ((t_primp_t)mp->b_rptr)->type != T_UNITDATA_REQ) {
3570 			udp_wput_other(q, mp);
3571 			return (0);
3572 		}
3573 		break;
3574 
3575 	default:
3576 		udp_wput_other(q, mp);
3577 		return (0);
3578 	}
3579 
3580 	/* Handle valid T_UNITDATA_REQ here */
3581 	data_mp = mp->b_cont;
3582 	if (data_mp == NULL) {
3583 		error = EPROTO;
3584 		goto ud_error2;
3585 	}
3586 	mp->b_cont = NULL;
3587 
3588 	if (!MBLKIN(mp, 0, tudr->DEST_offset + tudr->DEST_length)) {
3589 		error = EADDRNOTAVAIL;
3590 		goto ud_error2;
3591 	}
3592 
3593 	/*
3594 	 * All Solaris components should pass a db_credp
3595 	 * for this TPI message, hence we should ASSERT.
3596 	 * However, RPC (svc_clts_ksend) does this odd thing where it
3597 	 * passes the options from a T_UNITDATA_IND unchanged in a
3598 	 * T_UNITDATA_REQ. While that is the right thing to do for
3599 	 * some options, SCM_UCRED being the key one, this also makes it
3600 	 * pass down IP_RECVDSTADDR. Hence we can't ASSERT here.
3601 	 */
3602 	cr = msg_getcred(mp, &pid);
3603 	if (cr == NULL) {
3604 		cr = connp->conn_cred;
3605 		pid = connp->conn_cpid;
3606 	}
3607 
3608 	/*
3609 	 * If a port has not been bound to the stream, fail.
3610 	 * This is not a problem when sockfs is directly
3611 	 * above us, because it will ensure that the socket
3612 	 * is first bound before allowing data to be sent.
3613 	 */
3614 	if (udp->udp_state == TS_UNBND) {
3615 		error = EPROTO;
3616 		goto ud_error2;
3617 	}
3618 	addr = (struct sockaddr *)&mp->b_rptr[tudr->DEST_offset];
3619 	addrlen = tudr->DEST_length;
3620 
3621 	switch (connp->conn_family) {
3622 	case AF_INET6:
3623 		sin6 = (sin6_t *)addr;
3624 		if (!OK_32PTR((char *)sin6) || (addrlen != sizeof (sin6_t)) ||
3625 		    (sin6->sin6_family != AF_INET6)) {
3626 			error = EADDRNOTAVAIL;
3627 			goto ud_error2;
3628 		}
3629 
3630 		srcid = sin6->__sin6_src_id;
3631 		if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
3632 			/*
3633 			 * Destination is a non-IPv4-compatible IPv6 address.
3634 			 * Send out an IPv6 format packet.
3635 			 */
3636 
3637 			/*
3638 			 * If the local address is a mapped address return
3639 			 * an error.
3640 			 * It would be possible to send an IPv6 packet but the
3641 			 * response would never make it back to the application
3642 			 * since it is bound to a mapped address.
3643 			 */
3644 			if (IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6)) {
3645 				error = EADDRNOTAVAIL;
3646 				goto ud_error2;
3647 			}
3648 
3649 			UDP_DBGSTAT(us, udp_out_ipv6);
3650 
3651 			if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
3652 				sin6->sin6_addr = ipv6_loopback;
3653 			ipversion = IPV6_VERSION;
3654 		} else {
3655 			if (connp->conn_ipv6_v6only) {
3656 				error = EADDRNOTAVAIL;
3657 				goto ud_error2;
3658 			}
3659 
3660 			/*
3661 			 * If the local address is not zero or a mapped address
3662 			 * return an error.  It would be possible to send an
3663 			 * IPv4 packet but the response would never make it
3664 			 * back to the application since it is bound to a
3665 			 * non-mapped address.
3666 			 */
3667 			if (!IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6) &&
3668 			    !IN6_IS_ADDR_UNSPECIFIED(&connp->conn_saddr_v6)) {
3669 				error = EADDRNOTAVAIL;
3670 				goto ud_error2;
3671 			}
3672 			UDP_DBGSTAT(us, udp_out_mapped);
3673 
3674 			if (V4_PART_OF_V6(sin6->sin6_addr) == INADDR_ANY) {
3675 				V4_PART_OF_V6(sin6->sin6_addr) =
3676 				    htonl(INADDR_LOOPBACK);
3677 			}
3678 			ipversion = IPV4_VERSION;
3679 		}
3680 
3681 		if (tudr->OPT_length != 0) {
3682 			/*
3683 			 * If we are connected then the destination needs to be
3684 			 * the same as the connected one.
3685 			 */
3686 			if (udp->udp_state == TS_DATA_XFER &&
3687 			    !conn_same_as_last_v6(connp, sin6)) {
3688 				error = EISCONN;
3689 				goto ud_error2;
3690 			}
3691 			UDP_STAT(us, udp_out_opt);
3692 			error = udp_output_ancillary(connp, NULL, sin6,
3693 			    data_mp, mp, NULL, cr, pid);
3694 		} else {
3695 			ip_xmit_attr_t *ixa;
3696 
3697 			/*
3698 			 * We have to allocate an ip_xmit_attr_t before we grab
3699 			 * conn_lock and we need to hold conn_lock once we've
3700 			 * checked conn_same_as_last_v6 to handle concurrent
3701 			 * send* calls on a socket.
3702 			 */
3703 			ixa = conn_get_ixa(connp, B_FALSE);
3704 			if (ixa == NULL) {
3705 				error = ENOMEM;
3706 				goto ud_error2;
3707 			}
3708 			mutex_enter(&connp->conn_lock);
3709 
3710 			if (conn_same_as_last_v6(connp, sin6) &&
3711 			    connp->conn_lastsrcid == srcid &&
3712 			    ipsec_outbound_policy_current(ixa)) {
3713 				UDP_DBGSTAT(us, udp_out_lastdst);
3714 				/* udp_output_lastdst drops conn_lock */
3715 				error = udp_output_lastdst(connp, data_mp, cr,
3716 				    pid, ixa);
3717 			} else {
3718 				UDP_DBGSTAT(us, udp_out_diffdst);
3719 				/* udp_output_newdst drops conn_lock */
3720 				error = udp_output_newdst(connp, data_mp, NULL,
3721 				    sin6, ipversion, cr, pid, ixa);
3722 			}
3723 			ASSERT(MUTEX_NOT_HELD(&connp->conn_lock));
3724 		}
3725 		if (error == 0) {
3726 			freeb(mp);
3727 			return (0);
3728 		}
3729 		break;
3730 
3731 	case AF_INET:
3732 		sin = (sin_t *)addr;
3733 		if ((!OK_32PTR((char *)sin) || addrlen != sizeof (sin_t)) ||
3734 		    (sin->sin_family != AF_INET)) {
3735 			error = EADDRNOTAVAIL;
3736 			goto ud_error2;
3737 		}
3738 		UDP_DBGSTAT(us, udp_out_ipv4);
3739 		if (sin->sin_addr.s_addr == INADDR_ANY)
3740 			sin->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
3741 		ipversion = IPV4_VERSION;
3742 
3743 		srcid = 0;
3744 		if (tudr->OPT_length != 0) {
3745 			/*
3746 			 * If we are connected then the destination needs to be
3747 			 * the same as the connected one.
3748 			 */
3749 			if (udp->udp_state == TS_DATA_XFER &&
3750 			    !conn_same_as_last_v4(connp, sin)) {
3751 				error = EISCONN;
3752 				goto ud_error2;
3753 			}
3754 			UDP_STAT(us, udp_out_opt);
3755 			error = udp_output_ancillary(connp, sin, NULL,
3756 			    data_mp, mp, NULL, cr, pid);
3757 		} else {
3758 			ip_xmit_attr_t *ixa;
3759 
3760 			/*
3761 			 * We have to allocate an ip_xmit_attr_t before we grab
3762 			 * conn_lock and we need to hold conn_lock once we've
3763 			 * checked conn_same_as_last_v4 to handle concurrent
3764 			 * send* calls on a socket.
3765 			 */
3766 			ixa = conn_get_ixa(connp, B_FALSE);
3767 			if (ixa == NULL) {
3768 				error = ENOMEM;
3769 				goto ud_error2;
3770 			}
3771 			mutex_enter(&connp->conn_lock);
3772 
3773 			if (conn_same_as_last_v4(connp, sin) &&
3774 			    ipsec_outbound_policy_current(ixa)) {
3775 				UDP_DBGSTAT(us, udp_out_lastdst);
3776 				/* udp_output_lastdst drops conn_lock */
3777 				error = udp_output_lastdst(connp, data_mp, cr,
3778 				    pid, ixa);
3779 			} else {
3780 				UDP_DBGSTAT(us, udp_out_diffdst);
3781 				/* udp_output_newdst drops conn_lock */
3782 				error = udp_output_newdst(connp, data_mp, sin,
3783 				    NULL, ipversion, cr, pid, ixa);
3784 			}
3785 			ASSERT(MUTEX_NOT_HELD(&connp->conn_lock));
3786 		}
3787 		if (error == 0) {
3788 			freeb(mp);
3789 			return (0);
3790 		}
3791 		break;
3792 	}
3793 	UDP_STAT(us, udp_out_err_output);
3794 	ASSERT(mp != NULL);
3795 	/* mp is freed by the following routine */
3796 	udp_ud_err(q, mp, (t_scalar_t)error);
3797 	return (0);
3798 
3799 ud_error2:
3800 	UDPS_BUMP_MIB(us, udpOutErrors);
3801 	freemsg(data_mp);
3802 	UDP_STAT(us, udp_out_err_output);
3803 	ASSERT(mp != NULL);
3804 	/* mp is freed by the following routine */
3805 	udp_ud_err(q, mp, (t_scalar_t)error);
3806 	return (0);
3807 }
3808 
3809 /*
3810  * Handle the case of the IP address, port, flow label being different
3811  * for both IPv4 and IPv6.
3812  *
3813  * NOTE: The caller must hold conn_lock and we drop it here.
3814  */
3815 static int
udp_output_newdst(conn_t * connp,mblk_t * data_mp,sin_t * sin,sin6_t * sin6,ushort_t ipversion,cred_t * cr,pid_t pid,ip_xmit_attr_t * ixa)3816 udp_output_newdst(conn_t *connp, mblk_t *data_mp, sin_t *sin, sin6_t *sin6,
3817     ushort_t ipversion, cred_t *cr, pid_t pid, ip_xmit_attr_t *ixa)
3818 {
3819 	uint_t		srcid;
3820 	uint32_t	flowinfo;
3821 	udp_t		*udp = connp->conn_udp;
3822 	int		error = 0;
3823 	ip_xmit_attr_t	*oldixa;
3824 	udp_stack_t	*us = udp->udp_us;
3825 	in6_addr_t	v6src;
3826 	in6_addr_t	v6dst;
3827 	in6_addr_t	v6nexthop;
3828 	in_port_t	dstport;
3829 
3830 	ASSERT(MUTEX_HELD(&connp->conn_lock));
3831 	ASSERT(ixa != NULL);
3832 	/*
3833 	 * We hold conn_lock across all the use and modifications of
3834 	 * the conn_lastdst, conn_ixa, and conn_xmit_ipp to ensure that they
3835 	 * stay consistent.
3836 	 */
3837 
3838 	ASSERT(cr != NULL);
3839 	ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
3840 	ixa->ixa_cred = cr;
3841 	ixa->ixa_cpid = pid;
3842 	if (is_system_labeled()) {
3843 		/* We need to restart with a label based on the cred */
3844 		ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred);
3845 	}
3846 
3847 	/*
3848 	 * If we are connected then the destination needs to be the
3849 	 * same as the connected one, which is not the case here since we
3850 	 * checked for that above.
3851 	 */
3852 	if (udp->udp_state == TS_DATA_XFER) {
3853 		mutex_exit(&connp->conn_lock);
3854 		error = EISCONN;
3855 		goto ud_error;
3856 	}
3857 
3858 	/* In case previous destination was multicast or multirt */
3859 	ip_attr_newdst(ixa);
3860 
3861 	/*
3862 	 * If laddr is unspecified then we look at sin6_src_id.
3863 	 * We will give precedence to a source address set with IPV6_PKTINFO
3864 	 * (aka IPPF_ADDR) but that is handled in build_hdrs. However, we don't
3865 	 * want ip_attr_connect to select a source (since it can fail) when
3866 	 * IPV6_PKTINFO is specified.
3867 	 * If this doesn't result in a source address then we get a source
3868 	 * from ip_attr_connect() below.
3869 	 */
3870 	v6src = connp->conn_saddr_v6;
3871 	if (sin != NULL) {
3872 		IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &v6dst);
3873 		dstport = sin->sin_port;
3874 		flowinfo = 0;
3875 		/* Don't bother with ip_srcid_find_id(), but indicate anyway. */
3876 		srcid = 0;
3877 		ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
3878 		ixa->ixa_flags |= IXAF_IS_IPV4;
3879 	} else {
3880 		boolean_t v4mapped;
3881 
3882 		v6dst = sin6->sin6_addr;
3883 		dstport = sin6->sin6_port;
3884 		flowinfo = sin6->sin6_flowinfo;
3885 		srcid = sin6->__sin6_src_id;
3886 		if (IN6_IS_ADDR_LINKSCOPE(&v6dst) && sin6->sin6_scope_id != 0) {
3887 			ixa->ixa_scopeid = sin6->sin6_scope_id;
3888 			ixa->ixa_flags |= IXAF_SCOPEID_SET;
3889 		} else {
3890 			ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
3891 		}
3892 		v4mapped = IN6_IS_ADDR_V4MAPPED(&v6dst);
3893 		if (v4mapped)
3894 			ixa->ixa_flags |= IXAF_IS_IPV4;
3895 		else
3896 			ixa->ixa_flags &= ~IXAF_IS_IPV4;
3897 		if (srcid != 0 && IN6_IS_ADDR_UNSPECIFIED(&v6src)) {
3898 			if (!ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp),
3899 			    v4mapped, connp->conn_netstack)) {
3900 				/* Mismatched v4mapped/v6 specified by srcid. */
3901 				mutex_exit(&connp->conn_lock);
3902 				error = EADDRNOTAVAIL;
3903 				goto ud_error;
3904 			}
3905 		}
3906 	}
3907 	/* Handle IP_PKTINFO/IPV6_PKTINFO setting source address. */
3908 	if (connp->conn_xmit_ipp.ipp_fields & IPPF_ADDR) {
3909 		ip_pkt_t *ipp = &connp->conn_xmit_ipp;
3910 
3911 		if (ixa->ixa_flags & IXAF_IS_IPV4) {
3912 			if (IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr))
3913 				v6src = ipp->ipp_addr;
3914 		} else {
3915 			if (!IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr))
3916 				v6src = ipp->ipp_addr;
3917 		}
3918 	}
3919 
3920 	ip_attr_nexthop(&connp->conn_xmit_ipp, ixa, &v6dst, &v6nexthop);
3921 	mutex_exit(&connp->conn_lock);
3922 
3923 	error = ip_attr_connect(connp, ixa, &v6src, &v6dst, &v6nexthop, dstport,
3924 	    &v6src, NULL, IPDF_ALLOW_MCBC | IPDF_VERIFY_DST | IPDF_IPSEC);
3925 	switch (error) {
3926 	case 0:
3927 		break;
3928 	case EADDRNOTAVAIL:
3929 		/*
3930 		 * IXAF_VERIFY_SOURCE tells us to pick a better source.
3931 		 * Don't have the application see that errno
3932 		 */
3933 		error = ENETUNREACH;
3934 		goto failed;
3935 	case ENETDOWN:
3936 		/*
3937 		 * Have !ipif_addr_ready address; drop packet silently
3938 		 * until we can get applications to not send until we
3939 		 * are ready.
3940 		 */
3941 		error = 0;
3942 		goto failed;
3943 	case EHOSTUNREACH:
3944 	case ENETUNREACH:
3945 		if (ixa->ixa_ire != NULL) {
3946 			/*
3947 			 * Let conn_ip_output/ire_send_noroute return
3948 			 * the error and send any local ICMP error.
3949 			 */
3950 			error = 0;
3951 			break;
3952 		}
3953 		/* FALLTHRU */
3954 	failed:
3955 	default:
3956 		goto ud_error;
3957 	}
3958 
3959 
3960 	/*
3961 	 * Cluster note: we let the cluster hook know that we are sending to a
3962 	 * new address and/or port.
3963 	 */
3964 	if (cl_inet_connect2 != NULL) {
3965 		CL_INET_UDP_CONNECT(connp, B_TRUE, &v6dst, dstport, error);
3966 		if (error != 0) {
3967 			error = EHOSTUNREACH;
3968 			goto ud_error;
3969 		}
3970 	}
3971 
3972 	mutex_enter(&connp->conn_lock);
3973 	/*
3974 	 * While we dropped the lock some other thread might have connected
3975 	 * this socket. If so we bail out with EISCONN to ensure that the
3976 	 * connecting thread is the one that updates conn_ixa, conn_ht_*
3977 	 * and conn_*last*.
3978 	 */
3979 	if (udp->udp_state == TS_DATA_XFER) {
3980 		mutex_exit(&connp->conn_lock);
3981 		error = EISCONN;
3982 		goto ud_error;
3983 	}
3984 
3985 	/*
3986 	 * We need to rebuild the headers if
3987 	 *  - we are labeling packets (could be different for different
3988 	 *    destinations)
3989 	 *  - we have a source route (or routing header) since we need to
3990 	 *    massage that to get the pseudo-header checksum
3991 	 *  - the IP version is different than the last time
3992 	 *  - a socket option with COA_HEADER_CHANGED has been set which
3993 	 *    set conn_v6lastdst to zero.
3994 	 *
3995 	 * Otherwise the prepend function will just update the src, dst,
3996 	 * dstport, and flow label.
3997 	 */
3998 	if (is_system_labeled()) {
3999 		/* TX MLP requires SCM_UCRED and don't have that here */
4000 		if (connp->conn_mlp_type != mlptSingle) {
4001 			mutex_exit(&connp->conn_lock);
4002 			error = ECONNREFUSED;
4003 			goto ud_error;
4004 		}
4005 		/*
4006 		 * Check whether Trusted Solaris policy allows communication
4007 		 * with this host, and pretend that the destination is
4008 		 * unreachable if not.
4009 		 * Compute any needed label and place it in ipp_label_v4/v6.
4010 		 *
4011 		 * Later conn_build_hdr_template/conn_prepend_hdr takes
4012 		 * ipp_label_v4/v6 to form the packet.
4013 		 *
4014 		 * Tsol note: Since we hold conn_lock we know no other
4015 		 * thread manipulates conn_xmit_ipp.
4016 		 */
4017 		error = conn_update_label(connp, ixa, &v6dst,
4018 		    &connp->conn_xmit_ipp);
4019 		if (error != 0) {
4020 			mutex_exit(&connp->conn_lock);
4021 			goto ud_error;
4022 		}
4023 		/* Rebuild the header template */
4024 		error = udp_build_hdr_template(connp, &v6src, &v6dst, dstport,
4025 		    flowinfo);
4026 		if (error != 0) {
4027 			mutex_exit(&connp->conn_lock);
4028 			goto ud_error;
4029 		}
4030 	} else if ((connp->conn_xmit_ipp.ipp_fields &
4031 	    (IPPF_IPV4_OPTIONS|IPPF_RTHDR)) ||
4032 	    ipversion != connp->conn_lastipversion ||
4033 	    IN6_IS_ADDR_UNSPECIFIED(&connp->conn_v6lastdst)) {
4034 		/* Rebuild the header template */
4035 		error = udp_build_hdr_template(connp, &v6src, &v6dst, dstport,
4036 		    flowinfo);
4037 		if (error != 0) {
4038 			mutex_exit(&connp->conn_lock);
4039 			goto ud_error;
4040 		}
4041 	} else {
4042 		/* Simply update the destination address if no source route */
4043 		if (ixa->ixa_flags & IXAF_IS_IPV4) {
4044 			ipha_t	*ipha = (ipha_t *)connp->conn_ht_iphc;
4045 
4046 			IN6_V4MAPPED_TO_IPADDR(&v6dst, ipha->ipha_dst);
4047 			if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF) {
4048 				ipha->ipha_fragment_offset_and_flags |=
4049 				    IPH_DF_HTONS;
4050 			} else {
4051 				ipha->ipha_fragment_offset_and_flags &=
4052 				    ~IPH_DF_HTONS;
4053 			}
4054 		} else {
4055 			ip6_t *ip6h = (ip6_t *)connp->conn_ht_iphc;
4056 			ip6h->ip6_dst = v6dst;
4057 		}
4058 	}
4059 
4060 	/*
4061 	 * Remember the dst/dstport etc which corresponds to the built header
4062 	 * template and conn_ixa.
4063 	 */
4064 	oldixa = conn_replace_ixa(connp, ixa);
4065 	connp->conn_v6lastdst = v6dst;
4066 	connp->conn_lastipversion = ipversion;
4067 	connp->conn_lastdstport = dstport;
4068 	connp->conn_lastflowinfo = flowinfo;
4069 	connp->conn_lastscopeid = ixa->ixa_scopeid;
4070 	connp->conn_lastsrcid = srcid;
4071 	/* Also remember a source to use together with lastdst */
4072 	connp->conn_v6lastsrc = v6src;
4073 
4074 	data_mp = udp_prepend_header_template(connp, ixa, data_mp, &v6src,
4075 	    dstport, flowinfo, &error);
4076 
4077 	/* Done with conn_t */
4078 	mutex_exit(&connp->conn_lock);
4079 	ixa_refrele(oldixa);
4080 
4081 	if (data_mp == NULL) {
4082 		ASSERT(error != 0);
4083 		goto ud_error;
4084 	}
4085 
4086 	/* We're done.  Pass the packet to ip. */
4087 	UDPS_BUMP_MIB(us, udpHCOutDatagrams);
4088 
4089 	DTRACE_UDP5(send, mblk_t *, NULL, ip_xmit_attr_t *, ixa,
4090 	    void_ip_t *, data_mp->b_rptr, udp_t *, udp, udpha_t *,
4091 	    &data_mp->b_rptr[ixa->ixa_ip_hdr_length]);
4092 
4093 	error = conn_ip_output(data_mp, ixa);
4094 	/* No udpOutErrors if an error since IP increases its error counter */
4095 	switch (error) {
4096 	case 0:
4097 		break;
4098 	case EWOULDBLOCK:
4099 		(void) ixa_check_drain_insert(connp, ixa);
4100 		error = 0;
4101 		break;
4102 	case EADDRNOTAVAIL:
4103 		/*
4104 		 * IXAF_VERIFY_SOURCE tells us to pick a better source.
4105 		 * Don't have the application see that errno
4106 		 */
4107 		error = ENETUNREACH;
4108 		/* FALLTHRU */
4109 	default:
4110 		mutex_enter(&connp->conn_lock);
4111 		/*
4112 		 * Clear the source and v6lastdst so we call ip_attr_connect
4113 		 * for the next packet and try to pick a better source.
4114 		 */
4115 		if (connp->conn_mcbc_bind)
4116 			connp->conn_saddr_v6 = ipv6_all_zeros;
4117 		else
4118 			connp->conn_saddr_v6 = connp->conn_bound_addr_v6;
4119 		connp->conn_v6lastdst = ipv6_all_zeros;
4120 		mutex_exit(&connp->conn_lock);
4121 		break;
4122 	}
4123 	ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
4124 	ixa->ixa_cred = connp->conn_cred;	/* Restore */
4125 	ixa->ixa_cpid = connp->conn_cpid;
4126 	ixa_refrele(ixa);
4127 	return (error);
4128 
4129 ud_error:
4130 	ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
4131 	ixa->ixa_cred = connp->conn_cred;	/* Restore */
4132 	ixa->ixa_cpid = connp->conn_cpid;
4133 	ixa_refrele(ixa);
4134 
4135 	freemsg(data_mp);
4136 	UDPS_BUMP_MIB(us, udpOutErrors);
4137 	UDP_STAT(us, udp_out_err_output);
4138 	return (error);
4139 }
4140 
4141 /* ARGSUSED */
4142 static int
udp_wput_fallback(queue_t * wq,mblk_t * mp)4143 udp_wput_fallback(queue_t *wq, mblk_t *mp)
4144 {
4145 #ifdef DEBUG
4146 	cmn_err(CE_CONT, "udp_wput_fallback: Message in fallback \n");
4147 #endif
4148 	freemsg(mp);
4149 	return (0);
4150 }
4151 
4152 
4153 /*
4154  * Handle special out-of-band ioctl requests (see PSARC/2008/265).
4155  */
4156 static void
udp_wput_cmdblk(queue_t * q,mblk_t * mp)4157 udp_wput_cmdblk(queue_t *q, mblk_t *mp)
4158 {
4159 	void	*data;
4160 	mblk_t	*datamp = mp->b_cont;
4161 	conn_t	*connp = Q_TO_CONN(q);
4162 	udp_t	*udp = connp->conn_udp;
4163 	cmdblk_t *cmdp = (cmdblk_t *)mp->b_rptr;
4164 
4165 	if (datamp == NULL || MBLKL(datamp) < cmdp->cb_len) {
4166 		cmdp->cb_error = EPROTO;
4167 		qreply(q, mp);
4168 		return;
4169 	}
4170 	data = datamp->b_rptr;
4171 
4172 	mutex_enter(&connp->conn_lock);
4173 	switch (cmdp->cb_cmd) {
4174 	case TI_GETPEERNAME:
4175 		if (udp->udp_state != TS_DATA_XFER)
4176 			cmdp->cb_error = ENOTCONN;
4177 		else
4178 			cmdp->cb_error = conn_getpeername(connp, data,
4179 			    &cmdp->cb_len);
4180 		break;
4181 	case TI_GETMYNAME:
4182 		cmdp->cb_error = conn_getsockname(connp, data, &cmdp->cb_len);
4183 		break;
4184 	default:
4185 		cmdp->cb_error = EINVAL;
4186 		break;
4187 	}
4188 	mutex_exit(&connp->conn_lock);
4189 
4190 	qreply(q, mp);
4191 }
4192 
4193 static void
udp_use_pure_tpi(udp_t * udp)4194 udp_use_pure_tpi(udp_t *udp)
4195 {
4196 	conn_t	*connp = udp->udp_connp;
4197 
4198 	mutex_enter(&connp->conn_lock);
4199 	udp->udp_issocket = B_FALSE;
4200 	mutex_exit(&connp->conn_lock);
4201 	UDP_STAT(udp->udp_us, udp_sock_fallback);
4202 }
4203 
4204 static void
udp_wput_other(queue_t * q,mblk_t * mp)4205 udp_wput_other(queue_t *q, mblk_t *mp)
4206 {
4207 	uchar_t	*rptr = mp->b_rptr;
4208 	struct iocblk *iocp;
4209 	conn_t	*connp = Q_TO_CONN(q);
4210 	udp_t	*udp = connp->conn_udp;
4211 	cred_t	*cr;
4212 
4213 	switch (mp->b_datap->db_type) {
4214 	case M_CMD:
4215 		udp_wput_cmdblk(q, mp);
4216 		return;
4217 
4218 	case M_PROTO:
4219 	case M_PCPROTO:
4220 		if (mp->b_wptr - rptr < sizeof (t_scalar_t)) {
4221 			/*
4222 			 * If the message does not contain a PRIM_type,
4223 			 * throw it away.
4224 			 */
4225 			freemsg(mp);
4226 			return;
4227 		}
4228 		switch (((t_primp_t)rptr)->type) {
4229 		case T_ADDR_REQ:
4230 			udp_addr_req(q, mp);
4231 			return;
4232 		case O_T_BIND_REQ:
4233 		case T_BIND_REQ:
4234 			udp_tpi_bind(q, mp);
4235 			return;
4236 		case T_CONN_REQ:
4237 			udp_tpi_connect(q, mp);
4238 			return;
4239 		case T_CAPABILITY_REQ:
4240 			udp_capability_req(q, mp);
4241 			return;
4242 		case T_INFO_REQ:
4243 			udp_info_req(q, mp);
4244 			return;
4245 		case T_UNITDATA_REQ:
4246 			/*
4247 			 * If a T_UNITDATA_REQ gets here, the address must
4248 			 * be bad.  Valid T_UNITDATA_REQs are handled
4249 			 * in udp_wput.
4250 			 */
4251 			udp_ud_err(q, mp, EADDRNOTAVAIL);
4252 			return;
4253 		case T_UNBIND_REQ:
4254 			udp_tpi_unbind(q, mp);
4255 			return;
4256 		case T_SVR4_OPTMGMT_REQ:
4257 			/*
4258 			 * All Solaris components should pass a db_credp
4259 			 * for this TPI message, hence we ASSERT.
4260 			 * But in case there is some other M_PROTO that looks
4261 			 * like a TPI message sent by some other kernel
4262 			 * component, we check and return an error.
4263 			 */
4264 			cr = msg_getcred(mp, NULL);
4265 			ASSERT(cr != NULL);
4266 			if (cr == NULL) {
4267 				udp_err_ack(q, mp, TSYSERR, EINVAL);
4268 				return;
4269 			}
4270 			if (!snmpcom_req(q, mp, udp_snmp_set, ip_snmp_get,
4271 			    cr)) {
4272 				svr4_optcom_req(q, mp, cr, &udp_opt_obj);
4273 			}
4274 			return;
4275 
4276 		case T_OPTMGMT_REQ:
4277 			/*
4278 			 * All Solaris components should pass a db_credp
4279 			 * for this TPI message, hence we ASSERT.
4280 			 * But in case there is some other M_PROTO that looks
4281 			 * like a TPI message sent by some other kernel
4282 			 * component, we check and return an error.
4283 			 */
4284 			cr = msg_getcred(mp, NULL);
4285 			ASSERT(cr != NULL);
4286 			if (cr == NULL) {
4287 				udp_err_ack(q, mp, TSYSERR, EINVAL);
4288 				return;
4289 			}
4290 			tpi_optcom_req(q, mp, cr, &udp_opt_obj);
4291 			return;
4292 
4293 		case T_DISCON_REQ:
4294 			udp_tpi_disconnect(q, mp);
4295 			return;
4296 
4297 		/* The following TPI message is not supported by udp. */
4298 		case O_T_CONN_RES:
4299 		case T_CONN_RES:
4300 			udp_err_ack(q, mp, TNOTSUPPORT, 0);
4301 			return;
4302 
4303 		/* The following 3 TPI requests are illegal for udp. */
4304 		case T_DATA_REQ:
4305 		case T_EXDATA_REQ:
4306 		case T_ORDREL_REQ:
4307 			udp_err_ack(q, mp, TNOTSUPPORT, 0);
4308 			return;
4309 		default:
4310 			break;
4311 		}
4312 		break;
4313 	case M_FLUSH:
4314 		if (*rptr & FLUSHW)
4315 			flushq(q, FLUSHDATA);
4316 		break;
4317 	case M_IOCTL:
4318 		iocp = (struct iocblk *)mp->b_rptr;
4319 		switch (iocp->ioc_cmd) {
4320 		case TI_GETPEERNAME:
4321 			if (udp->udp_state != TS_DATA_XFER) {
4322 				/*
4323 				 * If a default destination address has not
4324 				 * been associated with the stream, then we
4325 				 * don't know the peer's name.
4326 				 */
4327 				iocp->ioc_error = ENOTCONN;
4328 				iocp->ioc_count = 0;
4329 				mp->b_datap->db_type = M_IOCACK;
4330 				qreply(q, mp);
4331 				return;
4332 			}
4333 			/* FALLTHRU */
4334 		case TI_GETMYNAME:
4335 			/*
4336 			 * For TI_GETPEERNAME and TI_GETMYNAME, we first
4337 			 * need to copyin the user's strbuf structure.
4338 			 * Processing will continue in the M_IOCDATA case
4339 			 * below.
4340 			 */
4341 			mi_copyin(q, mp, NULL,
4342 			    SIZEOF_STRUCT(strbuf, iocp->ioc_flag));
4343 			return;
4344 		case _SIOCSOCKFALLBACK:
4345 			/*
4346 			 * Either sockmod is about to be popped and the
4347 			 * socket would now be treated as a plain stream,
4348 			 * or a module is about to be pushed so we have
4349 			 * to follow pure TPI semantics.
4350 			 */
4351 			if (!udp->udp_issocket) {
4352 				DB_TYPE(mp) = M_IOCNAK;
4353 				iocp->ioc_error = EINVAL;
4354 			} else {
4355 				udp_use_pure_tpi(udp);
4356 
4357 				DB_TYPE(mp) = M_IOCACK;
4358 				iocp->ioc_error = 0;
4359 			}
4360 			iocp->ioc_count = 0;
4361 			iocp->ioc_rval = 0;
4362 			qreply(q, mp);
4363 			return;
4364 		default:
4365 			break;
4366 		}
4367 		break;
4368 	case M_IOCDATA:
4369 		udp_wput_iocdata(q, mp);
4370 		return;
4371 	default:
4372 		/* Unrecognized messages are passed through without change. */
4373 		break;
4374 	}
4375 	ip_wput_nondata(q, mp);
4376 }
4377 
4378 /*
4379  * udp_wput_iocdata is called by udp_wput_other to handle all M_IOCDATA
4380  * messages.
4381  */
4382 static void
udp_wput_iocdata(queue_t * q,mblk_t * mp)4383 udp_wput_iocdata(queue_t *q, mblk_t *mp)
4384 {
4385 	mblk_t		*mp1;
4386 	struct	iocblk *iocp = (struct iocblk *)mp->b_rptr;
4387 	STRUCT_HANDLE(strbuf, sb);
4388 	uint_t		addrlen;
4389 	conn_t		*connp = Q_TO_CONN(q);
4390 	udp_t		*udp = connp->conn_udp;
4391 
4392 	/* Make sure it is one of ours. */
4393 	switch (iocp->ioc_cmd) {
4394 	case TI_GETMYNAME:
4395 	case TI_GETPEERNAME:
4396 		break;
4397 	default:
4398 		ip_wput_nondata(q, mp);
4399 		return;
4400 	}
4401 
4402 	switch (mi_copy_state(q, mp, &mp1)) {
4403 	case -1:
4404 		return;
4405 	case MI_COPY_CASE(MI_COPY_IN, 1):
4406 		break;
4407 	case MI_COPY_CASE(MI_COPY_OUT, 1):
4408 		/*
4409 		 * The address has been copied out, so now
4410 		 * copyout the strbuf.
4411 		 */
4412 		mi_copyout(q, mp);
4413 		return;
4414 	case MI_COPY_CASE(MI_COPY_OUT, 2):
4415 		/*
4416 		 * The address and strbuf have been copied out.
4417 		 * We're done, so just acknowledge the original
4418 		 * M_IOCTL.
4419 		 */
4420 		mi_copy_done(q, mp, 0);
4421 		return;
4422 	default:
4423 		/*
4424 		 * Something strange has happened, so acknowledge
4425 		 * the original M_IOCTL with an EPROTO error.
4426 		 */
4427 		mi_copy_done(q, mp, EPROTO);
4428 		return;
4429 	}
4430 
4431 	/*
4432 	 * Now we have the strbuf structure for TI_GETMYNAME
4433 	 * and TI_GETPEERNAME.  Next we copyout the requested
4434 	 * address and then we'll copyout the strbuf.
4435 	 */
4436 	STRUCT_SET_HANDLE(sb, iocp->ioc_flag, (void *)mp1->b_rptr);
4437 
4438 	if (connp->conn_family == AF_INET)
4439 		addrlen = sizeof (sin_t);
4440 	else
4441 		addrlen = sizeof (sin6_t);
4442 
4443 	if (STRUCT_FGET(sb, maxlen) < addrlen) {
4444 		mi_copy_done(q, mp, EINVAL);
4445 		return;
4446 	}
4447 
4448 	switch (iocp->ioc_cmd) {
4449 	case TI_GETMYNAME:
4450 		break;
4451 	case TI_GETPEERNAME:
4452 		if (udp->udp_state != TS_DATA_XFER) {
4453 			mi_copy_done(q, mp, ENOTCONN);
4454 			return;
4455 		}
4456 		break;
4457 	}
4458 	mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), addrlen, B_TRUE);
4459 	if (!mp1)
4460 		return;
4461 
4462 	STRUCT_FSET(sb, len, addrlen);
4463 	switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) {
4464 	case TI_GETMYNAME:
4465 		(void) conn_getsockname(connp, (struct sockaddr *)mp1->b_wptr,
4466 		    &addrlen);
4467 		break;
4468 	case TI_GETPEERNAME:
4469 		(void) conn_getpeername(connp, (struct sockaddr *)mp1->b_wptr,
4470 		    &addrlen);
4471 		break;
4472 	}
4473 	mp1->b_wptr += addrlen;
4474 	/* Copy out the address */
4475 	mi_copyout(q, mp);
4476 }
4477 
4478 void
udp_ddi_g_init(void)4479 udp_ddi_g_init(void)
4480 {
4481 	udp_max_optsize = optcom_max_optsize(udp_opt_obj.odb_opt_des_arr,
4482 	    udp_opt_obj.odb_opt_arr_cnt);
4483 
4484 	/*
4485 	 * We want to be informed each time a stack is created or
4486 	 * destroyed in the kernel, so we can maintain the
4487 	 * set of udp_stack_t's.
4488 	 */
4489 	netstack_register(NS_UDP, udp_stack_init, NULL, udp_stack_fini);
4490 }
4491 
4492 void
udp_ddi_g_destroy(void)4493 udp_ddi_g_destroy(void)
4494 {
4495 	netstack_unregister(NS_UDP);
4496 }
4497 
4498 #define	INET_NAME	"ip"
4499 
4500 /*
4501  * Initialize the UDP stack instance.
4502  */
4503 static void *
udp_stack_init(netstackid_t stackid,netstack_t * ns)4504 udp_stack_init(netstackid_t stackid, netstack_t *ns)
4505 {
4506 	udp_stack_t	*us;
4507 	int		i;
4508 	int		error = 0;
4509 	major_t		major;
4510 	size_t		arrsz;
4511 
4512 	us = (udp_stack_t *)kmem_zalloc(sizeof (*us), KM_SLEEP);
4513 	us->us_netstack = ns;
4514 
4515 	mutex_init(&us->us_epriv_port_lock, NULL, MUTEX_DEFAULT, NULL);
4516 	us->us_num_epriv_ports = UDP_NUM_EPRIV_PORTS;
4517 	us->us_epriv_ports[0] = ULP_DEF_EPRIV_PORT1;
4518 	us->us_epriv_ports[1] = ULP_DEF_EPRIV_PORT2;
4519 
4520 	/*
4521 	 * The smallest anonymous port in the priviledged port range which UDP
4522 	 * looks for free port.  Use in the option UDP_ANONPRIVBIND.
4523 	 */
4524 	us->us_min_anonpriv_port = 512;
4525 
4526 	us->us_bind_fanout_size = udp_bind_fanout_size;
4527 
4528 	/* Roundup variable that might have been modified in /etc/system */
4529 	if (!ISP2(us->us_bind_fanout_size)) {
4530 		/* Not a power of two. Round up to nearest power of two */
4531 		for (i = 0; i < 31; i++) {
4532 			if (us->us_bind_fanout_size < (1 << i))
4533 				break;
4534 		}
4535 		us->us_bind_fanout_size = 1 << i;
4536 	}
4537 	us->us_bind_fanout = kmem_zalloc(us->us_bind_fanout_size *
4538 	    sizeof (udp_fanout_t), KM_SLEEP);
4539 	for (i = 0; i < us->us_bind_fanout_size; i++) {
4540 		mutex_init(&us->us_bind_fanout[i].uf_lock, NULL, MUTEX_DEFAULT,
4541 		    NULL);
4542 	}
4543 
4544 	arrsz = udp_propinfo_count * sizeof (mod_prop_info_t);
4545 	us->us_propinfo_tbl = (mod_prop_info_t *)kmem_alloc(arrsz,
4546 	    KM_SLEEP);
4547 	bcopy(udp_propinfo_tbl, us->us_propinfo_tbl, arrsz);
4548 
4549 	/* Allocate the per netstack stats */
4550 	mutex_enter(&cpu_lock);
4551 	us->us_sc_cnt = MAX(ncpus, boot_ncpus);
4552 	mutex_exit(&cpu_lock);
4553 	us->us_sc = kmem_zalloc(max_ncpus  * sizeof (udp_stats_cpu_t *),
4554 	    KM_SLEEP);
4555 	for (i = 0; i < us->us_sc_cnt; i++) {
4556 		us->us_sc[i] = kmem_zalloc(sizeof (udp_stats_cpu_t),
4557 		    KM_SLEEP);
4558 	}
4559 
4560 	us->us_kstat = udp_kstat2_init(stackid);
4561 	us->us_mibkp = udp_kstat_init(stackid);
4562 
4563 	major = mod_name_to_major(INET_NAME);
4564 	error = ldi_ident_from_major(major, &us->us_ldi_ident);
4565 	ASSERT(error == 0);
4566 	return (us);
4567 }
4568 
4569 /*
4570  * Free the UDP stack instance.
4571  */
4572 static void
udp_stack_fini(netstackid_t stackid,void * arg)4573 udp_stack_fini(netstackid_t stackid, void *arg)
4574 {
4575 	udp_stack_t *us = (udp_stack_t *)arg;
4576 	int i;
4577 
4578 	for (i = 0; i < us->us_bind_fanout_size; i++) {
4579 		mutex_destroy(&us->us_bind_fanout[i].uf_lock);
4580 	}
4581 
4582 	kmem_free(us->us_bind_fanout, us->us_bind_fanout_size *
4583 	    sizeof (udp_fanout_t));
4584 
4585 	us->us_bind_fanout = NULL;
4586 
4587 	for (i = 0; i < us->us_sc_cnt; i++)
4588 		kmem_free(us->us_sc[i], sizeof (udp_stats_cpu_t));
4589 	kmem_free(us->us_sc, max_ncpus * sizeof (udp_stats_cpu_t *));
4590 
4591 	kmem_free(us->us_propinfo_tbl,
4592 	    udp_propinfo_count * sizeof (mod_prop_info_t));
4593 	us->us_propinfo_tbl = NULL;
4594 
4595 	udp_kstat_fini(stackid, us->us_mibkp);
4596 	us->us_mibkp = NULL;
4597 
4598 	udp_kstat2_fini(stackid, us->us_kstat);
4599 	us->us_kstat = NULL;
4600 
4601 	mutex_destroy(&us->us_epriv_port_lock);
4602 	ldi_ident_release(us->us_ldi_ident);
4603 	kmem_free(us, sizeof (*us));
4604 }
4605 
4606 static size_t
udp_set_rcv_hiwat(udp_t * udp,size_t size)4607 udp_set_rcv_hiwat(udp_t *udp, size_t size)
4608 {
4609 	udp_stack_t *us = udp->udp_us;
4610 
4611 	/* We add a bit of extra buffering */
4612 	size += size >> 1;
4613 	if (size > us->us_max_buf)
4614 		size = us->us_max_buf;
4615 
4616 	udp->udp_rcv_hiwat = size;
4617 	return (size);
4618 }
4619 
4620 /*
4621  * For the lower queue so that UDP can be a dummy mux.
4622  * Nobody should be sending
4623  * packets up this stream
4624  */
4625 static int
udp_lrput(queue_t * q,mblk_t * mp)4626 udp_lrput(queue_t *q, mblk_t *mp)
4627 {
4628 	switch (mp->b_datap->db_type) {
4629 	case M_FLUSH:
4630 		/* Turn around */
4631 		if (*mp->b_rptr & FLUSHW) {
4632 			*mp->b_rptr &= ~FLUSHR;
4633 			qreply(q, mp);
4634 			return (0);
4635 		}
4636 		break;
4637 	}
4638 	freemsg(mp);
4639 	return (0);
4640 }
4641 
4642 /*
4643  * For the lower queue so that UDP can be a dummy mux.
4644  * Nobody should be sending packets down this stream.
4645  */
4646 /* ARGSUSED */
4647 int
udp_lwput(queue_t * q,mblk_t * mp)4648 udp_lwput(queue_t *q, mblk_t *mp)
4649 {
4650 	freemsg(mp);
4651 	return (0);
4652 }
4653 
4654 /*
4655  * When a CPU is added, we need to allocate the per CPU stats struct.
4656  */
4657 void
udp_stack_cpu_add(udp_stack_t * us,processorid_t cpu_seqid)4658 udp_stack_cpu_add(udp_stack_t *us, processorid_t cpu_seqid)
4659 {
4660 	int i;
4661 
4662 	if (cpu_seqid < us->us_sc_cnt)
4663 		return;
4664 	for (i = us->us_sc_cnt; i <= cpu_seqid; i++) {
4665 		ASSERT(us->us_sc[i] == NULL);
4666 		us->us_sc[i] = kmem_zalloc(sizeof (udp_stats_cpu_t),
4667 		    KM_SLEEP);
4668 	}
4669 	membar_producer();
4670 	us->us_sc_cnt = cpu_seqid + 1;
4671 }
4672 
4673 /*
4674  * Below routines for UDP socket module.
4675  */
4676 
4677 static conn_t *
udp_do_open(cred_t * credp,boolean_t isv6,int flags,int * errorp)4678 udp_do_open(cred_t *credp, boolean_t isv6, int flags, int *errorp)
4679 {
4680 	udp_t		*udp;
4681 	conn_t		*connp;
4682 	zoneid_t	zoneid;
4683 	netstack_t	*ns;
4684 	udp_stack_t	*us;
4685 	int		len;
4686 
4687 	ASSERT(errorp != NULL);
4688 
4689 	if ((*errorp = secpolicy_basic_net_access(credp)) != 0)
4690 		return (NULL);
4691 
4692 	ns = netstack_find_by_cred(credp);
4693 	ASSERT(ns != NULL);
4694 	us = ns->netstack_udp;
4695 	ASSERT(us != NULL);
4696 
4697 	/*
4698 	 * For exclusive stacks we set the zoneid to zero
4699 	 * to make UDP operate as if in the global zone.
4700 	 */
4701 	if (ns->netstack_stackid != GLOBAL_NETSTACKID)
4702 		zoneid = GLOBAL_ZONEID;
4703 	else
4704 		zoneid = crgetzoneid(credp);
4705 
4706 	ASSERT(flags == KM_SLEEP || flags == KM_NOSLEEP);
4707 
4708 	connp = ipcl_conn_create(IPCL_UDPCONN, flags, ns);
4709 	if (connp == NULL) {
4710 		netstack_rele(ns);
4711 		*errorp = ENOMEM;
4712 		return (NULL);
4713 	}
4714 	udp = connp->conn_udp;
4715 
4716 	/*
4717 	 * ipcl_conn_create did a netstack_hold. Undo the hold that was
4718 	 * done by netstack_find_by_cred()
4719 	 */
4720 	netstack_rele(ns);
4721 
4722 	/*
4723 	 * Since this conn_t/udp_t is not yet visible to anybody else we don't
4724 	 * need to lock anything.
4725 	 */
4726 	ASSERT(connp->conn_proto == IPPROTO_UDP);
4727 	ASSERT(connp->conn_udp == udp);
4728 	ASSERT(udp->udp_connp == connp);
4729 
4730 	/* Set the initial state of the stream and the privilege status. */
4731 	udp->udp_state = TS_UNBND;
4732 	connp->conn_ixa->ixa_flags |= IXAF_VERIFY_SOURCE;
4733 	if (isv6) {
4734 		connp->conn_family = AF_INET6;
4735 		connp->conn_ipversion = IPV6_VERSION;
4736 		connp->conn_ixa->ixa_flags &= ~IXAF_IS_IPV4;
4737 		connp->conn_default_ttl = us->us_ipv6_hoplimit;
4738 		len = sizeof (ip6_t) + UDPH_SIZE;
4739 	} else {
4740 		connp->conn_family = AF_INET;
4741 		connp->conn_ipversion = IPV4_VERSION;
4742 		connp->conn_ixa->ixa_flags |= IXAF_IS_IPV4;
4743 		connp->conn_default_ttl = us->us_ipv4_ttl;
4744 		len = sizeof (ipha_t) + UDPH_SIZE;
4745 	}
4746 
4747 	ASSERT(connp->conn_ixa->ixa_protocol == connp->conn_proto);
4748 	connp->conn_xmit_ipp.ipp_unicast_hops = connp->conn_default_ttl;
4749 
4750 	connp->conn_ixa->ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
4751 	connp->conn_ixa->ixa_flags |= IXAF_MULTICAST_LOOP | IXAF_SET_ULP_CKSUM;
4752 	/* conn_allzones can not be set this early, hence no IPCL_ZONEID */
4753 	connp->conn_ixa->ixa_zoneid = zoneid;
4754 
4755 	connp->conn_zoneid = zoneid;
4756 
4757 	/*
4758 	 * If the caller has the process-wide flag set, then default to MAC
4759 	 * exempt mode.  This allows read-down to unlabeled hosts.
4760 	 */
4761 	if (getpflags(NET_MAC_AWARE, credp) != 0)
4762 		connp->conn_mac_mode = CONN_MAC_AWARE;
4763 
4764 	connp->conn_zone_is_global = (crgetzoneid(credp) == GLOBAL_ZONEID);
4765 
4766 	udp->udp_us = us;
4767 
4768 	connp->conn_rcvbuf = us->us_recv_hiwat;
4769 	connp->conn_sndbuf = us->us_xmit_hiwat;
4770 	connp->conn_sndlowat = us->us_xmit_lowat;
4771 	connp->conn_rcvlowat = udp_mod_info.mi_lowat;
4772 
4773 	connp->conn_wroff = len + us->us_wroff_extra;
4774 	connp->conn_so_type = SOCK_DGRAM;
4775 
4776 	connp->conn_recv = udp_input;
4777 	connp->conn_recvicmp = udp_icmp_input;
4778 	crhold(credp);
4779 	connp->conn_cred = credp;
4780 	connp->conn_cpid = curproc->p_pid;
4781 	connp->conn_open_time = ddi_get_lbolt64();
4782 	/* Cache things in ixa without an extra refhold */
4783 	ASSERT(!(connp->conn_ixa->ixa_free_flags & IXA_FREE_CRED));
4784 	connp->conn_ixa->ixa_cred = connp->conn_cred;
4785 	connp->conn_ixa->ixa_cpid = connp->conn_cpid;
4786 	if (is_system_labeled())
4787 		connp->conn_ixa->ixa_tsl = crgetlabel(connp->conn_cred);
4788 
4789 	*((sin6_t *)&udp->udp_delayed_addr) = sin6_null;
4790 
4791 	if (us->us_pmtu_discovery)
4792 		connp->conn_ixa->ixa_flags |= IXAF_PMTU_DISCOVERY;
4793 
4794 	return (connp);
4795 }
4796 
4797 sock_lower_handle_t
udp_create(int family,int type,int proto,sock_downcalls_t ** sock_downcalls,uint_t * smodep,int * errorp,int flags,cred_t * credp)4798 udp_create(int family, int type, int proto, sock_downcalls_t **sock_downcalls,
4799     uint_t *smodep, int *errorp, int flags, cred_t *credp)
4800 {
4801 	udp_t		*udp = NULL;
4802 	udp_stack_t	*us;
4803 	conn_t		*connp;
4804 	boolean_t	isv6;
4805 
4806 	if (type != SOCK_DGRAM || (family != AF_INET && family != AF_INET6) ||
4807 	    (proto != 0 && proto != IPPROTO_UDP)) {
4808 		*errorp = EPROTONOSUPPORT;
4809 		return (NULL);
4810 	}
4811 
4812 	if (family == AF_INET6)
4813 		isv6 = B_TRUE;
4814 	else
4815 		isv6 = B_FALSE;
4816 
4817 	connp = udp_do_open(credp, isv6, flags, errorp);
4818 	if (connp == NULL)
4819 		return (NULL);
4820 
4821 	udp = connp->conn_udp;
4822 	ASSERT(udp != NULL);
4823 	us = udp->udp_us;
4824 	ASSERT(us != NULL);
4825 
4826 	udp->udp_issocket = B_TRUE;
4827 	connp->conn_flags |= IPCL_NONSTR;
4828 
4829 	/*
4830 	 * Set flow control
4831 	 * Since this conn_t/udp_t is not yet visible to anybody else we don't
4832 	 * need to lock anything.
4833 	 */
4834 	(void) udp_set_rcv_hiwat(udp, connp->conn_rcvbuf);
4835 	udp->udp_rcv_disply_hiwat = connp->conn_rcvbuf;
4836 
4837 	connp->conn_flow_cntrld = B_FALSE;
4838 
4839 	mutex_enter(&connp->conn_lock);
4840 	connp->conn_state_flags &= ~CONN_INCIPIENT;
4841 	mutex_exit(&connp->conn_lock);
4842 
4843 	*errorp = 0;
4844 	*smodep = SM_ATOMIC;
4845 	*sock_downcalls = &sock_udp_downcalls;
4846 	return ((sock_lower_handle_t)connp);
4847 }
4848 
4849 /* ARGSUSED3 */
4850 void
udp_activate(sock_lower_handle_t proto_handle,sock_upper_handle_t sock_handle,sock_upcalls_t * sock_upcalls,int flags,cred_t * cr)4851 udp_activate(sock_lower_handle_t proto_handle, sock_upper_handle_t sock_handle,
4852     sock_upcalls_t *sock_upcalls, int flags, cred_t *cr)
4853 {
4854 	conn_t		*connp = (conn_t *)proto_handle;
4855 	struct sock_proto_props sopp;
4856 
4857 	/* All Solaris components should pass a cred for this operation. */
4858 	ASSERT(cr != NULL);
4859 
4860 	connp->conn_upcalls = sock_upcalls;
4861 	connp->conn_upper_handle = sock_handle;
4862 
4863 	sopp.sopp_flags = SOCKOPT_WROFF | SOCKOPT_RCVHIWAT | SOCKOPT_RCVLOWAT |
4864 	    SOCKOPT_MAXBLK | SOCKOPT_MAXPSZ | SOCKOPT_MINPSZ;
4865 	sopp.sopp_wroff = connp->conn_wroff;
4866 	sopp.sopp_maxblk = INFPSZ;
4867 	sopp.sopp_rxhiwat = connp->conn_rcvbuf;
4868 	sopp.sopp_rxlowat = connp->conn_rcvlowat;
4869 	sopp.sopp_maxaddrlen = sizeof (sin6_t);
4870 	sopp.sopp_maxpsz =
4871 	    (connp->conn_family == AF_INET) ? UDP_MAXPACKET_IPV4 :
4872 	    UDP_MAXPACKET_IPV6;
4873 	sopp.sopp_minpsz = (udp_mod_info.mi_minpsz == 1) ? 0 :
4874 	    udp_mod_info.mi_minpsz;
4875 
4876 	(*connp->conn_upcalls->su_set_proto_props)(connp->conn_upper_handle,
4877 	    &sopp);
4878 }
4879 
4880 static void
udp_do_close(conn_t * connp)4881 udp_do_close(conn_t *connp)
4882 {
4883 	udp_t	*udp;
4884 
4885 	ASSERT(connp != NULL && IPCL_IS_UDP(connp));
4886 	udp = connp->conn_udp;
4887 
4888 	if (cl_inet_unbind != NULL && udp->udp_state == TS_IDLE) {
4889 		/*
4890 		 * Running in cluster mode - register unbind information
4891 		 */
4892 		if (connp->conn_ipversion == IPV4_VERSION) {
4893 			(*cl_inet_unbind)(
4894 			    connp->conn_netstack->netstack_stackid,
4895 			    IPPROTO_UDP, AF_INET,
4896 			    (uint8_t *)(&V4_PART_OF_V6(connp->conn_laddr_v6)),
4897 			    (in_port_t)connp->conn_lport, NULL);
4898 		} else {
4899 			(*cl_inet_unbind)(
4900 			    connp->conn_netstack->netstack_stackid,
4901 			    IPPROTO_UDP, AF_INET6,
4902 			    (uint8_t *)&(connp->conn_laddr_v6),
4903 			    (in_port_t)connp->conn_lport, NULL);
4904 		}
4905 	}
4906 
4907 	udp_bind_hash_remove(udp, B_FALSE);
4908 
4909 	ip_quiesce_conn(connp);
4910 
4911 	if (!IPCL_IS_NONSTR(connp)) {
4912 		ASSERT(connp->conn_wq != NULL);
4913 		ASSERT(connp->conn_rq != NULL);
4914 		qprocsoff(connp->conn_rq);
4915 	}
4916 
4917 	udp_close_free(connp);
4918 
4919 	/*
4920 	 * Now we are truly single threaded on this stream, and can
4921 	 * delete the things hanging off the connp, and finally the connp.
4922 	 * We removed this connp from the fanout list, it cannot be
4923 	 * accessed thru the fanouts, and we already waited for the
4924 	 * conn_ref to drop to 0. We are already in close, so
4925 	 * there cannot be any other thread from the top. qprocsoff
4926 	 * has completed, and service has completed or won't run in
4927 	 * future.
4928 	 */
4929 	ASSERT(connp->conn_ref == 1);
4930 
4931 	if (!IPCL_IS_NONSTR(connp)) {
4932 		inet_minor_free(connp->conn_minor_arena, connp->conn_dev);
4933 	} else {
4934 		ip_free_helper_stream(connp);
4935 	}
4936 
4937 	connp->conn_ref--;
4938 	ipcl_conn_destroy(connp);
4939 }
4940 
4941 /* ARGSUSED1 */
4942 int
udp_close(sock_lower_handle_t proto_handle,int flags,cred_t * cr)4943 udp_close(sock_lower_handle_t proto_handle, int flags, cred_t *cr)
4944 {
4945 	conn_t	*connp = (conn_t *)proto_handle;
4946 
4947 	/* All Solaris components should pass a cred for this operation. */
4948 	ASSERT(cr != NULL);
4949 
4950 	udp_do_close(connp);
4951 	return (0);
4952 }
4953 
4954 static int
udp_do_bind(conn_t * connp,struct sockaddr * sa,socklen_t len,cred_t * cr,boolean_t bind_to_req_port_only)4955 udp_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr,
4956     boolean_t bind_to_req_port_only)
4957 {
4958 	sin_t		*sin;
4959 	sin6_t		*sin6;
4960 	udp_t		*udp = connp->conn_udp;
4961 	int		error = 0;
4962 	ip_laddr_t	laddr_type = IPVL_UNICAST_UP;	/* INADDR_ANY */
4963 	in_port_t	port;		/* Host byte order */
4964 	in_port_t	requested_port;	/* Host byte order */
4965 	int		count;
4966 	ipaddr_t	v4src;		/* Set if AF_INET */
4967 	in6_addr_t	v6src;
4968 	int		loopmax;
4969 	udp_fanout_t	*udpf;
4970 	in_port_t	lport;		/* Network byte order */
4971 	uint_t		scopeid = 0;
4972 	zoneid_t	zoneid = IPCL_ZONEID(connp);
4973 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
4974 	boolean_t	is_inaddr_any;
4975 	mlp_type_t	addrtype, mlptype;
4976 	udp_stack_t	*us = udp->udp_us;
4977 
4978 	sin = NULL;
4979 	sin6 = NULL;
4980 	switch (len) {
4981 	case sizeof (sin_t):	/* Complete IPv4 address */
4982 		sin = (sin_t *)sa;
4983 
4984 		if (sin == NULL || !OK_32PTR((char *)sin))
4985 			return (EINVAL);
4986 
4987 		if (connp->conn_family != AF_INET ||
4988 		    sin->sin_family != AF_INET) {
4989 			return (EAFNOSUPPORT);
4990 		}
4991 		v4src = sin->sin_addr.s_addr;
4992 		IN6_IPADDR_TO_V4MAPPED(v4src, &v6src);
4993 		if (v4src != INADDR_ANY) {
4994 			laddr_type = ip_laddr_verify_v4(v4src, zoneid, ipst,
4995 			    B_TRUE);
4996 		}
4997 		port = ntohs(sin->sin_port);
4998 		break;
4999 
5000 	case sizeof (sin6_t):	/* complete IPv6 address */
5001 		sin6 = (sin6_t *)sa;
5002 
5003 		if (sin6 == NULL || !OK_32PTR((char *)sin6))
5004 			return (EINVAL);
5005 
5006 		if (connp->conn_family != AF_INET6 ||
5007 		    sin6->sin6_family != AF_INET6) {
5008 			return (EAFNOSUPPORT);
5009 		}
5010 		v6src = sin6->sin6_addr;
5011 		if (IN6_IS_ADDR_V4MAPPED(&v6src)) {
5012 			if (connp->conn_ipv6_v6only)
5013 				return (EADDRNOTAVAIL);
5014 
5015 			IN6_V4MAPPED_TO_IPADDR(&v6src, v4src);
5016 			if (v4src != INADDR_ANY) {
5017 				laddr_type = ip_laddr_verify_v4(v4src,
5018 				    zoneid, ipst, B_FALSE);
5019 			}
5020 		} else {
5021 			if (!IN6_IS_ADDR_UNSPECIFIED(&v6src)) {
5022 				if (IN6_IS_ADDR_LINKSCOPE(&v6src))
5023 					scopeid = sin6->sin6_scope_id;
5024 				laddr_type = ip_laddr_verify_v6(&v6src,
5025 				    zoneid, ipst, B_TRUE, scopeid);
5026 			}
5027 		}
5028 		port = ntohs(sin6->sin6_port);
5029 		break;
5030 
5031 	default:		/* Invalid request */
5032 		(void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
5033 		    "udp_bind: bad ADDR_length length %u", len);
5034 		return (-TBADADDR);
5035 	}
5036 
5037 	/* Is the local address a valid unicast, multicast, or broadcast? */
5038 	if (laddr_type == IPVL_BAD)
5039 		return (EADDRNOTAVAIL);
5040 
5041 	requested_port = port;
5042 
5043 	if (requested_port == 0 || !bind_to_req_port_only)
5044 		bind_to_req_port_only = B_FALSE;
5045 	else		/* T_BIND_REQ and requested_port != 0 */
5046 		bind_to_req_port_only = B_TRUE;
5047 
5048 	if (requested_port == 0) {
5049 		/*
5050 		 * If the application passed in zero for the port number, it
5051 		 * doesn't care which port number we bind to. Get one in the
5052 		 * valid range.
5053 		 */
5054 		if (connp->conn_anon_priv_bind) {
5055 			port = udp_get_next_priv_port(udp);
5056 		} else {
5057 			port = udp_update_next_port(udp,
5058 			    us->us_next_port_to_try, B_TRUE);
5059 		}
5060 	} else {
5061 		/*
5062 		 * If the port is in the well-known privileged range,
5063 		 * make sure the caller was privileged.
5064 		 */
5065 		int i;
5066 		boolean_t priv = B_FALSE;
5067 
5068 		if (port < us->us_smallest_nonpriv_port) {
5069 			priv = B_TRUE;
5070 		} else {
5071 			for (i = 0; i < us->us_num_epriv_ports; i++) {
5072 				if (port == us->us_epriv_ports[i]) {
5073 					priv = B_TRUE;
5074 					break;
5075 				}
5076 			}
5077 		}
5078 
5079 		if (priv) {
5080 			if (secpolicy_net_privaddr(cr, port, IPPROTO_UDP) != 0)
5081 				return (-TACCES);
5082 		}
5083 	}
5084 
5085 	if (port == 0)
5086 		return (-TNOADDR);
5087 
5088 	/*
5089 	 * The state must be TS_UNBND. TPI mandates that users must send
5090 	 * TPI primitives only 1 at a time and wait for the response before
5091 	 * sending the next primitive.
5092 	 */
5093 	mutex_enter(&connp->conn_lock);
5094 	if (udp->udp_state != TS_UNBND) {
5095 		mutex_exit(&connp->conn_lock);
5096 		(void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
5097 		    "udp_bind: bad state, %u", udp->udp_state);
5098 		return (-TOUTSTATE);
5099 	}
5100 	/*
5101 	 * Copy the source address into our udp structure. This address
5102 	 * may still be zero; if so, IP will fill in the correct address
5103 	 * each time an outbound packet is passed to it. Since the udp is
5104 	 * not yet in the bind hash list, we don't grab the uf_lock to
5105 	 * change conn_ipversion
5106 	 */
5107 	if (connp->conn_family == AF_INET) {
5108 		ASSERT(sin != NULL);
5109 		ASSERT(connp->conn_ixa->ixa_flags & IXAF_IS_IPV4);
5110 	} else {
5111 		if (IN6_IS_ADDR_V4MAPPED(&v6src)) {
5112 			/*
5113 			 * no need to hold the uf_lock to set the conn_ipversion
5114 			 * since we are not yet in the fanout list
5115 			 */
5116 			connp->conn_ipversion = IPV4_VERSION;
5117 			connp->conn_ixa->ixa_flags |= IXAF_IS_IPV4;
5118 		} else {
5119 			connp->conn_ipversion = IPV6_VERSION;
5120 			connp->conn_ixa->ixa_flags &= ~IXAF_IS_IPV4;
5121 		}
5122 	}
5123 
5124 	/*
5125 	 * If conn_reuseaddr is not set, then we have to make sure that
5126 	 * the IP address and port number the application requested
5127 	 * (or we selected for the application) is not being used by
5128 	 * another stream.  If another stream is already using the
5129 	 * requested IP address and port, the behavior depends on
5130 	 * "bind_to_req_port_only". If set the bind fails; otherwise we
5131 	 * search for any unused port to bind to the stream.
5132 	 *
5133 	 * As per the BSD semantics, as modified by the Deering multicast
5134 	 * changes, if conn_reuseaddr is set, then we allow multiple binds
5135 	 * to the same port independent of the local IP address.
5136 	 *
5137 	 * This is slightly different than in SunOS 4.X which did not
5138 	 * support IP multicast. Note that the change implemented by the
5139 	 * Deering multicast code effects all binds - not only binding
5140 	 * to IP multicast addresses.
5141 	 *
5142 	 * Note that when binding to port zero we ignore SO_REUSEADDR in
5143 	 * order to guarantee a unique port.
5144 	 */
5145 
5146 	count = 0;
5147 	if (connp->conn_anon_priv_bind) {
5148 		/*
5149 		 * loopmax = (IPPORT_RESERVED-1) -
5150 		 *    us->us_min_anonpriv_port + 1
5151 		 */
5152 		loopmax = IPPORT_RESERVED - us->us_min_anonpriv_port;
5153 	} else {
5154 		loopmax = us->us_largest_anon_port -
5155 		    us->us_smallest_anon_port + 1;
5156 	}
5157 
5158 	is_inaddr_any = V6_OR_V4_INADDR_ANY(v6src);
5159 
5160 	for (;;) {
5161 		udp_t		*udp1;
5162 		boolean_t	found_exclbind = B_FALSE;
5163 		conn_t		*connp1;
5164 
5165 		/*
5166 		 * Walk through the list of udp streams bound to
5167 		 * requested port with the same IP address.
5168 		 */
5169 		lport = htons(port);
5170 		udpf = &us->us_bind_fanout[UDP_BIND_HASH(lport,
5171 		    us->us_bind_fanout_size)];
5172 		mutex_enter(&udpf->uf_lock);
5173 		for (udp1 = udpf->uf_udp; udp1 != NULL;
5174 		    udp1 = udp1->udp_bind_hash) {
5175 			connp1 = udp1->udp_connp;
5176 
5177 			if (lport != connp1->conn_lport)
5178 				continue;
5179 
5180 			/*
5181 			 * On a labeled system, we must treat bindings to ports
5182 			 * on shared IP addresses by sockets with MAC exemption
5183 			 * privilege as being in all zones, as there's
5184 			 * otherwise no way to identify the right receiver.
5185 			 */
5186 			if (!IPCL_BIND_ZONE_MATCH(connp1, connp))
5187 				continue;
5188 
5189 			/*
5190 			 * If UDP_EXCLBIND is set for either the bound or
5191 			 * binding endpoint, the semantics of bind
5192 			 * is changed according to the following chart.
5193 			 *
5194 			 * spec = specified address (v4 or v6)
5195 			 * unspec = unspecified address (v4 or v6)
5196 			 * A = specified addresses are different for endpoints
5197 			 *
5198 			 * bound	bind to		allowed?
5199 			 * -------------------------------------
5200 			 * unspec	unspec		no
5201 			 * unspec	spec		no
5202 			 * spec		unspec		no
5203 			 * spec		spec		yes if A
5204 			 *
5205 			 * For labeled systems, SO_MAC_EXEMPT behaves the same
5206 			 * as UDP_EXCLBIND, except that zoneid is ignored.
5207 			 */
5208 			if (connp1->conn_exclbind || connp->conn_exclbind ||
5209 			    IPCL_CONNS_MAC(udp1->udp_connp, connp)) {
5210 				if (V6_OR_V4_INADDR_ANY(
5211 				    connp1->conn_bound_addr_v6) ||
5212 				    is_inaddr_any ||
5213 				    IN6_ARE_ADDR_EQUAL(
5214 				    &connp1->conn_bound_addr_v6,
5215 				    &v6src)) {
5216 					found_exclbind = B_TRUE;
5217 					break;
5218 				}
5219 				continue;
5220 			}
5221 
5222 			/*
5223 			 * Check ipversion to allow IPv4 and IPv6 sockets to
5224 			 * have disjoint port number spaces.
5225 			 */
5226 			if (connp->conn_ipversion != connp1->conn_ipversion) {
5227 
5228 				/*
5229 				 * On the first time through the loop, if the
5230 				 * the user intentionally specified a
5231 				 * particular port number, then ignore any
5232 				 * bindings of the other protocol that may
5233 				 * conflict. This allows the user to bind IPv6
5234 				 * alone and get both v4 and v6, or bind both
5235 				 * both and get each seperately. On subsequent
5236 				 * times through the loop, we're checking a
5237 				 * port that we chose (not the user) and thus
5238 				 * we do not allow casual duplicate bindings.
5239 				 */
5240 				if (count == 0 && requested_port != 0)
5241 					continue;
5242 			}
5243 
5244 			/*
5245 			 * No difference depending on SO_REUSEADDR.
5246 			 *
5247 			 * If existing port is bound to a
5248 			 * non-wildcard IP address and
5249 			 * the requesting stream is bound to
5250 			 * a distinct different IP addresses
5251 			 * (non-wildcard, also), keep going.
5252 			 */
5253 			if (!is_inaddr_any &&
5254 			    !V6_OR_V4_INADDR_ANY(connp1->conn_bound_addr_v6) &&
5255 			    !IN6_ARE_ADDR_EQUAL(&connp1->conn_laddr_v6,
5256 			    &v6src)) {
5257 				continue;
5258 			}
5259 			break;
5260 		}
5261 
5262 		if (!found_exclbind &&
5263 		    (connp->conn_reuseaddr && requested_port != 0)) {
5264 			break;
5265 		}
5266 
5267 		if (udp1 == NULL) {
5268 			/*
5269 			 * No other stream has this IP address
5270 			 * and port number. We can use it.
5271 			 */
5272 			break;
5273 		}
5274 		mutex_exit(&udpf->uf_lock);
5275 		if (bind_to_req_port_only) {
5276 			/*
5277 			 * We get here only when requested port
5278 			 * is bound (and only first  of the for()
5279 			 * loop iteration).
5280 			 *
5281 			 * The semantics of this bind request
5282 			 * require it to fail so we return from
5283 			 * the routine (and exit the loop).
5284 			 *
5285 			 */
5286 			mutex_exit(&connp->conn_lock);
5287 			return (-TADDRBUSY);
5288 		}
5289 
5290 		if (connp->conn_anon_priv_bind) {
5291 			port = udp_get_next_priv_port(udp);
5292 		} else {
5293 			if ((count == 0) && (requested_port != 0)) {
5294 				/*
5295 				 * If the application wants us to find
5296 				 * a port, get one to start with. Set
5297 				 * requested_port to 0, so that we will
5298 				 * update us->us_next_port_to_try below.
5299 				 */
5300 				port = udp_update_next_port(udp,
5301 				    us->us_next_port_to_try, B_TRUE);
5302 				requested_port = 0;
5303 			} else {
5304 				port = udp_update_next_port(udp, port + 1,
5305 				    B_FALSE);
5306 			}
5307 		}
5308 
5309 		if (port == 0 || ++count >= loopmax) {
5310 			/*
5311 			 * We've tried every possible port number and
5312 			 * there are none available, so send an error
5313 			 * to the user.
5314 			 */
5315 			mutex_exit(&connp->conn_lock);
5316 			return (-TNOADDR);
5317 		}
5318 	}
5319 
5320 	/*
5321 	 * Copy the source address into our udp structure.  This address
5322 	 * may still be zero; if so, ip_attr_connect will fill in the correct
5323 	 * address when a packet is about to be sent.
5324 	 * If we are binding to a broadcast or multicast address then
5325 	 * we just set the conn_bound_addr since we don't want to use
5326 	 * that as the source address when sending.
5327 	 */
5328 	connp->conn_bound_addr_v6 = v6src;
5329 	connp->conn_laddr_v6 = v6src;
5330 	if (scopeid != 0) {
5331 		connp->conn_ixa->ixa_flags |= IXAF_SCOPEID_SET;
5332 		connp->conn_ixa->ixa_scopeid = scopeid;
5333 		connp->conn_incoming_ifindex = scopeid;
5334 	} else {
5335 		connp->conn_ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
5336 		connp->conn_incoming_ifindex = connp->conn_bound_if;
5337 	}
5338 
5339 	switch (laddr_type) {
5340 	case IPVL_UNICAST_UP:
5341 	case IPVL_UNICAST_DOWN:
5342 		connp->conn_saddr_v6 = v6src;
5343 		connp->conn_mcbc_bind = B_FALSE;
5344 		break;
5345 	case IPVL_MCAST:
5346 	case IPVL_BCAST:
5347 		/* ip_set_destination will pick a source address later */
5348 		connp->conn_saddr_v6 = ipv6_all_zeros;
5349 		connp->conn_mcbc_bind = B_TRUE;
5350 		break;
5351 	}
5352 
5353 	/* Any errors after this point should use late_error */
5354 	connp->conn_lport = lport;
5355 
5356 	/*
5357 	 * Now reset the next anonymous port if the application requested
5358 	 * an anonymous port, or we handed out the next anonymous port.
5359 	 */
5360 	if ((requested_port == 0) && (!connp->conn_anon_priv_bind)) {
5361 		us->us_next_port_to_try = port + 1;
5362 	}
5363 
5364 	/* Initialize the T_BIND_ACK. */
5365 	if (connp->conn_family == AF_INET) {
5366 		sin->sin_port = connp->conn_lport;
5367 	} else {
5368 		sin6->sin6_port = connp->conn_lport;
5369 	}
5370 	udp->udp_state = TS_IDLE;
5371 	udp_bind_hash_insert(udpf, udp);
5372 	mutex_exit(&udpf->uf_lock);
5373 	mutex_exit(&connp->conn_lock);
5374 
5375 	if (cl_inet_bind) {
5376 		/*
5377 		 * Running in cluster mode - register bind information
5378 		 */
5379 		if (connp->conn_ipversion == IPV4_VERSION) {
5380 			(*cl_inet_bind)(connp->conn_netstack->netstack_stackid,
5381 			    IPPROTO_UDP, AF_INET, (uint8_t *)&v4src,
5382 			    (in_port_t)connp->conn_lport, NULL);
5383 		} else {
5384 			(*cl_inet_bind)(connp->conn_netstack->netstack_stackid,
5385 			    IPPROTO_UDP, AF_INET6, (uint8_t *)&v6src,
5386 			    (in_port_t)connp->conn_lport, NULL);
5387 		}
5388 	}
5389 
5390 	mutex_enter(&connp->conn_lock);
5391 	connp->conn_anon_port = (is_system_labeled() && requested_port == 0);
5392 	if (is_system_labeled() && (!connp->conn_anon_port ||
5393 	    connp->conn_anon_mlp)) {
5394 		uint16_t mlpport;
5395 		zone_t *zone;
5396 
5397 		zone = crgetzone(cr);
5398 		connp->conn_mlp_type =
5399 		    connp->conn_recv_ancillary.crb_recvucred ? mlptBoth :
5400 		    mlptSingle;
5401 		addrtype = tsol_mlp_addr_type(
5402 		    connp->conn_allzones ? ALL_ZONES : zone->zone_id,
5403 		    IPV6_VERSION, &v6src, us->us_netstack->netstack_ip);
5404 		if (addrtype == mlptSingle) {
5405 			error = -TNOADDR;
5406 			mutex_exit(&connp->conn_lock);
5407 			goto late_error;
5408 		}
5409 		mlpport = connp->conn_anon_port ? PMAPPORT : port;
5410 		mlptype = tsol_mlp_port_type(zone, IPPROTO_UDP, mlpport,
5411 		    addrtype);
5412 
5413 		/*
5414 		 * It is a coding error to attempt to bind an MLP port
5415 		 * without first setting SOL_SOCKET/SCM_UCRED.
5416 		 */
5417 		if (mlptype != mlptSingle &&
5418 		    connp->conn_mlp_type == mlptSingle) {
5419 			error = EINVAL;
5420 			mutex_exit(&connp->conn_lock);
5421 			goto late_error;
5422 		}
5423 
5424 		/*
5425 		 * It is an access violation to attempt to bind an MLP port
5426 		 * without NET_BINDMLP privilege.
5427 		 */
5428 		if (mlptype != mlptSingle &&
5429 		    secpolicy_net_bindmlp(cr) != 0) {
5430 			if (connp->conn_debug) {
5431 				(void) strlog(UDP_MOD_ID, 0, 1,
5432 				    SL_ERROR|SL_TRACE,
5433 				    "udp_bind: no priv for multilevel port %d",
5434 				    mlpport);
5435 			}
5436 			error = -TACCES;
5437 			mutex_exit(&connp->conn_lock);
5438 			goto late_error;
5439 		}
5440 
5441 		/*
5442 		 * If we're specifically binding a shared IP address and the
5443 		 * port is MLP on shared addresses, then check to see if this
5444 		 * zone actually owns the MLP.  Reject if not.
5445 		 */
5446 		if (mlptype == mlptShared && addrtype == mlptShared) {
5447 			/*
5448 			 * No need to handle exclusive-stack zones since
5449 			 * ALL_ZONES only applies to the shared stack.
5450 			 */
5451 			zoneid_t mlpzone;
5452 
5453 			mlpzone = tsol_mlp_findzone(IPPROTO_UDP,
5454 			    htons(mlpport));
5455 			if (connp->conn_zoneid != mlpzone) {
5456 				if (connp->conn_debug) {
5457 					(void) strlog(UDP_MOD_ID, 0, 1,
5458 					    SL_ERROR|SL_TRACE,
5459 					    "udp_bind: attempt to bind port "
5460 					    "%d on shared addr in zone %d "
5461 					    "(should be %d)",
5462 					    mlpport, connp->conn_zoneid,
5463 					    mlpzone);
5464 				}
5465 				error = -TACCES;
5466 				mutex_exit(&connp->conn_lock);
5467 				goto late_error;
5468 			}
5469 		}
5470 		if (connp->conn_anon_port) {
5471 			error = tsol_mlp_anon(zone, mlptype, connp->conn_proto,
5472 			    port, B_TRUE);
5473 			if (error != 0) {
5474 				if (connp->conn_debug) {
5475 					(void) strlog(UDP_MOD_ID, 0, 1,
5476 					    SL_ERROR|SL_TRACE,
5477 					    "udp_bind: cannot establish anon "
5478 					    "MLP for port %d", port);
5479 				}
5480 				error = -TACCES;
5481 				mutex_exit(&connp->conn_lock);
5482 				goto late_error;
5483 			}
5484 		}
5485 		connp->conn_mlp_type = mlptype;
5486 	}
5487 
5488 	/*
5489 	 * We create an initial header template here to make a subsequent
5490 	 * sendto have a starting point. Since conn_last_dst is zero the
5491 	 * first sendto will always follow the 'dst changed' code path.
5492 	 * Note that we defer massaging options and the related checksum
5493 	 * adjustment until we have a destination address.
5494 	 */
5495 	error = udp_build_hdr_template(connp, &connp->conn_saddr_v6,
5496 	    &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo);
5497 	if (error != 0) {
5498 		mutex_exit(&connp->conn_lock);
5499 		goto late_error;
5500 	}
5501 	/* Just in case */
5502 	connp->conn_faddr_v6 = ipv6_all_zeros;
5503 	connp->conn_fport = 0;
5504 	connp->conn_v6lastdst = ipv6_all_zeros;
5505 	mutex_exit(&connp->conn_lock);
5506 
5507 	error = ip_laddr_fanout_insert(connp);
5508 	if (error != 0)
5509 		goto late_error;
5510 
5511 	/* Bind succeeded */
5512 	return (0);
5513 
5514 late_error:
5515 	/* We had already picked the port number, and then the bind failed */
5516 	mutex_enter(&connp->conn_lock);
5517 	udpf = &us->us_bind_fanout[
5518 	    UDP_BIND_HASH(connp->conn_lport,
5519 	    us->us_bind_fanout_size)];
5520 	mutex_enter(&udpf->uf_lock);
5521 	connp->conn_saddr_v6 = ipv6_all_zeros;
5522 	connp->conn_bound_addr_v6 = ipv6_all_zeros;
5523 	connp->conn_laddr_v6 = ipv6_all_zeros;
5524 	if (scopeid != 0) {
5525 		connp->conn_ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
5526 		connp->conn_incoming_ifindex = connp->conn_bound_if;
5527 	}
5528 	udp->udp_state = TS_UNBND;
5529 	udp_bind_hash_remove(udp, B_TRUE);
5530 	connp->conn_lport = 0;
5531 	mutex_exit(&udpf->uf_lock);
5532 	connp->conn_anon_port = B_FALSE;
5533 	connp->conn_mlp_type = mlptSingle;
5534 
5535 	connp->conn_v6lastdst = ipv6_all_zeros;
5536 
5537 	/* Restore the header that was built above - different source address */
5538 	(void) udp_build_hdr_template(connp, &connp->conn_saddr_v6,
5539 	    &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo);
5540 	mutex_exit(&connp->conn_lock);
5541 	return (error);
5542 }
5543 
5544 int
udp_bind(sock_lower_handle_t proto_handle,struct sockaddr * sa,socklen_t len,cred_t * cr)5545 udp_bind(sock_lower_handle_t proto_handle, struct sockaddr *sa,
5546     socklen_t len, cred_t *cr)
5547 {
5548 	int		error;
5549 	conn_t		*connp;
5550 
5551 	/* All Solaris components should pass a cred for this operation. */
5552 	ASSERT(cr != NULL);
5553 
5554 	connp = (conn_t *)proto_handle;
5555 
5556 	if (sa == NULL)
5557 		error = udp_do_unbind(connp);
5558 	else
5559 		error = udp_do_bind(connp, sa, len, cr, B_TRUE);
5560 
5561 	if (error < 0) {
5562 		if (error == -TOUTSTATE)
5563 			error = EINVAL;
5564 		else
5565 			error = proto_tlitosyserr(-error);
5566 	}
5567 
5568 	return (error);
5569 }
5570 
5571 static int
udp_implicit_bind(conn_t * connp,cred_t * cr)5572 udp_implicit_bind(conn_t *connp, cred_t *cr)
5573 {
5574 	sin6_t sin6addr;
5575 	sin_t *sin;
5576 	sin6_t *sin6;
5577 	socklen_t len;
5578 	int error;
5579 
5580 	/* All Solaris components should pass a cred for this operation. */
5581 	ASSERT(cr != NULL);
5582 
5583 	if (connp->conn_family == AF_INET) {
5584 		len = sizeof (struct sockaddr_in);
5585 		sin = (sin_t *)&sin6addr;
5586 		*sin = sin_null;
5587 		sin->sin_family = AF_INET;
5588 		sin->sin_addr.s_addr = INADDR_ANY;
5589 	} else {
5590 		ASSERT(connp->conn_family == AF_INET6);
5591 		len = sizeof (sin6_t);
5592 		sin6 = (sin6_t *)&sin6addr;
5593 		*sin6 = sin6_null;
5594 		sin6->sin6_family = AF_INET6;
5595 		V6_SET_ZERO(sin6->sin6_addr);
5596 	}
5597 
5598 	error = udp_do_bind(connp, (struct sockaddr *)&sin6addr, len,
5599 	    cr, B_FALSE);
5600 	return ((error < 0) ? proto_tlitosyserr(-error) : error);
5601 }
5602 
5603 /*
5604  * This routine removes a port number association from a stream. It
5605  * is called by udp_unbind and udp_tpi_unbind.
5606  */
5607 static int
udp_do_unbind(conn_t * connp)5608 udp_do_unbind(conn_t *connp)
5609 {
5610 	udp_t		*udp = connp->conn_udp;
5611 	udp_fanout_t	*udpf;
5612 	udp_stack_t	*us = udp->udp_us;
5613 
5614 	if (cl_inet_unbind != NULL) {
5615 		/*
5616 		 * Running in cluster mode - register unbind information
5617 		 */
5618 		if (connp->conn_ipversion == IPV4_VERSION) {
5619 			(*cl_inet_unbind)(
5620 			    connp->conn_netstack->netstack_stackid,
5621 			    IPPROTO_UDP, AF_INET,
5622 			    (uint8_t *)(&V4_PART_OF_V6(connp->conn_laddr_v6)),
5623 			    (in_port_t)connp->conn_lport, NULL);
5624 		} else {
5625 			(*cl_inet_unbind)(
5626 			    connp->conn_netstack->netstack_stackid,
5627 			    IPPROTO_UDP, AF_INET6,
5628 			    (uint8_t *)&(connp->conn_laddr_v6),
5629 			    (in_port_t)connp->conn_lport, NULL);
5630 		}
5631 	}
5632 
5633 	mutex_enter(&connp->conn_lock);
5634 	/* If a bind has not been done, we can't unbind. */
5635 	if (udp->udp_state == TS_UNBND) {
5636 		mutex_exit(&connp->conn_lock);
5637 		return (-TOUTSTATE);
5638 	}
5639 	udpf = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport,
5640 	    us->us_bind_fanout_size)];
5641 	mutex_enter(&udpf->uf_lock);
5642 	udp_bind_hash_remove(udp, B_TRUE);
5643 	connp->conn_saddr_v6 = ipv6_all_zeros;
5644 	connp->conn_bound_addr_v6 = ipv6_all_zeros;
5645 	connp->conn_laddr_v6 = ipv6_all_zeros;
5646 	connp->conn_mcbc_bind = B_FALSE;
5647 	connp->conn_lport = 0;
5648 	/* In case we were also connected */
5649 	connp->conn_faddr_v6 = ipv6_all_zeros;
5650 	connp->conn_fport = 0;
5651 	mutex_exit(&udpf->uf_lock);
5652 
5653 	connp->conn_v6lastdst = ipv6_all_zeros;
5654 	udp->udp_state = TS_UNBND;
5655 
5656 	(void) udp_build_hdr_template(connp, &connp->conn_saddr_v6,
5657 	    &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo);
5658 	mutex_exit(&connp->conn_lock);
5659 
5660 	ip_unbind(connp);
5661 
5662 	return (0);
5663 }
5664 
5665 /*
5666  * It associates a default destination address with the stream.
5667  */
5668 static int
udp_do_connect(conn_t * connp,const struct sockaddr * sa,socklen_t len,cred_t * cr,pid_t pid)5669 udp_do_connect(conn_t *connp, const struct sockaddr *sa, socklen_t len,
5670     cred_t *cr, pid_t pid)
5671 {
5672 	sin6_t		*sin6;
5673 	sin_t		*sin;
5674 	in6_addr_t	v6dst;
5675 	ipaddr_t	v4dst;
5676 	uint16_t	dstport;
5677 	uint32_t	flowinfo;
5678 	udp_fanout_t	*udpf;
5679 	udp_t		*udp, *udp1;
5680 	ushort_t	ipversion;
5681 	udp_stack_t	*us;
5682 	int		error;
5683 	conn_t		*connp1;
5684 	ip_xmit_attr_t	*ixa;
5685 	ip_xmit_attr_t	*oldixa;
5686 	uint_t		scopeid = 0;
5687 	uint_t		srcid = 0;
5688 	in6_addr_t	v6src = connp->conn_saddr_v6;
5689 	boolean_t	v4mapped;
5690 
5691 	udp = connp->conn_udp;
5692 	us = udp->udp_us;
5693 	sin = NULL;
5694 	sin6 = NULL;
5695 	v4dst = INADDR_ANY;
5696 	flowinfo = 0;
5697 
5698 	/*
5699 	 * Address has been verified by the caller
5700 	 */
5701 	switch (len) {
5702 	default:
5703 		/*
5704 		 * Should never happen
5705 		 */
5706 		return (EINVAL);
5707 
5708 	case sizeof (sin_t):
5709 		sin = (sin_t *)sa;
5710 		v4dst = sin->sin_addr.s_addr;
5711 		dstport = sin->sin_port;
5712 		IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst);
5713 		ASSERT(connp->conn_ipversion == IPV4_VERSION);
5714 		ipversion = IPV4_VERSION;
5715 		break;
5716 
5717 	case sizeof (sin6_t):
5718 		sin6 = (sin6_t *)sa;
5719 		v6dst = sin6->sin6_addr;
5720 		dstport = sin6->sin6_port;
5721 		srcid = sin6->__sin6_src_id;
5722 		v4mapped = IN6_IS_ADDR_V4MAPPED(&v6dst);
5723 		if (srcid != 0 && IN6_IS_ADDR_UNSPECIFIED(&v6src)) {
5724 			if (!ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp),
5725 			    v4mapped, connp->conn_netstack)) {
5726 				/* Mismatch v4mapped/v6 specified by srcid. */
5727 				return (EADDRNOTAVAIL);
5728 			}
5729 		}
5730 		if (v4mapped) {
5731 			if (connp->conn_ipv6_v6only)
5732 				return (EADDRNOTAVAIL);
5733 
5734 			/*
5735 			 * Destination adress is mapped IPv6 address.
5736 			 * Source bound address should be unspecified or
5737 			 * IPv6 mapped address as well.
5738 			 */
5739 			if (!IN6_IS_ADDR_UNSPECIFIED(
5740 			    &connp->conn_bound_addr_v6) &&
5741 			    !IN6_IS_ADDR_V4MAPPED(&connp->conn_bound_addr_v6)) {
5742 				return (EADDRNOTAVAIL);
5743 			}
5744 			IN6_V4MAPPED_TO_IPADDR(&v6dst, v4dst);
5745 			ipversion = IPV4_VERSION;
5746 			flowinfo = 0;
5747 		} else {
5748 			ipversion = IPV6_VERSION;
5749 			flowinfo = sin6->sin6_flowinfo;
5750 			if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr))
5751 				scopeid = sin6->sin6_scope_id;
5752 		}
5753 		break;
5754 	}
5755 
5756 	if (dstport == 0)
5757 		return (-TBADADDR);
5758 
5759 	/*
5760 	 * If there is a different thread using conn_ixa then we get a new
5761 	 * copy and cut the old one loose from conn_ixa. Otherwise we use
5762 	 * conn_ixa and prevent any other thread from using/changing it.
5763 	 * Once connect() is done other threads can use conn_ixa since the
5764 	 * refcnt will be back at one.
5765 	 * We defer updating conn_ixa until later to handle any concurrent
5766 	 * conn_ixa_cleanup thread.
5767 	 */
5768 	ixa = conn_get_ixa(connp, B_FALSE);
5769 	if (ixa == NULL)
5770 		return (ENOMEM);
5771 
5772 	mutex_enter(&connp->conn_lock);
5773 	/*
5774 	 * This udp_t must have bound to a port already before doing a connect.
5775 	 * Reject if a connect is in progress (we drop conn_lock during
5776 	 * udp_do_connect).
5777 	 */
5778 	if (udp->udp_state == TS_UNBND || udp->udp_state == TS_WCON_CREQ) {
5779 		mutex_exit(&connp->conn_lock);
5780 		(void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
5781 		    "udp_connect: bad state, %u", udp->udp_state);
5782 		ixa_refrele(ixa);
5783 		return (-TOUTSTATE);
5784 	}
5785 	ASSERT(connp->conn_lport != 0 && udp->udp_ptpbhn != NULL);
5786 
5787 	udpf = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport,
5788 	    us->us_bind_fanout_size)];
5789 
5790 	mutex_enter(&udpf->uf_lock);
5791 	if (udp->udp_state == TS_DATA_XFER) {
5792 		/* Already connected - clear out state */
5793 		if (connp->conn_mcbc_bind)
5794 			connp->conn_saddr_v6 = ipv6_all_zeros;
5795 		else
5796 			connp->conn_saddr_v6 = connp->conn_bound_addr_v6;
5797 		connp->conn_laddr_v6 = connp->conn_bound_addr_v6;
5798 		connp->conn_faddr_v6 = ipv6_all_zeros;
5799 		connp->conn_fport = 0;
5800 		udp->udp_state = TS_IDLE;
5801 	}
5802 
5803 	connp->conn_fport = dstport;
5804 	connp->conn_ipversion = ipversion;
5805 	if (ipversion == IPV4_VERSION) {
5806 		/*
5807 		 * Interpret a zero destination to mean loopback.
5808 		 * Update the T_CONN_REQ (sin/sin6) since it is used to
5809 		 * generate the T_CONN_CON.
5810 		 */
5811 		if (v4dst == INADDR_ANY) {
5812 			v4dst = htonl(INADDR_LOOPBACK);
5813 			IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst);
5814 			if (connp->conn_family == AF_INET) {
5815 				sin->sin_addr.s_addr = v4dst;
5816 			} else {
5817 				sin6->sin6_addr = v6dst;
5818 			}
5819 		}
5820 		connp->conn_faddr_v6 = v6dst;
5821 		connp->conn_flowinfo = 0;
5822 	} else {
5823 		ASSERT(connp->conn_ipversion == IPV6_VERSION);
5824 		/*
5825 		 * Interpret a zero destination to mean loopback.
5826 		 * Update the T_CONN_REQ (sin/sin6) since it is used to
5827 		 * generate the T_CONN_CON.
5828 		 */
5829 		if (IN6_IS_ADDR_UNSPECIFIED(&v6dst)) {
5830 			v6dst = ipv6_loopback;
5831 			sin6->sin6_addr = v6dst;
5832 		}
5833 		connp->conn_faddr_v6 = v6dst;
5834 		connp->conn_flowinfo = flowinfo;
5835 	}
5836 	mutex_exit(&udpf->uf_lock);
5837 
5838 	/*
5839 	 * We update our cred/cpid based on the caller of connect
5840 	 */
5841 	if (connp->conn_cred != cr) {
5842 		crhold(cr);
5843 		crfree(connp->conn_cred);
5844 		connp->conn_cred = cr;
5845 	}
5846 	connp->conn_cpid = pid;
5847 	ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
5848 	ixa->ixa_cred = cr;
5849 	ixa->ixa_cpid = pid;
5850 	if (is_system_labeled()) {
5851 		/* We need to restart with a label based on the cred */
5852 		ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred);
5853 	}
5854 
5855 	if (scopeid != 0) {
5856 		ixa->ixa_flags |= IXAF_SCOPEID_SET;
5857 		ixa->ixa_scopeid = scopeid;
5858 		connp->conn_incoming_ifindex = scopeid;
5859 	} else {
5860 		ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
5861 		connp->conn_incoming_ifindex = connp->conn_bound_if;
5862 	}
5863 	/*
5864 	 * conn_connect will drop conn_lock and reacquire it.
5865 	 * To prevent a send* from messing with this udp_t while the lock
5866 	 * is dropped we set udp_state and clear conn_v6lastdst.
5867 	 * That will make all send* fail with EISCONN.
5868 	 */
5869 	connp->conn_v6lastdst = ipv6_all_zeros;
5870 	udp->udp_state = TS_WCON_CREQ;
5871 
5872 	error = conn_connect(connp, NULL, IPDF_ALLOW_MCBC);
5873 	mutex_exit(&connp->conn_lock);
5874 	if (error != 0)
5875 		goto connect_failed;
5876 
5877 	/*
5878 	 * The addresses have been verified. Time to insert in
5879 	 * the correct fanout list.
5880 	 */
5881 	error = ipcl_conn_insert(connp);
5882 	if (error != 0)
5883 		goto connect_failed;
5884 
5885 	mutex_enter(&connp->conn_lock);
5886 	error = udp_build_hdr_template(connp, &connp->conn_saddr_v6,
5887 	    &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo);
5888 	if (error != 0) {
5889 		mutex_exit(&connp->conn_lock);
5890 		goto connect_failed;
5891 	}
5892 
5893 	udp->udp_state = TS_DATA_XFER;
5894 	/* Record this as the "last" send even though we haven't sent any */
5895 	connp->conn_v6lastdst = connp->conn_faddr_v6;
5896 	connp->conn_lastipversion = connp->conn_ipversion;
5897 	connp->conn_lastdstport = connp->conn_fport;
5898 	connp->conn_lastflowinfo = connp->conn_flowinfo;
5899 	connp->conn_lastscopeid = scopeid;
5900 	connp->conn_lastsrcid = srcid;
5901 	/* Also remember a source to use together with lastdst */
5902 	connp->conn_v6lastsrc = v6src;
5903 
5904 	oldixa = conn_replace_ixa(connp, ixa);
5905 	mutex_exit(&connp->conn_lock);
5906 	ixa_refrele(oldixa);
5907 
5908 	/*
5909 	 * We've picked a source address above. Now we can
5910 	 * verify that the src/port/dst/port is unique for all
5911 	 * connections in TS_DATA_XFER, skipping ourselves.
5912 	 */
5913 	mutex_enter(&udpf->uf_lock);
5914 	for (udp1 = udpf->uf_udp; udp1 != NULL; udp1 = udp1->udp_bind_hash) {
5915 		if (udp1->udp_state != TS_DATA_XFER)
5916 			continue;
5917 
5918 		if (udp1 == udp)
5919 			continue;
5920 
5921 		connp1 = udp1->udp_connp;
5922 		if (connp->conn_lport != connp1->conn_lport ||
5923 		    connp->conn_ipversion != connp1->conn_ipversion ||
5924 		    dstport != connp1->conn_fport ||
5925 		    !IN6_ARE_ADDR_EQUAL(&connp->conn_laddr_v6,
5926 		    &connp1->conn_laddr_v6) ||
5927 		    !IN6_ARE_ADDR_EQUAL(&v6dst, &connp1->conn_faddr_v6) ||
5928 		    !(IPCL_ZONE_MATCH(connp, connp1->conn_zoneid) ||
5929 		    IPCL_ZONE_MATCH(connp1, connp->conn_zoneid)))
5930 			continue;
5931 		mutex_exit(&udpf->uf_lock);
5932 		error = -TBADADDR;
5933 		goto connect_failed;
5934 	}
5935 	if (cl_inet_connect2 != NULL) {
5936 		CL_INET_UDP_CONNECT(connp, B_TRUE, &v6dst, dstport, error);
5937 		if (error != 0) {
5938 			mutex_exit(&udpf->uf_lock);
5939 			error = -TBADADDR;
5940 			goto connect_failed;
5941 		}
5942 	}
5943 	mutex_exit(&udpf->uf_lock);
5944 
5945 	ixa_refrele(ixa);
5946 	return (0);
5947 
5948 connect_failed:
5949 	if (ixa != NULL)
5950 		ixa_refrele(ixa);
5951 	mutex_enter(&connp->conn_lock);
5952 	mutex_enter(&udpf->uf_lock);
5953 	udp->udp_state = TS_IDLE;
5954 	connp->conn_faddr_v6 = ipv6_all_zeros;
5955 	connp->conn_fport = 0;
5956 	/* In case the source address was set above */
5957 	if (connp->conn_mcbc_bind)
5958 		connp->conn_saddr_v6 = ipv6_all_zeros;
5959 	else
5960 		connp->conn_saddr_v6 = connp->conn_bound_addr_v6;
5961 	connp->conn_laddr_v6 = connp->conn_bound_addr_v6;
5962 	mutex_exit(&udpf->uf_lock);
5963 
5964 	connp->conn_v6lastdst = ipv6_all_zeros;
5965 	connp->conn_flowinfo = 0;
5966 
5967 	(void) udp_build_hdr_template(connp, &connp->conn_saddr_v6,
5968 	    &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo);
5969 	mutex_exit(&connp->conn_lock);
5970 	return (error);
5971 }
5972 
5973 static int
udp_connect(sock_lower_handle_t proto_handle,const struct sockaddr * sa,socklen_t len,sock_connid_t * id,cred_t * cr)5974 udp_connect(sock_lower_handle_t proto_handle, const struct sockaddr *sa,
5975     socklen_t len, sock_connid_t *id, cred_t *cr)
5976 {
5977 	conn_t	*connp = (conn_t *)proto_handle;
5978 	udp_t	*udp = connp->conn_udp;
5979 	int	error;
5980 	boolean_t did_bind = B_FALSE;
5981 	pid_t	pid = curproc->p_pid;
5982 
5983 	/* All Solaris components should pass a cred for this operation. */
5984 	ASSERT(cr != NULL);
5985 
5986 	if (sa == NULL) {
5987 		/*
5988 		 * Disconnect
5989 		 * Make sure we are connected
5990 		 */
5991 		if (udp->udp_state != TS_DATA_XFER)
5992 			return (EINVAL);
5993 
5994 		error = udp_disconnect(connp);
5995 		return (error);
5996 	}
5997 
5998 	error = proto_verify_ip_addr(connp->conn_family, sa, len);
5999 	if (error != 0)
6000 		goto done;
6001 
6002 	/* do an implicit bind if necessary */
6003 	if (udp->udp_state == TS_UNBND) {
6004 		error = udp_implicit_bind(connp, cr);
6005 		/*
6006 		 * We could be racing with an actual bind, in which case
6007 		 * we would see EPROTO. We cross our fingers and try
6008 		 * to connect.
6009 		 */
6010 		if (!(error == 0 || error == EPROTO))
6011 			goto done;
6012 		did_bind = B_TRUE;
6013 	}
6014 	/*
6015 	 * set SO_DGRAM_ERRIND
6016 	 */
6017 	connp->conn_dgram_errind = B_TRUE;
6018 
6019 	error = udp_do_connect(connp, sa, len, cr, pid);
6020 
6021 	if (error != 0 && did_bind) {
6022 		int unbind_err;
6023 
6024 		unbind_err = udp_do_unbind(connp);
6025 		ASSERT(unbind_err == 0);
6026 	}
6027 
6028 	if (error == 0) {
6029 		*id = 0;
6030 		(*connp->conn_upcalls->su_connected)
6031 		    (connp->conn_upper_handle, 0, NULL, -1);
6032 	} else if (error < 0) {
6033 		error = proto_tlitosyserr(-error);
6034 	}
6035 
6036 done:
6037 	if (error != 0 && udp->udp_state == TS_DATA_XFER) {
6038 		/*
6039 		 * No need to hold locks to set state
6040 		 * after connect failure socket state is undefined
6041 		 * We set the state only to imitate old sockfs behavior
6042 		 */
6043 		udp->udp_state = TS_IDLE;
6044 	}
6045 	return (error);
6046 }
6047 
6048 int
udp_send(sock_lower_handle_t proto_handle,mblk_t * mp,struct nmsghdr * msg,cred_t * cr)6049 udp_send(sock_lower_handle_t proto_handle, mblk_t *mp, struct nmsghdr *msg,
6050     cred_t *cr)
6051 {
6052 	sin6_t		*sin6;
6053 	sin_t		*sin = NULL;
6054 	uint_t		srcid;
6055 	conn_t		*connp = (conn_t *)proto_handle;
6056 	udp_t		*udp = connp->conn_udp;
6057 	int		error = 0;
6058 	udp_stack_t	*us = udp->udp_us;
6059 	ushort_t	ipversion;
6060 	pid_t		pid = curproc->p_pid;
6061 	ip_xmit_attr_t	*ixa;
6062 
6063 	ASSERT(DB_TYPE(mp) == M_DATA);
6064 
6065 	/* All Solaris components should pass a cred for this operation. */
6066 	ASSERT(cr != NULL);
6067 
6068 	/* do an implicit bind if necessary */
6069 	if (udp->udp_state == TS_UNBND) {
6070 		error = udp_implicit_bind(connp, cr);
6071 		/*
6072 		 * We could be racing with an actual bind, in which case
6073 		 * we would see EPROTO. We cross our fingers and try
6074 		 * to connect.
6075 		 */
6076 		if (!(error == 0 || error == EPROTO)) {
6077 			freemsg(mp);
6078 			return (error);
6079 		}
6080 	}
6081 
6082 	/* Connected? */
6083 	if (msg->msg_name == NULL) {
6084 		if (udp->udp_state != TS_DATA_XFER) {
6085 			UDPS_BUMP_MIB(us, udpOutErrors);
6086 			return (EDESTADDRREQ);
6087 		}
6088 		if (msg->msg_controllen != 0) {
6089 			error = udp_output_ancillary(connp, NULL, NULL, mp,
6090 			    NULL, msg, cr, pid);
6091 		} else {
6092 			error = udp_output_connected(connp, mp, cr, pid);
6093 		}
6094 		if (us->us_sendto_ignerr)
6095 			return (0);
6096 		else
6097 			return (error);
6098 	}
6099 	if (udp->udp_state == TS_DATA_XFER) {
6100 		UDPS_BUMP_MIB(us, udpOutErrors);
6101 		return (EISCONN);
6102 	}
6103 	error = proto_verify_ip_addr(connp->conn_family,
6104 	    (struct sockaddr *)msg->msg_name, msg->msg_namelen);
6105 	if (error != 0) {
6106 		UDPS_BUMP_MIB(us, udpOutErrors);
6107 		return (error);
6108 	}
6109 	switch (connp->conn_family) {
6110 	case AF_INET6:
6111 		sin6 = (sin6_t *)msg->msg_name;
6112 
6113 		srcid = sin6->__sin6_src_id;
6114 
6115 		if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
6116 			/*
6117 			 * Destination is a non-IPv4-compatible IPv6 address.
6118 			 * Send out an IPv6 format packet.
6119 			 */
6120 
6121 			/*
6122 			 * If the local address is a mapped address return
6123 			 * an error.
6124 			 * It would be possible to send an IPv6 packet but the
6125 			 * response would never make it back to the application
6126 			 * since it is bound to a mapped address.
6127 			 */
6128 			if (IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6)) {
6129 				UDPS_BUMP_MIB(us, udpOutErrors);
6130 				return (EADDRNOTAVAIL);
6131 			}
6132 			if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
6133 				sin6->sin6_addr = ipv6_loopback;
6134 			ipversion = IPV6_VERSION;
6135 		} else {
6136 			if (connp->conn_ipv6_v6only) {
6137 				UDPS_BUMP_MIB(us, udpOutErrors);
6138 				return (EADDRNOTAVAIL);
6139 			}
6140 
6141 			/*
6142 			 * If the local address is not zero or a mapped address
6143 			 * return an error.  It would be possible to send an
6144 			 * IPv4 packet but the response would never make it
6145 			 * back to the application since it is bound to a
6146 			 * non-mapped address.
6147 			 */
6148 			if (!IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6) &&
6149 			    !IN6_IS_ADDR_UNSPECIFIED(&connp->conn_saddr_v6)) {
6150 				UDPS_BUMP_MIB(us, udpOutErrors);
6151 				return (EADDRNOTAVAIL);
6152 			}
6153 
6154 			if (V4_PART_OF_V6(sin6->sin6_addr) == INADDR_ANY) {
6155 				V4_PART_OF_V6(sin6->sin6_addr) =
6156 				    htonl(INADDR_LOOPBACK);
6157 			}
6158 			ipversion = IPV4_VERSION;
6159 		}
6160 
6161 		/*
6162 		 * We have to allocate an ip_xmit_attr_t before we grab
6163 		 * conn_lock and we need to hold conn_lock once we've check
6164 		 * conn_same_as_last_v6 to handle concurrent send* calls on a
6165 		 * socket.
6166 		 */
6167 		if (msg->msg_controllen == 0) {
6168 			ixa = conn_get_ixa(connp, B_FALSE);
6169 			if (ixa == NULL) {
6170 				UDPS_BUMP_MIB(us, udpOutErrors);
6171 				return (ENOMEM);
6172 			}
6173 		} else {
6174 			ixa = NULL;
6175 		}
6176 		mutex_enter(&connp->conn_lock);
6177 		if (udp->udp_delayed_error != 0) {
6178 			sin6_t  *sin2 = (sin6_t *)&udp->udp_delayed_addr;
6179 
6180 			error = udp->udp_delayed_error;
6181 			udp->udp_delayed_error = 0;
6182 
6183 			/* Compare IP address, port, and family */
6184 
6185 			if (sin6->sin6_port == sin2->sin6_port &&
6186 			    IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr,
6187 			    &sin2->sin6_addr) &&
6188 			    sin6->sin6_family == sin2->sin6_family) {
6189 				mutex_exit(&connp->conn_lock);
6190 				UDPS_BUMP_MIB(us, udpOutErrors);
6191 				if (ixa != NULL)
6192 					ixa_refrele(ixa);
6193 				return (error);
6194 			}
6195 		}
6196 
6197 		if (msg->msg_controllen != 0) {
6198 			mutex_exit(&connp->conn_lock);
6199 			ASSERT(ixa == NULL);
6200 			error = udp_output_ancillary(connp, NULL, sin6, mp,
6201 			    NULL, msg, cr, pid);
6202 		} else if (conn_same_as_last_v6(connp, sin6) &&
6203 		    connp->conn_lastsrcid == srcid &&
6204 		    ipsec_outbound_policy_current(ixa)) {
6205 			/* udp_output_lastdst drops conn_lock */
6206 			error = udp_output_lastdst(connp, mp, cr, pid, ixa);
6207 		} else {
6208 			/* udp_output_newdst drops conn_lock */
6209 			error = udp_output_newdst(connp, mp, NULL, sin6,
6210 			    ipversion, cr, pid, ixa);
6211 		}
6212 		ASSERT(MUTEX_NOT_HELD(&connp->conn_lock));
6213 		if (us->us_sendto_ignerr)
6214 			return (0);
6215 		else
6216 			return (error);
6217 	case AF_INET:
6218 		sin = (sin_t *)msg->msg_name;
6219 
6220 		ipversion = IPV4_VERSION;
6221 
6222 		if (sin->sin_addr.s_addr == INADDR_ANY)
6223 			sin->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
6224 
6225 		/*
6226 		 * We have to allocate an ip_xmit_attr_t before we grab
6227 		 * conn_lock and we need to hold conn_lock once we've check
6228 		 * conn_same_as_last_v6 to handle concurrent send* on a socket.
6229 		 */
6230 		if (msg->msg_controllen == 0) {
6231 			ixa = conn_get_ixa(connp, B_FALSE);
6232 			if (ixa == NULL) {
6233 				UDPS_BUMP_MIB(us, udpOutErrors);
6234 				return (ENOMEM);
6235 			}
6236 		} else {
6237 			ixa = NULL;
6238 		}
6239 		mutex_enter(&connp->conn_lock);
6240 		if (udp->udp_delayed_error != 0) {
6241 			sin_t  *sin2 = (sin_t *)&udp->udp_delayed_addr;
6242 
6243 			error = udp->udp_delayed_error;
6244 			udp->udp_delayed_error = 0;
6245 
6246 			/* Compare IP address and port */
6247 
6248 			if (sin->sin_port == sin2->sin_port &&
6249 			    sin->sin_addr.s_addr == sin2->sin_addr.s_addr) {
6250 				mutex_exit(&connp->conn_lock);
6251 				UDPS_BUMP_MIB(us, udpOutErrors);
6252 				if (ixa != NULL)
6253 					ixa_refrele(ixa);
6254 				return (error);
6255 			}
6256 		}
6257 		if (msg->msg_controllen != 0) {
6258 			mutex_exit(&connp->conn_lock);
6259 			ASSERT(ixa == NULL);
6260 			error = udp_output_ancillary(connp, sin, NULL, mp,
6261 			    NULL, msg, cr, pid);
6262 		} else if (conn_same_as_last_v4(connp, sin) &&
6263 		    ipsec_outbound_policy_current(ixa)) {
6264 			/* udp_output_lastdst drops conn_lock */
6265 			error = udp_output_lastdst(connp, mp, cr, pid, ixa);
6266 		} else {
6267 			/* udp_output_newdst drops conn_lock */
6268 			error = udp_output_newdst(connp, mp, sin, NULL,
6269 			    ipversion, cr, pid, ixa);
6270 		}
6271 		ASSERT(MUTEX_NOT_HELD(&connp->conn_lock));
6272 		if (us->us_sendto_ignerr)
6273 			return (0);
6274 		else
6275 			return (error);
6276 	default:
6277 		return (EINVAL);
6278 	}
6279 }
6280 
6281 int
udp_fallback(sock_lower_handle_t proto_handle,queue_t * q,boolean_t issocket,so_proto_quiesced_cb_t quiesced_cb,sock_quiesce_arg_t * arg)6282 udp_fallback(sock_lower_handle_t proto_handle, queue_t *q,
6283     boolean_t issocket, so_proto_quiesced_cb_t quiesced_cb,
6284     sock_quiesce_arg_t *arg)
6285 {
6286 	conn_t	*connp = (conn_t *)proto_handle;
6287 	udp_t	*udp;
6288 	struct T_capability_ack tca;
6289 	struct sockaddr_in6 laddr, faddr;
6290 	socklen_t laddrlen, faddrlen;
6291 	short opts;
6292 	struct stroptions *stropt;
6293 	mblk_t *mp, *stropt_mp;
6294 	int error;
6295 
6296 	udp = connp->conn_udp;
6297 
6298 	stropt_mp = allocb_wait(sizeof (*stropt), BPRI_HI, STR_NOSIG, NULL);
6299 
6300 	/*
6301 	 * setup the fallback stream that was allocated
6302 	 */
6303 	connp->conn_dev = (dev_t)RD(q)->q_ptr;
6304 	connp->conn_minor_arena = WR(q)->q_ptr;
6305 
6306 	RD(q)->q_ptr = WR(q)->q_ptr = connp;
6307 
6308 	WR(q)->q_qinfo = &udp_winit;
6309 
6310 	connp->conn_rq = RD(q);
6311 	connp->conn_wq = WR(q);
6312 
6313 	/* Notify stream head about options before sending up data */
6314 	stropt_mp->b_datap->db_type = M_SETOPTS;
6315 	stropt_mp->b_wptr += sizeof (*stropt);
6316 	stropt = (struct stroptions *)stropt_mp->b_rptr;
6317 	stropt->so_flags = SO_WROFF | SO_HIWAT;
6318 	stropt->so_wroff = connp->conn_wroff;
6319 	stropt->so_hiwat = udp->udp_rcv_disply_hiwat;
6320 	putnext(RD(q), stropt_mp);
6321 
6322 	/*
6323 	 * Free the helper stream
6324 	 */
6325 	ip_free_helper_stream(connp);
6326 
6327 	if (!issocket)
6328 		udp_use_pure_tpi(udp);
6329 
6330 	/*
6331 	 * Collect the information needed to sync with the sonode
6332 	 */
6333 	udp_do_capability_ack(udp, &tca, TC1_INFO);
6334 
6335 	laddrlen = faddrlen = sizeof (sin6_t);
6336 	(void) udp_getsockname((sock_lower_handle_t)connp,
6337 	    (struct sockaddr *)&laddr, &laddrlen, CRED());
6338 	error = udp_getpeername((sock_lower_handle_t)connp,
6339 	    (struct sockaddr *)&faddr, &faddrlen, CRED());
6340 	if (error != 0)
6341 		faddrlen = 0;
6342 
6343 	opts = 0;
6344 	if (connp->conn_dgram_errind)
6345 		opts |= SO_DGRAM_ERRIND;
6346 	if (connp->conn_ixa->ixa_flags & IXAF_DONTROUTE)
6347 		opts |= SO_DONTROUTE;
6348 
6349 	mp = (*quiesced_cb)(connp->conn_upper_handle, arg, &tca,
6350 	    (struct sockaddr *)&laddr, laddrlen,
6351 	    (struct sockaddr *)&faddr, faddrlen, opts);
6352 
6353 	mutex_enter(&udp->udp_recv_lock);
6354 	/*
6355 	 * Attempts to send data up during fallback will result in it being
6356 	 * queued in udp_t. First push up the datagrams obtained from the
6357 	 * socket, then any packets queued in udp_t.
6358 	 */
6359 	if (mp != NULL) {
6360 		mp->b_next = udp->udp_fallback_queue_head;
6361 		udp->udp_fallback_queue_head = mp;
6362 	}
6363 	while (udp->udp_fallback_queue_head != NULL) {
6364 		mp = udp->udp_fallback_queue_head;
6365 		udp->udp_fallback_queue_head = mp->b_next;
6366 		mutex_exit(&udp->udp_recv_lock);
6367 		mp->b_next = NULL;
6368 		putnext(RD(q), mp);
6369 		mutex_enter(&udp->udp_recv_lock);
6370 	}
6371 	udp->udp_fallback_queue_tail = udp->udp_fallback_queue_head;
6372 	/*
6373 	 * No longer a streams less socket
6374 	 */
6375 	mutex_enter(&connp->conn_lock);
6376 	connp->conn_flags &= ~IPCL_NONSTR;
6377 	mutex_exit(&connp->conn_lock);
6378 
6379 	mutex_exit(&udp->udp_recv_lock);
6380 
6381 	ASSERT(connp->conn_ref >= 1);
6382 
6383 	return (0);
6384 }
6385 
6386 /* ARGSUSED3 */
6387 int
udp_getpeername(sock_lower_handle_t proto_handle,struct sockaddr * sa,socklen_t * salenp,cred_t * cr)6388 udp_getpeername(sock_lower_handle_t proto_handle, struct sockaddr *sa,
6389     socklen_t *salenp, cred_t *cr)
6390 {
6391 	conn_t	*connp = (conn_t *)proto_handle;
6392 	udp_t	*udp = connp->conn_udp;
6393 	int error;
6394 
6395 	/* All Solaris components should pass a cred for this operation. */
6396 	ASSERT(cr != NULL);
6397 
6398 	mutex_enter(&connp->conn_lock);
6399 	if (udp->udp_state != TS_DATA_XFER)
6400 		error = ENOTCONN;
6401 	else
6402 		error = conn_getpeername(connp, sa, salenp);
6403 	mutex_exit(&connp->conn_lock);
6404 	return (error);
6405 }
6406 
6407 /* ARGSUSED3 */
6408 int
udp_getsockname(sock_lower_handle_t proto_handle,struct sockaddr * sa,socklen_t * salenp,cred_t * cr)6409 udp_getsockname(sock_lower_handle_t proto_handle, struct sockaddr *sa,
6410     socklen_t *salenp, cred_t *cr)
6411 {
6412 	conn_t	*connp = (conn_t *)proto_handle;
6413 	int error;
6414 
6415 	/* All Solaris components should pass a cred for this operation. */
6416 	ASSERT(cr != NULL);
6417 
6418 	mutex_enter(&connp->conn_lock);
6419 	error = conn_getsockname(connp, sa, salenp);
6420 	mutex_exit(&connp->conn_lock);
6421 	return (error);
6422 }
6423 
6424 int
udp_getsockopt(sock_lower_handle_t proto_handle,int level,int option_name,void * optvalp,socklen_t * optlen,cred_t * cr)6425 udp_getsockopt(sock_lower_handle_t proto_handle, int level, int option_name,
6426     void *optvalp, socklen_t *optlen, cred_t *cr)
6427 {
6428 	conn_t		*connp = (conn_t *)proto_handle;
6429 	int		error;
6430 	t_uscalar_t	max_optbuf_len;
6431 	void		*optvalp_buf;
6432 	int		len;
6433 
6434 	/* All Solaris components should pass a cred for this operation. */
6435 	ASSERT(cr != NULL);
6436 
6437 	error = proto_opt_check(level, option_name, *optlen, &max_optbuf_len,
6438 	    udp_opt_obj.odb_opt_des_arr,
6439 	    udp_opt_obj.odb_opt_arr_cnt,
6440 	    B_FALSE, B_TRUE, cr);
6441 	if (error != 0) {
6442 		if (error < 0)
6443 			error = proto_tlitosyserr(-error);
6444 		return (error);
6445 	}
6446 
6447 	optvalp_buf = kmem_alloc(max_optbuf_len, KM_SLEEP);
6448 	len = udp_opt_get(connp, level, option_name, optvalp_buf);
6449 	if (len == -1) {
6450 		kmem_free(optvalp_buf, max_optbuf_len);
6451 		return (EINVAL);
6452 	}
6453 
6454 	/*
6455 	 * update optlen and copy option value
6456 	 */
6457 	t_uscalar_t size = MIN(len, *optlen);
6458 
6459 	bcopy(optvalp_buf, optvalp, size);
6460 	bcopy(&size, optlen, sizeof (size));
6461 
6462 	kmem_free(optvalp_buf, max_optbuf_len);
6463 	return (0);
6464 }
6465 
6466 int
udp_setsockopt(sock_lower_handle_t proto_handle,int level,int option_name,const void * optvalp,socklen_t optlen,cred_t * cr)6467 udp_setsockopt(sock_lower_handle_t proto_handle, int level, int option_name,
6468     const void *optvalp, socklen_t optlen, cred_t *cr)
6469 {
6470 	conn_t		*connp = (conn_t *)proto_handle;
6471 	int		error;
6472 
6473 	/* All Solaris components should pass a cred for this operation. */
6474 	ASSERT(cr != NULL);
6475 
6476 	error = proto_opt_check(level, option_name, optlen, NULL,
6477 	    udp_opt_obj.odb_opt_des_arr,
6478 	    udp_opt_obj.odb_opt_arr_cnt,
6479 	    B_TRUE, B_FALSE, cr);
6480 
6481 	if (error != 0) {
6482 		if (error < 0)
6483 			error = proto_tlitosyserr(-error);
6484 		return (error);
6485 	}
6486 
6487 	error = udp_opt_set(connp, SETFN_OPTCOM_NEGOTIATE, level, option_name,
6488 	    optlen, (uchar_t *)optvalp, (uint_t *)&optlen, (uchar_t *)optvalp,
6489 	    NULL, cr);
6490 
6491 	ASSERT(error >= 0);
6492 
6493 	return (error);
6494 }
6495 
6496 void
udp_clr_flowctrl(sock_lower_handle_t proto_handle)6497 udp_clr_flowctrl(sock_lower_handle_t proto_handle)
6498 {
6499 	conn_t	*connp = (conn_t *)proto_handle;
6500 	udp_t	*udp = connp->conn_udp;
6501 
6502 	mutex_enter(&udp->udp_recv_lock);
6503 	connp->conn_flow_cntrld = B_FALSE;
6504 	mutex_exit(&udp->udp_recv_lock);
6505 }
6506 
6507 /* ARGSUSED2 */
6508 int
udp_shutdown(sock_lower_handle_t proto_handle,int how,cred_t * cr)6509 udp_shutdown(sock_lower_handle_t proto_handle, int how, cred_t *cr)
6510 {
6511 	conn_t	*connp = (conn_t *)proto_handle;
6512 
6513 	/* All Solaris components should pass a cred for this operation. */
6514 	ASSERT(cr != NULL);
6515 
6516 	/* shut down the send side */
6517 	if (how != SHUT_RD)
6518 		(*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle,
6519 		    SOCK_OPCTL_SHUT_SEND, 0);
6520 	/* shut down the recv side */
6521 	if (how != SHUT_WR)
6522 		(*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle,
6523 		    SOCK_OPCTL_SHUT_RECV, 0);
6524 	return (0);
6525 }
6526 
6527 int
udp_ioctl(sock_lower_handle_t proto_handle,int cmd,intptr_t arg,int mode,int32_t * rvalp,cred_t * cr)6528 udp_ioctl(sock_lower_handle_t proto_handle, int cmd, intptr_t arg,
6529     int mode, int32_t *rvalp, cred_t *cr)
6530 {
6531 	conn_t		*connp = (conn_t *)proto_handle;
6532 	int		error;
6533 
6534 	/* All Solaris components should pass a cred for this operation. */
6535 	ASSERT(cr != NULL);
6536 
6537 	/*
6538 	 * If we don't have a helper stream then create one.
6539 	 * ip_create_helper_stream takes care of locking the conn_t,
6540 	 * so this check for NULL is just a performance optimization.
6541 	 */
6542 	if (connp->conn_helper_info == NULL) {
6543 		udp_stack_t *us = connp->conn_udp->udp_us;
6544 
6545 		ASSERT(us->us_ldi_ident != NULL);
6546 
6547 		/*
6548 		 * Create a helper stream for non-STREAMS socket.
6549 		 */
6550 		error = ip_create_helper_stream(connp, us->us_ldi_ident);
6551 		if (error != 0) {
6552 			ip0dbg(("udp_ioctl: create of IP helper stream "
6553 			    "failed %d\n", error));
6554 			return (error);
6555 		}
6556 	}
6557 
6558 	switch (cmd) {
6559 		case _SIOCSOCKFALLBACK:
6560 		case TI_GETPEERNAME:
6561 		case TI_GETMYNAME:
6562 			ip1dbg(("udp_ioctl: cmd 0x%x on non streams socket",
6563 			    cmd));
6564 			error = EINVAL;
6565 			break;
6566 		default:
6567 			/*
6568 			 * Pass on to IP using helper stream
6569 			 */
6570 			error = ldi_ioctl(connp->conn_helper_info->iphs_handle,
6571 			    cmd, arg, mode, cr, rvalp);
6572 			break;
6573 	}
6574 	return (error);
6575 }
6576 
6577 /* ARGSUSED */
6578 int
udp_accept(sock_lower_handle_t lproto_handle,sock_lower_handle_t eproto_handle,sock_upper_handle_t sock_handle,cred_t * cr)6579 udp_accept(sock_lower_handle_t lproto_handle,
6580     sock_lower_handle_t eproto_handle, sock_upper_handle_t sock_handle,
6581     cred_t *cr)
6582 {
6583 	return (EOPNOTSUPP);
6584 }
6585 
6586 /* ARGSUSED */
6587 int
udp_listen(sock_lower_handle_t proto_handle,int backlog,cred_t * cr)6588 udp_listen(sock_lower_handle_t proto_handle, int backlog, cred_t *cr)
6589 {
6590 	return (EOPNOTSUPP);
6591 }
6592 
6593 sock_downcalls_t sock_udp_downcalls = {
6594 	udp_activate,		/* sd_activate */
6595 	udp_accept,		/* sd_accept */
6596 	udp_bind,		/* sd_bind */
6597 	udp_listen,		/* sd_listen */
6598 	udp_connect,		/* sd_connect */
6599 	udp_getpeername,	/* sd_getpeername */
6600 	udp_getsockname,	/* sd_getsockname */
6601 	udp_getsockopt,		/* sd_getsockopt */
6602 	udp_setsockopt,		/* sd_setsockopt */
6603 	udp_send,		/* sd_send */
6604 	NULL,			/* sd_send_uio */
6605 	NULL,			/* sd_recv_uio */
6606 	NULL,			/* sd_poll */
6607 	udp_shutdown,		/* sd_shutdown */
6608 	udp_clr_flowctrl,	/* sd_setflowctrl */
6609 	udp_ioctl,		/* sd_ioctl */
6610 	udp_close		/* sd_close */
6611 };
6612