udp.c revision 8a06b3d6467c15646e663c05086378f16288af85
27c478bdstevel@tonic-gate * CDDL HEADER START
37c478bdstevel@tonic-gate *
47c478bdstevel@tonic-gate * The contents of this file are subject to the terms of the
59557906ja * Common Development and Distribution License (the "License").
69557906ja * You may not use this file except in compliance with the License.
77c478bdstevel@tonic-gate *
87c478bdstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bdstevel@tonic-gate * or http://www.opensolaris.org/os/licensing.
107c478bdstevel@tonic-gate * See the License for the specific language governing permissions
117c478bdstevel@tonic-gate * and limitations under the License.
127c478bdstevel@tonic-gate *
137c478bdstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each
147c478bdstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bdstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the
167c478bdstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying
177c478bdstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bdstevel@tonic-gate *
197c478bdstevel@tonic-gate * CDDL HEADER END
207c478bdstevel@tonic-gate */
22e5e7971Erik Nordmark * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
237256a34Dan McDonald * Copyright 2013 Nexenta Systems, Inc.  All rights reserved.
24a1ca8b4Dan McDonald * Copyright 2014, OmniTI Computer Consulting, Inc. All rights reserved.
257c478bdstevel@tonic-gate */
267c478bdstevel@tonic-gate/* Copyright (c) 1990 Mentat Inc. */
28de710d2Josef 'Jeff' Sipek#include <sys/sysmacros.h>
297c478bdstevel@tonic-gate#include <sys/types.h>
307c478bdstevel@tonic-gate#include <sys/stream.h>
317c478bdstevel@tonic-gate#include <sys/stropts.h>
327c478bdstevel@tonic-gate#include <sys/strlog.h>
337c478bdstevel@tonic-gate#include <sys/strsun.h>
347c478bdstevel@tonic-gate#define	_SUN_TPI_VERSION 2
357c478bdstevel@tonic-gate#include <sys/tihdr.h>
367c478bdstevel@tonic-gate#include <sys/timod.h>
377c478bdstevel@tonic-gate#include <sys/ddi.h>
387c478bdstevel@tonic-gate#include <sys/sunddi.h>
397c478bdstevel@tonic-gate#include <sys/strsubr.h>
407c478bdstevel@tonic-gate#include <sys/suntpi.h>
417c478bdstevel@tonic-gate#include <sys/xti_inet.h>
427c478bdstevel@tonic-gate#include <sys/kmem.h>
43bd670b3Erik Nordmark#include <sys/cred_impl.h>
447c478bdstevel@tonic-gate#include <sys/policy.h>
45bd670b3Erik Nordmark#include <sys/priv.h>
467c478bdstevel@tonic-gate#include <sys/ucred.h>
477c478bdstevel@tonic-gate#include <sys/zone.h>
497c478bdstevel@tonic-gate#include <sys/socket.h>
500f1702cYu Xiangning<Eric.Yu@Sun.COM>#include <sys/socketvar.h>
51ff550d0masputra#include <sys/sockio.h>
527c478bdstevel@tonic-gate#include <sys/vtrace.h>
53381a2a9dr#include <sys/sdt.h>
547c478bdstevel@tonic-gate#include <sys/debug.h>
557c478bdstevel@tonic-gate#include <sys/isa_defs.h>
567c478bdstevel@tonic-gate#include <sys/random.h>
577c478bdstevel@tonic-gate#include <netinet/in.h>
587c478bdstevel@tonic-gate#include <netinet/ip6.h>
597c478bdstevel@tonic-gate#include <netinet/icmp6.h>
607c478bdstevel@tonic-gate#include <netinet/udp.h>
627c478bdstevel@tonic-gate#include <inet/common.h>
637c478bdstevel@tonic-gate#include <inet/ip.h>
64ff550d0masputra#include <inet/ip_impl.h>
65bd670b3Erik Nordmark#include <inet/ipsec_impl.h>
667c478bdstevel@tonic-gate#include <inet/ip6.h>
677c478bdstevel@tonic-gate#include <inet/ip_ire.h>
68ff550d0masputra#include <inet/ip_if.h>
69ff550d0masputra#include <inet/ip_multi.h>
70c793af9sangeeta#include <inet/ip_ndp.h>
710f1702cYu Xiangning<Eric.Yu@Sun.COM>#include <inet/proto_set.h>
727c478bdstevel@tonic-gate#include <inet/mib2.h>
737c478bdstevel@tonic-gate#include <inet/optcom.h>
747c478bdstevel@tonic-gate#include <inet/snmpcom.h>
757c478bdstevel@tonic-gate#include <inet/kstatcom.h>
76ff550d0masputra#include <inet/ipclassifier.h>
77da14cebEric Cheng#include <sys/squeue_impl.h>
78b127ac4Philip Kirk#include <inet/ipnet.h>
79e11c3f4meem#include <sys/ethernet.h>
8145916cdjpk#include <sys/tsol/label.h>
8245916cdjpk#include <sys/tsol/tnet.h>
8345916cdjpk#include <rpc/pmap_prot.h>
85bd670b3Erik Nordmark#include <inet/udp_impl.h>
86bd670b3Erik Nordmark
887c478bdstevel@tonic-gate * Synchronization notes:
897c478bdstevel@tonic-gate *
90fc80c0dnordmark * UDP is MT and uses the usual kernel synchronization primitives. There are 2
91bd670b3Erik Nordmark * locks, the fanout lock (uf_lock) and conn_lock. conn_lock
92bd670b3Erik Nordmark * protects the contents of the udp_t. uf_lock protects the address and the
93bd670b3Erik Nordmark * fanout information.
94bd670b3Erik Nordmark * The lock order is conn_lock -> uf_lock.
957c478bdstevel@tonic-gate *
96fc80c0dnordmark * The fanout lock uf_lock:
97ff550d0masputra * When a UDP endpoint is bound to a local port, it is inserted into
987c478bdstevel@tonic-gate * a bind hash list.  The list consists of an array of udp_fanout_t buckets.
997c478bdstevel@tonic-gate * The size of the array is controlled by the udp_bind_fanout_size variable.
1007c478bdstevel@tonic-gate * This variable can be changed in /etc/system if the default value is
101ff550d0masputra * not large enough.  Each bind hash bucket is protected by a per bucket
102ff550d0masputra * lock.  It protects the udp_bind_hash and udp_ptpbhn fields in the udp_t
103fc80c0dnordmark * structure and a few other fields in the udp_t. A UDP endpoint is removed
104fc80c0dnordmark * from the bind hash list only when it is being unbound or being closed.
105fc80c0dnordmark * The per bucket lock also protects a UDP endpoint's state changes.
106ff550d0masputra *
107fc80c0dnordmark * Plumbing notes:
108fc80c0dnordmark * UDP is always a device driver. For compatibility with mibopen() code
109fc80c0dnordmark * it is possible to I_PUSH "udp", but that results in pushing a passthrough
110fc80c0dnordmark * dummy module.
111ff550d0masputra *
112fc80c0dnordmark * The above implies that we don't support any intermediate module to
113ff550d0masputra * reside in between /dev/ip and udp -- in fact, we never supported such
114ff550d0masputra * scenario in the past as the inter-layer communication semantics have
115fc80c0dnordmark * always been private.
1167c478bdstevel@tonic-gate */
118f4b3ec6dh/* For /etc/system control */
1197c478bdstevel@tonic-gateuint_t udp_bind_fanout_size = UDP_BIND_FANOUT_SIZE;
1217c478bdstevel@tonic-gatestatic void	udp_addr_req(queue_t *q, mblk_t *mp);
1220f1702cYu Xiangning<Eric.Yu@Sun.COM>static void	udp_tpi_bind(queue_t *q, mblk_t *mp);
1237c478bdstevel@tonic-gatestatic void	udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp);
1247c478bdstevel@tonic-gatestatic void	udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock);
125bd670b3Erik Nordmarkstatic int	udp_build_hdr_template(conn_t *, const in6_addr_t *,
126bd670b3Erik Nordmark    const in6_addr_t *, in_port_t, uint32_t);
1277c478bdstevel@tonic-gatestatic void	udp_capability_req(queue_t *q, mblk_t *mp);
1285e1743fToomas Soomestatic int	udp_tpi_close(queue_t *q, int flags, cred_t *);
129bd670b3Erik Nordmarkstatic void	udp_close_free(conn_t *);
1300f1702cYu Xiangning<Eric.Yu@Sun.COM>static void	udp_tpi_connect(queue_t *q, mblk_t *mp);
1310f1702cYu Xiangning<Eric.Yu@Sun.COM>static void	udp_tpi_disconnect(queue_t *q, mblk_t *mp);
1327c478bdstevel@tonic-gatestatic void	udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error,
133bd670b3Erik Nordmark    int sys_error);
134bd670b3Erik Nordmarkstatic void	udp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive,
135bd670b3Erik Nordmark    t_scalar_t tlierr, int sys_error);
1367c478bdstevel@tonic-gatestatic int	udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp,
1377c478bdstevel@tonic-gate		    cred_t *cr);
1387c478bdstevel@tonic-gatestatic int	udp_extra_priv_ports_add(queue_t *q, mblk_t *mp,
1397c478bdstevel@tonic-gate		    char *value, caddr_t cp, cred_t *cr);
1407c478bdstevel@tonic-gatestatic int	udp_extra_priv_ports_del(queue_t *q, mblk_t *mp,
1417c478bdstevel@tonic-gate		    char *value, caddr_t cp, cred_t *cr);
142bd670b3Erik Nordmarkstatic void	udp_icmp_input(void *, mblk_t *, void *, ip_recv_attr_t *);
143bd670b3Erik Nordmarkstatic void	udp_icmp_error_ipv6(conn_t *connp, mblk_t *mp,
144bd670b3Erik Nordmark    ip_recv_attr_t *ira);
1457c478bdstevel@tonic-gatestatic void	udp_info_req(queue_t *q, mblk_t *mp);
146bd670b3Erik Nordmarkstatic void	udp_input(void *, mblk_t *, void *, ip_recv_attr_t *);
1478a06b3dToomas Soomestatic int	udp_lrput(queue_t *, mblk_t *);
1488a06b3dToomas Soomestatic int	udp_lwput(queue_t *, mblk_t *);
1497c478bdstevel@tonic-gatestatic int	udp_open(queue_t *q, dev_t *devp, int flag, int sflag,
150fc80c0dnordmark		    cred_t *credp, boolean_t isv6);
151fc80c0dnordmarkstatic int	udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag,
152fc80c0dnordmark		    cred_t *credp);
153fc80c0dnordmarkstatic int	udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag,
1547c478bdstevel@tonic-gate		    cred_t *credp);
1557c478bdstevel@tonic-gatestatic boolean_t udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name);
156bd670b3Erik Nordmarkint		udp_opt_set(conn_t *connp, uint_t optset_context,
157bd670b3Erik Nordmark		    int level, int name, uint_t inlen,
158bd670b3Erik Nordmark		    uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp,
159bd670b3Erik Nordmark		    void *thisdg_attrs, cred_t *cr);
160bd670b3Erik Nordmarkint		udp_opt_get(conn_t *connp, int level, int name,
161bd670b3Erik Nordmark		    uchar_t *ptr);
162bd670b3Erik Nordmarkstatic int	udp_output_connected(conn_t *connp, mblk_t *mp, cred_t *cr,
163bd670b3Erik Nordmark		    pid_t pid);
164bd670b3Erik Nordmarkstatic int	udp_output_lastdst(conn_t *connp, mblk_t *mp, cred_t *cr,
165bd670b3Erik Nordmark    pid_t pid, ip_xmit_attr_t *ixa);
166bd670b3Erik Nordmarkstatic int	udp_output_newdst(conn_t *connp, mblk_t *data_mp, sin_t *sin,
167bd670b3Erik Nordmark		    sin6_t *sin6, ushort_t ipversion, cred_t *cr, pid_t,
168bd670b3Erik Nordmark		    ip_xmit_attr_t *ixa);
169bd670b3Erik Nordmarkstatic mblk_t	*udp_prepend_hdr(conn_t *, ip_xmit_attr_t *, const ip_pkt_t *,
170bd670b3Erik Nordmark    const in6_addr_t *, const in6_addr_t *, in_port_t, uint32_t, mblk_t *,
171bd670b3Erik Nordmark    int *);
172bd670b3Erik Nordmarkstatic mblk_t	*udp_prepend_header_template(conn_t *, ip_xmit_attr_t *,
173bd670b3Erik Nordmark    mblk_t *, const in6_addr_t *, in_port_t, uint32_t, int *);
174bd670b3Erik Nordmarkstatic void	udp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err);
175bd670b3Erik Nordmarkstatic void	udp_ud_err_connected(conn_t *, t_scalar_t);
1760f1702cYu Xiangning<Eric.Yu@Sun.COM>static void	udp_tpi_unbind(queue_t *q, mblk_t *mp);
17745916cdjpkstatic in_port_t udp_update_next_port(udp_t *udp, in_port_t port,
17845916cdjpk    boolean_t random);
1797c478bdstevel@tonic-gatestatic void	udp_wput_other(queue_t *q, mblk_t *mp);
1807c478bdstevel@tonic-gatestatic void	udp_wput_iocdata(queue_t *q, mblk_t *mp);
1818a06b3dToomas Soomestatic int	udp_wput_fallback(queue_t *q, mblk_t *mp);
182ff550d0masputrastatic size_t	udp_set_rcv_hiwat(udp_t *udp, size_t size);
184f4b3ec6dhstatic void	*udp_stack_init(netstackid_t stackid, netstack_t *ns);
185f4b3ec6dhstatic void	udp_stack_fini(netstackid_t stackid, void *arg);
187bd670b3Erik Nordmark/* Common routines for TPI and socket module */
188bd670b3Erik Nordmarkstatic void	udp_ulp_recv(conn_t *, mblk_t *, uint_t, ip_recv_attr_t *);
1890f1702cYu Xiangning<Eric.Yu@Sun.COM>
1900f1702cYu Xiangning<Eric.Yu@Sun.COM>/* Common routine for TPI and socket module */
191634e26eCasper H.S. Dikstatic conn_t	*udp_do_open(cred_t *, boolean_t, int, int *);
1920f1702cYu Xiangning<Eric.Yu@Sun.COM>static void	udp_do_close(conn_t *);
1930f1702cYu Xiangning<Eric.Yu@Sun.COM>static int	udp_do_bind(conn_t *, struct sockaddr *, socklen_t, cred_t *,
1940f1702cYu Xiangning<Eric.Yu@Sun.COM>    boolean_t);
1950f1702cYu Xiangning<Eric.Yu@Sun.COM>static int	udp_do_unbind(conn_t *);
1960f1702cYu Xiangning<Eric.Yu@Sun.COM>
1970f1702cYu Xiangning<Eric.Yu@Sun.COM>int		udp_getsockname(sock_lower_handle_t,
1980f1702cYu Xiangning<Eric.Yu@Sun.COM>    struct sockaddr *, socklen_t *, cred_t *);
1990f1702cYu Xiangning<Eric.Yu@Sun.COM>int		udp_getpeername(sock_lower_handle_t,
2000f1702cYu Xiangning<Eric.Yu@Sun.COM>    struct sockaddr *, socklen_t *, cred_t *);
201de8c4a1Erik Nordmarkstatic int	udp_do_connect(conn_t *, const struct sockaddr *, socklen_t,
202bd670b3Erik Nordmark    cred_t *, pid_t);
2030f1702cYu Xiangning<Eric.Yu@Sun.COM>
204bd670b3Erik Nordmark#pragma inline(udp_output_connected, udp_output_newdst, udp_output_lastdst)
2058e4b770Lu Huafeng<Huafeng.Lv@Sun.COM>
2068e4b770Lu Huafeng<Huafeng.Lv@Sun.COM>/*
2078e4b770Lu Huafeng<Huafeng.Lv@Sun.COM> * Checks if the given destination addr/port is allowed out.
2088e4b770Lu Huafeng<Huafeng.Lv@Sun.COM> * If allowed, registers the (dest_addr/port, node_ID) mapping at Cluster.
2098e4b770Lu Huafeng<Huafeng.Lv@Sun.COM> * Called for each connect() and for sendto()/sendmsg() to a different
2108e4b770Lu Huafeng<Huafeng.Lv@Sun.COM> * destination.
2118e4b770Lu Huafeng<Huafeng.Lv@Sun.COM> * For connect(), called in udp_connect().
212bd670b3Erik Nordmark * For sendto()/sendmsg(), called in udp_output_newdst().
2138e4b770Lu Huafeng<Huafeng.Lv@Sun.COM> *