18fb65cergrimes/*-
24736ccfpfg * SPDX-License-Identifier: BSD-3-Clause
34736ccfpfg *
48fb65cergrimes * Copyright (c) 1982, 1986, 1990, 1993
58fb65cergrimes *	The Regents of the University of California.  All rights reserved.
68fb65cergrimes *
78fb65cergrimes * Redistribution and use in source and binary forms, with or without
88fb65cergrimes * modification, are permitted provided that the following conditions
98fb65cergrimes * are met:
108fb65cergrimes * 1. Redistributions of source code must retain the above copyright
118fb65cergrimes *    notice, this list of conditions and the following disclaimer.
128fb65cergrimes * 2. Redistributions in binary form must reproduce the above copyright
138fb65cergrimes *    notice, this list of conditions and the following disclaimer in the
148fb65cergrimes *    documentation and/or other materials provided with the distribution.
157e6cabdimp * 3. Neither the name of the University nor the names of its contributors
168fb65cergrimes *    may be used to endorse or promote products derived from this software
178fb65cergrimes *    without specific prior written permission.
188fb65cergrimes *
198fb65cergrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
208fb65cergrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
218fb65cergrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
228fb65cergrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
238fb65cergrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
248fb65cergrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
258fb65cergrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
268fb65cergrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
278fb65cergrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
288fb65cergrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
298fb65cergrimes * SUCH DAMAGE.
308fb65cergrimes *
31c8955bbhsu *	@(#)socketvar.h	8.3 (Berkeley) 2/19/95
32b516438kmacy *
333b842d3peter * $FreeBSD$
348fb65cergrimes */
358fb65cergrimes
361f5dfa1paul#ifndef _SYS_SOCKETVAR_H_
371f5dfa1paul#define _SYS_SOCKETVAR_H_
381f5dfa1paul
397168facglebius/*
407168facglebius * Socket generation count type.  Also used in xinpcb, xtcpcb, xunpcb.
417168facglebius */
427168facglebiustypedef uint64_t so_gen_t;
437168facglebius
447168facglebius#if defined(_KERNEL) || defined(_WANT_SOCKET)
45b3ded28bde#include <sys/queue.h>			/* for TAILQ macros */
4670c88bbwollman#include <sys/selinfo.h>		/* for struct selinfo */
477bfe3e8rwatson#include <sys/_lock.h>
487bfe3e8rwatson#include <sys/_mutex.h>
498bec689marcel#include <sys/osd.h>
502084823rwatson#include <sys/_sx.h>
51b516438kmacy#include <sys/sockbuf.h>
52b516438kmacy#ifdef _KERNEL
5342d9d54marcel#include <sys/caprights.h>
54b516438kmacy#include <sys/sockopt.h>
55b516438kmacy#endif
568fb65cergrimes
5739b6dc8zecstruct vnet;
5839b6dc8zec
598fb65cergrimes/*
608fb65cergrimes * Kernel structure per socket.
618fb65cergrimes * Contains send and receive buffer queues,
628fb65cergrimes * handle on protocol and pointer to protocol
638fb65cergrimes * private data and error information.
648fb65cergrimes */
653ef6278glebiustypedef	int so_upcall_t(struct socket *, void *, int);
667cf8a13jtltypedef	void so_dtor_t(struct socket *);
67bbc4497wollman
68a1af9ecjhbstruct socket;
69a1af9ecjhb
70a0a0568sjgenum socket_qstate {
71a0a0568sjg	SQ_NONE = 0,
72a0a0568sjg	SQ_INCOMP = 0x0800,	/* on sol_incomp */
73a0a0568sjg	SQ_COMP = 0x1000,	/* on sol_comp */
74a0a0568sjg};
75a0a0568sjg
763e54021trasz/*-
7787f869drwatson * Locking key to struct socket:
7887f869drwatson * (a) constant after allocation, no locking required.
7987f869drwatson * (b) locked by SOCK_LOCK(so).
80e35d543glebius * (cr) locked by SOCKBUF_LOCK(&so->so_rcv).
81daa4277bz * (cs) locked by SOCKBUF_LOCK(&so->so_snd).
82e35d543glebius * (e) locked by SOLISTEN_LOCK() of corresponding listening socket.
8387f869drwatson * (f) not locked since integer reads/writes are atomic.
8487f869drwatson * (g) used only as a sleep/wakeup address, no value.
850fb2a46rwatson * (h) locked by global mutex so_global_mtx.
86fb264c6jhb * (k) locked by KTLS workqueue mutex
87dbb4756tanimura */
88e35d543glebiusTAILQ_HEAD(accept_queue, socket);
898fb65cergrimesstruct socket {
90e35d543glebius	struct mtx	so_lock;
91e35d543glebius	volatile u_int	so_count;	/* (b / refcount) */
92e35d543glebius	struct selinfo	so_rdsel;	/* (b/cr) for so_rcv/so_comp */
93e35d543glebius	struct selinfo	so_wrsel;	/* (b/cs) for so_snd */
9487f869drwatson	short	so_type;		/* (a) generic type, see socket.h */
95d0aeaa5sbruno	int	so_options;		/* (b) from socket call, see socket.h */
96e35d543glebius	short	so_linger;		/* time to linger close(2) */
97e9c176frwatson	short	so_state;		/* (b) internal state flags SS_* */
98c5ef3e2alfred	void	*so_pcb;		/* protocol control block */
9951cae3fdelphij	struct	vnet *so_vnet;		/* (a) network stack instance */
10087f869drwatson	struct	protosw *so_proto;	/* (a) protocol handle */
101645f886rwatson	short	so_timeo;		/* (g) connection timeout */
102856e57erwatson	u_short	so_error;		/* (f) error affecting connection */
103e6fa9b9tanimura	struct	sigio *so_sigio;	/* [sg] information for async I/O or
104d869e35truckman					   out of band data (SIGURG) */
105645f886rwatson	struct	ucred *so_cred;		/* (a) user credentials */
106f1bc833rwatson	struct	label *so_label;	/* (b) MAC label for socket */
107785a134rwatson	/* NB: generation count must not be first. */
1080fb2a46rwatson	so_gen_t so_gencnt;		/* (h) generation count */
1098c5a4b8jhb	void	*so_emuldata;		/* (b) private data for emulators */
1107cf8a13jtl	so_dtor_t *so_dtor;		/* (b) optional destructor */
1118bec689marcel	struct	osd	osd;		/* Object Specific extensions */
112d5e8d23luigi	/*
113d5e8d23luigi	 * so_fibnum, so_user_cookie and friends can be used to attach
114d5e8d23luigi	 * some user-specified metadata to a socket, which then can be
115d5e8d23luigi	 * used by the kernel for various actions.
116d5e8d23luigi	 * so_user_cookie is used by ipfw/dummynet.
117d5e8d23luigi	 */
1181dfc5c9julian	int so_fibnum;		/* routing domain for this socket */
119d5e8d23luigi	uint32_t so_user_cookie;
120af53319np
1217016975sobomax	int so_ts_clock;	/* type of the clock used for timestamps */
122efa6326hselasky	uint32_t so_max_pacing_rate;	/* (f) TX rate limit in bytes/s */
123e35d543glebius	union {
124e35d543glebius		/* Regular (data flow) socket. */
125e35d543glebius		struct {
126e35d543glebius			/* (cr, cs) Receive and send buffers. */
127e35d543glebius			struct sockbuf		so_rcv, so_snd;
128e35d543glebius
129e35d543glebius			/* (e) Our place on accept queue. */
130e35d543glebius			TAILQ_ENTRY(socket)	so_list;
131e35d543glebius			struct socket		*so_listen;	/* (b) */
132a0a0568sjg			enum socket_qstate so_qstate;		/* (b) */
133e35d543glebius			/* (b) cached MAC label for peer */
134e35d543glebius			struct	label		*so_peerlabel;
135e35d543glebius			u_long	so_oobmark;	/* chars to oob mark */
136fb264c6jhb
137fb264c6jhb			/* (k) Our place on KTLS RX work queue. */
138fb264c6jhb			STAILQ_ENTRY(socket)	so_ktls_rx_list;
139e35d543glebius		};
140e35d543glebius		/*
141e35d543glebius		 * Listening socket, where accepts occur, is so_listen in all
142e35d543glebius		 * subsidiary sockets.  If so_listen is NULL, socket is not
143e35d543glebius		 * related to an accept.  For a listening socket itself
144e35d543glebius		 * sol_incomp queues partially completed connections, while
145e35d543glebius		 * sol_comp is a queue of connections ready to be accepted.
146e35d543glebius		 * If a connection is aborted and it has so_listen set, then
147e35d543glebius		 * it has to be pulled out of either sol_incomp or sol_comp.
148e35d543glebius		 * We allow connections to queue up based on current queue
149e35d543glebius		 * lengths and limit on number of queued connections for this
150e35d543glebius		 * socket.
151e35d543glebius		 */
152e35d543glebius		struct {
153e35d543glebius			/* (e) queue of partial unaccepted connections */
154e35d543glebius			struct accept_queue	sol_incomp;
155e35d543glebius			/* (e) queue of complete unaccepted connections */
156e35d543glebius			struct accept_queue	sol_comp;
157e35d543glebius			u_int	sol_qlen;    /* (e) sol_comp length */
158e35d543glebius			u_int	sol_incqlen; /* (e) sol_incomp length */
159e35d543glebius			u_int	sol_qlimit;  /* (e) queue limit */
160e35d543glebius
161e35d543glebius			/* accept_filter(9) optional data */
162e35d543glebius			struct	accept_filter	*sol_accept_filter;
163e35d543glebius			void	*sol_accept_filter_arg;	/* saved filter args */
164e35d543glebius			char	*sol_accept_filter_str;	/* saved user args */
165e35d543glebius
166e35d543glebius			/* Optional upcall, for kernel socket. */
167e35d543glebius			so_upcall_t	*sol_upcall;	/* (e) */
168e35d543glebius			void		*sol_upcallarg;	/* (e) */
169e35d543glebius
170e35d543glebius			/* Socket buffer parameters, to be copied to
171e35d543glebius			 * dataflow sockets, accepted from this one. */
172e35d543glebius			int		sol_sbrcv_lowat;
173e35d543glebius			int		sol_sbsnd_lowat;
174e35d543glebius			u_int		sol_sbrcv_hiwat;
175e35d543glebius			u_int		sol_sbsnd_hiwat;
176e35d543glebius			short		sol_sbrcv_flags;
177e35d543glebius			short		sol_sbsnd_flags;
178e35d543glebius			sbintime_t	sol_sbrcv_timeo;
179e35d543glebius			sbintime_t	sol_sbsnd_timeo;
1805c3de7ejtl
1815c3de7ejtl			/* Information tracking listen queue overflows. */
1825c3de7ejtl			struct timeval	sol_lastover;	/* (e) */
1835c3de7ejtl			int		sol_overcount;	/* (e) */
184e35d543glebius		};
185e35d543glebius	};
1868fb65cergrimes};
1877168facglebius#endif	/* defined(_KERNEL) || defined(_WANT_SOCKET) */
1887168facglebius
1897168facglebius/*
1907168facglebius * Socket state bits.
1917168facglebius *
192343e089bz * Historically, these bits were all kept in the so_state field.
193343e089bz * They are now split into separate, lock-specific fields.
194343e089bz * so_state maintains basic socket state protected by the socket lock.
195343e089bz * so_qstate holds information about the socket accept queues.
196343e089bz * Each socket buffer also has a state field holding information
197343e089bz * relevant to that socket buffer (can't send, rcv).
198343e089bz * Many fields will be read without locks to improve performance and avoid
1997168facglebius * lock order issues.  However, this approach must be used with caution.
2007168facglebius */
2017168facglebius#define	SS_NOFDREF		0x0001	/* no file table ref any more */
2027168facglebius#define	SS_ISCONNECTED		0x0002	/* socket connected to a peer */
2037168facglebius#define	SS_ISCONNECTING		0x0004	/* in process of connecting to peer */
2047168facglebius#define	SS_ISDISCONNECTING	0x0008	/* in process of disconnecting */
2057168facglebius#define	SS_NBIO			0x0100	/* non-blocking ops */
2067168facglebius#define	SS_ASYNC		0x0200	/* async i/o notify */
2077168facglebius#define	SS_ISCONFIRMING		0x0400	/* deciding to accept connection req */
2087168facglebius#define	SS_ISDISCONNECTED	0x2000	/* socket disconnected from peer */
2097168facglebius
2107168facglebius/*
2117168facglebius * Protocols can mark a socket as SS_PROTOREF to indicate that, following
2127168facglebius * pru_detach, they still want the socket to persist, and will free it
2137168facglebius * themselves when they are done.  Protocols should only ever call sofree()
2147168facglebius * following setting this flag in pru_detach(), and never otherwise, as
2157168facglebius * sofree() bypasses socket reference counting.
2167168facglebius */
2177168facglebius#define	SS_PROTOREF		0x4000	/* strong protocol reference */
2187168facglebius
2197168facglebius#ifdef _KERNEL
2208fb65cergrimes
221e35d543glebius#define	SOCK_MTX(so)		&(so)->so_lock
222e35d543glebius#define	SOCK_LOCK(so)		mtx_lock(&(so)->so_lock)
223e35d543glebius#define	SOCK_OWNED(so)		mtx_owned(&(so)->so_lock)
224e35d543glebius#define	SOCK_UNLOCK(so)		mtx_unlock(&(so)->so_lock)
225e35d543glebius#define	SOCK_LOCK_ASSERT(so)	mtx_assert(&(so)->so_lock, MA_OWNED)
226e35d543glebius#define	SOCK_UNLOCK_ASSERT(so)	mtx_assert(&(so)->so_lock, MA_NOTOWNED)
227e35d543glebius
228e35d543glebius#define	SOLISTENING(sol)	(((sol)->so_options & SO_ACCEPTCONN) != 0)
229e35d543glebius#define	SOLISTEN_LOCK(sol)	do {					\
230e35d543glebius	mtx_lock(&(sol)->so_lock);					\
231e35d543glebius	KASSERT(SOLISTENING(sol),					\
232e35d543glebius	    ("%s: %p not listening", __func__, (sol)));			\
233e35d543glebius} while (0)
234e35d543glebius#define	SOLISTEN_TRYLOCK(sol)	mtx_trylock(&(sol)->so_lock)
235e35d543glebius#define	SOLISTEN_UNLOCK(sol)	do {					\
236e35d543glebius	KASSERT(SOLISTENING(sol),					\
237e35d543glebius	    ("%s: %p not listening", __func__, (sol)));			\
238e35d543glebius	mtx_unlock(&(sol)->so_lock);					\
239e35d543glebius} while (0)
240e35d543glebius#define	SOLISTEN_LOCK_ASSERT(sol)	do {				\
241e35d543glebius	mtx_assert(&(sol)->so_lock, MA_OWNED);				\
242e35d543glebius	KASSERT(SOLISTENING(sol),					\
243e35d543glebius	    ("%s: %p not listening", __func__, (sol)));			\
244e35d543glebius} while (0)
245bddadcfrwatson
246bddadcfrwatson/*
2478fb65cergrimes * Macros for sockets and socket buffering.
2488fb65cergrimes */
2498fb65cergrimes
2508fb65cergrimes/*
251c57fa54rwatson * Flags to sblock().
252c57fa54rwatson */
253