xref: /illumos-gate/usr/src/uts/sun4v/sys/vnet_gen.h (revision 6a634c9d)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 #ifndef _VNET_GEN_H
27 #define	_VNET_GEN_H
28 
29 #ifdef __cplusplus
30 extern "C" {
31 #endif
32 
33 #include <sys/vgen_stats.h>
34 
35 #define	VGEN_SUCCESS		(0)	/* successful return */
36 #define	VGEN_FAILURE		(-1)	/* unsuccessful return */
37 
38 #define	VGEN_NUM_VER		1	/* max # of vgen versions */
39 
40 #define	VGEN_LOCAL	1	/* local ldc end-point */
41 #define	VGEN_PEER	2	/* peer ldc end-point */
42 
43 /* vgen_t flags */
44 #define	VGEN_STOPPED		0x0
45 #define	VGEN_STARTED		0x1
46 
47 #define	KMEM_FREE(_p)		kmem_free((_p), sizeof (*(_p)))
48 
49 #define	VGEN_INIT_MCTAB_SIZE	16	/* initial size of multicast table */
50 
51 #define	READ_ENTER(x)	rw_enter(x, RW_READER)
52 #define	WRITE_ENTER(x)	rw_enter(x, RW_WRITER)
53 #define	RW_EXIT(x)	rw_exit(x)
54 
55 /* channel flags */
56 #define	CHANNEL_ATTACHED	0x1
57 #define	CHANNEL_STARTED		0x2
58 
59 /* transmit return values */
60 #define	VGEN_TX_SUCCESS		0	/* transmit success */
61 #define	VGEN_TX_FAILURE		1	/* transmit failure */
62 #define	VGEN_TX_NORESOURCES	2	/* out of tbufs/txds */
63 
64 /* private descriptor flags */
65 #define	VGEN_PRIV_DESC_FREE	0x0	/* desc is available */
66 #define	VGEN_PRIV_DESC_BUSY	0x1	/* desc in use */
67 
68 #define	LDC_TO_VNET(ldcp)  ((ldcp)->portp->vgenp->vnetp)
69 #define	LDC_TO_VGEN(ldcp)  ((ldcp)->portp->vgenp)
70 
71 /* receive thread flags */
72 #define	VGEN_WTHR_DATARCVD 		0x01 /* data received */
73 #define	VGEN_WTHR_STOP 			0x02 /* stop worker thr request */
74 #define	VGEN_WTHR_PROCESSING		0x04 /* worker thr awake & processing */
75 
76 #define	VGEN_LDC_MTU		64	/* ldc pkt transfer mtu */
77 #define	VGEN_LDC_UP_DELAY	100	/* usec delay between ldc_up retries */
78 #define	VGEN_LDC_CLOSE_DELAY	100	/* usec delay between ldc_cl retries */
79 #define	VGEN_LDC_UNINIT_DELAY	100	/* usec delay between uninit retries */
80 #define	VGEN_TXWD_INTERVAL	1000	/* tx watchdog freq in msec */
81 #define	VGEN_TXWD_TIMEOUT	1000	/* tx watchdog timeout in msec */
82 
83 #define	VGEN_NUM_VMPOOLS	3	/* number of vio mblk pools */
84 
85 #define	VGEN_DBLK_SZ_128	128	/* data buffer size 128 bytes */
86 #define	VGEN_DBLK_SZ_256	256	/* data buffer size 256 bytes */
87 #define	VGEN_DBLK_SZ_2048	2048	/* data buffer size 2K bytes */
88 #define	VGEN_NRBUFS		512	/* number of receive bufs */
89 
90 #define	VGEN_TXDBLK_SZ		2048	/* Tx data buffer size */
91 
92 #define	VGEN_NUM_DESCRIPTORS_MIN	128	/* min # of descriptors */
93 
94 /*
95  * Number of rcv buffers in RxDringData mode
96  */
97 #define	VGEN_RXDRING_NRBUFS	(vnet_num_descriptors * vgen_nrbufs_factor)
98 
99 static struct ether_addr etherbroadcastaddr = {
100 	0xff, 0xff, 0xff, 0xff, 0xff, 0xff
101 };
102 /*
103  * MIB II broadcast/multicast packets
104  */
105 #define	IS_BROADCAST(ehp) \
106 		(ether_cmp(&ehp->ether_dhost, &etherbroadcastaddr) == 0)
107 #define	IS_MULTICAST(ehp) \
108 		((ehp->ether_dhost.ether_addr_octet[0] & 01) == 1)
109 
110 /*
111  * The handshake process consists of 5 phases defined below, with VH_PHASE0
112  * being the pre-handshake phase and VH_DONE is the phase to indicate
113  * successful completion of all phases. Each phase may have one to several
114  * handshake states which are required to complete successfully to move to the
115  * next phase. See functions vgen_handshake() and vgen_handshake_done() for
116  * more details.
117  */
118 /* Handshake phases */
119 enum {	VH_PHASE0, VH_PHASE1, VH_PHASE2, VH_PHASE3, VH_PHASE4, VH_DONE = 0x80 };
120 
121 /* Handshake states */
122 enum {
123 
124 	VER_INFO_SENT	=	0x1,
125 	VER_ACK_RCVD	=	0x2,
126 	VER_INFO_RCVD	=	0x4,
127 	VER_ACK_SENT	=	0x8,
128 	VER_NEGOTIATED	=	(VER_ACK_RCVD | VER_ACK_SENT),
129 
130 	ATTR_INFO_SENT	=	0x10,
131 	ATTR_ACK_RCVD	=	0x20,
132 	ATTR_INFO_RCVD	=	0x40,
133 	ATTR_ACK_SENT	=	0x80,
134 	ATTR_INFO_EXCHANGED	=	(ATTR_ACK_RCVD | ATTR_ACK_SENT),
135 
136 	DRING_INFO_SENT	=	0x100,
137 	DRING_ACK_RCVD	=	0x200,
138 	DRING_INFO_RCVD	=	0x400,
139 	DRING_ACK_SENT	=	0x800,
140 	DRING_INFO_EXCHANGED	=	(DRING_ACK_RCVD | DRING_ACK_SENT),
141 
142 	RDX_INFO_SENT	=	0x1000,
143 	RDX_ACK_RCVD	=	0x2000,
144 	RDX_INFO_RCVD	=	0x4000,
145 	RDX_ACK_SENT	=	0x8000,
146 	RDX_EXCHANGED	=	(RDX_ACK_RCVD | RDX_ACK_SENT)
147 
148 };
149 
150 /* reset flags */
151 typedef enum {
152 	VGEN_FLAG_EVT_RESET = 0x1,	/* channel reset event */
153 	VGEN_FLAG_NEED_LDCRESET = 0x2,	/* need channel reset */
154 	VGEN_FLAG_UNINIT = 0x4		/* channel tear down */
155 } vgen_reset_flags_t;
156 
157 /* caller information needed in some code paths */
158 typedef enum {
159 	VGEN_LDC_CB = 0x1,	/* ldc callback handler */
160 	VGEN_MSG_THR = 0x2,	/* vio message worker thread */
161 	VGEN_OTHER = 0x4	/* other threads - tx etc */
162 } vgen_caller_t;
163 
164 /* get the address of next tbuf */
165 #define	NEXTTBUF(ldcp, tbufp)	(((tbufp) + 1) == (ldcp)->tbufendp    \
166 		? (ldcp)->tbufp : ((tbufp) + 1))
167 
168 /* increment recv index */
169 #define	INCR_RXI(i, ldcp)	\
170 		((i) = (((i) + 1) & ((ldcp)->num_rxds - 1)))
171 
172 /* decrement recv index */
173 #define	DECR_RXI(i, ldcp)	\
174 		((i) = (((i) - 1) & ((ldcp)->num_rxds - 1)))
175 
176 /* increment tx index */
177 #define	INCR_TXI(i, ldcp)	\
178 		((i) = (((i) + 1) & ((ldcp)->num_txds - 1)))
179 
180 /* decrement tx index */
181 #define	DECR_TXI(i, ldcp)	\
182 		((i) = (((i) - 1) & ((ldcp)->num_txds - 1)))
183 
184 /* bounds check rx index */
185 #define	CHECK_RXI(i, ldcp)	\
186 		(((i) >= 0) && ((i) < (ldcp)->num_rxds))
187 
188 /* bounds check tx index */
189 #define	CHECK_TXI(i, ldcp)	\
190 		(((i) >= 0) && ((i) < (ldcp)->num_txds))
191 
192 #ifdef DEBUG
193 
194 /* Error injection codes */
195 #define	VGEN_ERR_HVER		0x1	/* handshake version */
196 #define	VGEN_ERR_HTIMEOUT	0x2	/* handshake timeout */
197 #define	VGEN_ERR_HSID		0x4	/* handshake session id */
198 #define	VGEN_ERR_HSTATE		0x8	/* handshake state */
199 #define	VGEN_ERR_TXTIMEOUT	0x10	/* tx timeout */
200 #define	VGEN_ERR_RXLOST		0x20	/* rx lost pkts */
201 
202 #endif
203 /* private descriptor */
204 typedef struct vgen_priv_desc {
205 	uint64_t		flags;		/* flag bits */
206 	vnet_public_desc_t	*descp;		/* associated public desc */
207 	ldc_mem_handle_t	memhandle;	/* mem handle for data */
208 	caddr_t			datap;		/* prealloc'd tx data buffer */
209 	uint64_t		datalen;	/* total actual datalen */
210 	uint64_t		ncookies;	/* num ldc_mem_cookies */
211 	ldc_mem_cookie_t	memcookie[MAX_COOKIES];	/* data cookies */
212 } vgen_private_desc_t;
213 
214 /*
215  * Handshake parameters (per vio_mailbox.h) of each ldc end point, used
216  * during handshake negotiation.
217  */
218 typedef struct vgen_handshake_params {
219 	/* version specific params */
220 	uint16_t	ver_major;		/* major version number */
221 	uint16_t	ver_minor;		/* minor version number */
222 	uint8_t		dev_class;		/* device class */
223 
224 	/* attributes specific params */
225 	uint64_t		mtu;		/* max transfer unit size */
226 	uint64_t		addr;		/* address of the device */
227 	uint8_t			addr_type;	/* type of address */
228 	uint8_t			xfer_mode;	/* SHM or PKT */
229 	uint16_t		ack_freq;	/* dring data ack freq */
230 	uint32_t		physlink_update; /* physlink updates */
231 	uint8_t			dring_mode;	/* Descriptor ring mode */
232 
233 	/* descriptor ring params */
234 	uint32_t		num_desc;	/* # of descriptors in ring */
235 	uint32_t		desc_size;	/* size of descriptor */
236 	ldc_mem_cookie_t	dring_cookie;	/* desc ring cookie */
237 	uint32_t		dring_ncookies;	/* # of dring cookies */
238 	uint64_t		dring_ident;	/* ident=0 for INFO msg */
239 	boolean_t		dring_ready;	/* dring ready flag */
240 } vgen_hparams_t;
241 
242 /* version info */
243 typedef struct vgen_ver {
244 	uint16_t	ver_major;		/* major version number */
245 	uint16_t	ver_minor;		/* minor version number */
246 } vgen_ver_t;
247 
248 /*
249  * vnet-protocol-version dependent function prototypes.
250  */
251 typedef int	(*vgen_ldctx_t) (void *, mblk_t *);
252 typedef void	(*vgen_ldcrx_pktdata_t) (void *, void *, uint32_t);
253 typedef int	(*vgen_ldcrx_dringdata_t) (void *, void *);
254 
255 /*
256  * LDC end point abstraction in vnet. This structure holds all the information
257  * that is required to configure and use the Channel for data transfers with
258  * the peer LDC end point (vnet or vswitch), using VIO Protocol.
259  */
260 typedef struct vgen_ldc {
261 
262 	struct vgen_port	*portp;		/* associated port */
263 
264 	/*
265 	 * Locks:
266 	 * locking hierarchy when more than one lock is held concurrently:
267 	 * cblock > rxlock > txlock > tclock.
268 	 */
269 	kmutex_t		cblock;		/* sync callback processing */
270 	kmutex_t		txlock;		/* protect txd alloc */
271 	kmutex_t		tclock;		/* tx reclaim lock */
272 	kmutex_t		wrlock;		/* sync transmits */
273 	kmutex_t		rxlock;		/* sync reception */
274 	kmutex_t		pollq_lock;	/* sync polling and rxworker */
275 
276 	/*
277 	 * Channel and Handshake Info
278 	 */
279 	uint64_t		ldc_id;		/* channel number */
280 	uint64_t		ldc_handle;	/* channel handle */
281 	ldc_status_t		ldc_status;	/* channel status */
282 	vgen_ver_t		vgen_versions[VGEN_NUM_VER]; /* versions */
283 	int			hphase;		/* handshake phase */
284 	int			hstate;		/* handshake state bits */
285 	link_state_t		link_state;	/* channel link state */
286 #ifdef	VNET_IOC_DEBUG
287 	boolean_t		link_down_forced; /* forced link down */
288 #endif
289 	uint32_t		local_sid;	/* local session id */
290 	uint32_t		peer_sid;	/* session id of peer */
291 	vgen_hparams_t		local_hparams;	/* local handshake params */
292 	vgen_hparams_t		peer_hparams;	/* peer's handshake params */
293 	timeout_id_t		htid;		/* handshake wd timeout id */
294 	timeout_id_t		cancel_htid;	/* cancel handshake watchdog */
295 	uint8_t			dring_mtype;	/* dring mem map type */
296 	uint64_t		*ldcmsg;	/* msg buffer for ldc_read() */
297 	uint64_t		msglen;		/* size of ldcmsg */
298 	uint32_t		flags;		/* flags */
299 	uint_t			reset_in_progress; /* channel being reset */
300 	uint32_t		hretries;	/* handshake retry count */
301 	uint32_t		ldc_reset_count; /* # of channel resets */
302 
303 	/*
304 	 * Transmit Specific Fields
305 	 */
306 	/* TX-Common (Used in both TxDring and RxDringData modes) */
307 	uint32_t		num_txds;	   /* # of descriptors */
308 	uint32_t		tx_dring_ncookies; /* # of dring cookies */
309 	ldc_dring_handle_t	tx_dring_handle;   /* dring handle */
310 	ldc_mem_cookie_t	tx_dring_cookie;   /* dring cookie */
311 	uint32_t		next_txi;	   /* free descriptor index */
312 	caddr_t			tx_datap;	   /* tx data area */
313 	size_t			tx_data_sz;	   /* size of data area */
314 	size_t			tx_dblk_sz;	   /* size of data blk */
315 	timeout_id_t		wd_tid;		   /* watchdog timeout id */
316 	boolean_t		tx_blocked;	   /* flow controlled */
317 	clock_t			tx_blocked_lbolt;  /* flow controlled time */
318 	boolean_t		resched_peer;	   /* restart peer needed */
319 	uint32_t		resched_peer_txi;  /* index to resched peer */
320 	vgen_ldctx_t		tx;		   /* transmit function */
321 	vgen_ldctx_t		tx_dringdata;	   /* dring transmit function */
322 
323 	/* TX-TxDring mode */
324 	vnet_public_desc_t	*txdp;		/* exported dring */
325 	vgen_private_desc_t	*tbufp;		/* dring associated resources */
326 	vgen_private_desc_t	*tbufendp;	/* tbuf ring end */
327 	vgen_private_desc_t	*next_tbufp;	/* free tbuf */
328 	vgen_private_desc_t	*cur_tbufp;	/* reclaim tbuf */
329 	uint32_t		cur_txi;	/* reclaim descrptor index */
330 	uint64_t		next_txseq;	/* msg seqnum */
331 	clock_t			reclaim_lbolt;	/* time of last reclaim */
332 
333 	/* TX-RxDringData mode */
334 	uint32_t		tx_data_ncookies; /* # of data cookies */
335 	ldc_mem_handle_t	tx_data_handle;	  /* mapped data handle */
336 	ldc_mem_cookie_t	*tx_data_cookie;  /* mapped data cookies */
337 	vnet_rx_dringdata_desc_t *mtxdp;	  /* mapped dring */
338 	uint32_t		dringdata_msgid;  /* msg id */
339 
340 	/*
341 	 * Receive Specific Fields
342 	 */
343 	/* RX-Common (Used in both TxDring and RxDringData modes) */
344 	uint32_t		num_rxds;	   /* # of descriptors */
345 	uint32_t		rx_dring_ncookies; /* # of dring cookies */
346 	ldc_dring_handle_t	rx_dring_handle;   /* dring handle */
347 	ldc_mem_cookie_t	rx_dring_cookie;   /* dring cookie */
348 	uint32_t		next_rxi;	   /* free descriptor index */
349 	vgen_ldcrx_dringdata_t	rx_dringdata;	   /* dring rcv function */
350 	vgen_ldcrx_pktdata_t	rx_pktdata;	   /* raw data rcv function */
351 	boolean_t		polling_on;	   /* polling enabled ? */
352 
353 	/* RX-TxDring mode */
354 	vnet_public_desc_t	*mrxdp;		 /* mapped dring */
355 	uint64_t		next_rxseq;	 /* msg seqnum */
356 	vio_multi_pool_t	vmp;		 /* mblk pools */
357 	uint32_t		max_rxpool_size; /* max size of rxpool in use */
358 	mblk_t			*pollq_headp;	 /* head of pkts in pollq */
359 	mblk_t			*pollq_tailp;	 /* tail of pkts in pollq */
360 	kthread_t		*msg_thread;	 /* message thread */
361 	uint32_t		msg_thr_flags;	 /* message thread flags */
362 	kmutex_t		msg_thr_lock;	 /* lock for message thread */
363 	kcondvar_t		msg_thr_cv;	 /* cond.var for msg thread */
364 
365 	/* RX-RxDringData mode */
366 	uint32_t		num_rbufs;	  /* # of data bufs */
367 	uint32_t		rx_data_ncookies; /* # of data cookies */
368 	ldc_mem_handle_t	rx_data_handle;	  /* exported data handle */
369 	ldc_mem_cookie_t	*rx_data_cookie;  /* exported data cookies */
370 	vnet_rx_dringdata_desc_t *rxdp;		  /* exported dring */
371 	vio_mblk_pool_t		*rx_vmp;	  /* mblk pool */
372 	vio_mblk_t		**rxdp_to_vmp;	  /* descr to buf map tbl */
373 	caddr_t			rx_datap;	  /* mapped rx data area */
374 	size_t			rx_data_sz;	  /* size of mapped rx data */
375 	size_t			rx_dblk_sz;	  /* size of each rx data blk */
376 	mblk_t			*rx_pri_head;	  /* priority pkts head */
377 	mblk_t			*rx_pri_tail;	  /* priority pkts tail */
378 
379 	/* Channel Statistics */
380 	vgen_stats_t		stats;		/* channel statistics */
381 	kstat_t			*ksp;		/* channel kstats */
382 } vgen_ldc_t;
383 
384 /* port information  structure */
385 typedef struct vgen_port {
386 	struct vgen_port	*nextp;		/* next port in the list */
387 	struct vgen		*vgenp;		/* associated vgen_t */
388 	int			port_num;	/* port number */
389 	boolean_t		is_vsw_port;	/* connected to vswitch ? */
390 	int			num_ldcs;	/* # of channels in this port */
391 	uint64_t		*ldc_ids;	/* channel ids */
392 	vgen_ldc_t		*ldcp;		/* list of ldcs for this port */
393 	ether_addr_t		macaddr;	/* mac address of peer */
394 	uint16_t		pvid;		/* port vlan id (untagged) */
395 	uint16_t		*vids;		/* vlan ids (tagged) */
396 	uint16_t		nvids;		/* # of vids */
397 	mod_hash_t		*vlan_hashp;	/* vlan hash table */
398 	uint32_t		vlan_nchains;	/* # of vlan hash chains */
399 	uint32_t		use_vsw_port;	/* Use vsw_port or not */
400 	uint32_t		flags;		/* status of this port */
401 	vio_net_callbacks_t	vcb;		/* vnet callbacks */
402 	vio_net_handle_t	vhp;		/* handle from vnet */
403 	kmutex_t		lock;		/* synchornize ops */
404 } vgen_port_t;
405 
406 /* port list structure */
407 typedef struct vgen_portlist {
408 	vgen_port_t	*headp;		/* head of ports */
409 	vgen_port_t	*tailp;		/* tail */
410 	krwlock_t	rwlock;		/* sync access to the port list */
411 } vgen_portlist_t;
412 
413 /* vgen instance information  */
414 typedef struct vgen {
415 	vnet_t			*vnetp;		/* associated vnet instance */
416 	int			instance;	/* vnet instance */
417 	dev_info_t		*vnetdip;	/* dip of vnet */
418 	uint64_t		regprop;	/* "reg" property */
419 	ether_addr_t		macaddr;	/* mac addr of vnet */
420 	kmutex_t		lock;		/* synchornize ops */
421 	int			flags;		/* flags */
422 	vgen_portlist_t		vgenports;	/* Port List */
423 	mdeg_node_spec_t	*mdeg_parentp;
424 	mdeg_handle_t		mdeg_dev_hdl;	/* mdeg cb handle for device */
425 	mdeg_handle_t		mdeg_port_hdl;	/* mdeg cb handle for port */
426 	vgen_port_t		*vsw_portp;	/* port connected to vsw */
427 	struct ether_addr	*mctab;		/* multicast addr table */
428 	uint32_t		mcsize;		/* allocated size of mctab */
429 	uint32_t		mccount;	/* # of valid addrs in mctab */
430 	ddi_taskq_t		*rxp_taskq;	/* VIO rx pool taskq */
431 	uint32_t		pri_num_types;	/* # of priority eth types */
432 	uint16_t		*pri_types;	/* priority eth types */
433 	vio_mblk_pool_t		*pri_tx_vmp;	/* tx priority mblk pool */
434 	uint32_t		max_frame_size;	/* max frame size supported */
435 
436 	uint32_t		vsw_port_refcnt; /* refcnt for vsw_port */
437 	boolean_t		pls_negotiated;	/* phys link state update ? */
438 	link_state_t		phys_link_state; /* physical link state */
439 } vgen_t;
440 
441 #ifdef __cplusplus
442 }
443 #endif
444 
445 #endif	/* _VNET_GEN_H */
446