xref: /illumos-gate/usr/src/uts/common/io/ena/ena.h (revision c46e4de3)
1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13  * Copyright 2024 Oxide Computer Company
14  */
15 
16 #ifndef	_ENA_H
17 #define	_ENA_H
18 
19 #include <sys/stdbool.h>
20 #include <sys/ddi.h>
21 #include <sys/sunddi.h>
22 #include <sys/types.h>
23 #include <sys/atomic.h>
24 #include <sys/list.h>
25 #include <sys/time.h>
26 #include <sys/modctl.h>
27 #include <sys/conf.h>
28 #include <sys/cpuvar.h>
29 #include <sys/pci.h>
30 #include <sys/sysmacros.h>
31 #include <sys/mac.h>
32 #include <sys/mac_ether.h>
33 #include <sys/mac_provider.h>
34 #include <sys/pattr.h>
35 #include <sys/strsun.h>
36 #include <sys/ethernet.h>
37 #include <sys/vlan.h>
38 #include <sys/utsname.h>
39 #include "ena_hw.h"
40 
41 /*
42  * AWS ENA Ethernet Driver
43  */
44 
45 #ifdef __cplusplus
46 extern "C" {
47 #endif
48 
49 #define	ENA_MODULE_NAME	"ena"
50 
51 /*
52  * The minimum supported ENA device controller version.
53  */
54 #define	ENA_CTRL_MAJOR_VSN_MIN		0
55 #define	ENA_CTRL_MINOR_VSN_MIN		0
56 #define	ENA_CTRL_SUBMINOR_VSN_MIN	1
57 
58 #define	ENA_MODULE_VER_MAJOR	1
59 #define	ENA_MODULE_VER_MINOR	0
60 #define	ENA_MODULE_VER_SUBMINOR	0
61 
62 /*
63  * The Linux driver doesn't document what the specification version
64  * number controls or the contract around version changes. The best we
65  * can do is use the same version that they use and port version
66  * changes as they come (the last one was in 2018).
67  *
68  * common: ENA_COMMON_SPEC_VERSION_{MAJOR,MINOR}
69  */
70 #define	ENA_SPEC_VERSION_MAJOR	2
71 #define	ENA_SPEC_VERSION_MINOR	0
72 
73 
74 /* This represents BAR 0. */
75 #define	ENA_REG_NUMBER	1
76 
77 /*
78  * A sentinel value passed as argument to ena_ring_rx() to indicate
79  * the Rx ring is being read in interrupt mode, not polling mode.
80  */
81 #define	ENA_INTERRUPT_MODE	-1
82 
83 #define	ENA_RX_BUF_IPHDR_ALIGNMENT	2
84 #define	ENA_ADMINQ_DEPTH		32
85 #define	ENA_AENQ_NUM_DESCS		32
86 
87 /* Convert milliseconds to nanoseconds. */
88 #define	ENA_MS_TO_NS(ms)	((ms) * 1000000ul)
89 
90 /*
91  * The default amount of time we will wait for an admin command to complete,
92  * specified in nanoseconds. This can be overridden by hints received from the
93  * device. We default to half a second.
94  */
95 #define	ENA_ADMIN_CMD_DEF_TIMEOUT_NS	MSEC2NSEC(500)
96 
97 /*
98  * The interval of the watchdog timer, in nanoseconds.
99  */
100 #define	ENA_WATCHDOG_INTERVAL_NS	MSEC2NSEC(1000)
101 
102 /*
103  * The device sends a keepalive message every second. If we don't see any for
104  * a while we will trigger a device reset. Other open source drivers use
105  * 6 seconds for this value, so do we.
106  */
107 #define	ENA_DEVICE_KEEPALIVE_TIMEOUT_NS	MSEC2NSEC(6000)
108 
109 /*
110  * The number of consecutive times a TX queue needs to be seen as blocked by
111  * the watchdog timer before a reset is invoked. Since the watchdog interval
112  * is one second, this is approximately in seconds.
113  */
114 #define	ENA_TX_STALL_TIMEOUT		8
115 
116 /*
117  * In order to avoid rapidly sending basic stats requests to the controller, we
118  * impose a limit of one request every 10ms.
119  */
120 #define	ENA_BASIC_STATS_MINIMUM_INTERVAL_NS	MSEC2NSEC(10);
121 
122 /*
123  * Property macros.
124  */
125 #define	ENA_PROP_RXQ_NUM_DESCS	"rx_queue_num_descs"
126 #define	ENA_PROP_RXQ_NUM_DESCS_MIN	64
127 
128 #define	ENA_PROP_TXQ_NUM_DESCS	"tx_queue_num_descs"
129 #define	ENA_PROP_TXQ_NUM_DESCS_MIN	64
130 
131 #define	ENA_PROP_RXQ_INTR_LIMIT	"rx_queue_intr_limit"
132 #define	ENA_PROP_RXQ_INTR_LIMIT_MIN	16
133 #define	ENA_PROP_RXQ_INTR_LIMIT_MAX	4096
134 #define	ENA_PROP_RXQ_INTR_LIMIT_DEF	256
135 
136 #define	ENA_DMA_BIT_MASK(x)	((1ULL << (x)) - 1ULL)
137 #define	ENA_DMA_VERIFY_ADDR(ena, phys_addr)				\
138 	VERIFY3U(ENA_DMA_BIT_MASK((ena)->ena_dma_width) & (phys_addr), \
139 	    ==, (phys_addr))
140 
141 typedef struct ena_dma_conf {
142 	size_t		edc_size;
143 	uint64_t	edc_align;
144 	int		edc_sgl;
145 	uchar_t		edc_endian;
146 	bool		edc_stream;
147 } ena_dma_conf_t;
148 
149 typedef struct ena_dma_buf {
150 	caddr_t			edb_va;
151 	size_t			edb_len;
152 	/*
153 	 * The length given by DMA engine, kept around for debugging
154 	 * purposes.
155 	 */
156 	size_t			edb_real_len;
157 	size_t			edb_used_len;
158 	ddi_acc_handle_t	edb_acc_hdl;
159 	ddi_dma_handle_t	edb_dma_hdl;
160 	const ddi_dma_cookie_t	*edb_cookie;
161 } ena_dma_buf_t;
162 
163 /*
164  * We always sync the entire range, and therefore expect success.
165  */
166 #ifdef DEBUG
167 #define	ENA_DMA_SYNC(buf, flag)					\
168 	ASSERT0(ddi_dma_sync((buf).edb_dma_hdl, 0, 0, (flag)))
169 #else  /* DEBUG */
170 #define	ENA_DMA_SYNC(buf, flag)					\
171 	((void)ddi_dma_sync((buf).edb_dma_hdl, 0, 0, (flag)))
172 #endif
173 
174 typedef void (*ena_aenq_hdlr_t)(void *data, enahw_aenq_desc_t *desc);
175 
176 typedef struct ena_aenq {
177 	enahw_aenq_desc_t	*eaenq_descs;
178 	ena_dma_buf_t		eaenq_dma;
179 	ena_aenq_hdlr_t		eaenq_hdlrs[ENAHW_AENQ_GROUPS_ARR_NUM];
180 	uint16_t		eaenq_num_descs;
181 	uint16_t		eaenq_head;
182 	uint8_t			eaenq_phase;
183 } ena_aenq_t;
184 
185 typedef struct ena_admin_sq {
186 	enahw_cmd_desc_t	*eas_entries;
187 	ena_dma_buf_t		eas_dma;
188 	uint32_t		*eas_dbaddr;
189 	uint16_t		eas_tail;
190 	uint8_t			eas_phase;
191 } ena_admin_sq_t;
192 
193 typedef struct ena_admin_cq {
194 	enahw_resp_desc_t	*eac_entries;
195 	ena_dma_buf_t		eac_dma;
196 	uint16_t		eac_head;
197 	uint8_t			eac_phase;
198 } ena_admin_cq_t;
199 
200 /*
201  * The command context is used to track outstanding requests and match
202  * them to device responses.
203  */
204 typedef struct ena_cmd_ctx {
205 	list_node_t		ectx_node;
206 
207 	/*
208 	 * The index into ea_cmd_ctxs where this ctx lives. Used as
209 	 * the command ID value in the command descriptor. This allows
210 	 * us to match a response to its associated context.
211 	 */
212 	uint16_t		ectx_id;
213 
214 	/* Is the command pending? */
215 	bool			ectx_pending;
216 
217 	/* The type of command associated with this context. */
218 	enahw_cmd_opcode_t	ectx_cmd_opcode;
219 
220 	/*
221 	 * The location to copy the full response to. This is
222 	 * specified by the caller of the command during
223 	 * submission.
224 	 */
225 	enahw_resp_desc_t	*ectx_resp;
226 } ena_cmd_ctx_t;
227 
228 /*
229  * The admin queue, the queue through which commands are sent to the
230  * device.
231  *
232  * WO: Write Once (at initialization)
233  *
234  * In general, only a single lock needs to be held in order to access
235  * the different parts of the admin queue:
236  *
237  *  sq_lock: Any data dealing with submitting admin commands, which
238  *  includes acquiring a command context.
239  *
240  *  cq_lock: Any data dealing with reading command responses.
241  *
242  *  stat_lock: For accessing statistics.
243  *
244  * In some cases, the ectx_lock/stat_lock may be held in tandem with
245  * either the SQ or CQ lock. In that case, the SQ/CQ lock is always
246  * entered first.
247  */
248 typedef struct ena_adminq {
249 	kmutex_t		ea_sq_lock;	/* WO */
250 	kmutex_t		ea_cq_lock;	/* WO */
251 	kmutex_t		ea_stat_lock;	/* WO */
252 
253 	hrtime_t		ea_cmd_timeout_ns; /* WO */
254 
255 	uint16_t		ea_qlen;	/* WO */
256 	bool			ea_poll_mode;	/* WO */
257 
258 	ena_cmd_ctx_t		*ea_cmd_ctxs;	  /* WO */
259 	list_t			ea_cmd_ctxs_free; /* ea_sq_lock */
260 	list_t			ea_cmd_ctxs_used; /* ea_sq_lock */
261 	uint16_t		ea_pending_cmds; /* ea_sq_lock */
262 	ena_admin_sq_t		ea_sq; /* eq_sq_lock */
263 	ena_admin_cq_t		ea_cq; /* eq_cq_lock */
264 
265 	/* ea_stat_lock */
266 	struct ena_adminq_stats {
267 		uint64_t cmds_fail;
268 		uint64_t cmds_submitted;
269 		uint64_t cmds_success;
270 		uint64_t queue_full;
271 	} ea_stats;
272 } ena_adminq_t;
273 
274 /*
275  * Cache of the last set of value hints received from the device. See the
276  * definition of ehahw_device_hints_t in ena_hw.h for more detail on the
277  * purpose of each.
278  */
279 typedef struct ena_hints {
280 	uint16_t		eh_mmio_read_timeout;
281 	uint16_t		eh_keep_alive_timeout;
282 	uint16_t		eh_tx_comp_timeout;
283 	uint16_t		eh_missed_tx_reset_threshold;
284 	uint16_t		eh_admin_comp_timeout;
285 	uint16_t		eh_max_tx_sgl;
286 	uint16_t		eh_max_rx_sgl;
287 } ena_hints_t;
288 
289 typedef enum ena_attach_seq {
290 	ENA_ATTACH_PCI = 1,	 /* PCI config space */
291 	ENA_ATTACH_REGS,	 /* BAR mapping */
292 	ENA_ATTACH_DEV_INIT,	 /* ENA device initialization */
293 	ENA_ATTACH_READ_CONF,	 /* Read driver conf file */
294 	ENA_ATTACH_DEV_CFG,	 /* Set any needed device config */
295 	ENA_ATTACH_INTR_ALLOC,	 /* interrupt handles allocated */
296 	ENA_ATTACH_INTR_HDLRS,	 /* intr handlers set */
297 	ENA_ATTACH_TXQS_ALLOC,	 /* Tx Queues allocated */
298 	ENA_ATTACH_RXQS_ALLOC,	 /* Tx Queues allocated */
299 	ENA_ATTACH_MAC_REGISTER, /* registered with mac */
300 	ENA_ATTACH_INTRS_ENABLE, /* interrupts are enabled */
301 	ENA_ATTACH_END
302 } ena_attach_seq_t;
303 
304 #define	ENA_ATTACH_SEQ_FIRST	(ENA_ATTACH_PCI)
305 #define	ENA_ATTACH_NUM_ENTRIES	(ENA_ATTACH_END - 1)
306 
307 struct ena;
308 typedef bool (*ena_attach_fn_t)(struct ena *);
309 typedef void (*ena_cleanup_fn_t)(struct ena *, bool);
310 
311 typedef struct ena_attach_desc {
312 	ena_attach_seq_t ead_seq;
313 	const char *ead_name;
314 	ena_attach_fn_t ead_attach_fn;
315 	bool ead_attach_hard_fail;
316 	ena_cleanup_fn_t ead_cleanup_fn;
317 } ena_attach_desc_t;
318 
319 typedef enum {
320 	ENA_TCB_NONE,
321 	ENA_TCB_COPY
322 } ena_tcb_type_t;
323 
324 /*
325  * The TCB is used to track information relating to the Tx of a
326  * packet. At the moment we support copy only.
327  */
328 typedef struct ena_tx_control_block {
329 	mblk_t		*etcb_mp;
330 	ena_tcb_type_t	etcb_type;
331 	ena_dma_buf_t	etcb_dma;
332 } ena_tx_control_block_t;
333 
334 typedef enum ena_txq_state {
335 	ENA_TXQ_STATE_NONE		= 0,
336 	ENA_TXQ_STATE_HOST_ALLOC	= 1 << 0,
337 	ENA_TXQ_STATE_CQ_CREATED	= 1 << 1,
338 	ENA_TXQ_STATE_SQ_CREATED	= 1 << 2,
339 	ENA_TXQ_STATE_READY		= 1 << 3, /* TxQ ready and waiting */
340 	ENA_TXQ_STATE_RUNNING		= 1 << 4, /* intrs enabled */
341 } ena_txq_state_t;
342 
343 typedef struct ena_txq_stat {
344 	/* Number of times mac_ether_offload_info() has failed. */
345 	kstat_named_t	ets_hck_meoifail;
346 
347 	/*
348 	 * Total number of times the ring was blocked due to
349 	 * insufficient descriptors, or unblocked due to recycling
350 	 * descriptors.
351 	 */
352 	kstat_named_t	ets_blocked;
353 	kstat_named_t	ets_unblocked;
354 
355 	/* The total number descriptors that have been recycled. */
356 	kstat_named_t	ets_recycled;
357 
358 	/*
359 	 * Number of bytes and packets that have been _submitted_ to
360 	 * the device.
361 	 */
362 	kstat_named_t	ets_bytes;
363 	kstat_named_t	ets_packets;
364 } ena_txq_stat_t;
365 
366 /*
367  * A transmit queue, made up of a Submission Queue (SQ) and Completion
368  * Queue (CQ) to form a logical descriptor ring for sending packets.
369  *
370  * Write Once (WO)
371  *
372  *   This value is written once, before the datapath is activated, in
373  *   a function which is controlled by mac(9E). Some values may be
374  *   written earlier, during ena attach, like et_ena and
375  *   et_sq_num_descs.
376  *
377  * Tx Mutex (TM) -- et_lock
378  *
379  *   This value is protected by the Tx queue's mutex. Some values may
380  *   be initialized in a WO path, but also continually updated as part
381  *   of normal datapath operation, such as et_sq_avail_descs. These
382  *   values need mutex protection.
383  */
384 typedef struct ena_txq {
385 	kmutex_t		et_lock; /* WO */
386 
387 	struct ena		*et_ena; /* WO */
388 	uint_t			et_txqs_idx; /* WO */
389 	mac_ring_handle_t	et_mrh;	 /* WO */
390 	uint64_t		et_m_gen_num; /* TM */
391 	ena_txq_state_t		et_state; /* WO */
392 	uint16_t		et_intr_vector; /* WO */
393 
394 	enahw_tx_desc_t		*et_sq_descs; /* TM */
395 	ena_dma_buf_t		et_sq_dma;    /* WO */
396 
397 	/* Is the Tx queue currently in a blocked state? */
398 	bool			et_blocked; /* TM */
399 
400 	/*
401 	 * The number of descriptors owned by this ring. This value
402 	 * never changes after initialization.
403 	 */
404 	uint16_t		et_sq_num_descs;   /* WO */
405 
406 	/*
407 	 * The number of descriptors currently available for Tx
408 	 * submission. When this value reaches zero the ring must
409 	 * block until device notifies us of freed descriptors.
410 	 */
411 	uint16_t		et_sq_avail_descs; /* TM */
412 
413 	/*
414 	 * The current tail index of the queue (the first free
415 	 * descriptor for host Tx submission). After initialization,
416 	 * this value only increments, relying on unsigned wrap
417 	 * around. The ENA device seems to expect this behavior,
418 	 * performing its own modulo on the value for the purposes of
419 	 * indexing, much like the driver code needs to do in order to
420 	 * access the proper TCB entry.
421 	 */
422 	uint16_t		et_sq_tail_idx;  /* TM */
423 
424 	/*
425 	 * The phase is used to know which CQ descriptors may be
426 	 * reclaimed. This is explained further in ena.c.
427 	 */
428 	uint16_t		et_sq_phase; /* TM */
429 	uint16_t		et_sq_hw_idx; /* WO */
430 
431 	/*
432 	 * The "doorbell" address is how the host indicates to the
433 	 * device which descriptors are ready for Tx processing.
434 	 */
435 	uint32_t		*et_sq_db_addr; /* WO */
436 
437 	/*
438 	 * The TCBs track host Tx information, like a pointer to the
439 	 * mblk being submitted. Currently we maintain a 1:1 mapping
440 	 * of SQ descriptors to TCBs as Tx is copy only.
441 	 */
442 	ena_tx_control_block_t	*et_tcbs;    /* TM */
443 
444 	enahw_tx_cdesc_t	*et_cq_descs; /* TM */
445 	ena_dma_buf_t		et_cq_dma;    /* WO */
446 	uint16_t		et_cq_num_descs; /* WO */
447 	uint16_t		et_cq_head_idx; /* TM */
448 	uint16_t		et_cq_phase;	/* TM */
449 	uint16_t		et_cq_hw_idx;	/* WO */
450 
451 	/*
452 	 * This address is used to control the CQ interrupts.
453 	 */
454 	uint32_t		*et_cq_unmask_addr; /* WO */
455 	uint32_t		*et_cq_numa_addr;   /* WO (currently unused) */
456 
457 	/*
458 	 * This is used to detect transmit stalls and invoke a reset. The
459 	 * watchdog increments this counter when it sees that the TX
460 	 * ring is still blocked, and if it exceeds the threshold then the
461 	 * device is assumed to have stalled and needs to be reset.
462 	 */
463 	uint32_t		et_stall_watchdog; /* TM */
464 
465 	/*
466 	 * This mutex protects the Tx queue stats. This mutex may be
467 	 * entered while et_lock is held, but et_lock is not required
468 	 * to access/modify the stats. However, if both locks are
469 	 * held, then et_lock must be entered first.
470 	 */
471 	kmutex_t		et_stat_lock;
472 	ena_txq_stat_t		et_stat;
473 	kstat_t			*et_kstat;
474 } ena_txq_t;
475 
476 typedef enum ena_rxq_state {
477 	ENA_RXQ_STATE_NONE		= 0,
478 	ENA_RXQ_STATE_HOST_ALLOC	= 1 << 0,
479 	ENA_RXQ_STATE_CQ_CREATED	= 1 << 1,
480 	ENA_RXQ_STATE_SQ_CREATED	= 1 << 2,
481 	ENA_RXQ_STATE_SQ_FILLED		= 1 << 3,
482 	ENA_RXQ_STATE_READY		= 1 << 4, /* RxQ ready and waiting */
483 	ENA_RXQ_STATE_RUNNING		= 1 << 5, /* intrs enabled */
484 } ena_rxq_state_t;
485 
486 typedef struct ena_rx_ctrl_block {
487 	ena_dma_buf_t	ercb_dma;
488 	uint8_t		ercb_offset;
489 	uint16_t	ercb_length;
490 } ena_rx_ctrl_block_t;
491 
492 typedef enum {
493 	ENA_RXQ_MODE_POLLING	= 1,
494 	ENA_RXQ_MODE_INTR	= 2,
495 } ena_rxq_mode_t;
496 
497 typedef struct ena_rxq_stat_t {
498 	/* The total number of packets/bytes received on this queue. */
499 	kstat_named_t	ers_packets;
500 	kstat_named_t	ers_bytes;
501 
502 	/*
503 	 * At this time we expect all incoming frames to fit in a
504 	 * single buffer/descriptor. In some rare event that the
505 	 * device doesn't cooperate this stat is incremented.
506 	 */
507 	kstat_named_t	ers_multi_desc;
508 
509 	/*
510 	 * The total number of times we failed to allocate a new mblk
511 	 * for an incoming frame.
512 	 */
513 	kstat_named_t	ers_allocb_fail;
514 
515 	/*
516 	 * The total number of times the Rx interrupt handler reached
517 	 * its maximum limit for number of packets to process in a
518 	 * single interrupt. If you see this number increase
519 	 * continuously at a steady rate, then it may be an indication
520 	 * the driver is not entering polling mode.
521 	 */
522 	kstat_named_t	ers_intr_limit;
523 
524 	/*
525 	 * The total number of times the device detected an incorrect
526 	 * IPv4 header checksum.
527 	 */
528 	kstat_named_t	ers_hck_ipv4_err;
529 
530 	/*
531 	 * The total number of times the device detected an incorrect
532 	 * L4/ULP checksum.
533 	 */
534 	kstat_named_t	ers_hck_l4_err;
535 } ena_rxq_stat_t;
536 
537 /*
538  * A receive queue, made up of a Submission Queue (SQ) and Completion
539  * Queue (CQ) to form a logical descriptor ring for receiving packets.
540  *
541  * Write Once (WO)
542  *
543  *   This value is written once, before the datapath is activated, in
544  *   a function which is controlled by mac(9E).
545  *
546  * Rx Mutex (RM) -- er_lock
547  *
548  *   This value is protected by the Rx queue's mutex. Some values may
549  *   be initialized in a WO path, but also continually updated as part
550  *   of normal datapath operation, such as er_sq_avail_descs. These
551  *   values need mutex protection.
552  */
553 typedef struct ena_rxq {
554 	kmutex_t		er_lock;
555 
556 	struct ena		*er_ena; /* WO */
557 	uint_t			er_rxqs_idx; /* WO */
558 	mac_ring_handle_t	er_mrh;	 /* WO */
559 	uint64_t		er_m_gen_num; /* WO */
560 	ena_rxq_state_t		er_state; /* WO */
561 	uint16_t		er_intr_vector; /* WO */
562 	ena_rxq_mode_t		er_mode;	/* RM */
563 	uint16_t		er_intr_limit;	/* RM */
564 
565 	enahw_rx_desc_t		*er_sq_descs; /* RM */
566 	ena_dma_buf_t		er_sq_dma;    /* WO */
567 	uint16_t		er_sq_num_descs;   /* WO */
568 	uint16_t		er_sq_avail_descs; /* RM */
569 	uint16_t		er_sq_tail_idx;  /* RM */
570 	uint16_t		er_sq_phase; /* RM */
571 	uint16_t		er_sq_hw_idx;	/* WO */
572 	uint32_t		*er_sq_db_addr; /* WO */
573 
574 	enahw_rx_cdesc_t	*er_cq_descs; /* RM */
575 	ena_dma_buf_t		er_cq_dma;    /* WO */
576 	uint16_t		er_cq_num_descs; /* WO */
577 	uint16_t		er_cq_head_idx;	 /* RM */
578 	uint16_t		er_cq_phase;	 /* RM */
579 	uint16_t		er_cq_hw_idx;	 /* WO */
580 	uint32_t		*er_cq_unmask_addr; /* WO */
581 	uint32_t		*er_cq_numa_addr;    /* WO (currently unused) */
582 
583 	ena_rx_ctrl_block_t	*er_rcbs; /* RM */
584 
585 	kmutex_t		er_stat_lock;
586 	ena_rxq_stat_t		er_stat;
587 	kstat_t			*er_kstat;
588 } ena_rxq_t;
589 
590 typedef struct ena_device_stat {
591 	kstat_named_t	eds_reset_forced;
592 	kstat_named_t	eds_reset_error;
593 	kstat_named_t	eds_reset_fatal;
594 	kstat_named_t	eds_reset_keepalive;
595 	kstat_named_t	eds_reset_txstall;
596 } ena_device_stat_t;
597 
598 /*
599  * These are stats based on enahw_resp_basic_stats_t and data that accompanies
600  * the asynchronous keepalive event.
601  */
602 typedef struct ena_basic_stat {
603 	kstat_named_t	ebs_tx_bytes;
604 	kstat_named_t	ebs_tx_pkts;
605 	kstat_named_t	ebs_tx_drops;
606 
607 	kstat_named_t	ebs_rx_bytes;
608 	kstat_named_t	ebs_rx_pkts;
609 	kstat_named_t	ebs_rx_drops;
610 	kstat_named_t	ebs_rx_overruns;
611 } ena_basic_stat_t;
612 
613 /* These are stats based on enahw_resp_eni_stats_t. */
614 typedef struct ena_extended_stat {
615 	kstat_named_t	ees_bw_in_exceeded;
616 	kstat_named_t	ees_bw_out_exceeded;
617 	kstat_named_t	ees_pps_exceeded;
618 	kstat_named_t	ees_conns_exceeded;
619 	kstat_named_t	ees_linklocal_exceeded;
620 } ena_extended_stat_t;
621 
622 /* These stats monitor which AENQ handlers have been called. */
623 typedef struct ena_aenq_stat {
624 	kstat_named_t	eaes_default;
625 	kstat_named_t	eaes_link_change;
626 	kstat_named_t	eaes_notification;
627 	kstat_named_t	eaes_keep_alive;
628 	kstat_named_t	eaes_request_reset;
629 	kstat_named_t	eaes_fatal_error;
630 	kstat_named_t	eaes_warning;
631 } ena_aenq_stat_t;
632 
633 #ifdef DEBUG
634 typedef struct ena_reg {
635 	const char	*er_name;
636 	const uint16_t	er_offset;
637 	uint32_t	er_value;
638 } ena_reg_t;
639 #endif
640 
641 #define	ENA_STATE_UNKNOWN	0x00u
642 #define	ENA_STATE_INITIALIZED	0x01u
643 #define	ENA_STATE_STARTED	0x02u
644 #define	ENA_STATE_ERROR		0x04u
645 #define	ENA_STATE_RESETTING	0x08u
646 
647 /*
648  * This structure contains the per-instance (PF of VF) state of the
649  * device.
650  */
651 typedef struct ena {
652 	dev_info_t		*ena_dip;
653 	int			ena_instance;
654 
655 #ifdef DEBUG
656 	/*
657 	 * In debug kernels, the registers are cached here at various points
658 	 * for easy inspection via mdb(1).
659 	 */
660 	ena_reg_t		ena_reg[ENAHW_NUM_REGS];
661 #endif
662 
663 	/*
664 	 * Global lock, used to synchronize administration changes to
665 	 * the ena_t. This lock should not be held in the datapath.
666 	 */
667 	kmutex_t		ena_lock;
668 	ena_attach_seq_t	ena_attach_seq;
669 
670 	/*
671 	 * We use atomic ops for ena_state so that datapath consumers
672 	 * do not need to enter ena_lock.
673 	 */
674 	uint32_t		ena_state;
675 
676 	/*
677 	 * The reason for the last device reset.
678 	 */
679 	enahw_reset_reason_t	ena_reset_reason;
680 
681 	/*
682 	 * Watchdog
683 	 */
684 	kmutex_t		ena_watchdog_lock;
685 	ddi_periodic_t		ena_watchdog_periodic;
686 	uint64_t		ena_watchdog_last_keepalive;
687 
688 	/*
689 	 * PCI config space and BAR handle.
690 	 */
691 	ddi_acc_handle_t	ena_pci_hdl;
692 	off_t			ena_reg_size;
693 	caddr_t			ena_reg_base;
694 	ddi_device_acc_attr_t	ena_reg_attr;
695 	ddi_acc_handle_t	ena_reg_hdl;
696 
697 	/*
698 	 * Vendor information.
699 	 */
700 	uint16_t		ena_pci_vid;
701 	uint16_t		ena_pci_did;
702 	uint8_t			ena_pci_rev;
703 	uint16_t		ena_pci_svid;
704 	uint16_t		ena_pci_sdid;
705 
706 	/*
707 	 * Device and controller versions.
708 	 */
709 	uint32_t		ena_dev_major_vsn;
710 	uint32_t		ena_dev_minor_vsn;
711 	uint32_t		ena_ctrl_major_vsn;
712 	uint32_t		ena_ctrl_minor_vsn;
713 	uint32_t		ena_ctrl_subminor_vsn;
714 	uint32_t		ena_ctrl_impl_id;
715 
716 	/*
717 	 * Interrupts
718 	 */
719 	int			ena_num_intrs;
720 	ddi_intr_handle_t	*ena_intr_handles;
721 	size_t			ena_intr_handles_sz;
722 	int			ena_intr_caps;
723 	uint_t			ena_intr_pri;
724 
725 	mac_handle_t		ena_mh;
726 
727 	size_t			ena_page_sz;
728 
729 	/*
730 	 * The MTU and data layer frame sizes.
731 	 */
732 	uint32_t		ena_mtu;
733 	uint32_t		ena_max_frame_hdr;
734 	uint32_t		ena_max_frame_total;
735 
736 	/* The size (in bytes) of the Rx/Tx data buffers. */
737 	uint32_t		ena_tx_buf_sz;
738 	uint32_t		ena_rx_buf_sz;
739 
740 	/*
741 	 * The maximum number of Scatter Gather List segments the
742 	 * device can address.
743 	 */
744 	uint8_t			ena_tx_sgl_max_sz;
745 	uint8_t			ena_rx_sgl_max_sz;
746 
747 	/* The number of descriptors per Rx/Tx queue. */
748 	uint16_t		ena_rxq_num_descs;
749 	uint16_t		ena_txq_num_descs;
750 
751 	/*
752 	 * The maximum number of frames which may be read per Rx
753 	 * interrupt.
754 	 */
755 	uint16_t		ena_rxq_intr_limit;
756 
757 	/* The Rx/Tx data queues (rings). */
758 	ena_rxq_t		*ena_rxqs;
759 	uint16_t		ena_num_rxqs;
760 	ena_txq_t		*ena_txqs;
761 	uint16_t		ena_num_txqs;
762 
763 	/* These statistics are device-wide. */
764 	kstat_t			*ena_device_kstat;
765 	ena_device_stat_t	ena_device_stat;
766 	hrtime_t		ena_device_basic_stat_last_update;
767 	kmutex_t		ena_device_basic_stat_lock;
768 	kstat_t			*ena_device_basic_kstat;
769 	kstat_t			*ena_device_extended_kstat;
770 
771 	/*
772 	 * This tracks AENQ-related stats, it is implicitly
773 	 * device-wide.
774 	 */
775 	ena_aenq_stat_t		ena_aenq_stat;
776 	kstat_t			*ena_aenq_kstat;
777 
778 	/*
779 	 * The Admin Queue, through which call device commands are
780 	 * sent.
781 	 */
782 	ena_adminq_t		ena_aq;
783 
784 	ena_aenq_t		ena_aenq;
785 	ena_dma_buf_t		ena_host_info;
786 
787 	/*
788 	 * Hardware info
789 	 */
790 	ena_hints_t		ena_device_hints;
791 	uint32_t		ena_supported_features;
792 	uint32_t		ena_capabilities;
793 	uint8_t			ena_dma_width;
794 	bool			ena_link_autoneg;
795 	link_duplex_t		ena_link_duplex;
796 	uint64_t		ena_link_speed_mbits;
797 	enahw_link_speeds_t	ena_link_speeds;
798 	link_state_t		ena_link_state;
799 	uint32_t		ena_aenq_supported_groups;
800 	uint32_t		ena_aenq_enabled_groups;
801 
802 	uint32_t		ena_tx_max_sq_num;
803 	uint32_t		ena_tx_max_sq_num_descs;
804 	uint32_t		ena_tx_max_cq_num;
805 	uint32_t		ena_tx_max_cq_num_descs;
806 	uint16_t		ena_tx_max_desc_per_pkt;
807 	uint32_t		ena_tx_max_hdr_len;
808 
809 	uint32_t		ena_rx_max_sq_num;
810 	uint32_t		ena_rx_max_sq_num_descs;
811 	uint32_t		ena_rx_max_cq_num;
812 	uint32_t		ena_rx_max_cq_num_descs;
813 	uint16_t		ena_rx_max_desc_per_pkt;
814 
815 	/* This is calculated from the Rx/Tx queue nums. */
816 	uint16_t		ena_max_io_queues;
817 
818 	/* Hardware Offloads */
819 	bool			ena_tx_l3_ipv4_csum;
820 
821 	bool			ena_tx_l4_ipv4_part_csum;
822 	bool			ena_tx_l4_ipv4_full_csum;
823 	bool			ena_tx_l4_ipv4_lso;
824 
825 	bool			ena_tx_l4_ipv6_part_csum;
826 	bool			ena_tx_l4_ipv6_full_csum;
827 	bool			ena_tx_l4_ipv6_lso;
828 
829 	bool			ena_rx_l3_ipv4_csum;
830 	bool			ena_rx_l4_ipv4_csum;
831 	bool			ena_rx_l4_ipv6_csum;
832 	bool			ena_rx_hash;
833 
834 	uint32_t		ena_max_mtu;
835 	uint8_t			ena_mac_addr[ETHERADDRL];
836 } ena_t;
837 
838 /*
839  * Misc
840  */
841 extern bool ena_reset(ena_t *, const enahw_reset_reason_t);
842 extern bool ena_is_feat_avail(ena_t *, const enahw_feature_id_t);
843 extern bool ena_is_cap_avail(ena_t *, const enahw_capability_id_t);
844 extern void ena_update_hints(ena_t *, enahw_device_hints_t *);
845 
846 /*
847  * Logging functions.
848  */
849 extern bool ena_debug;
850 extern void ena_err(const ena_t *, const char *, ...) __KPRINTFLIKE(2);
851 extern void ena_dbg(const ena_t *, const char *, ...) __KPRINTFLIKE(2);
852 extern void ena_panic(const ena_t *, const char *, ...) __KPRINTFLIKE(2);
853 extern void ena_trigger_reset(ena_t *, enahw_reset_reason_t);
854 
855 /*
856  * Hardware access.
857  */
858 extern uint32_t ena_hw_bar_read32(const ena_t *, const uint16_t);
859 extern uint32_t ena_hw_abs_read32(const ena_t *, uint32_t *);
860 extern void ena_hw_bar_write32(const ena_t *, const uint16_t, const uint32_t);
861 extern void ena_hw_abs_write32(const ena_t *, uint32_t *, const uint32_t);
862 extern const char *enahw_reset_reason(enahw_reset_reason_t);
863 #ifdef DEBUG
864 extern void ena_init_regcache(ena_t *);
865 extern void ena_update_regcache(ena_t *);
866 #else
867 #define	ena_init_regcache(x)
868 #define	ena_update_regcache(x)
869 #endif
870 
871 /*
872  * Watchdog
873  */
874 extern void ena_enable_watchdog(ena_t *);
875 extern void ena_disable_watchdog(ena_t *);
876 
877 /*
878  * Stats
879  */
880 extern void ena_stat_device_cleanup(ena_t *);
881 extern bool ena_stat_device_init(ena_t *);
882 
883 extern void ena_stat_device_basic_cleanup(ena_t *);
884 extern bool ena_stat_device_basic_init(ena_t *);
885 
886 extern void ena_stat_device_extended_cleanup(ena_t *);
887 extern bool ena_stat_device_extended_init(ena_t *);
888 
889 extern void ena_stat_aenq_cleanup(ena_t *);
890 extern bool ena_stat_aenq_init(ena_t *);
891 
892 extern void ena_stat_rxq_cleanup(ena_rxq_t *);
893 extern bool ena_stat_rxq_init(ena_rxq_t *);
894 extern void ena_stat_txq_cleanup(ena_txq_t *);
895 extern bool ena_stat_txq_init(ena_txq_t *);
896 
897 /*
898  * DMA
899  */
900 extern bool ena_dma_alloc(ena_t *, ena_dma_buf_t *, ena_dma_conf_t *,
901     size_t);
902 extern void ena_dma_free(ena_dma_buf_t *);
903 extern void ena_dma_bzero(ena_dma_buf_t *);
904 extern void ena_set_dma_addr(const ena_t *, const uint64_t, enahw_addr_t *);
905 extern void ena_set_dma_addr_values(const ena_t *, const uint64_t, uint32_t *,
906     uint16_t *);
907 
908 /*
909  * Interrupts
910  */
911 extern bool ena_intr_add_handlers(ena_t *);
912 extern void ena_intr_remove_handlers(ena_t *, bool);
913 extern void ena_tx_intr_work(ena_txq_t *);
914 extern void ena_rx_intr_work(ena_rxq_t *);
915 extern bool ena_intrs_disable(ena_t *);
916 extern bool ena_intrs_enable(ena_t *);
917 
918 /*
919  * MAC
920  */
921 extern bool ena_mac_register(ena_t *);
922 extern int ena_mac_unregister(ena_t *);
923 extern void ena_ring_tx_stop(mac_ring_driver_t);
924 extern int ena_ring_tx_start(mac_ring_driver_t, uint64_t);
925 extern mblk_t *ena_ring_tx(void *, mblk_t *);
926 extern void ena_ring_rx_stop(mac_ring_driver_t);
927 extern int ena_ring_rx_start(mac_ring_driver_t rh, uint64_t gen_num);
928 extern int ena_m_stat(void *, uint_t, uint64_t *);
929 extern mblk_t *ena_ring_rx_poll(void *, int);
930 extern int ena_ring_rx_stat(mac_ring_driver_t, uint_t, uint64_t *);
931 extern int ena_ring_tx_stat(mac_ring_driver_t, uint_t, uint64_t *);
932 
933 /*
934  * Admin API
935  */
936 extern int ena_admin_submit_cmd(ena_t *, enahw_cmd_desc_t *,
937     enahw_resp_desc_t *, ena_cmd_ctx_t **);
938 extern int ena_admin_poll_for_resp(ena_t *, ena_cmd_ctx_t *);
939 extern void ena_free_host_info(ena_t *);
940 extern bool ena_init_host_info(ena_t *);
941 extern void ena_create_cmd_ctx(ena_t *);
942 extern void ena_release_all_cmd_ctx(ena_t *);
943 extern int ena_create_cq(ena_t *, uint16_t, uint64_t, bool, uint32_t,
944     uint16_t *, uint32_t **, uint32_t **);
945 extern int ena_destroy_cq(ena_t *, uint16_t);
946 extern int ena_create_sq(ena_t *, uint16_t, uint64_t, bool, uint16_t,
947     uint16_t *, uint32_t **);
948 extern int ena_destroy_sq(ena_t *, uint16_t, bool);
949 extern int ena_set_feature(ena_t *, enahw_cmd_desc_t *,
950     enahw_resp_desc_t *, const enahw_feature_id_t, const uint8_t);
951 extern int ena_get_feature(ena_t *, enahw_resp_desc_t *,
952     const enahw_feature_id_t, const uint8_t);
953 extern int ena_admin_get_basic_stats(ena_t *, enahw_resp_desc_t *);
954 extern int ena_admin_get_eni_stats(ena_t *, enahw_resp_desc_t *);
955 extern int enahw_resp_status_to_errno(ena_t *, enahw_resp_status_t);
956 
957 /*
958  * Async event queue
959  */
960 extern bool ena_aenq_init(ena_t *);
961 extern bool ena_aenq_configure(ena_t *);
962 extern void ena_aenq_enable(ena_t *);
963 extern void ena_aenq_work(ena_t *);
964 extern void ena_aenq_free(ena_t *);
965 
966 /*
967  * Rx/Tx allocations
968  */
969 extern bool ena_alloc_rxq(ena_rxq_t *);
970 extern void ena_cleanup_rxq(ena_rxq_t *, bool);
971 extern bool ena_alloc_txq(ena_txq_t *);
972 extern void ena_cleanup_txq(ena_txq_t *, bool);
973 
974 #ifdef __cplusplus
975 }
976 #endif
977 
978 #endif	/* _ENA_H */
979