xref: /illumos-gate/usr/src/uts/common/io/ena/ena_aenq.c (revision c46e4de3)
1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13  * Copyright 2024 Oxide Computer Company
14  */
15 
16 #include "ena_hw.h"
17 #include "ena.h"
18 
19 CTASSERT(sizeof (enahw_aenq_desc_t) == 64);
20 
21 /*
22  * We add this here as an extra safety check to make sure that any
23  * addition to the AENQ group enum also updates the groups array num
24  * value.
25  */
26 CTASSERT(ENAHW_AENQ_GROUPS_ARR_NUM == 8);
27 
28 typedef struct ena_aenq_grpstr {
29 	enahw_aenq_groups_t	eag_type;
30 	const char		*eag_str;
31 } ena_aenq_grpstr_t;
32 
33 static ena_aenq_grpstr_t ena_groups_str[ENAHW_AENQ_GROUPS_ARR_NUM] = {
34 	{
35 		.eag_type = ENAHW_AENQ_GROUP_LINK_CHANGE,
36 		.eag_str = "LINK CHANGE"
37 	},
38 	{
39 		.eag_type = ENAHW_AENQ_GROUP_FATAL_ERROR,
40 		.eag_str = "FATAL ERROR"
41 	},
42 	{
43 		.eag_type = ENAHW_AENQ_GROUP_WARNING,
44 		.eag_str = "WARNING"
45 	},
46 	{
47 		.eag_type = ENAHW_AENQ_GROUP_NOTIFICATION,
48 		.eag_str = "NOTIFICATION"
49 	},
50 	{
51 		.eag_type = ENAHW_AENQ_GROUP_KEEP_ALIVE,
52 		.eag_str = "KEEP ALIVE"
53 	},
54 	{
55 		.eag_type = ENAHW_AENQ_GROUP_REFRESH_CAPABILITIES,
56 		.eag_str = "REFRESH CAPABILITIES"
57 	},
58 	{
59 		.eag_type = ENAHW_AENQ_GROUP_CONF_NOTIFICATIONS,
60 		.eag_str = "CONFIG NOTIFICATIONS"
61 	},
62 	{
63 		.eag_type = ENAHW_AENQ_GROUP_DEVICE_REQUEST_RESET,
64 		.eag_str = "DEVICE RESET REQUEST"
65 	}
66 };
67 
68 bool
ena_aenq_configure(ena_t * ena)69 ena_aenq_configure(ena_t *ena)
70 {
71 	enahw_cmd_desc_t cmd;
72 	enahw_feat_aenq_t *cmd_feat =
73 	    &cmd.ecd_cmd.ecd_set_feat.ecsf_feat.ecsf_aenq;
74 	enahw_resp_desc_t resp;
75 	enahw_feat_aenq_t *resp_feat = &resp.erd_resp.erd_get_feat.ergf_aenq;
76 	enahw_aenq_groups_t to_enable;
77 
78 	bzero(&resp, sizeof (resp));
79 	if (ena_get_feature(ena, &resp, ENAHW_FEAT_AENQ_CONFIG,
80 	    ENAHW_FEAT_AENQ_CONFIG_VER) != 0) {
81 		return (false);
82 	}
83 
84 	to_enable = BIT(ENAHW_AENQ_GROUP_LINK_CHANGE) |
85 	    BIT(ENAHW_AENQ_GROUP_FATAL_ERROR) |
86 	    BIT(ENAHW_AENQ_GROUP_WARNING) |
87 	    BIT(ENAHW_AENQ_GROUP_NOTIFICATION) |
88 	    BIT(ENAHW_AENQ_GROUP_KEEP_ALIVE) |
89 	    BIT(ENAHW_AENQ_GROUP_DEVICE_REQUEST_RESET);
90 	to_enable &= resp_feat->efa_supported_groups;
91 
92 	bzero(&cmd, sizeof (cmd));
93 	bzero(&resp, sizeof (cmd));
94 	cmd_feat->efa_enabled_groups = to_enable;
95 
96 	if (ena_set_feature(ena, &cmd, &resp, ENAHW_FEAT_AENQ_CONFIG,
97 	    ENAHW_FEAT_AENQ_CONFIG_VER) != 0) {
98 		return (false);
99 	}
100 
101 	bzero(&resp, sizeof (resp));
102 	if (ena_get_feature(ena, &resp, ENAHW_FEAT_AENQ_CONFIG,
103 	    ENAHW_FEAT_AENQ_CONFIG_VER) != 0) {
104 		return (false);
105 	}
106 
107 	ena->ena_aenq_supported_groups = resp_feat->efa_supported_groups;
108 	ena->ena_aenq_enabled_groups = resp_feat->efa_enabled_groups;
109 
110 	for (uint_t i = 0; i < ENAHW_AENQ_GROUPS_ARR_NUM; i++) {
111 		ena_aenq_grpstr_t *grpstr = &ena_groups_str[i];
112 		bool supported = BIT(grpstr->eag_type) &
113 		    resp_feat->efa_supported_groups;
114 		bool enabled = BIT(grpstr->eag_type) &
115 		    resp_feat->efa_enabled_groups;
116 
117 		ena_dbg(ena, "%s supported: %s enabled: %s", grpstr->eag_str,
118 		    supported ? "Y" : "N", enabled ? "Y" : "N");
119 	}
120 
121 	return (true);
122 }
123 
124 void
ena_aenq_work(ena_t * ena)125 ena_aenq_work(ena_t *ena)
126 {
127 	ena_aenq_t *aenq = &ena->ena_aenq;
128 	uint16_t head_mod = aenq->eaenq_head & (aenq->eaenq_num_descs - 1);
129 	bool processed = false;
130 	enahw_aenq_desc_t *desc = &aenq->eaenq_descs[head_mod];
131 
132 	ENA_DMA_SYNC(aenq->eaenq_dma, DDI_DMA_SYNC_FORKERNEL);
133 
134 	while (ENAHW_AENQ_DESC_PHASE(desc) == aenq->eaenq_phase) {
135 		ena_aenq_hdlr_t hdlr;
136 
137 		ASSERT3U(desc->ead_group, <, ENAHW_AENQ_GROUPS_ARR_NUM);
138 		processed = true;
139 
140 		/*
141 		 * Keepalives occur once per second, we won't issue a debug
142 		 * log message for each of those.
143 		 */
144 		if (ena_debug &&
145 		    desc->ead_group != ENAHW_AENQ_GROUP_KEEP_ALIVE) {
146 			uint64_t ts = ((uint64_t)desc->ead_ts_high << 32) |
147 			    (uint64_t)desc->ead_ts_low;
148 
149 			ena_dbg(ena,
150 			    "AENQ Group: (0x%x) %s Syndrome: 0x%x ts: %" PRIu64
151 			    " us", desc->ead_group,
152 			    ena_groups_str[desc->ead_group].eag_str,
153 			    desc->ead_syndrome, ts);
154 		}
155 
156 		hdlr = ena->ena_aenq.eaenq_hdlrs[desc->ead_group];
157 		hdlr(ena, desc);
158 
159 		aenq->eaenq_head++;
160 		head_mod = aenq->eaenq_head & (aenq->eaenq_num_descs - 1);
161 
162 		if (head_mod == 0)
163 			aenq->eaenq_phase ^= 1;
164 
165 		desc = &aenq->eaenq_descs[head_mod];
166 	}
167 
168 	if (processed) {
169 		ena_hw_bar_write32(ena, ENAHW_REG_AENQ_HEAD_DB,
170 		    aenq->eaenq_head);
171 	}
172 }
173 
174 static void
ena_aenq_link_change_hdlr(void * data,enahw_aenq_desc_t * desc)175 ena_aenq_link_change_hdlr(void *data, enahw_aenq_desc_t *desc)
176 {
177 	ena_t *ena = data;
178 	bool is_up = (desc->ead_payload.link_change.flags &
179 	    ENAHW_AENQ_LINK_CHANGE_LINK_STATUS_MASK) != 0;
180 	link_state_t new_state = is_up ? LINK_STATE_UP : LINK_STATE_DOWN;
181 
182 	/*
183 	 * The interrupts are not enabled until after we register mac,
184 	 * so the mac handle should be valid.
185 	 */
186 	ASSERT3U(ena->ena_attach_seq, >=, ENA_ATTACH_MAC_REGISTER);
187 	ena->ena_aenq_stat.eaes_link_change.value.ui64++;
188 
189 	ena_dbg(ena, "link is %s", is_up ? "UP" : "DOWN");
190 
191 	mutex_enter(&ena->ena_lock);
192 
193 	/*
194 	 * Notify mac only on an actual change in status.
195 	 */
196 	if (ena->ena_link_state != new_state) {
197 		mac_link_update(ena->ena_mh, new_state);
198 		ena->ena_link_state = new_state;
199 	}
200 
201 	mutex_exit(&ena->ena_lock);
202 }
203 
204 static void
ena_aenq_notification_hdlr(void * data,enahw_aenq_desc_t * desc)205 ena_aenq_notification_hdlr(void *data, enahw_aenq_desc_t *desc)
206 {
207 	ena_t *ena = data;
208 
209 	if (desc->ead_syndrome == ENAHW_AENQ_SYNDROME_UPDATE_HINTS) {
210 		enahw_device_hints_t *hints =
211 		    (enahw_device_hints_t *)desc->ead_payload.raw;
212 
213 		ena_update_hints(ena, hints);
214 	} else {
215 		ena_err(ena, "Invalid aenq notification syndrome 0x%x",
216 		    desc->ead_syndrome);
217 	}
218 
219 	ena->ena_aenq_stat.eaes_notification.value.ui64++;
220 }
221 
222 static void
ena_aenq_keep_alive_hdlr(void * data,enahw_aenq_desc_t * desc)223 ena_aenq_keep_alive_hdlr(void *data, enahw_aenq_desc_t *desc)
224 {
225 	ena_t *ena = data;
226 	uint64_t rx_drops, tx_drops, rx_overruns;
227 	ena_basic_stat_t *ebs = ena->ena_device_basic_kstat->ks_data;
228 	uint64_t now = (uint64_t)gethrtime();
229 
230 	(void) atomic_swap_64(&ena->ena_watchdog_last_keepalive, now);
231 
232 	rx_drops =
233 	    ((uint64_t)desc->ead_payload.keep_alive.rx_drops_high << 32) |
234 	    desc->ead_payload.keep_alive.rx_drops_low;
235 	tx_drops =
236 	    ((uint64_t)desc->ead_payload.keep_alive.tx_drops_high << 32) |
237 	    desc->ead_payload.keep_alive.tx_drops_low;
238 	rx_overruns =
239 	    ((uint64_t)desc->ead_payload.keep_alive.rx_overruns_high << 32) |
240 	    desc->ead_payload.keep_alive.rx_overruns_low;
241 
242 	mutex_enter(&ena->ena_device_basic_stat_lock);
243 	ebs->ebs_rx_drops.value.ui64 = rx_drops;
244 	ebs->ebs_tx_drops.value.ui64 = tx_drops;
245 	ebs->ebs_rx_overruns.value.ui64 = rx_overruns;
246 	mutex_exit(&ena->ena_device_basic_stat_lock);
247 
248 	ena->ena_aenq_stat.eaes_keep_alive.value.ui64++;
249 }
250 
251 static void
ena_aenq_request_reset_hdlr(void * data,enahw_aenq_desc_t * desc)252 ena_aenq_request_reset_hdlr(void *data, enahw_aenq_desc_t *desc)
253 {
254 	ena_t *ena = data;
255 
256 	ena->ena_reset_reason = ENAHW_RESET_DEVICE_REQUEST;
257 	atomic_or_32(&ena->ena_state, ENA_STATE_ERROR);
258 
259 	ena->ena_aenq_stat.eaes_request_reset.value.ui64++;
260 }
261 
262 static void
ena_aenq_fatal_error_hdlr(void * data,enahw_aenq_desc_t * desc)263 ena_aenq_fatal_error_hdlr(void *data, enahw_aenq_desc_t *desc)
264 {
265 	ena_t *ena = data;
266 
267 	/*
268 	 * The other open source drivers register this event but don't do
269 	 * anything when it triggers. We do the same for now. If this indicates
270 	 * that the fatal error bit has been set in the status register, the
271 	 * watchdog will pick that up directly and issue a reset.
272 	 */
273 	ena->ena_aenq_stat.eaes_fatal_error.value.ui64++;
274 }
275 
276 static void
ena_aenq_warning_hdlr(void * data,enahw_aenq_desc_t * desc)277 ena_aenq_warning_hdlr(void *data, enahw_aenq_desc_t *desc)
278 {
279 	ena_t *ena = data;
280 
281 	/*
282 	 * The other open source drivers register this event but don't do
283 	 * anything when it triggers. We do the same for now.
284 	 */
285 	ena->ena_aenq_stat.eaes_warning.value.ui64++;
286 }
287 
288 static void
ena_aenq_default_hdlr(void * data,enahw_aenq_desc_t * desc)289 ena_aenq_default_hdlr(void *data, enahw_aenq_desc_t *desc)
290 {
291 	ena_t *ena = data;
292 
293 	ena->ena_aenq_stat.eaes_default.value.ui64++;
294 	/*
295 	 * We don't enable any of the groups that we don't support, so this
296 	 * should not happen.
297 	 */
298 	ena_dbg(ena, "unimplemented handler for aenq group: %s",
299 	    ena_groups_str[desc->ead_group].eag_str);
300 }
301 
302 static void
ena_aenq_set_hdlrs(ena_aenq_t * aenq)303 ena_aenq_set_hdlrs(ena_aenq_t *aenq)
304 {
305 	aenq->eaenq_hdlrs[ENAHW_AENQ_GROUP_LINK_CHANGE] =
306 	    ena_aenq_link_change_hdlr;
307 	aenq->eaenq_hdlrs[ENAHW_AENQ_GROUP_NOTIFICATION] =
308 	    ena_aenq_notification_hdlr;
309 	aenq->eaenq_hdlrs[ENAHW_AENQ_GROUP_KEEP_ALIVE] =
310 	    ena_aenq_keep_alive_hdlr;
311 	aenq->eaenq_hdlrs[ENAHW_AENQ_GROUP_DEVICE_REQUEST_RESET] =
312 	    ena_aenq_request_reset_hdlr;
313 	aenq->eaenq_hdlrs[ENAHW_AENQ_GROUP_FATAL_ERROR] =
314 	    ena_aenq_fatal_error_hdlr;
315 	aenq->eaenq_hdlrs[ENAHW_AENQ_GROUP_WARNING] =
316 	    ena_aenq_warning_hdlr;
317 
318 	/* The following events are not handled */
319 	aenq->eaenq_hdlrs[ENAHW_AENQ_GROUP_REFRESH_CAPABILITIES] =
320 	    ena_aenq_default_hdlr;
321 	aenq->eaenq_hdlrs[ENAHW_AENQ_GROUP_CONF_NOTIFICATIONS] =
322 	    ena_aenq_default_hdlr;
323 }
324 
325 bool
ena_aenq_init(ena_t * ena)326 ena_aenq_init(ena_t *ena)
327 {
328 	ena_aenq_t *aenq = &ena->ena_aenq;
329 	uint32_t addr_low, addr_high, wval;
330 
331 	if (aenq->eaenq_descs == NULL) {
332 		size_t size;
333 
334 		aenq->eaenq_num_descs = ENA_AENQ_NUM_DESCS;
335 		size = aenq->eaenq_num_descs * sizeof (*aenq->eaenq_descs);
336 
337 		ena_dma_conf_t conf = {
338 			.edc_size = size,
339 			.edc_align = ENAHW_AENQ_DESC_BUF_ALIGNMENT,
340 			.edc_sgl = 1,
341 			.edc_endian = DDI_NEVERSWAP_ACC,
342 			.edc_stream = false,
343 		};
344 
345 		if (!ena_dma_alloc(ena, &aenq->eaenq_dma, &conf, size)) {
346 			ena_err(ena, "failed to allocate DMA for AENQ");
347 			return (false);
348 		}
349 
350 		ENA_DMA_VERIFY_ADDR(ena,
351 		    aenq->eaenq_dma.edb_cookie->dmac_laddress);
352 		aenq->eaenq_descs = (void *)aenq->eaenq_dma.edb_va;
353 		ena_aenq_set_hdlrs(aenq);
354 	} else {
355 		ena_dma_bzero(&aenq->eaenq_dma);
356 	}
357 
358 	aenq->eaenq_head = aenq->eaenq_num_descs;
359 	aenq->eaenq_phase = 1;
360 
361 	addr_low = (uint32_t)(aenq->eaenq_dma.edb_cookie->dmac_laddress);
362 	addr_high = (uint32_t)(aenq->eaenq_dma.edb_cookie->dmac_laddress >> 32);
363 	ena_hw_bar_write32(ena, ENAHW_REG_AENQ_BASE_LO, addr_low);
364 	ena_hw_bar_write32(ena, ENAHW_REG_AENQ_BASE_HI, addr_high);
365 	ENA_DMA_SYNC(aenq->eaenq_dma, DDI_DMA_SYNC_FORDEV);
366 	wval = ENAHW_AENQ_CAPS_DEPTH(aenq->eaenq_num_descs) |
367 	    ENAHW_AENQ_CAPS_ENTRY_SIZE(sizeof (*aenq->eaenq_descs));
368 	ena_hw_bar_write32(ena, ENAHW_REG_AENQ_CAPS, wval);
369 
370 	return (true);
371 }
372 
373 void
ena_aenq_enable(ena_t * ena)374 ena_aenq_enable(ena_t *ena)
375 {
376 	/*
377 	 * We set this to zero here so that the watchdog will ignore it until
378 	 * the first keepalive event is received. Devices that do not support
379 	 * sending keepalives will result in this value remaining at 0.
380 	 */
381 	ena->ena_watchdog_last_keepalive = 0;
382 	ena_hw_bar_write32(ena, ENAHW_REG_AENQ_HEAD_DB,
383 	    ena->ena_aenq.eaenq_head);
384 }
385 
386 void
ena_aenq_free(ena_t * ena)387 ena_aenq_free(ena_t *ena)
388 {
389 	ena_dma_free(&ena->ena_aenq.eaenq_dma);
390 }
391