1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 /*
27  * tavor.c
28  *    Tavor (InfiniBand) HCA Driver attach/detach Routines
29  *
30  *    Implements all the routines necessary for the attach, setup,
31  *    initialization (and subsequent possible teardown and detach) of the
32  *    Tavor InfiniBand HCA driver.
33  */
34 
35 #include <sys/types.h>
36 #include <sys/file.h>
37 #include <sys/open.h>
38 #include <sys/conf.h>
39 #include <sys/ddi.h>
40 #include <sys/sunddi.h>
41 #include <sys/modctl.h>
42 #include <sys/stat.h>
43 #include <sys/pci.h>
44 #include <sys/pci_cap.h>
45 #include <sys/bitmap.h>
46 #include <sys/policy.h>
47 
48 #include <sys/ib/adapters/tavor/tavor.h>
49 #include <sys/pci.h>
50 
51 /* Tavor HCA State Pointer */
52 void *tavor_statep;
53 
54 /*
55  * The Tavor "userland resource database" is common to instances of the
56  * Tavor HCA driver.  This structure "tavor_userland_rsrc_db" contains all
57  * the necessary information to maintain it.
58  */
59 tavor_umap_db_t tavor_userland_rsrc_db;
60 
61 static int tavor_attach(dev_info_t *, ddi_attach_cmd_t);
62 static int tavor_detach(dev_info_t *, ddi_detach_cmd_t);
63 static int tavor_open(dev_t *, int, int, cred_t *);
64 static int tavor_close(dev_t, int, int, cred_t *);
65 static int tavor_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
66 static int tavor_drv_init(tavor_state_t *state, dev_info_t *dip, int instance);
67 static void tavor_drv_fini(tavor_state_t *state);
68 static void tavor_drv_fini2(tavor_state_t *state);
69 static int tavor_isr_init(tavor_state_t *state);
70 static void tavor_isr_fini(tavor_state_t *state);
71 static int tavor_hw_init(tavor_state_t *state);
72 static void tavor_hw_fini(tavor_state_t *state,
73     tavor_drv_cleanup_level_t cleanup);
74 static int tavor_soft_state_init(tavor_state_t *state);
75 static void tavor_soft_state_fini(tavor_state_t *state);
76 static int tavor_hca_port_init(tavor_state_t *state);
77 static int tavor_hca_ports_shutdown(tavor_state_t *state, uint_t num_init);
78 static void tavor_hca_config_setup(tavor_state_t *state,
79     tavor_hw_initqueryhca_t *inithca);
80 static int tavor_internal_uarpgs_init(tavor_state_t *state);
81 static void tavor_internal_uarpgs_fini(tavor_state_t *state);
82 static int tavor_special_qp_contexts_reserve(tavor_state_t *state);
83 static void tavor_special_qp_contexts_unreserve(tavor_state_t *state);
84 static int tavor_sw_reset(tavor_state_t *state);
85 static int tavor_mcg_init(tavor_state_t *state);
86 static void tavor_mcg_fini(tavor_state_t *state);
87 static int tavor_fw_version_check(tavor_state_t *state);
88 static void tavor_device_info_report(tavor_state_t *state);
89 static void tavor_pci_capability_list(tavor_state_t *state,
90     ddi_acc_handle_t hdl);
91 static void tavor_pci_capability_vpd(tavor_state_t *state,
92     ddi_acc_handle_t hdl, uint_t offset);
93 static int tavor_pci_read_vpd(ddi_acc_handle_t hdl, uint_t offset,
94     uint32_t addr, uint32_t *data);
95 static void tavor_pci_capability_pcix(tavor_state_t *state,
96     ddi_acc_handle_t hdl, uint_t offset);
97 static int tavor_intr_or_msi_init(tavor_state_t *state);
98 static int tavor_add_intrs(tavor_state_t *state, int intr_type);
99 static int tavor_intr_or_msi_fini(tavor_state_t *state);
100 
101 /* X86 fastreboot support */
102 static int tavor_intr_disable(tavor_state_t *);
103 static int tavor_quiesce(dev_info_t *);
104 
105 /* Character/Block Operations */
106 static struct cb_ops tavor_cb_ops = {
107 	tavor_open,		/* open */
108 	tavor_close,		/* close */
109 	nodev,			/* strategy (block) */
110 	nodev,			/* print (block) */
111 	nodev,			/* dump (block) */
112 	nodev,			/* read */
113 	nodev,			/* write */
114 	tavor_ioctl,		/* ioctl */
115 	tavor_devmap,		/* devmap */
116 	NULL,			/* mmap */
117 	nodev,			/* segmap */
118 	nochpoll,		/* chpoll */
119 	ddi_prop_op,		/* prop_op */
120 	NULL,			/* streams */
121 	D_NEW | D_MP |
122 	D_64BIT | D_HOTPLUG |
123 	D_DEVMAP,		/* flags */
124 	CB_REV			/* rev */
125 };
126 
127 /* Driver Operations */
128 static struct dev_ops tavor_ops = {
129 	DEVO_REV,		/* struct rev */
130 	0,			/* refcnt */
131 	tavor_getinfo,		/* getinfo */
132 	nulldev,		/* identify */
133 	nulldev,		/* probe */
134 	tavor_attach,		/* attach */
135 	tavor_detach,		/* detach */
136 	nodev,			/* reset */
137 	&tavor_cb_ops,		/* cb_ops */
138 	NULL,			/* bus_ops */
139 	nodev,			/* power */
140 	tavor_quiesce,		/* devo_quiesce */
141 };
142 
143 /* Module Driver Info */
144 static struct modldrv tavor_modldrv = {
145 	&mod_driverops,
146 	"Tavor InfiniBand HCA Driver",
147 	&tavor_ops
148 };
149 
150 /* Module Linkage */
151 static struct modlinkage tavor_modlinkage = {
152 	MODREV_1,
153 	&tavor_modldrv,
154 	NULL
155 };
156 
157 /*
158  * This extern refers to the ibc_operations_t function vector that is defined
159  * in the tavor_ci.c file.
160  */
161 extern ibc_operations_t	tavor_ibc_ops;
162 
163 /*
164  * _init()
165  */
166 int
_init()167 _init()
168 {
169 	int	status;
170 
171 	status = ddi_soft_state_init(&tavor_statep, sizeof (tavor_state_t),
172 	    (size_t)TAVOR_INITIAL_STATES);
173 	if (status != 0) {
174 		return (status);
175 	}
176 
177 	status = ibc_init(&tavor_modlinkage);
178 	if (status != 0) {
179 		ddi_soft_state_fini(&tavor_statep);
180 		return (status);
181 	}
182 	status = mod_install(&tavor_modlinkage);
183 	if (status != 0) {
184 		ibc_fini(&tavor_modlinkage);
185 		ddi_soft_state_fini(&tavor_statep);
186 		return (status);
187 	}
188 
189 	/* Initialize the Tavor "userland resources database" */
190 	tavor_umap_db_init();
191 
192 	return (status);
193 }
194 
195 
196 /*
197  * _info()
198  */
199 int
_info(struct modinfo * modinfop)200 _info(struct modinfo *modinfop)
201 {
202 	int	status;
203 
204 	status = mod_info(&tavor_modlinkage, modinfop);
205 	return (status);
206 }
207 
208 
209 /*
210  * _fini()
211  */
212 int
_fini()213 _fini()
214 {
215 	int	status;
216 
217 	status = mod_remove(&tavor_modlinkage);
218 	if (status != 0) {
219 		return (status);
220 	}
221 
222 	/* Destroy the Tavor "userland resources database" */
223 	tavor_umap_db_fini();
224 
225 	ibc_fini(&tavor_modlinkage);
226 	ddi_soft_state_fini(&tavor_statep);
227 	return (status);
228 }
229 
230 
231 /*
232  * tavor_getinfo()
233  */
234 /* ARGSUSED */
235 static int
tavor_getinfo(dev_info_t * dip,ddi_info_cmd_t cmd,void * arg,void ** result)236 tavor_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result)
237 {
238 	dev_t		dev;
239 	tavor_state_t	*state;
240 	minor_t		instance;
241 
242 	switch (cmd) {
243 	case DDI_INFO_DEVT2DEVINFO:
244 		dev = (dev_t)arg;
245 		instance = TAVOR_DEV_INSTANCE(dev);
246 		state = ddi_get_soft_state(tavor_statep, instance);
247 		if (state == NULL) {
248 			return (DDI_FAILURE);
249 		}
250 		*result = (void *)state->ts_dip;
251 		return (DDI_SUCCESS);
252 
253 	case DDI_INFO_DEVT2INSTANCE:
254 		dev = (dev_t)arg;
255 		instance = TAVOR_DEV_INSTANCE(dev);
256 		*result = (void *)(uintptr_t)instance;
257 		return (DDI_SUCCESS);
258 
259 	default:
260 		break;
261 	}
262 
263 	return (DDI_FAILURE);
264 }
265 
266 
267 /*
268  * tavor_open()
269  */
270 /* ARGSUSED */
271 static int
tavor_open(dev_t * devp,int flag,int otyp,cred_t * credp)272 tavor_open(dev_t *devp, int flag, int otyp, cred_t *credp)
273 {
274 	tavor_state_t		*state;
275 	tavor_rsrc_t		*rsrcp;
276 	tavor_umap_db_entry_t	*umapdb, *umapdb2;
277 	minor_t			instance;
278 	uint64_t		key, value;
279 	uint_t			tr_indx;
280 	dev_t			dev;
281 	int			status;
282 
283 	instance = TAVOR_DEV_INSTANCE(*devp);
284 	state = ddi_get_soft_state(tavor_statep, instance);
285 	if (state == NULL) {
286 		return (ENXIO);
287 	}
288 
289 	/*
290 	 * Only allow driver to be opened for character access, and verify
291 	 * whether exclusive access is allowed.
292 	 */
293 	if ((otyp != OTYP_CHR) || ((flag & FEXCL) &&
294 	    secpolicy_excl_open(credp) != 0)) {
295 		return (EINVAL);
296 	}
297 
298 	/*
299 	 * Search for the current process PID in the "userland resources
300 	 * database".  If it is not found, then attempt to allocate a UAR
301 	 * page and add the ("key", "value") pair to the database.
302 	 * Note:  As a last step we always return a devp appropriate for
303 	 * the open.  Either we return a new minor number (based on the
304 	 * instance and the UAR page index) or we return the current minor
305 	 * number for the given client process.
306 	 *
307 	 * We also add an entry to the database to allow for lookup from
308 	 * "dev_t" to the current process PID.  This is necessary because,
309 	 * under certain circumstance, the process PID that calls the Tavor
310 	 * close() entry point may not be the same as the one who called
311 	 * open().  Specifically, this can happen if a child process calls
312 	 * the Tavor's open() entry point, gets a UAR page, maps it out (using
313 	 * mmap()), and then exits without calling munmap().  Because mmap()
314 	 * adds a reference to the file descriptor, at the exit of the child
315 	 * process the file descriptor is "inherited" by the parent (and will
316 	 * be close()'d by the parent's PID only when it exits).
317 	 *
318 	 * Note: We use the tavor_umap_db_find_nolock() and
319 	 * tavor_umap_db_add_nolock() database access routines below (with
320 	 * an explicit mutex_enter of the database lock - "tdl_umapdb_lock")
321 	 * to ensure that the multiple accesses (in this case searching for,
322 	 * and then adding _two_ database entries) can be done atomically.
323 	 */
324 	key = ddi_get_pid();
325 	mutex_enter(&tavor_userland_rsrc_db.tdl_umapdb_lock);
326 	status = tavor_umap_db_find_nolock(instance, key,
327 	    MLNX_UMAP_UARPG_RSRC, &value, 0, NULL);
328 	if (status != DDI_SUCCESS) {
329 		/*
330 		 * If we are in 'maintenance mode', we cannot alloc a UAR page.
331 		 * But we still need some rsrcp value, and a mostly unique
332 		 * tr_indx value.  So we set rsrcp to NULL for maintenance
333 		 * mode, and use a rolling count for tr_indx.  The field
334 		 * 'ts_open_tr_indx' is used only in this maintenance mode
335 		 * condition.
336 		 *
337 		 * Otherwise, if we are in operational mode then we allocate
338 		 * the UAR page as normal, and use the rsrcp value and tr_indx
339 		 * value from that allocation.
340 		 */
341 		if (!TAVOR_IS_OPERATIONAL(state->ts_operational_mode)) {
342 			rsrcp = NULL;
343 			tr_indx = state->ts_open_tr_indx++;
344 		} else {
345 			/* Allocate a new UAR page for this process */
346 			status = tavor_rsrc_alloc(state, TAVOR_UARPG, 1,
347 			    TAVOR_NOSLEEP, &rsrcp);
348 			if (status != DDI_SUCCESS) {
349 				mutex_exit(
350 				    &tavor_userland_rsrc_db.tdl_umapdb_lock);
351 				return (EAGAIN);
352 			}
353 
354 			tr_indx = rsrcp->tr_indx;
355 		}
356 
357 		/*
358 		 * Allocate an entry to track the UAR page resource in the
359 		 * "userland resources database".
360 		 */
361 		umapdb = tavor_umap_db_alloc(instance, key,
362 		    MLNX_UMAP_UARPG_RSRC, (uint64_t)(uintptr_t)rsrcp);
363 		if (umapdb == NULL) {
364 			mutex_exit(&tavor_userland_rsrc_db.tdl_umapdb_lock);
365 			/* If in "maintenance mode", don't free the rsrc */
366 			if (TAVOR_IS_OPERATIONAL(state->ts_operational_mode)) {
367 				tavor_rsrc_free(state, &rsrcp);
368 			}
369 			return (EAGAIN);
370 		}
371 
372 		/*
373 		 * Create a new device number.  Minor number is a function of
374 		 * the UAR page index (15 bits) and the device instance number
375 		 * (3 bits).
376 		 */
377 		dev = makedevice(getmajor(*devp), (tr_indx <<
378 		    TAVOR_MINORNUM_SHIFT) | instance);
379 
380 		/*
381 		 * Allocate another entry in the "userland resources database"
382 		 * to track the association of the device number (above) to
383 		 * the current process ID (in "key").
384 		 */
385 		umapdb2 = tavor_umap_db_alloc(instance, dev,
386 		    MLNX_UMAP_PID_RSRC, (uint64_t)key);
387 		if (umapdb2 == NULL) {
388 			mutex_exit(&tavor_userland_rsrc_db.tdl_umapdb_lock);
389 			tavor_umap_db_free(umapdb);
390 			/* If in "maintenance mode", don't free the rsrc */
391 			if (TAVOR_IS_OPERATIONAL(state->ts_operational_mode)) {
392 				tavor_rsrc_free(state, &rsrcp);
393 			}
394 			return (EAGAIN);
395 		}
396 
397 		/* Add the entries to the database */
398 		tavor_umap_db_add_nolock(umapdb);
399 		tavor_umap_db_add_nolock(umapdb2);
400 
401 	} else {
402 		/*
403 		 * Return the same device number as on the original open()
404 		 * call.  This was calculated as a function of the UAR page
405 		 * index (top 16 bits) and the device instance number
406 		 */
407 		rsrcp = (tavor_rsrc_t *)(uintptr_t)value;
408 		dev = makedevice(getmajor(*devp), (rsrcp->tr_indx <<
409 		    TAVOR_MINORNUM_SHIFT) | instance);
410 	}
411 	mutex_exit(&tavor_userland_rsrc_db.tdl_umapdb_lock);
412 
413 	*devp = dev;
414 
415 	return (0);
416 }
417 
418 
419 /*
420  * tavor_close()
421  */
422 /* ARGSUSED */
423 static int
tavor_close(dev_t dev,int flag,int otyp,cred_t * credp)424 tavor_close(dev_t dev, int flag, int otyp, cred_t *credp)
425 {
426 	tavor_state_t		*state;
427 	tavor_rsrc_t		*rsrcp;
428 	tavor_umap_db_entry_t	*umapdb;
429 	tavor_umap_db_priv_t	*priv;
430 	minor_t			instance;
431 	uint64_t		key, value;
432 	int			status;
433 
434 	instance = TAVOR_DEV_INSTANCE(dev);
435 	state = ddi_get_soft_state(tavor_statep, instance);
436 	if (state == NULL) {
437 		return (ENXIO);
438 	}
439 
440 	/*
441 	 * Search for "dev_t" in the "userland resources database".  As
442 	 * explained above in tavor_open(), we can't depend on using the
443 	 * current process ID here to do the lookup because the process
444 	 * that ultimately closes may not be the same one who opened
445 	 * (because of inheritance).
446 	 * So we lookup the "dev_t" (which points to the PID of the process
447 	 * that opened), and we remove the entry from the database (and free
448 	 * it up).  Then we do another query based on the PID value.  And when
449 	 * we find that database entry, we free it up too and then free the
450 	 * Tavor UAR page resource.
451 	 *
452 	 * Note: We use the tavor_umap_db_find_nolock() database access
453 	 * routine below (with an explicit mutex_enter of the database lock)
454 	 * to ensure that the multiple accesses (which attempt to remove the
455 	 * two database entries) can be done atomically.
456 	 *
457 	 * This works the same in both maintenance mode and HCA mode, except
458 	 * for the call to tavor_rsrc_free().  In the case of maintenance mode,
459 	 * this call is not needed, as it was not allocated in tavor_open()
460 	 * above.
461 	 */
462 	key = dev;
463 	mutex_enter(&tavor_userland_rsrc_db.tdl_umapdb_lock);
464 	status = tavor_umap_db_find_nolock(instance, key, MLNX_UMAP_PID_RSRC,
465 	    &value, TAVOR_UMAP_DB_REMOVE, &umapdb);
466 	if (status == DDI_SUCCESS) {
467 		/*
468 		 * If the "tdb_priv" field is non-NULL, it indicates that
469 		 * some "on close" handling is still necessary.  Call
470 		 * tavor_umap_db_handle_onclose_cb() to do the handling (i.e.
471 		 * to invoke all the registered callbacks).  Then free up
472 		 * the resources associated with "tdb_priv" and continue
473 		 * closing.
474 		 */
475 		priv = (tavor_umap_db_priv_t *)umapdb->tdbe_common.tdb_priv;
476 		if (priv != NULL) {
477 			tavor_umap_db_handle_onclose_cb(priv);
478 			kmem_free(priv, sizeof (tavor_umap_db_priv_t));
479 			umapdb->tdbe_common.tdb_priv = (void *)NULL;
480 		}
481 
482 		tavor_umap_db_free(umapdb);
483 
484 		/*
485 		 * Now do another lookup using PID as the key (copy it from
486 		 * "value").  When this lookup is complete, the "value" field
487 		 * will contain the tavor_rsrc_t pointer for the UAR page
488 		 * resource.
489 		 */
490 		key = value;
491 		status = tavor_umap_db_find_nolock(instance, key,
492 		    MLNX_UMAP_UARPG_RSRC, &value, TAVOR_UMAP_DB_REMOVE,
493 		    &umapdb);
494 		if (status == DDI_SUCCESS) {
495 			tavor_umap_db_free(umapdb);
496 			/* If in "maintenance mode", don't free the rsrc */
497 			if (TAVOR_IS_OPERATIONAL(state->ts_operational_mode)) {
498 				rsrcp = (tavor_rsrc_t *)(uintptr_t)value;
499 				tavor_rsrc_free(state, &rsrcp);
500 			}
501 		}
502 	}
503 	mutex_exit(&tavor_userland_rsrc_db.tdl_umapdb_lock);
504 
505 	return (0);
506 }
507 
508 
509 /*
510  * tavor_attach()
511  *    Context: Only called from attach() path context
512  */
513 static int
tavor_attach(dev_info_t * dip,ddi_attach_cmd_t cmd)514 tavor_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
515 {
516 	tavor_state_t	*state;
517 	ibc_clnt_hdl_t	tmp_ibtfpriv;
518 	ibc_status_t	ibc_status;
519 	int		instance;
520 	int		status;
521 
522 #ifdef __lock_lint
523 	(void) tavor_quiesce(dip);
524 #endif
525 
526 	switch (cmd) {
527 	case DDI_ATTACH:
528 		instance = ddi_get_instance(dip);
529 		status = ddi_soft_state_zalloc(tavor_statep, instance);
530 		if (status != DDI_SUCCESS) {
531 			cmn_err(CE_NOTE, "tavor%d: driver failed to attach: "
532 			    "attach_ssz_fail", instance);
533 			goto fail_attach_nomsg;
534 
535 		}
536 		state = ddi_get_soft_state(tavor_statep, instance);
537 		if (state == NULL) {
538 			ddi_soft_state_free(tavor_statep, instance);
539 			cmn_err(CE_NOTE, "tavor%d: driver failed to attach: "
540 			    "attach_gss_fail", instance);
541 			goto fail_attach_nomsg;
542 		}
543 
544 		/* clear the attach error buffer */
545 		TAVOR_ATTACH_MSG_INIT(state->ts_attach_buf);
546 
547 		/*
548 		 * Initialize Tavor driver and hardware.
549 		 *
550 		 * Note: If this initialization fails we may still wish to
551 		 * create a device node and remain operational so that Tavor
552 		 * firmware can be updated/flashed (i.e. "maintenance mode").
553 		 * If this is the case, then "ts_operational_mode" will be
554 		 * equal to TAVOR_MAINTENANCE_MODE.  We will not attempt to
555 		 * attach to the IBTF or register with the IBMF (i.e. no
556 		 * InfiniBand interfaces will be enabled).
557 		 */
558 		status = tavor_drv_init(state, dip, instance);
559 		if ((status != DDI_SUCCESS) &&
560 		    (TAVOR_IS_OPERATIONAL(state->ts_operational_mode))) {
561 			goto fail_attach;
562 		}
563 
564 		/* Create the minor node for device */
565 		status = ddi_create_minor_node(dip, "devctl", S_IFCHR, instance,
566 		    DDI_PSEUDO, 0);
567 		if (status != DDI_SUCCESS) {
568 			tavor_drv_fini(state);
569 			TAVOR_ATTACH_MSG(state->ts_attach_buf,
570 			    "attach_create_mn_fail");
571 			goto fail_attach;
572 		}
573 
574 		/*
575 		 * If we are in "maintenance mode", then we don't want to
576 		 * register with the IBTF.  All InfiniBand interfaces are
577 		 * uninitialized, and the device is only capable of handling
578 		 * requests to update/flash firmware (or test/debug requests).
579 		 */
580 		if (TAVOR_IS_OPERATIONAL(state->ts_operational_mode)) {
581 
582 			/* Attach to InfiniBand Transport Framework (IBTF) */
583 			ibc_status = ibc_attach(&tmp_ibtfpriv,
584 			    &state->ts_ibtfinfo);
585 			if (ibc_status != IBC_SUCCESS) {
586 				ddi_remove_minor_node(dip, "devctl");
587 				tavor_drv_fini(state);
588 				TAVOR_ATTACH_MSG(state->ts_attach_buf,
589 				    "attach_ibcattach_fail");
590 				goto fail_attach;
591 			}
592 
593 			/*
594 			 * Now that we've successfully attached to the IBTF,
595 			 * we enable all appropriate asynch and CQ events to
596 			 * be forwarded to the IBTF.
597 			 */
598 			TAVOR_ENABLE_IBTF_CALLB(state, tmp_ibtfpriv);
599 
600 			ibc_post_attach(state->ts_ibtfpriv);
601 
602 			/* Register agents with IB Mgmt Framework (IBMF) */
603 			status = tavor_agent_handlers_init(state);
604 			if (status != DDI_SUCCESS) {
605 				(void) ibc_pre_detach(tmp_ibtfpriv, DDI_DETACH);
606 				TAVOR_QUIESCE_IBTF_CALLB(state);
607 				if (state->ts_in_evcallb != 0) {
608 					TAVOR_WARNING(state, "unable to "
609 					    "quiesce Tavor IBTF callbacks");
610 				}
611 				ibc_detach(tmp_ibtfpriv);
612 				ddi_remove_minor_node(dip, "devctl");
613 				tavor_drv_fini(state);
614 				TAVOR_ATTACH_MSG(state->ts_attach_buf,
615 				    "attach_agentinit_fail");
616 				goto fail_attach;
617 			}
618 		}
619 
620 		/* Report that driver was loaded */
621 		ddi_report_dev(dip);
622 
623 		/* Send device information to log file */
624 		tavor_device_info_report(state);
625 
626 		/* Report attach in maintenance mode, if appropriate */
627 		if (!(TAVOR_IS_OPERATIONAL(state->ts_operational_mode))) {
628 			cmn_err(CE_NOTE, "tavor%d: driver attached "
629 			    "(for maintenance mode only)", state->ts_instance);
630 		}
631 
632 		return (DDI_SUCCESS);
633 
634 	case DDI_RESUME:
635 		/* Add code here for DDI_RESUME XXX */
636 		return (DDI_FAILURE);
637 
638 	default:
639 		break;
640 	}
641 
642 fail_attach:
643 	cmn_err(CE_NOTE, "tavor%d: driver failed to attach: %s", instance,
644 	    state->ts_attach_buf);
645 	tavor_drv_fini2(state);
646 	ddi_soft_state_free(tavor_statep, instance);
647 fail_attach_nomsg:
648 	return (DDI_FAILURE);
649 }
650 
651 
652 /*
653  * tavor_detach()
654  *    Context: Only called from detach() path context
655  */
656 static int
tavor_detach(dev_info_t * dip,ddi_detach_cmd_t cmd)657 tavor_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
658 {
659 	tavor_state_t	*state;
660 	ibc_clnt_hdl_t	tmp_ibtfpriv;
661 	ibc_status_t	ibc_status;
662 	int		instance, status;
663 
664 	instance = ddi_get_instance(dip);
665 	state = ddi_get_soft_state(tavor_statep, instance);
666 	if (state == NULL) {
667 		return (DDI_FAILURE);
668 	}
669 
670 	switch (cmd) {
671 	case DDI_DETACH:
672 		/*
673 		 * If we are in "maintenance mode", then we do not want to
674 		 * do teardown for any of the InfiniBand interfaces.
675 		 * Specifically, this means not detaching from IBTF (we never
676 		 * attached to begin with) and not deregistering from IBMF.
677 		 */
678 		if (TAVOR_IS_OPERATIONAL(state->ts_operational_mode)) {
679 			/* Unregister agents from IB Mgmt Framework (IBMF) */
680 			status = tavor_agent_handlers_fini(state);
681 			if (status != DDI_SUCCESS) {
682 				return (DDI_FAILURE);
683 			}
684 
685 			/*
686 			 * Attempt the "pre-detach" from InfiniBand Transport
687 			 * Framework (IBTF).  At this point the IBTF is still
688 			 * capable of handling incoming asynch and completion
689 			 * events.  This "pre-detach" is primarily a mechanism
690 			 * to notify the appropriate IBTF clients that the
691 			 * HCA is being removed/offlined.
692 			 */
693 			ibc_status = ibc_pre_detach(state->ts_ibtfpriv, cmd);
694 			if (ibc_status != IBC_SUCCESS) {
695 				status = tavor_agent_handlers_init(state);
696 				if (status != DDI_SUCCESS) {
697 					TAVOR_WARNING(state, "failed to "
698 					    "restart Tavor agents");
699 				}
700 				return (DDI_FAILURE);
701 			}
702 
703 			/*
704 			 * Before we can fully detach from the IBTF we need to
705 			 * ensure that we have handled all outstanding event
706 			 * callbacks.  This is accomplished by quiescing the
707 			 * event callback mechanism.  Note: if we are unable
708 			 * to successfully quiesce the callbacks, then this is
709 			 * an indication that something has probably gone
710 			 * seriously wrong.  We print out a warning, but
711 			 * continue.
712 			 */
713 			tmp_ibtfpriv = state->ts_ibtfpriv;
714 			TAVOR_QUIESCE_IBTF_CALLB(state);
715 			if (state->ts_in_evcallb != 0) {
716 				TAVOR_WARNING(state, "unable to quiesce Tavor "
717 				    "IBTF callbacks");
718 			}
719 
720 			/* Complete the detach from the IBTF */
721 			ibc_detach(tmp_ibtfpriv);
722 		}
723 
724 		/* Remove the minor node for device */
725 		ddi_remove_minor_node(dip, "devctl");
726 
727 		/*
728 		 * Only call tavor_drv_fini() if we are in Tavor HCA mode.
729 		 * (Because if we are in "maintenance mode", then we never
730 		 * successfully finished init.)  Only report successful
731 		 * detach for normal HCA mode.
732 		 */
733 		if (TAVOR_IS_OPERATIONAL(state->ts_operational_mode)) {
734 			/* Cleanup driver resources and shutdown hardware */
735 			tavor_drv_fini(state);
736 			cmn_err(CE_CONT, "Tavor driver successfully "
737 			    "detached\n");
738 		}
739 
740 		tavor_drv_fini2(state);
741 		ddi_soft_state_free(tavor_statep, instance);
742 
743 		return (DDI_SUCCESS);
744 
745 	case DDI_SUSPEND:
746 		/* Add code here for DDI_SUSPEND XXX */
747 		return (DDI_FAILURE);
748 
749 	default:
750 		break;
751 	}
752 
753 	return (DDI_FAILURE);
754 }
755 
756 
757 /*
758  * tavor_drv_init()
759  *    Context: Only called from attach() path context
760  */
761 static int
tavor_drv_init(tavor_state_t * state,dev_info_t * dip,int instance)762 tavor_drv_init(tavor_state_t *state, dev_info_t *dip, int instance)
763 {
764 	int			status;
765 
766 	/* Save away devinfo and instance */
767 	state->ts_dip = dip;
768 	state->ts_instance = instance;
769 
770 	/*
771 	 * Check and set the operational mode of the device. If the driver is
772 	 * bound to the Tavor device in "maintenance mode", then this generally
773 	 * means that either the device has been specifically jumpered to
774 	 * start in this mode or the firmware boot process has failed to
775 	 * successfully load either the primary or the secondary firmware
776 	 * image.
777 	 */
778 	if (TAVOR_IS_HCA_MODE(state->ts_dip)) {
779 		state->ts_operational_mode = TAVOR_HCA_MODE;
780 
781 	} else if (TAVOR_IS_COMPAT_MODE(state->ts_dip)) {
782 		state->ts_operational_mode = TAVOR_COMPAT_MODE;
783 
784 	} else if (TAVOR_IS_MAINTENANCE_MODE(state->ts_dip)) {
785 		state->ts_operational_mode = TAVOR_MAINTENANCE_MODE;
786 		return (DDI_FAILURE);
787 
788 	} else {
789 		state->ts_operational_mode = 0;	/* invalid operational mode */
790 		TAVOR_WARNING(state, "unexpected device type detected");
791 		return (DDI_FAILURE);
792 	}
793 
794 	/*
795 	 * Initialize the Tavor hardware.
796 	 * Note:  If this routine returns an error, it is often an reasonably
797 	 * good indication that something Tavor firmware-related has caused
798 	 * the failure.  In order to give the user an opportunity (if desired)
799 	 * to update or reflash the Tavor firmware image, we set
800 	 * "ts_operational_mode" flag (described above) to indicate that we
801 	 * wish to enter maintenance mode.
802 	 */
803 	status = tavor_hw_init(state);
804 	if (status != DDI_SUCCESS) {
805 		state->ts_operational_mode = TAVOR_MAINTENANCE_MODE;
806 		cmn_err(CE_NOTE, "tavor%d: error during attach: %s", instance,
807 		    state->ts_attach_buf);
808 		return (DDI_FAILURE);
809 	}
810 
811 	/* Setup Tavor interrupt handler */
812 	status = tavor_isr_init(state);
813 	if (status != DDI_SUCCESS) {
814 		tavor_hw_fini(state, TAVOR_DRV_CLEANUP_ALL);
815 		return (DDI_FAILURE);
816 	}
817 
818 	/* Initialize Tavor softstate */
819 	status = tavor_soft_state_init(state);
820 	if (status != DDI_SUCCESS) {
821 		tavor_isr_fini(state);
822 		tavor_hw_fini(state, TAVOR_DRV_CLEANUP_ALL);
823 		return (DDI_FAILURE);
824 	}
825 
826 	return (DDI_SUCCESS);
827 }
828 
829 
830 /*
831  * tavor_drv_fini()
832  *    Context: Only called from attach() and/or detach() path contexts
833  */
834 static void
tavor_drv_fini(tavor_state_t * state)835 tavor_drv_fini(tavor_state_t *state)
836 {
837 	/* Cleanup Tavor softstate */
838 	tavor_soft_state_fini(state);
839 
840 	/* Teardown Tavor interrupts */
841 	tavor_isr_fini(state);
842 
843 	/* Cleanup Tavor resources and shutdown hardware */
844 	tavor_hw_fini(state, TAVOR_DRV_CLEANUP_ALL);
845 }
846 
847 /*
848  * tavor_drv_fini2()
849  *    Context: Only called from attach() and/or detach() path contexts
850  */
851 static void
tavor_drv_fini2(tavor_state_t * state)852 tavor_drv_fini2(tavor_state_t *state)
853 {
854 	/* TAVOR_DRV_CLEANUP_LEVEL1 */
855 	if (state->ts_reg_cmdhdl) {
856 		ddi_regs_map_free(&state->ts_reg_cmdhdl);
857 		state->ts_reg_cmdhdl = NULL;
858 	}
859 
860 	/* TAVOR_DRV_CLEANUP_LEVEL0 */
861 	if (state->ts_pci_cfghdl) {
862 		pci_config_teardown(&state->ts_pci_cfghdl);
863 		state->ts_pci_cfghdl = NULL;
864 	}
865 }
866 
867 /*
868  * tavor_isr_init()
869  *    Context: Only called from attach() path context
870  */
871 static int
tavor_isr_init(tavor_state_t * state)872 tavor_isr_init(tavor_state_t *state)
873 {
874 	int	status;
875 
876 	/*
877 	 * Add a handler for the interrupt or MSI
878 	 */
879 	status = ddi_intr_add_handler(state->ts_intrmsi_hdl, tavor_isr,
880 	    (caddr_t)state, NULL);
881 	if (status  != DDI_SUCCESS) {
882 		return (DDI_FAILURE);
883 	}
884 
885 	/*
886 	 * Enable the software interrupt.  Note: Even though we are only
887 	 * using one (1) interrupt/MSI, depending on the value returned in
888 	 * the capability flag, we have to call either ddi_intr_block_enable()
889 	 * or ddi_intr_enable().
890 	 */
891 	if (state->ts_intrmsi_cap & DDI_INTR_FLAG_BLOCK) {
892 		status = ddi_intr_block_enable(&state->ts_intrmsi_hdl, 1);
893 		if (status  != DDI_SUCCESS) {
894 			return (DDI_FAILURE);
895 		}
896 	} else {
897 		status = ddi_intr_enable(state->ts_intrmsi_hdl);
898 		if (status  != DDI_SUCCESS) {
899 			return (DDI_FAILURE);
900 		}
901 	}
902 
903 	/*
904 	 * Now that the ISR has been setup, arm all the EQs for event
905 	 * generation.
906 	 */
907 	tavor_eq_arm_all(state);
908 
909 	return (DDI_SUCCESS);
910 }
911 
912 
913 /*
914  * tavor_isr_fini()
915  *    Context: Only called from attach() and/or detach() path contexts
916  */
917 static void
tavor_isr_fini(tavor_state_t * state)918 tavor_isr_fini(tavor_state_t *state)
919 {
920 	/* Disable the software interrupt */
921 	if (state->ts_intrmsi_cap & DDI_INTR_FLAG_BLOCK) {
922 		(void) ddi_intr_block_disable(&state->ts_intrmsi_hdl, 1);
923 	} else {
924 		(void) ddi_intr_disable(state->ts_intrmsi_hdl);
925 	}
926 
927 	/*
928 	 * Remove the software handler for the interrupt or MSI
929 	 */
930 	(void) ddi_intr_remove_handler(state->ts_intrmsi_hdl);
931 }
932 
933 
934 /*
935  * tavor_fix_error_buf()
936  *	Context: Only called from attach().
937  *
938  * The error_buf_addr returned from QUERY_FW is a PCI address.
939  * We need to convert it to an offset from the base address,
940  * which is stored in the assigned-addresses property.
941  */
942 static int
tavor_fix_error_buf(tavor_state_t * state)943 tavor_fix_error_buf(tavor_state_t *state)
944 {
945 	int		assigned_addr_len;
946 	pci_regspec_t	*assigned_addr;
947 
948 	if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, state->ts_dip,
949 	    DDI_PROP_DONTPASS, "assigned-addresses", (int **)&assigned_addr,
950 	    (uint_t *)&assigned_addr_len) != DDI_PROP_SUCCESS)
951 		return (DDI_FAILURE);
952 
953 	state->ts_fw.error_buf_addr -= assigned_addr[0].pci_phys_low +
954 	    ((uint64_t)(assigned_addr[0].pci_phys_mid) << 32);
955 	ddi_prop_free(assigned_addr);
956 	return (DDI_SUCCESS);
957 }
958 
959 /*
960  * tavor_hw_init()
961  *    Context: Only called from attach() path context
962  */
963 static int
tavor_hw_init(tavor_state_t * state)964 tavor_hw_init(tavor_state_t *state)
965 {
966 	tavor_drv_cleanup_level_t	cleanup;
967 	sm_nodeinfo_t			nodeinfo;
968 	uint64_t			errorcode;
969 	off_t				ddr_size;
970 	int				status;
971 	int				retries;
972 
973 	/* This is where driver initialization begins */
974 	cleanup = TAVOR_DRV_CLEANUP_LEVEL0;
975 
976 	/* Setup device access attributes */
977 	state->ts_reg_accattr.devacc_attr_version = DDI_DEVICE_ATTR_V0;
978 	state->ts_reg_accattr.devacc_attr_endian_flags = DDI_STRUCTURE_BE_ACC;
979 	state->ts_reg_accattr.devacc_attr_dataorder = DDI_STRICTORDER_ACC;
980 
981 	/* Setup for PCI config read/write of HCA device  */
982 	status = pci_config_setup(state->ts_dip, &state->ts_pci_cfghdl);
983 	if (status != DDI_SUCCESS) {
984 		tavor_hw_fini(state, cleanup);
985 		TAVOR_ATTACH_MSG(state->ts_attach_buf,
986 		    "hw_init_PCI_config_space_regmap_fail");
987 		/* This case is not the degraded one */
988 		return (DDI_FAILURE);
989 	}
990 
991 	/* Map in Tavor registers (CMD, UAR, DDR) and setup offsets */
992 	status = ddi_regs_map_setup(state->ts_dip, TAVOR_CMD_BAR,
993 	    &state->ts_reg_cmd_baseaddr, 0, 0, &state->ts_reg_accattr,
994 	    &state->ts_reg_cmdhdl);
995 	if (status != DDI_SUCCESS) {
996 		tavor_hw_fini(state, cleanup);
997 		TAVOR_ATTACH_MSG(state->ts_attach_buf,
998 		    "hw_init_CMD_ddirms_fail");
999 		return (DDI_FAILURE);
1000 	}
1001 	cleanup = TAVOR_DRV_CLEANUP_LEVEL1;
1002 
1003 	status = ddi_regs_map_setup(state->ts_dip, TAVOR_UAR_BAR,
1004 	    &state->ts_reg_uar_baseaddr, 0, 0, &state->ts_reg_accattr,
1005 	    &state->ts_reg_uarhdl);
1006 	if (status != DDI_SUCCESS) {
1007 		tavor_hw_fini(state, cleanup);
1008 		TAVOR_ATTACH_MSG(state->ts_attach_buf,
1009 		    "hw_init_UAR_ddirms_fail");
1010 		return (DDI_FAILURE);
1011 	}
1012 	cleanup = TAVOR_DRV_CLEANUP_LEVEL2;
1013 
1014 	status = ddi_dev_regsize(state->ts_dip, TAVOR_DDR_BAR, &ddr_size);
1015 	if (status != DDI_SUCCESS) {
1016 		cmn_err(CE_CONT, "Tavor: ddi_dev_regsize() failed "
1017 		    "(check HCA-attached DIMM memory?)\n");
1018 		tavor_hw_fini(state, cleanup);
1019 		TAVOR_ATTACH_MSG(state->ts_attach_buf,
1020 		    "hw_init_DDR_ddi_regsize_fail");
1021 		return (DDI_FAILURE);
1022 	}
1023 
1024 #if !defined(_ELF64) && !defined(__sparc)
1025 	/*
1026 	 * For 32 bit x86/x64 kernels, where there is limited kernel virtual
1027 	 * memory available, define a minimal memory footprint. This is
1028 	 * specified in order to not take up too much resources, thus starving
1029 	 * out others. Only specified if the HCA DIMM is equal to or greater
1030 	 * than 256MB.
1031 	 *
1032 	 * Note: x86/x64 install and safemode boot are both 32bit.
1033 	 */
1034 	ddr_size = TAVOR_DDR_SIZE_MIN;
1035 #endif	/* !(_ELF64) && !(__sparc) */
1036 
1037 	state->ts_cfg_profile_setting = ddr_size;
1038 
1039 	status = ddi_regs_map_setup(state->ts_dip, TAVOR_DDR_BAR,
1040 	    &state->ts_reg_ddr_baseaddr, 0, ddr_size, &state->ts_reg_accattr,
1041 	    &state->ts_reg_ddrhdl);
1042 
1043 	/*
1044 	 * On 32-bit platform testing (primarily x86), it was seen that the
1045 	 * ddi_regs_map_setup() call would fail because there wasn't enough
1046 	 * kernel virtual address space available to map in the entire 256MB
1047 	 * DDR.  So we add this check in here, so that if the 256 (or other
1048 	 * larger value of DDR) map in fails, that we fallback to try the lower
1049 	 * size of 128MB.
1050 	 *
1051 	 * Note: If we only have 128MB of DDR in the system in the first place,
1052 	 * we don't try another ddi_regs_map_setup(), and just skip over this
1053 	 * check and return failures.
1054 	 */
1055 	if (status == DDI_ME_NORESOURCES && ddr_size > TAVOR_DDR_SIZE_128) {
1056 		/* Try falling back to 128MB DDR mapping */
1057 		status = ddi_regs_map_setup(state->ts_dip, TAVOR_DDR_BAR,
1058 		    &state->ts_reg_ddr_baseaddr, 0, TAVOR_DDR_SIZE_128,
1059 		    &state->ts_reg_accattr, &state->ts_reg_ddrhdl);
1060 
1061 		/*
1062 		 * 128MB DDR mapping worked.
1063 		 * Set the updated config profile setting here.
1064 		 */
1065 		if (status == DDI_SUCCESS) {
1066 			state->ts_cfg_profile_setting = TAVOR_DDR_SIZE_128;
1067 		}
1068 	}
1069 
1070 	if (status != DDI_SUCCESS) {
1071 		if (status == DDI_ME_RNUMBER_RANGE) {
1072 			cmn_err(CE_CONT, "Tavor: ddi_regs_map_setup() failed "
1073 			    "(check HCA-attached DIMM memory?)\n");
1074 		}
1075 		tavor_hw_fini(state, cleanup);
1076 		TAVOR_ATTACH_MSG(state->ts_attach_buf,
1077 		    "hw_init_DDR_ddirms_fail");
1078 		return (DDI_FAILURE);
1079 	}
1080 	cleanup = TAVOR_DRV_CLEANUP_LEVEL3;
1081 
1082 	/* Setup Tavor Host Command Register (HCR) */
1083 	state->ts_cmd_regs.hcr = (tavor_hw_hcr_t *)
1084 	    ((uintptr_t)state->ts_reg_cmd_baseaddr + TAVOR_CMD_HCR_OFFSET);
1085 
1086 	/* Setup Tavor Event Cause Register (ecr and clr_ecr) */
1087 	state->ts_cmd_regs.ecr = (uint64_t *)
1088 	    ((uintptr_t)state->ts_reg_cmd_baseaddr + TAVOR_CMD_ECR_OFFSET);
1089 	state->ts_cmd_regs.clr_ecr = (uint64_t *)
1090 	    ((uintptr_t)state->ts_reg_cmd_baseaddr + TAVOR_CMD_CLR_ECR_OFFSET);
1091 
1092 	/* Setup Tavor Software Reset register (sw_reset) */
1093 	state->ts_cmd_regs.sw_reset = (uint32_t *)
1094 	    ((uintptr_t)state->ts_reg_cmd_baseaddr + TAVOR_CMD_SW_RESET_OFFSET);
1095 
1096 	/* Setup Tavor Clear Interrupt register (clr_int) */
1097 	state->ts_cmd_regs.clr_int = (uint64_t *)
1098 	    ((uintptr_t)state->ts_reg_cmd_baseaddr + TAVOR_CMD_CLR_INT_OFFSET);
1099 
1100 	/* Initialize the Phase1 Tavor configuration profile */
1101 	status = tavor_cfg_profile_init_phase1(state);
1102 	if (status != DDI_SUCCESS) {
1103 		tavor_hw_fini(state, cleanup);
1104 		TAVOR_ATTACH_MSG(state->ts_attach_buf, "hw_init_cfginit_fail");
1105 		return (DDI_FAILURE);
1106 	}
1107 	cleanup = TAVOR_DRV_CLEANUP_LEVEL4;
1108 
1109 	/* Do a software reset of the Tavor HW to ensure proper state */
1110 	status = tavor_sw_reset(state);
1111 	if (status != TAVOR_CMD_SUCCESS) {
1112 		tavor_hw_fini(state, cleanup);
1113 		TAVOR_ATTACH_MSG(state->ts_attach_buf, "hw_init_sw_reset_fail");
1114 		return (DDI_FAILURE);
1115 	}
1116 
1117 	/* Post the SYS_EN command to start the hardware */
1118 	status = tavor_sys_en_cmd_post(state, TAVOR_CMD_SYS_EN_NORMAL,
1119 	    &errorcode, TAVOR_CMD_NOSLEEP_SPIN);
1120 	if (status != TAVOR_CMD_SUCCESS) {
1121 		if ((status == TAVOR_CMD_BAD_NVMEM) ||
1122 		    (status == TAVOR_CMD_DDR_MEM_ERR)) {
1123 			cmn_err(CE_CONT, "Tavor: SYS_EN command failed: 0x%x "
1124 			    "0x%" PRIx64 " (invalid firmware image?)\n",
1125 			    status, errorcode);
1126 		} else {
1127 			cmn_err(CE_CONT, "Tavor: SYS_EN command failed: 0x%x "
1128 			    "0x%" PRIx64 "\n", status, errorcode);
1129 		}
1130 		tavor_hw_fini(state, cleanup);
1131 		TAVOR_ATTACH_MSG(state->ts_attach_buf,
1132 		    "hw_init_sys_en_cmd_fail");
1133 		return (DDI_FAILURE);
1134 	}
1135 	cleanup = TAVOR_DRV_CLEANUP_LEVEL5;
1136 
1137 	/* First phase of init for Tavor configuration/resources */
1138 	status = tavor_rsrc_init_phase1(state);
1139 	if (status != DDI_SUCCESS) {
1140 		tavor_hw_fini(state, cleanup);
1141 		TAVOR_ATTACH_MSG(state->ts_attach_buf,
1142 		    "hw_init_rsrcinit1_fail");
1143 		return (DDI_FAILURE);
1144 	}
1145 	cleanup = TAVOR_DRV_CLEANUP_LEVEL6;
1146 
1147 	/* Query the DDR properties (e.g. total DDR size) */
1148 	status = tavor_cmn_query_cmd_post(state, QUERY_DDR, 0,
1149 	    &state->ts_ddr, sizeof (tavor_hw_queryddr_t),
1150 	    TAVOR_CMD_NOSLEEP_SPIN);
1151 	if (status != TAVOR_CMD_SUCCESS) {
1152 		cmn_err(CE_CONT, "Tavor: QUERY_DDR command failed: %08x\n",
1153 		    status);
1154 		tavor_hw_fini(state, cleanup);
1155 		TAVOR_ATTACH_MSG(state->ts_attach_buf,
1156 		    "hw_init_query_ddr_cmd_fail");
1157 		return (DDI_FAILURE);
1158 	}
1159 
1160 	/* Figure out how big the firmware image (in DDR) is */
1161 	status = tavor_cmn_query_cmd_post(state, QUERY_FW, 0, &state->ts_fw,
1162 	    sizeof (tavor_hw_queryfw_t), TAVOR_CMD_NOSLEEP_SPIN);
1163 	if (status != TAVOR_CMD_SUCCESS) {
1164 		cmn_err(CE_CONT, "Tavor: QUERY_FW command failed: %08x\n",
1165 		    status);
1166 		tavor_hw_fini(state, cleanup);
1167 		TAVOR_ATTACH_MSG(state->ts_attach_buf,
1168 		    "hw_init_query_fw_cmd_fail");
1169 		return (DDI_FAILURE);
1170 	}
1171 
1172 	if (tavor_fix_error_buf(state) != DDI_SUCCESS) {
1173 		tavor_hw_fini(state, cleanup);
1174 		TAVOR_ATTACH_MSG(state->ts_attach_buf,
1175 		    "hw_init_fixerrorbuf_fail");
1176 		return (DDI_FAILURE);
1177 	}
1178 
1179 	/* Validate that the FW version is appropriate */
1180 	status = tavor_fw_version_check(state);
1181 	if (status != DDI_SUCCESS) {
1182 		if (state->ts_operational_mode == TAVOR_HCA_MODE) {
1183 			cmn_err(CE_CONT, "Unsupported Tavor FW version: "
1184 			    "expected: %04d.%04d.%04d, "
1185 			    "actual: %04d.%04d.%04d\n",
1186 			    TAVOR_FW_VER_MAJOR,
1187 			    TAVOR_FW_VER_MINOR,
1188 			    TAVOR_FW_VER_SUBMINOR,
1189 			    state->ts_fw.fw_rev_major,
1190 			    state->ts_fw.fw_rev_minor,
1191 			    state->ts_fw.fw_rev_subminor);
1192 		} else if (state->ts_operational_mode == TAVOR_COMPAT_MODE) {
1193 			cmn_err(CE_CONT, "Unsupported Tavor Compat FW version: "
1194 			    "expected: %04d.%04d.%04d, "
1195 			    "actual: %04d.%04d.%04d\n",
1196 			    TAVOR_COMPAT_FW_VER_MAJOR,
1197 			    TAVOR_COMPAT_FW_VER_MINOR,
1198 			    TAVOR_COMPAT_FW_VER_SUBMINOR,
1199 			    state->ts_fw.fw_rev_major,
1200 			    state->ts_fw.fw_rev_minor,
1201 			    state->ts_fw.fw_rev_subminor);
1202 		} else {
1203 			cmn_err(CE_CONT, "Unsupported FW version: "
1204 			    "%04d.%04d.%04d\n",
1205 			    state->ts_fw.fw_rev_major,
1206 			    state->ts_fw.fw_rev_minor,
1207 			    state->ts_fw.fw_rev_subminor);
1208 		}
1209 		tavor_hw_fini(state, cleanup);
1210 		TAVOR_ATTACH_MSG(state->ts_attach_buf,
1211 		    "hw_init_checkfwver_fail");
1212 		return (DDI_FAILURE);
1213 	}
1214 
1215 	drv_usecwait(10);
1216 	retries = 1000;		/* retry up to 1 second before giving up */
1217 retry:
1218 	/* Call MOD_STAT_CFG to setup SRQ support (or disable) */
1219 	status = tavor_mod_stat_cfg_cmd_post(state);
1220 	if (status != DDI_SUCCESS) {
1221 		if (retries > 0) {
1222 			drv_usecwait(1000);
1223 			retries--;
1224 			goto retry;
1225 		}
1226 		cmn_err(CE_CONT, "Tavor: MOD_STAT_CFG command failed: %08x\n",
1227 		    status);
1228 		tavor_hw_fini(state, cleanup);
1229 		TAVOR_ATTACH_MSG(state->ts_attach_buf,
1230 		    "hw_init_mod_stat_cfg_cmd_fail");
1231 		return (DDI_FAILURE);
1232 	}
1233 
1234 	/* Figure out Tavor device limits */
1235 	status = tavor_cmn_query_cmd_post(state, QUERY_DEV_LIM, 0,
1236 	    &state->ts_devlim, sizeof (tavor_hw_querydevlim_t),
1237 	    TAVOR_CMD_NOSLEEP_SPIN);
1238 	if (status != TAVOR_CMD_SUCCESS) {
1239 		cmn_err(CE_CONT, "Tavor: QUERY_DEV_LIM command failed: %08x\n",
1240 		    status);
1241 		tavor_hw_fini(state, cleanup);
1242 		TAVOR_ATTACH_MSG(state->ts_attach_buf,
1243 		    "hw_init_query_devlim_cmd_fail");
1244 		return (DDI_FAILURE);
1245 	}
1246 
1247 	/* Initialize the Phase2 Tavor configuration profile */
1248 	status = tavor_cfg_profile_init_phase2(state);
1249 	if (status != DDI_SUCCESS) {
1250 		tavor_hw_fini(state, cleanup);
1251 		TAVOR_ATTACH_MSG(state->ts_attach_buf, "hw_init_cfginit2_fail");
1252 		return (DDI_FAILURE);
1253 	}
1254 
1255 	/* Second phase of init for Tavor configuration/resources */
1256 	status = tavor_rsrc_init_phase2(state);
1257 	if (status != DDI_SUCCESS) {
1258 		tavor_hw_fini(state, cleanup);
1259 		TAVOR_ATTACH_MSG(state->ts_attach_buf,
1260 		    "hw_init_rsrcinit2_fail");
1261 		return (DDI_FAILURE);
1262 	}
1263 	cleanup = TAVOR_DRV_CLEANUP_LEVEL7;
1264 
1265 	/* Miscellaneous query information */
1266 	status = tavor_cmn_query_cmd_post(state, QUERY_ADAPTER, 0,
1267 	    &state->ts_adapter, sizeof (tavor_hw_queryadapter_t),
1268 	    TAVOR_CMD_NOSLEEP_SPIN);
1269 	if (status != TAVOR_CMD_SUCCESS) {
1270 		cmn_err(CE_CONT, "Tavor: QUERY_ADAPTER command failed: %08x\n",
1271 		    status);
1272 		tavor_hw_fini(state, cleanup);
1273 		TAVOR_ATTACH_MSG(state->ts_attach_buf,
1274 		    "hw_init_query_adapter_cmd_fail");
1275 		return (DDI_FAILURE);
1276 	}
1277 
1278 	/* Prepare configuration for Tavor INIT_HCA command */
1279 	tavor_hca_config_setup(state, &state->ts_hcaparams);
1280 
1281 	/* Post command to init Tavor HCA */
1282 	status = tavor_init_hca_cmd_post(state, &state->ts_hcaparams,
1283 	    TAVOR_CMD_NOSLEEP_SPIN);
1284 	if (status != TAVOR_CMD_SUCCESS) {
1285 		cmn_err(CE_CONT, "Tavor: INIT_HCA command failed: %08x\n",
1286 		    status);
1287 		tavor_hw_fini(state, cleanup);
1288 		TAVOR_ATTACH_MSG(state->ts_attach_buf,
1289 		    "hw_init_init_hca_cmd_fail");
1290 		return (DDI_FAILURE);
1291 	}
1292 	cleanup = TAVOR_DRV_CLEANUP_LEVEL8;
1293 
1294 	/* Allocate protection domain (PD) for Tavor internal use */
1295 	status = tavor_pd_alloc(state, &state->ts_pdhdl_internal, TAVOR_SLEEP);
1296 	if (status != DDI_SUCCESS) {
1297 		tavor_hw_fini(state, cleanup);
1298 		TAVOR_ATTACH_MSG(state->ts_attach_buf,
1299 		    "hw_init_internal_pd_alloc_fail");
1300 		return (DDI_FAILURE);
1301 	}
1302 	cleanup = TAVOR_DRV_CLEANUP_LEVEL9;
1303 
1304 	/* Setup Tavor internal UAR pages (0 and 1) */
1305 	status = tavor_internal_uarpgs_init(state);
1306 	if (status != DDI_SUCCESS) {
1307 		tavor_hw_fini(state, cleanup);
1308 		TAVOR_ATTACH_MSG(state->ts_attach_buf,
1309 		    "hw_init_internal_uarpgs_alloc_fail");
1310 		return (DDI_FAILURE);
1311 	}
1312 	cleanup = TAVOR_DRV_CLEANUP_LEVEL10;
1313 
1314 	/* Query and initialize the Tavor interrupt/MSI information */
1315 	status = tavor_intr_or_msi_init(state);
1316 	if (status != DDI_SUCCESS) {
1317 		tavor_hw_fini(state, cleanup);
1318 		TAVOR_ATTACH_MSG(state->ts_attach_buf,
1319 		    "intr_or_msi_init_fail");
1320 		return (DDI_FAILURE);
1321 	}
1322 	cleanup = TAVOR_DRV_CLEANUP_LEVEL11;
1323 
1324 	/* Setup all of the Tavor EQs */
1325 	status = tavor_eq_init_all(state);
1326 	if (status != DDI_SUCCESS) {
1327 		tavor_hw_fini(state, cleanup);
1328 		TAVOR_ATTACH_MSG(state->ts_attach_buf,
1329 		    "hw_init_eqinitall_fail");
1330 		return (DDI_FAILURE);
1331 	}
1332 	cleanup = TAVOR_DRV_CLEANUP_LEVEL12;
1333 
1334 	/* Set aside contexts for QP0 and QP1 */
1335 	status = tavor_special_qp_contexts_reserve(state);
1336 	if (status != DDI_SUCCESS) {
1337 		tavor_hw_fini(state, cleanup);
1338 		TAVOR_ATTACH_MSG(state->ts_attach_buf,
1339 		    "hw_init_reserve_special_qp_fail");
1340 		return (DDI_FAILURE);
1341 	}
1342 	cleanup = TAVOR_DRV_CLEANUP_LEVEL13;
1343 
1344 	/* Initialize for multicast group handling */
1345 	status = tavor_mcg_init(state);
1346 	if (status != DDI_SUCCESS) {
1347 		tavor_hw_fini(state, cleanup);
1348 		TAVOR_ATTACH_MSG(state->ts_attach_buf, "hw_init_mcg_init_fail");
1349 		return (DDI_FAILURE);
1350 	}
1351 	cleanup = TAVOR_DRV_CLEANUP_LEVEL14;
1352 
1353 	/* Initialize the Tavor IB port(s) */
1354 	status = tavor_hca_port_init(state);
1355 	if (status != DDI_SUCCESS) {
1356 		tavor_hw_fini(state, cleanup);
1357 		TAVOR_ATTACH_MSG(state->ts_attach_buf,
1358 		    "hw_init_hca_port_init_fail");
1359 		return (DDI_FAILURE);
1360 	}
1361 	cleanup = TAVOR_DRV_CLEANUP_ALL;
1362 
1363 	/* Determine NodeGUID and SystemImageGUID */
1364 	status = tavor_getnodeinfo_cmd_post(state, TAVOR_CMD_NOSLEEP_SPIN,
1365 	    &nodeinfo);
1366 	if (status != TAVOR_CMD_SUCCESS) {
1367 		cmn_err(CE_CONT, "Tavor: GetNodeInfo command failed: %08x\n",
1368 		    status);
1369 		tavor_hw_fini(state, cleanup);
1370 		TAVOR_ATTACH_MSG(state->ts_attach_buf,
1371 		    "hw_init_getnodeinfo_cmd_fail");
1372 		return (DDI_FAILURE);
1373 	}
1374 
1375 	/*
1376 	 * If the NodeGUID value was set in OBP properties, then we use that
1377 	 * value.  But we still print a message if the value we queried from
1378 	 * firmware does not match this value.
1379 	 *
1380 	 * Otherwise if OBP value is not set then we use the value from
1381 	 * firmware unconditionally.
1382 	 */
1383 	if (state->ts_cfg_profile->cp_nodeguid) {
1384 		state->ts_nodeguid   = state->ts_cfg_profile->cp_nodeguid;
1385 	} else {
1386 		state->ts_nodeguid = nodeinfo.NodeGUID;
1387 	}
1388 
1389 	if (state->ts_nodeguid != nodeinfo.NodeGUID) {
1390 		cmn_err(CE_NOTE, "!NodeGUID value queried from firmware "
1391 		    "does not match value set by device property");
1392 	}
1393 
1394 	/*
1395 	 * If the SystemImageGUID value was set in OBP properties, then we use
1396 	 * that value.  But we still print a message if the value we queried
1397 	 * from firmware does not match this value.
1398 	 *
1399 	 * Otherwise if OBP value is not set then we use the value from
1400 	 * firmware unconditionally.
1401 	 */
1402 	if (state->ts_cfg_profile->cp_sysimgguid) {
1403 		state->ts_sysimgguid = state->ts_cfg_profile->cp_sysimgguid;
1404 	} else {
1405 		state->ts_sysimgguid = nodeinfo.SystemImageGUID;
1406 	}
1407 
1408 	if (state->ts_sysimgguid != nodeinfo.SystemImageGUID) {
1409 		cmn_err(CE_NOTE, "!SystemImageGUID value queried from firmware "
1410 		    "does not match value set by device property");
1411 	}
1412 
1413 	/* Get NodeDescription */
1414 	status = tavor_getnodedesc_cmd_post(state, TAVOR_CMD_NOSLEEP_SPIN,
1415 	    (sm_nodedesc_t *)&state->ts_nodedesc);
1416 	if (status != TAVOR_CMD_SUCCESS) {
1417 		cmn_err(CE_CONT, "Tavor: GetNodeDesc command failed: %08x\n",
1418 		    status);
1419 		tavor_hw_fini(state, cleanup);
1420 		TAVOR_ATTACH_MSG(state->ts_attach_buf,
1421 		    "hw_init_getnodedesc_cmd_fail");
1422 		return (DDI_FAILURE);
1423 	}
1424 
1425 	return (DDI_SUCCESS);
1426 }
1427 
1428 
1429 /*
1430  * tavor_hw_fini()
1431  *    Context: Only called from attach() and/or detach() path contexts
1432  */
1433 static void
tavor_hw_fini(tavor_state_t * state,tavor_drv_cleanup_level_t cleanup)1434 tavor_hw_fini(tavor_state_t *state, tavor_drv_cleanup_level_t cleanup)
1435 {
1436 	uint_t		num_ports;
1437 	int		status;
1438 
1439 	switch (cleanup) {
1440 	/*
1441 	 * If we add more driver initialization steps that should be cleaned
1442 	 * up here, we need to ensure that TAVOR_DRV_CLEANUP_ALL is still the
1443 	 * first entry (i.e. corresponds to the last init step).
1444 	 */
1445 	case TAVOR_DRV_CLEANUP_ALL:
1446 		/* Shutdown the Tavor IB port(s) */
1447 		num_ports = state->ts_cfg_profile->cp_num_ports;
1448 		(void) tavor_hca_ports_shutdown(state, num_ports);
1449 		/* FALLTHROUGH */
1450 
1451 	case TAVOR_DRV_CLEANUP_LEVEL14:
1452 		/* Teardown resources used for multicast group handling */
1453 		tavor_mcg_fini(state);
1454 		/* FALLTHROUGH */
1455 
1456 	case TAVOR_DRV_CLEANUP_LEVEL13:
1457 		/* Unreserve the special QP contexts */
1458 		tavor_special_qp_contexts_unreserve(state);
1459 		/* FALLTHROUGH */
1460 
1461 	case TAVOR_DRV_CLEANUP_LEVEL12:
1462 		/*
1463 		 * Attempt to teardown all event queues (EQ).  If we fail
1464 		 * here then print a warning message and return.  Something
1465 		 * (either in HW or SW) has gone seriously wrong.
1466 		 */
1467 		status = tavor_eq_fini_all(state);
1468 		if (status != DDI_SUCCESS) {
1469 			TAVOR_WARNING(state, "failed to teardown EQs");
1470 			return;
1471 		}
1472 		/* FALLTHROUGH */
1473 
1474 	case TAVOR_DRV_CLEANUP_LEVEL11:
1475 		status = tavor_intr_or_msi_fini(state);
1476 		if (status != DDI_SUCCESS) {
1477 			TAVOR_WARNING(state, "failed to free intr/MSI");
1478 			return;
1479 		}
1480 		/* FALLTHROUGH */
1481 
1482 	case TAVOR_DRV_CLEANUP_LEVEL10:
1483 		/* Free the resources for the Tavor internal UAR pages */
1484 		tavor_internal_uarpgs_fini(state);
1485 		/* FALLTHROUGH */
1486 
1487 	case TAVOR_DRV_CLEANUP_LEVEL9:
1488 		/*
1489 		 * Free the PD that was used internally by Tavor software.  If
1490 		 * we fail here then print a warning and return.  Something
1491 		 * (probably software-related, but perhaps HW) has gone wrong.
1492 		 */
1493 		status = tavor_pd_free(state, &state->ts_pdhdl_internal);
1494 		if (status != DDI_SUCCESS) {
1495 			TAVOR_WARNING(state, "failed to free internal PD");
1496 			return;
1497 		}
1498 		/* FALLTHROUGH */
1499 
1500 	case TAVOR_DRV_CLEANUP_LEVEL8:
1501 		/*
1502 		 * Post the CLOSE_HCA command to Tavor firmware.  If we fail
1503 		 * here then print a warning and return.  Something (either in
1504 		 * HW or SW) has gone seriously wrong.
1505 		 */
1506 		status = tavor_close_hca_cmd_post(state,
1507 		    TAVOR_CMD_NOSLEEP_SPIN);
1508 		if (status != TAVOR_CMD_SUCCESS) {
1509 			TAVOR_WARNING(state, "failed to shutdown HCA");
1510 			return;
1511 		}
1512 		/* FALLTHROUGH */
1513 
1514 	case TAVOR_DRV_CLEANUP_LEVEL7:
1515 		/* Cleanup all the phase2 resources first */
1516 		tavor_rsrc_fini(state, TAVOR_RSRC_CLEANUP_ALL);
1517 		/* FALLTHROUGH */
1518 
1519 	case TAVOR_DRV_CLEANUP_LEVEL6:
1520 		/* Then cleanup the phase1 resources */
1521 		tavor_rsrc_fini(state, TAVOR_RSRC_CLEANUP_PHASE1_COMPLETE);
1522 		/* FALLTHROUGH */
1523 
1524 	case TAVOR_DRV_CLEANUP_LEVEL5:
1525 		/*
1526 		 * Post the SYS_DIS command to Tavor firmware to shut
1527 		 * everything down again.  If we fail here then print a
1528 		 * warning and return.  Something (probably in HW, but maybe
1529 		 * in SW) has gone seriously wrong.
1530 		 */
1531 		status = tavor_sys_dis_cmd_post(state, TAVOR_CMD_NOSLEEP_SPIN);
1532 		if (status != TAVOR_CMD_SUCCESS) {
1533 			TAVOR_WARNING(state, "failed to shutdown hardware");
1534 			return;
1535 		}
1536 		/* FALLTHROUGH */
1537 
1538 	case TAVOR_DRV_CLEANUP_LEVEL4:
1539 		/* Teardown any resources allocated for the config profile */
1540 		tavor_cfg_profile_fini(state);
1541 		/* FALLTHROUGH */
1542 
1543 	case TAVOR_DRV_CLEANUP_LEVEL3:
1544 		ddi_regs_map_free(&state->ts_reg_ddrhdl);
1545 		/* FALLTHROUGH */
1546 
1547 	case TAVOR_DRV_CLEANUP_LEVEL2:
1548 		ddi_regs_map_free(&state->ts_reg_uarhdl);
1549 		/* FALLTHROUGH */
1550 
1551 	case TAVOR_DRV_CLEANUP_LEVEL1:
1552 	case TAVOR_DRV_CLEANUP_LEVEL0:
1553 		/*
1554 		 * LEVEL1 and LEVEL0 resources are freed in
1555 		 * tavor_drv_fini2().
1556 		 */
1557 		break;
1558 
1559 	default:
1560 		TAVOR_WARNING(state, "unexpected driver cleanup level");
1561 		return;
1562 	}
1563 }
1564 
1565 
1566 /*
1567  * tavor_soft_state_init()
1568  *    Context: Only called from attach() path context
1569  */
1570 static int
tavor_soft_state_init(tavor_state_t * state)1571 tavor_soft_state_init(tavor_state_t *state)
1572 {
1573 	ibt_hca_attr_t		*hca_attr;
1574 	uint64_t		maxval, val;
1575 	ibt_hca_flags_t		caps = IBT_HCA_NO_FLAGS;
1576 	int			status;
1577 
1578 	/*
1579 	 * The ibc_hca_info_t struct is passed to the IBTF.  This is the
1580 	 * routine where we initialize it.  Many of the init values come from
1581 	 * either configuration variables or successful queries of the Tavor
1582 	 * hardware abilities
1583 	 */
1584 	state->ts_ibtfinfo.hca_ci_vers	= IBCI_V4;
1585 	state->ts_ibtfinfo.hca_handle	= (ibc_hca_hdl_t)state;
1586 	state->ts_ibtfinfo.hca_ops	= &tavor_ibc_ops;
1587 
1588 	hca_attr = kmem_zalloc(sizeof (ibt_hca_attr_t), KM_SLEEP);
1589 	state->ts_ibtfinfo.hca_attr = hca_attr;
1590 
1591 	hca_attr->hca_dip = state->ts_dip;
1592 	hca_attr->hca_fw_major_version = state->ts_fw.fw_rev_major;
1593 	hca_attr->hca_fw_minor_version = state->ts_fw.fw_rev_minor;
1594 	hca_attr->hca_fw_micro_version = state->ts_fw.fw_rev_subminor;
1595 
1596 	/*
1597 	 * Determine HCA capabilities:
1598 	 * No default support for IBT_HCA_RD, IBT_HCA_RAW_MULTICAST,
1599 	 *    IBT_HCA_ATOMICS_GLOBAL, IBT_HCA_RESIZE_CHAN, IBT_HCA_INIT_TYPE,
1600 	 *    or IBT_HCA_SHUTDOWN_PORT
1601 	 * But IBT_HCA_AH_PORT_CHECK, IBT_HCA_SQD_RTS_PORT, IBT_HCA_SI_GUID,
1602 	 *    IBT_HCA_RNR_NAK, and IBT_HCA_CURRENT_QP_STATE are always
1603 	 *    supported
1604 	 * All other features are conditionally supported, depending on the
1605 	 *    status return by the Tavor HCA (in QUERY_DEV_LIM)
1606 	 */
1607 	if (state->ts_devlim.ud_multi) {
1608 		caps |= IBT_HCA_UD_MULTICAST;
1609 	}
1610 	if (state->ts_devlim.atomic) {
1611 		caps |= IBT_HCA_ATOMICS_HCA;
1612 	}
1613 	if (state->ts_devlim.apm) {
1614 		caps |= IBT_HCA_AUTO_PATH_MIG;
1615 	}
1616 	if (state->ts_devlim.pkey_v) {
1617 		caps |= IBT_HCA_PKEY_CNTR;
1618 	}
1619 	if (state->ts_devlim.qkey_v) {
1620 		caps |= IBT_HCA_QKEY_CNTR;
1621 	}
1622 	if (state->ts_cfg_profile->cp_srq_enable) {
1623 		caps |= IBT_HCA_SRQ | IBT_HCA_RESIZE_SRQ;
1624 	}
1625 	caps |= (IBT_HCA_AH_PORT_CHECK | IBT_HCA_SQD_SQD_PORT |
1626 	    IBT_HCA_SI_GUID | IBT_HCA_RNR_NAK | IBT_HCA_CURRENT_QP_STATE |
1627 	    IBT_HCA_PORT_UP | IBT_HCA_SQD_STATE);
1628 	hca_attr->hca_flags = caps;
1629 	hca_attr->hca_flags2 = IBT_HCA2_DMA_MR;
1630 
1631 	/* Determine VendorID, DeviceID, and revision ID */
1632 	hca_attr->hca_vendor_id	 = state->ts_adapter.vendor_id;
1633 	hca_attr->hca_device_id	 = state->ts_adapter.device_id;
1634 	hca_attr->hca_version_id = state->ts_adapter.rev_id;
1635 
1636 	/*
1637 	 * Determine number of available QPs and max QP size.  Number of
1638 	 * available QPs is determined by subtracting the number of
1639 	 * "reserved QPs" (i.e. reserved for firmware use) from the
1640 	 * total number configured.
1641 	 */
1642 	val = ((uint64_t)1 << state->ts_cfg_profile->cp_log_num_qp);
1643 	hca_attr->hca_max_qp = val - ((uint64_t)1 <<
1644 	    state->ts_devlim.log_rsvd_qp);
1645 	maxval	= ((uint64_t)1 << state->ts_devlim.log_max_qp_sz);
1646 	val	= ((uint64_t)1 << state->ts_cfg_profile->cp_log_max_qp_sz);
1647 	if (val > maxval) {
1648 		kmem_free(hca_attr, sizeof (ibt_hca_attr_t));
1649 		TAVOR_ATTACH_MSG(state->ts_attach_buf,
1650 		    "soft_state_init_maxqpsz_toobig_fail");
1651 		return (DDI_FAILURE);
1652 	}
1653 	hca_attr->hca_max_qp_sz = val;
1654 
1655 	/* Determine max scatter-gather size in WQEs */
1656 	maxval	= state->ts_devlim.max_sg;
1657 	val	= state->ts_cfg_profile->cp_wqe_max_sgl;
1658 	if (val > maxval) {
1659 		kmem_free(hca_attr, sizeof (ibt_hca_attr_t));
1660 		TAVOR_ATTACH_MSG(state->ts_attach_buf,
1661 		    "soft_state_init_toomanysgl_fail");
1662 		return (DDI_FAILURE);
1663 	}
1664 	/* If the rounded value for max SGL is too large, cap it */
1665 	if (state->ts_cfg_profile->cp_wqe_real_max_sgl > maxval) {
1666 		state->ts_cfg_profile->cp_wqe_real_max_sgl = maxval;
1667 		val = maxval;
1668 	} else {
1669 		val = state->ts_cfg_profile->cp_wqe_real_max_sgl;
1670 	}
1671 
1672 	hca_attr->hca_max_sgl	 = val;
1673 	hca_attr->hca_max_rd_sgl = 0;	/* zero because RD is unsupported */
1674 
1675 	/*
1676 	 * Determine number of available CQs and max CQ size. Number of
1677 	 * available CQs is determined by subtracting the number of
1678 	 * "reserved CQs" (i.e. reserved for firmware use) from the
1679 	 * total number configured.
1680 	 */
1681 	val = ((uint64_t)1 << state->ts_cfg_profile->cp_log_num_cq);
1682 	hca_attr->hca_max_cq = val - ((uint64_t)1 <<
1683 	    state->ts_devlim.log_rsvd_cq);
1684 	maxval	= ((uint64_t)1 << state->ts_devlim.log_max_cq_sz);
1685 	val	= ((uint64_t)1 << state->ts_cfg_profile->cp_log_max_cq_sz) - 1;
1686 	if (val > maxval) {
1687 		kmem_free(hca_attr, sizeof (ibt_hca_attr_t));
1688 		TAVOR_ATTACH_MSG(state->ts_attach_buf,
1689 		    "soft_state_init_maxcqsz_toobig_fail");
1690 		return (DDI_FAILURE);
1691 	}
1692 	hca_attr->hca_max_cq_sz = val;
1693 
1694 	/*
1695 	 * Determine number of available SRQs and max SRQ size. Number of
1696 	 * available SRQs is determined by subtracting the number of
1697 	 * "reserved SRQs" (i.e. reserved for firmware use) from the
1698 	 * total number configured.
1699 	 */
1700 	val = ((uint64_t)1 << state->ts_cfg_profile->cp_log_num_srq);
1701 	hca_attr->hca_max_srqs = val - ((uint64_t)1 <<
1702 	    state->ts_devlim.log_rsvd_srq);
1703 	maxval  = ((uint64_t)1 << state->ts_devlim.log_max_srq_sz);
1704 	val	= ((uint64_t)1 << state->ts_cfg_profile->cp_log_max_srq_sz);
1705 
1706 	if (val > maxval) {
1707 		kmem_free(hca_attr, sizeof (ibt_hca_attr_t));
1708 		TAVOR_ATTACH_MSG(state->ts_attach_buf,
1709 		    "soft_state_init_maxsrqsz_toobig_fail");
1710 		return (DDI_FAILURE);
1711 	}
1712 	hca_attr->hca_max_srqs_sz = val;
1713 
1714 	val    = state->ts_cfg_profile->cp_srq_max_sgl;
1715 	maxval	= state->ts_devlim.max_sg;
1716 	if (val > maxval) {
1717 		kmem_free(hca_attr, sizeof (ibt_hca_attr_t));
1718 		TAVOR_ATTACH_MSG(state->ts_attach_buf,
1719 		    "soft_state_init_toomanysrqsgl_fail");
1720 		return (DDI_FAILURE);
1721 	}
1722 	hca_attr->hca_max_srq_sgl = val;
1723 
1724 	/*
1725 	 * Determine supported HCA page sizes
1726 	 * XXX
1727 	 * For now we simply return the system pagesize as the only supported
1728 	 * pagesize
1729 	 */
1730 	hca_attr->hca_page_sz = ((PAGESIZE == (1 << 13)) ? IBT_PAGE_8K :
1731 	    IBT_PAGE_4K);
1732 
1733 	/*
1734 	 * Determine number of available MemReg, MemWin, and their max size.
1735 	 * Number of available MRs and MWs is determined by subtracting
1736 	 * the number of "reserved MPTs" (i.e. reserved for firmware use)
1737 	 * from the total number configured for each.
1738 	 */
1739 	val = ((uint64_t)1 << state->ts_cfg_profile->cp_log_num_mpt);
1740 	hca_attr->hca_max_memr	  = val - ((uint64_t)1 <<
1741 	    state->ts_devlim.log_rsvd_mpt);
1742 	hca_attr->hca_max_mem_win = val - ((uint64_t)1 <<
1743 	    state->ts_devlim.log_rsvd_mpt);
1744 	maxval	= state->ts_devlim.log_max_mrw_sz;
1745 	val	= state->ts_cfg_profile->cp_log_max_mrw_sz;
1746 	if (val > maxval) {
1747 		kmem_free(hca_attr, sizeof (ibt_hca_attr_t));
1748 		TAVOR_ATTACH_MSG(state->ts_attach_buf,
1749 		    "soft_state_init_maxmrwsz_toobig_fail");
1750 		return (DDI_FAILURE);
1751 	}
1752 	hca_attr->hca_max_memr_len = ((uint64_t)1 << val);
1753 
1754 	/* Determine RDMA/Atomic properties */
1755 	val = ((uint64_t)1 << state->ts_cfg_profile->cp_log_num_rdb);
1756 	hca_attr->hca_max_rsc = val;
1757 	val = state->ts_cfg_profile->cp_hca_max_rdma_in_qp;
1758 	hca_attr->hca_max_rdma_in_qp  = val;
1759 	val = state->ts_cfg_profile->cp_hca_max_rdma_out_qp;
1760 	hca_attr->hca_max_rdma_out_qp = val;
1761 	hca_attr->hca_max_rdma_in_ee  = 0;
1762 	hca_attr->hca_max_rdma_out_ee = 0;
1763 
1764 	/*
1765 	 * Determine maximum number of raw IPv6 and Ether QPs.  Set to 0
1766 	 * because neither type of raw QP is supported
1767 	 */
1768 	hca_attr->hca_max_ipv6_qp  = 0;
1769 	hca_attr->hca_max_ether_qp = 0;
1770 
1771 	/* Determine max number of MCGs and max QP-per-MCG */
1772 	val = ((uint64_t)1 << state->ts_cfg_profile->cp_log_num_qp);
1773 	hca_attr->hca_max_mcg_qps   = val;
1774 	val = ((uint64_t)1 << state->ts_cfg_profile->cp_log_num_mcg);
1775 	hca_attr->hca_max_mcg	    = val;
1776 	val = state->ts_cfg_profile->cp_num_qp_per_mcg;
1777 	hca_attr->hca_max_qp_per_mcg = val;
1778 
1779 	/* Determine max number partitions (i.e. PKeys) */
1780 	maxval	= ((uint64_t)1 << state->ts_devlim.log_max_pkey);
1781 	val	= ((uint64_t)state->ts_cfg_profile->cp_num_ports <<
1782 	    state->ts_cfg_profile->cp_log_max_pkeytbl);
1783 
1784 	if (val > maxval) {
1785 		kmem_free(hca_attr, sizeof (ibt_hca_attr_t));
1786 		TAVOR_ATTACH_MSG(state->ts_attach_buf,
1787 		    "soft_state_init_toomanypkey_fail");
1788 		return (DDI_FAILURE);
1789 	}
1790 	hca_attr->hca_max_partitions = val;
1791 
1792 	/* Determine number of ports */
1793 	maxval = state->ts_devlim.num_ports;
1794 	val = state->ts_cfg_profile->cp_num_ports;
1795 	if ((val > maxval) || (val == 0)) {
1796 		kmem_free(hca_attr, sizeof (ibt_hca_attr_t));
1797 		TAVOR_ATTACH_MSG(state->ts_attach_buf,
1798 		    "soft_state_init_toomanyports_fail");
1799 		return (DDI_FAILURE);
1800 	}
1801 	hca_attr->hca_nports = val;
1802 
1803 	/* Copy NodeGUID and SystemImageGUID from softstate */
1804 	hca_attr->hca_node_guid = state->ts_nodeguid;
1805 	hca_attr->hca_si_guid	= state->ts_sysimgguid;
1806 
1807 	/*
1808 	 * Determine local ACK delay.  Use the value suggested by the Tavor
1809 	 * hardware (from the QUERY_DEV_LIM command)
1810 	 */
1811 	hca_attr->hca_local_ack_delay = state->ts_devlim.ca_ack_delay;
1812 
1813 	/* Determine max SGID table and PKey table sizes */
1814 	val	= ((uint64_t)1 << state->ts_cfg_profile->cp_log_max_gidtbl);
1815 	hca_attr->hca_max_port_sgid_tbl_sz = val;
1816 	val	= ((uint64_t)1 << state->ts_cfg_profile->cp_log_max_pkeytbl);
1817 	hca_attr->hca_max_port_pkey_tbl_sz = val;
1818 
1819 	/* Determine max number of PDs */
1820 	maxval	= ((uint64_t)1 << state->ts_devlim.log_max_pd);
1821 	val	= ((uint64_t)1 << state->ts_cfg_profile->cp_log_num_pd);
1822 	if (val > maxval) {
1823 		kmem_free(hca_attr, sizeof (ibt_hca_attr_t));
1824 		TAVOR_ATTACH_MSG(state->ts_attach_buf,
1825 		    "soft_state_init_toomanypd_fail");
1826 		return (DDI_FAILURE);
1827 	}
1828 	hca_attr->hca_max_pd = val;
1829 
1830 	/* Determine max number of Address Handles */
1831 	maxval	= ((uint64_t)1 << state->ts_devlim.log_max_av);
1832 	val	= ((uint64_t)1 << state->ts_cfg_profile->cp_log_num_ah);
1833 	if (val > maxval) {
1834 		kmem_free(hca_attr, sizeof (ibt_hca_attr_t));
1835 		TAVOR_ATTACH_MSG(state->ts_attach_buf,
1836 		    "soft_state_init_toomanyah_fail");
1837 		return (DDI_FAILURE);
1838 	}
1839 	hca_attr->hca_max_ah = val;
1840 
1841 	/* No RDDs or EECs (since Reliable Datagram is not supported) */
1842 	hca_attr->hca_max_rdd = 0;
1843 	hca_attr->hca_max_eec = 0;
1844 
1845 	/* Initialize lock for reserved UAR page access */
1846 	mutex_init(&state->ts_uar_lock, NULL, MUTEX_DRIVER,
1847 	    DDI_INTR_PRI(state->ts_intrmsi_pri));
1848 
1849 	/* Initialize the flash fields */
1850 	state->ts_fw_flashstarted = 0;
1851 	mutex_init(&state->ts_fw_flashlock, NULL, MUTEX_DRIVER,
1852 	    DDI_INTR_PRI(state->ts_intrmsi_pri));
1853 
1854 	/* Initialize the lock for the info ioctl */
1855 	mutex_init(&state->ts_info_lock, NULL, MUTEX_DRIVER,
1856 	    DDI_INTR_PRI(state->ts_intrmsi_pri));
1857 
1858 	/* Initialize the AVL tree for QP number support */
1859 	tavor_qpn_avl_init(state);
1860 
1861 	/* Initialize the kstat info structure */
1862 	status = tavor_kstat_init(state);
1863 	if (status != DDI_SUCCESS) {
1864 		tavor_qpn_avl_fini(state);
1865 		mutex_destroy(&state->ts_info_lock);
1866 		mutex_destroy(&state->ts_fw_flashlock);
1867 		mutex_destroy(&state->ts_uar_lock);
1868 		kmem_free(hca_attr, sizeof (ibt_hca_attr_t));
1869 		TAVOR_ATTACH_MSG(state->ts_attach_buf,
1870 		    "soft_state_init_kstatinit_fail");
1871 		return (DDI_FAILURE);
1872 	}
1873 
1874 	return (DDI_SUCCESS);
1875 }
1876 
1877 
1878 /*
1879  * tavor_soft_state_fini()
1880  *    Context: Called only from detach() path context
1881  */
1882 static void
tavor_soft_state_fini(tavor_state_t * state)1883 tavor_soft_state_fini(tavor_state_t *state)
1884 {
1885 	/* Teardown the kstat info */
1886 	tavor_kstat_fini(state);
1887 
1888 	/* Teardown the AVL tree for QP number support */
1889 	tavor_qpn_avl_fini(state);
1890 
1891 	/* Free up info ioctl mutex */
1892 	mutex_destroy(&state->ts_info_lock);
1893 
1894 	/* Free up flash mutex */
1895 	mutex_destroy(&state->ts_fw_flashlock);
1896 
1897 	/* Free up the UAR page access mutex */
1898 	mutex_destroy(&state->ts_uar_lock);
1899 
1900 	/* Free up the hca_attr struct */
1901 	kmem_free(state->ts_ibtfinfo.hca_attr, sizeof (ibt_hca_attr_t));
1902 }
1903 
1904 
1905 /*
1906  * tavor_hca_config_setup()
1907  *    Context: Only called from attach() path context
1908  */
1909 static void
tavor_hca_config_setup(tavor_state_t * state,tavor_hw_initqueryhca_t * inithca)1910 tavor_hca_config_setup(tavor_state_t *state,
1911     tavor_hw_initqueryhca_t *inithca)
1912 {
1913 	tavor_rsrc_pool_info_t	*rsrc_pool;
1914 	uint64_t		ddr_baseaddr, ddr_base_map_addr;
1915 	uint64_t		offset, addr;
1916 	uint_t			mcg_size;
1917 
1918 	/* Set "host endianness".  Default is big endian */
1919 #ifdef	_LITTLE_ENDIAN
1920 	inithca->big_endian	= 0;
1921 #else
1922 	inithca->big_endian	= 1;
1923 #endif
1924 	/* No Address Vector Protection, but Port Checking on by default */
1925 	inithca->udav_chk	= TAVOR_UDAV_PROTECT_DISABLED;
1926 	inithca->udav_port_chk	= TAVOR_UDAV_PORTCHK_ENABLED;
1927 
1928 	ddr_baseaddr	  = (uint64_t)(uintptr_t)state->ts_reg_ddr_baseaddr;
1929 	ddr_base_map_addr = (uint64_t)state->ts_ddr.ddr_baseaddr;
1930 
1931 	/* Setup QPC table */
1932 	rsrc_pool = &state->ts_rsrc_hdl[TAVOR_QPC];
1933 	offset = (uint64_t)(uintptr_t)rsrc_pool->rsrc_start - ddr_baseaddr;
1934 	addr = ddr_base_map_addr + offset;
1935 	inithca->context.qpc_baseaddr_h = (addr >> 32);
1936 	inithca->context.qpc_baseaddr_l = (addr & 0xFFFFFFFF) >> 7;
1937 	inithca->context.log_num_qp	= state->ts_cfg_profile->cp_log_num_qp;
1938 
1939 	/* Setup EEC table (initialize to zero - RD unsupported) */
1940 	inithca->context.eec_baseaddr_h	= 0;
1941 	inithca->context.eec_baseaddr_l	= 0;
1942 	inithca->context.log_num_ee	= 0;
1943 
1944 	/* Setup CQC table */
1945 	rsrc_pool = &state->ts_rsrc_hdl[TAVOR_CQC];
1946 	offset = (uint64_t)(uintptr_t)rsrc_pool->rsrc_start - ddr_baseaddr;
1947 	addr = ddr_base_map_addr + offset;
1948 	inithca->context.cqc_baseaddr_h = (addr >> 32);
1949 	inithca->context.cqc_baseaddr_l = (addr & 0xFFFFFFFF) >> 6;
1950 	inithca->context.log_num_cq	= state->ts_cfg_profile->cp_log_num_cq;
1951 
1952 	/* Setup SRQC table */
1953 	rsrc_pool = &state->ts_rsrc_hdl[TAVOR_SRQC];
1954 	offset = (uint64_t)(uintptr_t)rsrc_pool->rsrc_start - ddr_baseaddr;
1955 	addr = ddr_base_map_addr + offset;
1956 	inithca->context.srqc_baseaddr_h = (addr >> 32);
1957 	inithca->context.srqc_baseaddr_l = (addr & 0xFFFFFFFF) >> 6;
1958 	inithca->context.log_num_srq	 =
1959 	    state->ts_cfg_profile->cp_log_num_srq;
1960 
1961 	/* Setup EQPC table */
1962 	rsrc_pool = &state->ts_rsrc_hdl[TAVOR_EQPC];
1963 	offset = (uint64_t)(uintptr_t)rsrc_pool->rsrc_start - ddr_baseaddr;
1964 	addr = ddr_base_map_addr + offset;
1965 	inithca->context.eqpc_baseaddr	= addr;
1966 
1967 	/* Setup EEEC table (initialize to zero - RD unsupported) */
1968 	inithca->context.eeec_baseaddr	= 0;
1969 
1970 	/* Setup EQC table */
1971 	rsrc_pool = &state->ts_rsrc_hdl[TAVOR_EQC];
1972 	offset = (uint64_t)(uintptr_t)rsrc_pool->rsrc_start - ddr_baseaddr;
1973 	addr = ddr_base_map_addr + offset;
1974 	inithca->context.eqc_baseaddr_h = (addr >> 32);
1975 	inithca->context.eqc_baseaddr_l = (addr & 0xFFFFFFFF) >> 6;
1976 	inithca->context.log_num_eq	= TAVOR_NUM_EQ_SHIFT;
1977 
1978 	/* Setup RDB table */
1979 	rsrc_pool = &state->ts_rsrc_hdl[TAVOR_RDB];
1980 	offset = (uint64_t)(uintptr_t)rsrc_pool->rsrc_start - ddr_baseaddr;
1981 	addr = ddr_base_map_addr + offset;
1982 	inithca->context.rdb_baseaddr_h	= (addr >> 32);
1983 	inithca->context.rdb_baseaddr_l = 0;
1984 
1985 	/* Setup Multicast */
1986 	rsrc_pool = &state->ts_rsrc_hdl[TAVOR_MCG];
1987 	offset = (uint64_t)(uintptr_t)rsrc_pool->rsrc_start - ddr_baseaddr;
1988 	addr = ddr_base_map_addr + offset;
1989 	inithca->multi.mc_baseaddr	= addr;
1990 	mcg_size = TAVOR_MCGMEM_SZ(state);
1991 	inithca->multi.log_mc_tbl_ent	= highbit(mcg_size) - 1;
1992 	inithca->multi.mc_tbl_hash_sz	=
1993 	    (1 << state->ts_cfg_profile->cp_log_num_mcg_hash);
1994 	inithca->multi.mc_hash_fn	= TAVOR_MCG_DEFAULT_HASH_FN;
1995 	inithca->multi.log_mc_tbl_sz	= state->ts_cfg_profile->cp_log_num_mcg;
1996 
1997 
1998 	/* Setup TPT */
1999 	rsrc_pool = &state->ts_rsrc_hdl[TAVOR_MPT];
2000 	offset = (uint64_t)(uintptr_t)rsrc_pool->rsrc_start - ddr_baseaddr;
2001 	addr = ddr_base_map_addr + offset;
2002 	inithca->tpt.mpt_baseaddr	= addr;
2003 	inithca->tpt.mttseg_sz		= TAVOR_MTTSEG_SIZE_SHIFT;
2004 	inithca->tpt.log_mpt_sz		= state->ts_cfg_profile->cp_log_num_mpt;
2005 	inithca->tpt.mtt_version	= TAVOR_MTT_PG_WALK_VER;
2006 
2007 	rsrc_pool = &state->ts_rsrc_hdl[TAVOR_MTT];
2008 	offset = (uint64_t)(uintptr_t)rsrc_pool->rsrc_start - ddr_baseaddr;
2009 	addr = ddr_base_map_addr + offset;
2010 	inithca->tpt.mtt_baseaddr	= addr;
2011 
2012 	/* Setup UAR */
2013 	rsrc_pool = &state->ts_rsrc_hdl[TAVOR_UAR_SCR];
2014 	offset = (uint64_t)(uintptr_t)rsrc_pool->rsrc_start - ddr_baseaddr;
2015 	addr = ddr_base_map_addr + offset;
2016 	inithca->uar.uarscr_baseaddr	= addr;
2017 
2018 	inithca->uar.uar_pg_sz = PAGESHIFT - 0xC;
2019 }
2020 
2021 
2022 /*
2023  * tavor_hca_port_init()
2024  *    Context: Only called from attach() path context
2025  */
2026 static int
tavor_hca_port_init(tavor_state_t * state)2027 tavor_hca_port_init(tavor_state_t *state)
2028 {
2029 	tavor_hw_initib_t	*portinits, *initib;
2030 	tavor_cfg_profile_t	*cfgprof;
2031 	uint_t			num_ports;
2032 	int			i, status;
2033 	uint64_t		maxval, val;
2034 	uint64_t		sysimgguid, nodeguid, portguid;
2035 
2036 	cfgprof = state->ts_cfg_profile;
2037 
2038 	/* Get number of HCA ports */
2039 	num_ports = cfgprof->cp_num_ports;
2040 
2041 	/* Allocate space for Tavor port init struct(s) */
2042 	portinits = (tavor_hw_initib_t *)kmem_zalloc(num_ports *
2043 	    sizeof (tavor_hw_initib_t), KM_SLEEP);
2044 
2045 	/* Post command to initialize Tavor HCA port */
2046 	for (i = 0; i < num_ports; i++) {
2047 		initib = &portinits[i];
2048 
2049 		/*
2050 		 * Determine whether we need to override the firmware's
2051 		 * default SystemImageGUID setting.
2052 		 */
2053 		sysimgguid = cfgprof->cp_sysimgguid;
2054 		if (sysimgguid != 0) {
2055 			initib->set_sysimg_guid	= 1;
2056 			initib->sysimg_guid	= sysimgguid;
2057 		}
2058 
2059 		/*
2060 		 * Determine whether we need to override the firmware's
2061 		 * default NodeGUID setting.
2062 		 */
2063 		nodeguid = cfgprof->cp_nodeguid;
2064 		if (nodeguid != 0) {
2065 			initib->set_node_guid	= 1;
2066 			initib->node_guid	= nodeguid;
2067 		}
2068 
2069 		/*
2070 		 * Determine whether we need to override the firmware's
2071 		 * default PortGUID setting.
2072 		 */
2073 		portguid = cfgprof->cp_portguid[i];
2074 		if (portguid != 0) {
2075 			initib->set_port_guid0	= 1;
2076 			initib->guid0		= portguid;
2077 		}
2078 
2079 		/* Validate max MTU size */
2080 		maxval  = state->ts_devlim.max_mtu;
2081 		val	= cfgprof->cp_max_mtu;
2082 		if (val > maxval) {
2083 			goto init_ports_fail;
2084 		}
2085 		initib->mtu_cap = val;
2086 
2087 		/* Validate the max port width */
2088 		maxval  = state->ts_devlim.max_port_width;
2089 		val	= cfgprof->cp_max_port_width;
2090 		if (val > maxval) {
2091 			goto init_ports_fail;
2092 		}
2093 		initib->port_width_cap = val;
2094 
2095 		/* Validate max VL cap size */
2096 		maxval  = state->ts_devlim.max_vl;
2097 		val	= cfgprof->cp_max_vlcap;
2098 		if (val > maxval) {
2099 			goto init_ports_fail;
2100 		}
2101 		initib->vl_cap = val;
2102 
2103 		/* Validate max GID table size */
2104 		maxval  = ((uint64_t)1 << state->ts_devlim.log_max_gid);
2105 		val	= ((uint64_t)1 << cfgprof->cp_log_max_gidtbl);
2106 		if (val > maxval) {
2107 			goto init_ports_fail;
2108 		}
2109 		initib->max_gid = val;
2110 
2111 		/* Validate max PKey table size */
2112 		maxval	= ((uint64_t)1 << state->ts_devlim.log_max_pkey);
2113 		val	= ((uint64_t)1 << cfgprof->cp_log_max_pkeytbl);
2114 		if (val > maxval) {
2115 			goto init_ports_fail;
2116 		}
2117 		initib->max_pkey = val;
2118 
2119 		/*
2120 		 * Post the INIT_IB command to Tavor firmware.  When this
2121 		 * command completes, the corresponding Tavor port will be
2122 		 * physically "Up" and initialized.
2123 		 */
2124 		status = tavor_init_ib_cmd_post(state, initib, i + 1,
2125 		    TAVOR_CMD_NOSLEEP_SPIN);
2126 		if (status != TAVOR_CMD_SUCCESS) {
2127 			cmn_err(CE_CONT, "Tavor: INIT_IB (port %02d) command "
2128 			    "failed: %08x\n", i + 1, status);
2129 			goto init_ports_fail;
2130 		}
2131 	}
2132 
2133 	/* Free up the memory for Tavor port init struct(s), return success */
2134 	kmem_free(portinits, num_ports * sizeof (tavor_hw_initib_t));
2135 	return (DDI_SUCCESS);
2136 
2137 init_ports_fail:
2138 	/*
2139 	 * Free up the memory for Tavor port init struct(s), shutdown any
2140 	 * successfully initialized ports, and return failure
2141 	 */
2142 	kmem_free(portinits, num_ports * sizeof (tavor_hw_initib_t));
2143 	(void) tavor_hca_ports_shutdown(state, i);
2144 
2145 	return (DDI_FAILURE);
2146 }
2147 
2148 
2149 /*
2150  * tavor_hca_ports_shutdown()
2151  *    Context: Only called from attach() and/or detach() path contexts
2152  */
2153 static int
tavor_hca_ports_shutdown(tavor_state_t * state,uint_t num_init)2154 tavor_hca_ports_shutdown(tavor_state_t *state, uint_t num_init)
2155 {
2156 	int	i, status;
2157 
2158 	/*
2159 	 * Post commands to shutdown all init'd Tavor HCA ports.  Note: if
2160 	 * any of these commands fail for any reason, it would be entirely
2161 	 * unexpected and probably indicative a serious problem (HW or SW).
2162 	 * Although we do return void from this function, this type of failure
2163 	 * should not go unreported.
2164 	 */
2165 	for (i = 0; i < num_init; i++) {
2166 		status = tavor_close_ib_cmd_post(state, i + 1,
2167 		    TAVOR_CMD_NOSLEEP_SPIN);
2168 		if (status != TAVOR_CMD_SUCCESS) {
2169 			TAVOR_WARNING(state, "failed to shutdown HCA port");
2170 			return (status);
2171 		}
2172 	}
2173 
2174 	return (TAVOR_CMD_SUCCESS);
2175 }
2176 
2177 
2178 /*
2179  * tavor_internal_uarpgs_init
2180  *    Context: Only called from attach() path context
2181  */
2182 static int
tavor_internal_uarpgs_init(tavor_state_t * state)2183 tavor_internal_uarpgs_init(tavor_state_t *state)
2184 {
2185 	int	status;
2186 
2187 	/*
2188 	 * Save away reserved Tavor UAR page #0.  This UAR page is not to
2189 	 * be used by software.
2190 	 */
2191 	status = tavor_rsrc_alloc(state, TAVOR_UARPG, 1, TAVOR_SLEEP,
2192 	    &state->ts_uarpg0_rsrc_rsrvd);
2193 	if (status != DDI_SUCCESS) {
2194 		return (DDI_FAILURE);
2195 	}
2196 
2197 	/*
2198 	 * Save away Tavor UAR page #1 (for internal use).  This UAR page is
2199 	 * the privileged UAR page through which all kernel generated
2200 	 * doorbells will be rung.
2201 	 */
2202 	status = tavor_rsrc_alloc(state, TAVOR_UARPG, 1, TAVOR_SLEEP,
2203 	    &state->ts_uarpg1_rsrc);
2204 	if (status != DDI_SUCCESS) {
2205 		tavor_rsrc_free(state, &state->ts_uarpg0_rsrc_rsrvd);
2206 		return (DDI_FAILURE);
2207 	}
2208 
2209 	/* Setup pointer to UAR page #1 doorbells */
2210 	state->ts_uar = (tavor_hw_uar_t *)state->ts_uarpg1_rsrc->tr_addr;
2211 
2212 	return (DDI_SUCCESS);
2213 }
2214 
2215 
2216 /*
2217  * tavor_internal_uarpgs_fini
2218  *    Context: Only called from attach() and/or detach() path contexts
2219  */
2220 static void
tavor_internal_uarpgs_fini(tavor_state_t * state)2221 tavor_internal_uarpgs_fini(tavor_state_t *state)
2222 {
2223 	/* Free up Tavor UAR page #1 (kernel driver doorbells) */
2224 	tavor_rsrc_free(state, &state->ts_uarpg1_rsrc);
2225 
2226 	/* Free up Tavor UAR page #0 (reserved) */
2227 	tavor_rsrc_free(state, &state->ts_uarpg0_rsrc_rsrvd);
2228 }
2229 
2230 
2231 /*
2232  * tavor_special_qp_contexts_reserve()
2233  *    Context: Only called from attach() path context
2234  */
2235 static int
tavor_special_qp_contexts_reserve(tavor_state_t * state)2236 tavor_special_qp_contexts_reserve(tavor_state_t *state)
2237 {
2238 	tavor_rsrc_t	*qp0_rsrc, *qp1_rsrc;
2239 	int		status;
2240 
2241 	/* Initialize the lock used for special QP rsrc management */
2242 	mutex_init(&state->ts_spec_qplock, NULL, MUTEX_DRIVER,
2243 	    DDI_INTR_PRI(state->ts_intrmsi_pri));
2244 
2245 	/*
2246 	 * Reserve contexts for QP0.  These QP contexts will be setup to
2247 	 * act as aliases for the real QP0.  Note: We are required to grab
2248 	 * two QPs (one per port) even if we are operating in single-port
2249 	 * mode.
2250 	 */
2251 	status = tavor_rsrc_alloc(state, TAVOR_QPC, 2, TAVOR_SLEEP, &qp0_rsrc);
2252 	if (status != DDI_SUCCESS) {
2253 		mutex_destroy(&state->ts_spec_qplock);
2254 		return (DDI_FAILURE);
2255 	}
2256 	state->ts_spec_qp0 = qp0_rsrc;
2257 
2258 	/*
2259 	 * Reserve contexts for QP1.  These QP contexts will be setup to
2260 	 * act as aliases for the real QP1.  Note: We are required to grab
2261 	 * two QPs (one per port) even if we are operating in single-port
2262 	 * mode.
2263 	 */
2264 	status = tavor_rsrc_alloc(state, TAVOR_QPC, 2, TAVOR_SLEEP, &qp1_rsrc);
2265 	if (status != DDI_SUCCESS) {
2266 		tavor_rsrc_free(state, &qp0_rsrc);
2267 		mutex_destroy(&state->ts_spec_qplock);
2268 		return (DDI_FAILURE);
2269 	}
2270 	state->ts_spec_qp1 = qp1_rsrc;
2271 
2272 	return (DDI_SUCCESS);
2273 }
2274 
2275 
2276 /*
2277  * tavor_special_qp_contexts_unreserve()
2278  *    Context: Only called from attach() and/or detach() path contexts
2279  */
2280 static void
tavor_special_qp_contexts_unreserve(tavor_state_t * state)2281 tavor_special_qp_contexts_unreserve(tavor_state_t *state)
2282 {
2283 	/* Unreserve contexts for QP1 */
2284 	tavor_rsrc_free(state, &state->ts_spec_qp1);
2285 
2286 	/* Unreserve contexts for QP0 */
2287 	tavor_rsrc_free(state, &state->ts_spec_qp0);
2288 
2289 	/* Destroy the lock used for special QP rsrc management */
2290 	mutex_destroy(&state->ts_spec_qplock);
2291 }
2292 
2293 
2294 /*
2295  * tavor_sw_reset()
2296  *    Context: Currently called only from attach() path context
2297  */
2298 static int
tavor_sw_reset(tavor_state_t * state)2299 tavor_sw_reset(tavor_state_t *state)
2300 {
2301 	dev_info_t		*dip, *pdip;
2302 	ddi_acc_handle_t	hdl = state->ts_pci_cfghdl, phdl;
2303 	uint32_t		reset_delay;
2304 	int			status, i;
2305 
2306 	/*
2307 	 * If the configured software reset delay is set to zero, then we
2308 	 * will not attempt a software reset of the Tavor device.
2309 	 */
2310 	reset_delay = state->ts_cfg_profile->cp_sw_reset_delay;
2311 	if (reset_delay == 0) {
2312 		return (DDI_SUCCESS);
2313 	}
2314 
2315 	/*
2316 	 * Get dip for HCA device _and_ parent device as well.  Parent access
2317 	 * is necessary here because software reset of the Tavor hardware
2318 	 * will reinitialize both the config registers of the PCI bridge
2319 	 * (parent, if it exists) and the IB HCA (self)
2320 	 */
2321 	dip  = state->ts_dip;
2322 	pdip = ddi_get_parent(dip);
2323 
2324 	/* Query the PCI capabilities of the HCA device */
2325 	tavor_pci_capability_list(state, hdl);
2326 
2327 	/*
2328 	 * Read all PCI config info (reg0...reg63).  Note: According to the
2329 	 * Tavor software reset application note, we should not read or
2330 	 * restore the values in reg22 and reg23.
2331 	 */
2332 	for (i = 0; i < TAVOR_SW_RESET_NUMREGS; i++) {
2333 		if ((i != TAVOR_SW_RESET_REG22_RSVD) &&
2334 		    (i != TAVOR_SW_RESET_REG23_RSVD)) {
2335 			state->ts_cfg_data[i]  = pci_config_get32(hdl, i << 2);
2336 		}
2337 	}
2338 
2339 	if (TAVOR_PARENT_IS_BRIDGE(pdip)) {
2340 		/*
2341 		 * Setup for PCI config read/write of bridge device
2342 		 */
2343 		status = pci_config_setup(pdip, &phdl);
2344 		if (status != DDI_SUCCESS) {
2345 			return (DDI_FAILURE);
2346 		}
2347 
2348 		/*
2349 		 * Read all PCI config info (reg0...reg63).  Note: According to
2350 		 * the Tavor software reset application note, we should not
2351 		 * read or restore the values in reg22 and reg23.
2352 		 */
2353 		for (i = 0; i < TAVOR_SW_RESET_NUMREGS; i++) {
2354 			if ((i != TAVOR_SW_RESET_REG22_RSVD) &&
2355 			    (i != TAVOR_SW_RESET_REG23_RSVD)) {
2356 				state->ts_cfg_pdata[i] =
2357 				    pci_config_get32(phdl, i << 2);
2358 			}
2359 		}
2360 	}
2361 
2362 	/*
2363 	 * Perform the software reset (by writing 1 at offset 0xF0010)
2364 	 */
2365 	ddi_put32(state->ts_reg_cmdhdl, state->ts_cmd_regs.sw_reset,
2366 	    TAVOR_SW_RESET_START);
2367 
2368 	drv_usecwait(reset_delay);
2369 
2370 	if (TAVOR_PARENT_IS_BRIDGE(pdip)) {
2371 		/*
2372 		 * Bridge exists, so wait for the bridge to become ready.
2373 		 *
2374 		 * The above delay is necessary to avoid system panic from
2375 		 * Master Abort.  If the device is accessed before this delay,
2376 		 * device will not respond to config cycles and they will be
2377 		 * terminate with a Master Abort which will panic the system.
2378 		 * Below is the loop we use to poll status from the device to
2379 		 * determine if it is OK to proceed.
2380 		 */
2381 		i = 0;
2382 		while (pci_config_get32(phdl, 0) == TAVOR_SW_RESET_NOTDONE) {
2383 			drv_usecwait(TAVOR_SW_RESET_POLL_DELAY);
2384 		}
2385 
2386 		/*
2387 		 * Write all the PCI config registers back into each device
2388 		 * (except for reg22 and reg23 - see above)
2389 		 */
2390 		for (i = 0; i < TAVOR_SW_RESET_NUMREGS; i++) {
2391 			if ((i != TAVOR_SW_RESET_REG22_RSVD) &&
2392 			    (i != TAVOR_SW_RESET_REG23_RSVD)) {
2393 				pci_config_put32(phdl, i << 2,
2394 				    state->ts_cfg_pdata[i]);
2395 			}
2396 		}
2397 
2398 		/*
2399 		 * Tear down the config setup (for bridge device)
2400 		 */
2401 		pci_config_teardown(&phdl);
2402 
2403 	/* No Bridge Device */
2404 	} else {
2405 		/*
2406 		 * Bridge does not exist, so instead wait for the device itself
2407 		 * to become ready.
2408 		 *
2409 		 * The above delay is necessary to avoid system panic from
2410 		 * Master Abort.  If the device is accessed before this delay,
2411 		 * device will not respond to config cycles and they will be
2412 		 * terminate with a Master Abort which will panic the system.
2413 		 * Below is the loop we use to poll status from the device to
2414 		 * determine if it is OK to proceed.
2415 		 */
2416 		i = 0;
2417 		while (pci_config_get32(hdl, 0) == TAVOR_SW_RESET_NOTDONE) {
2418 			drv_usecwait(TAVOR_SW_RESET_POLL_DELAY);
2419 		}
2420 	}
2421 
2422 	for (i = 0; i < TAVOR_SW_RESET_NUMREGS; i++) {
2423 		if ((i != TAVOR_SW_RESET_REG22_RSVD) &&
2424 		    (i != TAVOR_SW_RESET_REG23_RSVD)) {
2425 			pci_config_put32(hdl, i << 2, state->ts_cfg_data[i]);
2426 		}
2427 	}
2428 
2429 	return (DDI_SUCCESS);
2430 }
2431 
2432 
2433 /*
2434  * tavor_mcg_init()
2435  *    Context: Only called from attach() path context
2436  */
2437 static int
tavor_mcg_init(tavor_state_t * state)2438 tavor_mcg_init(tavor_state_t *state)
2439 {
2440 	uint_t		mcg_tmp_sz;
2441 
2442 	/*
2443 	 * Allocate space for the MCG temporary copy buffer.  This is
2444 	 * used by the Attach/Detach Multicast Group code
2445 	 */
2446 	mcg_tmp_sz = TAVOR_MCGMEM_SZ(state);
2447 	state->ts_mcgtmp = kmem_zalloc(mcg_tmp_sz, KM_SLEEP);
2448 
2449 	/*
2450 	 * Initialize the multicast group mutex.  This ensures atomic
2451 	 * access to add, modify, and remove entries in the multicast
2452 	 * group hash lists.
2453 	 */
2454 	mutex_init(&state->ts_mcglock, NULL, MUTEX_DRIVER,
2455 	    DDI_INTR_PRI(state->ts_intrmsi_pri));
2456 
2457 	return (DDI_SUCCESS);
2458 }
2459 
2460 
2461 /*
2462  * tavor_mcg_fini()
2463  *    Context: Only called from attach() and/or detach() path contexts
2464  */
2465 static void
tavor_mcg_fini(tavor_state_t * state)2466 tavor_mcg_fini(tavor_state_t *state)
2467 {
2468 	uint_t		mcg_tmp_sz;
2469 
2470 	/* Free up the space used for the MCG temporary copy buffer */
2471 	mcg_tmp_sz = TAVOR_MCGMEM_SZ(state);
2472 	kmem_free(state->ts_mcgtmp, mcg_tmp_sz);
2473 
2474 	/* Destroy the multicast group mutex */
2475 	mutex_destroy(&state->ts_mcglock);
2476 }
2477 
2478 
2479 /*
2480  * tavor_fw_version_check()
2481  *    Context: Only called from attach() path context
2482  */
2483 static int
tavor_fw_version_check(tavor_state_t * state)2484 tavor_fw_version_check(tavor_state_t *state)
2485 {
2486 	uint_t	tavor_fw_ver_major;
2487 	uint_t	tavor_fw_ver_minor;
2488 	uint_t	tavor_fw_ver_subminor;
2489 
2490 	/*
2491 	 * Depending on which version of driver we have attached, the firmware
2492 	 * version checks will be different.  We set up the comparison values
2493 	 * for both HCA Mode (Tavor hardware) or COMPAT Mode (Arbel hardware
2494 	 * running in tavor mode).
2495 	 */
2496 	switch (state->ts_operational_mode) {
2497 	case TAVOR_HCA_MODE:
2498 		tavor_fw_ver_major = TAVOR_FW_VER_MAJOR;
2499 		tavor_fw_ver_minor = TAVOR_FW_VER_MINOR;
2500 		tavor_fw_ver_subminor = TAVOR_FW_VER_SUBMINOR;
2501 		break;
2502 
2503 	case TAVOR_COMPAT_MODE:
2504 		tavor_fw_ver_major = TAVOR_COMPAT_FW_VER_MAJOR;
2505 		tavor_fw_ver_minor = TAVOR_COMPAT_FW_VER_MINOR;
2506 		tavor_fw_ver_subminor = TAVOR_COMPAT_FW_VER_SUBMINOR;
2507 		break;
2508 
2509 	default:
2510 		return (DDI_FAILURE);
2511 	}
2512 
2513 	/*
2514 	 * If FW revision major number is less than acceptable,
2515 	 * return failure, else if greater return success.  If
2516 	 * the major numbers are equal than check the minor number
2517 	 */
2518 	if (state->ts_fw.fw_rev_major < tavor_fw_ver_major) {
2519 		return (DDI_FAILURE);
2520 	} else if (state->ts_fw.fw_rev_major > tavor_fw_ver_major) {
2521 		return (DDI_SUCCESS);
2522 	}
2523 	/*
2524 	 * Do the same check as above, except for minor revision numbers
2525 	 * If the minor numbers are equal than check the subminor number
2526 	 */
2527 	if (state->ts_fw.fw_rev_minor < tavor_fw_ver_minor) {
2528 		return (DDI_FAILURE);
2529 	} else if (state->ts_fw.fw_rev_minor > tavor_fw_ver_minor) {
2530 		return (DDI_SUCCESS);
2531 	}
2532 
2533 	/*
2534 	 * Once again we do the same check as above, except for the subminor
2535 	 * revision number.  If the subminor numbers are equal here, then
2536 	 * these are the same firmware version, return success
2537 	 */
2538 	if (state->ts_fw.fw_rev_subminor < tavor_fw_ver_subminor) {
2539 		return (DDI_FAILURE);
2540 	} else if (state->ts_fw.fw_rev_subminor > tavor_fw_ver_subminor) {
2541 		return (DDI_SUCCESS);
2542 	}
2543 
2544 	return (DDI_SUCCESS);
2545 }
2546 
2547 
2548 /*
2549  * tavor_device_info_report()
2550  *    Context: Only called from attach() path context
2551  */
2552 static void
tavor_device_info_report(tavor_state_t * state)2553 tavor_device_info_report(tavor_state_t *state)
2554 {
2555 	cmn_err(CE_CONT, "?tavor%d: FW ver: %04d.%04d.%04d, "
2556 	    "HW rev: %02x\n", state->ts_instance, state->ts_fw.fw_rev_major,
2557 	    state->ts_fw.fw_rev_minor, state->ts_fw.fw_rev_subminor,
2558 	    state->ts_adapter.rev_id);
2559 	cmn_err(CE_CONT, "?tavor%d: %64s (0x%016" PRIx64 ")\n",
2560 	    state->ts_instance, state->ts_nodedesc, state->ts_nodeguid);
2561 }
2562 
2563 
2564 /*
2565  * tavor_pci_capability_list()
2566  *    Context: Only called from attach() path context
2567  */
2568 static void
tavor_pci_capability_list(tavor_state_t * state,ddi_acc_handle_t hdl)2569 tavor_pci_capability_list(tavor_state_t *state, ddi_acc_handle_t hdl)
2570 {
2571 	uint_t	offset, data;
2572 
2573 	/*
2574 	 * Check for the "PCI Capabilities" bit in the "Status Register".
2575 	 * Bit 4 in this register indicates the presence of a "PCI
2576 	 * Capabilities" list.
2577 	 */
2578 	data = pci_config_get16(hdl, 0x6);
2579 	if ((data & 0x10) == 0) {
2580 		return;
2581 	}
2582 
2583 	/*
2584 	 * Starting from offset 0x34 in PCI config space, find the
2585 	 * head of "PCI capabilities" list, and walk the list.  If
2586 	 * capabilities of a known type are encountered (e.g.
2587 	 * "PCI-X Capability"), then call the appropriate handler
2588 	 * function.
2589 	 */
2590 	offset = pci_config_get8(hdl, 0x34);
2591 	while (offset != 0x0) {
2592 		data = pci_config_get8(hdl, offset);
2593 
2594 		/*
2595 		 * Check for known capability types.  Tavor has the
2596 		 * following:
2597 		 *    o VPD Capability   (0x03)
2598 		 *    o PCI-X Capability (0x07)
2599 		 *    o MSI Capability   (0x05)
2600 		 *    o MSIX Capability  (0x11)
2601 		 */
2602 		switch (data) {
2603 		case 0x03:
2604 			tavor_pci_capability_vpd(state, hdl, offset);
2605 			break;
2606 		case 0x07:
2607 			tavor_pci_capability_pcix(state, hdl, offset);
2608 			break;
2609 		case 0x05:
2610 			break;
2611 		default:
2612 			break;
2613 		}
2614 
2615 		/* Get offset of next entry in list */
2616 		offset = pci_config_get8(hdl, offset + 1);
2617 	}
2618 }
2619 
2620 /*
2621  * tavor_pci_read_vpd()
2622  *    Context: Only called from attach() path context
2623  *    utility routine for tavor_pci_capability_vpd()
2624  */
2625 static int
tavor_pci_read_vpd(ddi_acc_handle_t hdl,uint_t offset,uint32_t addr,uint32_t * data)2626 tavor_pci_read_vpd(ddi_acc_handle_t hdl, uint_t offset, uint32_t addr,
2627     uint32_t *data)
2628 {
2629 	int		retry = 4;  /* retry counter for EEPROM poll */
2630 	uint32_t	val;
2631 	int		vpd_addr = offset + 2;
2632 	int		vpd_data = offset + 4;
2633 
2634 	/*
2635 	 * In order to read a 32-bit value from VPD, we are to write down
2636 	 * the address (offset in the VPD itself) to the address register.
2637 	 * To signal the read, we also clear bit 31.  We then poll on bit 31
2638 	 * and when it is set, we can then read our 4 bytes from the data
2639 	 * register.
2640 	 */
2641 	(void) pci_config_put32(hdl, offset, addr << 16);
2642 	do {
2643 		drv_usecwait(1000);
2644 		val = pci_config_get16(hdl, vpd_addr);
2645 		if ((val >> 15) & 0x01) {
2646 			*data = pci_config_get32(hdl, vpd_data);
2647 			return (DDI_SUCCESS);
2648 		}
2649 	} while (--retry);
2650 
2651 	return (DDI_FAILURE);
2652 }
2653 
2654 
2655 /*
2656  * tavor_pci_capability_vpd()
2657  *    Context: Only called from attach() path context
2658  */
2659 static void
tavor_pci_capability_vpd(tavor_state_t * state,ddi_acc_handle_t hdl,uint_t offset)2660 tavor_pci_capability_vpd(tavor_state_t *state, ddi_acc_handle_t hdl,
2661     uint_t offset)
2662 {
2663 	uint8_t			name_length;
2664 	uint8_t			pn_length;
2665 	int			i, err = 0;
2666 	int			vpd_str_id = 0;
2667 	int			vpd_ro_desc;
2668 	int			vpd_ro_pn_desc;
2669 #ifndef _LITTLE_ENDIAN
2670 	uint32_t		data32;
2671 #endif /* _LITTLE_ENDIAN */
2672 	union {
2673 		uint32_t	vpd_int[TAVOR_VPD_HDR_DWSIZE];
2674 		uchar_t		vpd_char[TAVOR_VPD_HDR_BSIZE];
2675 	} vpd;
2676 
2677 	/*
2678 	 * Read Vital Product Data (VPD) from PCI-X capability.
2679 	 */
2680 	for (i = 0; i < TAVOR_VPD_HDR_DWSIZE; i++) {
2681 		err = tavor_pci_read_vpd(hdl, offset, i << 2, &vpd.vpd_int[i]);
2682 		if (err != DDI_SUCCESS) {
2683 			cmn_err(CE_NOTE, "!VPD read failed\n");
2684 			goto out;
2685 		}
2686 	}
2687 
2688 #ifndef _LITTLE_ENDIAN
2689 	/*
2690 	 * Need to swap bytes for big endian.
2691 	 */
2692 	for (i = 0; i < TAVOR_VPD_HDR_DWSIZE; i++) {
2693 		data32 = vpd.vpd_int[i];
2694 		vpd.vpd_char[(i << 2) + 3] =
2695 		    (uchar_t)((data32 & 0xFF000000) >> 24);
2696 		vpd.vpd_char[(i << 2) + 2] =
2697 		    (uchar_t)((data32 & 0x00FF0000) >> 16);
2698 		vpd.vpd_char[(i << 2) + 1] =
2699 		    (uchar_t)((data32 & 0x0000FF00) >> 8);
2700 		vpd.vpd_char[i << 2] = (uchar_t)(data32 & 0x000000FF);
2701 	}
2702 #endif	/* _LITTLE_ENDIAN */
2703 
2704 	/* Check for VPD String ID Tag */
2705 	if (vpd.vpd_char[vpd_str_id] == 0x82) {
2706 		/* get the product name */
2707 		name_length = (uint8_t)vpd.vpd_char[vpd_str_id + 1];
2708 		if (name_length > sizeof (state->ts_hca_name)) {
2709 			cmn_err(CE_NOTE, "!VPD name too large (0x%x)\n",
2710 			    name_length);
2711 			goto out;
2712 		}
2713 		(void) memcpy(state->ts_hca_name, &vpd.vpd_char[vpd_str_id + 3],
2714 		    name_length);
2715 		state->ts_hca_name[name_length] = 0;
2716 
2717 		/* get the part number */
2718 		vpd_ro_desc = name_length + 3; /* read-only tag location */
2719 		vpd_ro_pn_desc = vpd_ro_desc + 3; /* P/N keyword location */
2720 		/*
2721 		 * Verify read-only tag and Part Number keyword.
2722 		 */
2723 		if (vpd.vpd_char[vpd_ro_desc] != 0x90 ||
2724 		    (vpd.vpd_char[vpd_ro_pn_desc] != 'P' &&
2725 		    vpd.vpd_char[vpd_ro_pn_desc + 1] != 'N')) {
2726 			cmn_err(CE_NOTE, "!VPD Part Number not found\n");
2727 			goto out;
2728 		}
2729 
2730 		pn_length = (uint8_t)vpd.vpd_char[vpd_ro_pn_desc + 2];
2731 		if (pn_length > sizeof (state->ts_hca_pn)) {
2732 			cmn_err(CE_NOTE, "!VPD part number too large (0x%x)\n",
2733 			    name_length);
2734 			goto out;
2735 		}
2736 		(void) memcpy(state->ts_hca_pn,
2737 		    &vpd.vpd_char[vpd_ro_pn_desc + 3],
2738 		    pn_length);
2739 		state->ts_hca_pn[pn_length] = 0;
2740 		state->ts_hca_pn_len = pn_length;
2741 	} else {
2742 		/* Wrong VPD String ID Tag */
2743 		cmn_err(CE_NOTE, "!VPD String ID Tag not found, tag: %02x\n",
2744 		    vpd.vpd_char[0]);
2745 		goto out;
2746 	}
2747 	return;
2748 out:
2749 	state->ts_hca_pn_len = 0;
2750 }
2751 
2752 /*
2753  * tavor_pci_capability_pcix()
2754  *    Context: Only called from attach() path context
2755  */
2756 static void
tavor_pci_capability_pcix(tavor_state_t * state,ddi_acc_handle_t hdl,uint_t offset)2757 tavor_pci_capability_pcix(tavor_state_t *state, ddi_acc_handle_t hdl,
2758     uint_t offset)
2759 {
2760 	uint_t	command, status;
2761 	int	max_out_splt_trans, max_mem_rd_byte_cnt;
2762 	int	designed_max_out_splt_trans, designed_max_mem_rd_byte_cnt;
2763 
2764 	/*
2765 	 * Query the current values for the PCI-X Command Register and
2766 	 * the PCI-X Status Register.
2767 	 */
2768 	command = pci_config_get16(hdl, offset + 2);
2769 	status  = pci_config_get32(hdl, offset + 4);
2770 
2771 	/*
2772 	 * Check for config property specifying "maximum outstanding
2773 	 * split transactions".  If the property is defined and valid
2774 	 * (i.e. no larger than the so-called "designed maximum"),
2775 	 * then use the specified value to update the PCI-X Command Register.
2776 	 * Otherwise, extract the value from the Tavor config profile.
2777 	 */
2778 	designed_max_out_splt_trans = ((status >> 23) & 7);
2779 	max_out_splt_trans = ddi_prop_get_int(DDI_DEV_T_ANY, state->ts_dip,
2780 	    DDI_PROP_DONTPASS, "pcix-max-outstanding-split-trans", -1);
2781 	if ((max_out_splt_trans != -1) &&
2782 	    ((max_out_splt_trans < 0) ||
2783 	    (max_out_splt_trans > designed_max_out_splt_trans))) {
2784 		cmn_err(CE_NOTE, "!tavor%d: property \"pcix-max-outstanding-"
2785 		    "split-trans\" (%d) invalid or exceeds device maximum"
2786 		    " (%d), using default value (%d)\n", state->ts_instance,
2787 		    max_out_splt_trans, designed_max_out_splt_trans,
2788 		    state->ts_cfg_profile->cp_max_out_splt_trans);
2789 		max_out_splt_trans =
2790 		    state->ts_cfg_profile->cp_max_out_splt_trans;
2791 	} else if (max_out_splt_trans == -1) {
2792 		max_out_splt_trans =
2793 		    state->ts_cfg_profile->cp_max_out_splt_trans;
2794 	}
2795 
2796 	/*
2797 	 * The config profile setting for max_out_splt_trans is determined
2798 	 * based on arch.  Check tavor_cfg.c for more information.  A value of
2799 	 * '-1' in the patchable variable means "do not change".  A value of
2800 	 * '0' means 1 outstanding splt trans and other values as defined by
2801 	 * PCI.  So we do one more check here, that if 'max_out_splt_trans' is
2802 	 * -1 (ie: < 0) we do not set the PCI command and leave it at the
2803 	 * default.
2804 	 */
2805 	if (max_out_splt_trans >= 0) {
2806 		command = ((command & 0xFF8F) | max_out_splt_trans << 4);
2807 	}
2808 
2809 	/*
2810 	 * Check for config property specifying "maximum memory read
2811 	 * byte count.  If the property is defined and valid
2812 	 * (i.e. no larger than the so-called "designed maximum"),
2813 	 * then use the specified value to update the PCI-X Command Register.
2814 	 * Otherwise, extract the value from the Tavor config profile.
2815 	 */
2816 	designed_max_mem_rd_byte_cnt = ((status >> 21) & 3);
2817 	max_mem_rd_byte_cnt = ddi_prop_get_int(DDI_DEV_T_ANY, state->ts_dip,
2818 	    DDI_PROP_DONTPASS, "pcix-max-read-byte-count", -1);
2819 	if ((max_mem_rd_byte_cnt != -1) &&
2820 	    ((max_mem_rd_byte_cnt < 0) ||
2821 	    (max_mem_rd_byte_cnt > designed_max_mem_rd_byte_cnt))) {
2822 		cmn_err(CE_NOTE, "!tavor%d: property \"pcix-max-read-byte-"
2823 		    "count\" (%d) invalid or exceeds device maximum"
2824 		    " (%d), using default value (%d)\n", state->ts_instance,
2825 		    max_mem_rd_byte_cnt, designed_max_mem_rd_byte_cnt,
2826 		    state->ts_cfg_profile->cp_max_mem_rd_byte_cnt);
2827 		max_mem_rd_byte_cnt =
2828 		    state->ts_cfg_profile->cp_max_mem_rd_byte_cnt;
2829 	} else if (max_mem_rd_byte_cnt == -1) {
2830 		max_mem_rd_byte_cnt =
2831 		    state->ts_cfg_profile->cp_max_mem_rd_byte_cnt;
2832 	}
2833 
2834 	/*
2835 	 * The config profile setting for max_mem_rd_byte_cnt is determined
2836 	 * based on arch.  Check tavor_cfg.c for more information.  A value of
2837 	 * '-1' in the patchable variable means "do not change".  A value of
2838 	 * '0' means minimum (512B) read, and other values as defined by
2839 	 * PCI.  So we do one more check here, that if 'max_mem_rd_byte_cnt' is
2840 	 * -1 (ie: < 0) we do not set the PCI command and leave it at the
2841 	 * default.
2842 	 */
2843 	if (max_mem_rd_byte_cnt >= 0) {
2844 		command = ((command & 0xFFF3) | max_mem_rd_byte_cnt << 2);
2845 	}
2846 
2847 	/*
2848 	 * Update the PCI-X Command Register with the newly configured
2849 	 * values.
2850 	 */
2851 	pci_config_put16(hdl, offset + 2, command);
2852 }
2853 
2854 
2855 /*
2856  * tavor_intr_or_msi_init()
2857  *    Context: Only called from attach() path context
2858  */
2859 static int
tavor_intr_or_msi_init(tavor_state_t * state)2860 tavor_intr_or_msi_init(tavor_state_t *state)
2861 {
2862 	int	status;
2863 
2864 	/* Query for the list of supported interrupt event types */
2865 	status = ddi_intr_get_supported_types(state->ts_dip,
2866 	    &state->ts_intr_types_avail);
2867 	if (status != DDI_SUCCESS) {
2868 		return (DDI_FAILURE);
2869 	}
2870 
2871 	/*
2872 	 * If Tavor/Arbel supports MSI in this system (and, if it
2873 	 * hasn't been overridden by a configuration variable), then
2874 	 * the default behavior is to use a single MSI.  Otherwise,
2875 	 * fallback to using legacy interrupts.  Also, if MSI allocatis chosen,
2876 	 * but fails for whatever reasons, then fallback to using legacy
2877 	 * interrupts.
2878 	 */
2879 	if ((state->ts_cfg_profile->cp_use_msi_if_avail != 0) &&
2880 	    (state->ts_intr_types_avail & DDI_INTR_TYPE_MSI)) {
2881 		status = tavor_add_intrs(state, DDI_INTR_TYPE_MSI);
2882 		if (status == DDI_SUCCESS) {
2883 			state->ts_intr_type_chosen = DDI_INTR_TYPE_MSI;
2884 			return (DDI_SUCCESS);
2885 		}
2886 	}
2887 
2888 	/*
2889 	 * MSI interrupt allocation failed, or was not available.  Fallback to
2890 	 * legacy interrupt support.
2891 	 */
2892 	if (state->ts_intr_types_avail & DDI_INTR_TYPE_FIXED) {
2893 		status = tavor_add_intrs(state, DDI_INTR_TYPE_FIXED);
2894 		if (status == DDI_SUCCESS) {
2895 			state->ts_intr_type_chosen = DDI_INTR_TYPE_FIXED;
2896 			return (DDI_SUCCESS);
2897 		}
2898 	}
2899 
2900 	/*
2901 	 * Neither MSI or legacy interrupts were successful.  return failure.
2902 	 */
2903 	return (DDI_FAILURE);
2904 }
2905 
2906 /*
2907  * tavor_add_intrs()
2908  *    Context: Only called from attach() patch context
2909  */
2910 static int
tavor_add_intrs(tavor_state_t * state,int intr_type)2911 tavor_add_intrs(tavor_state_t *state, int intr_type)
2912 {
2913 	int status;
2914 
2915 	/* Get number of interrupts/MSI supported */
2916 	status = ddi_intr_get_nintrs(state->ts_dip, intr_type,
2917 	    &state->ts_intrmsi_count);
2918 	if (status != DDI_SUCCESS) {
2919 		return (DDI_FAILURE);
2920 	}
2921 
2922 	/* Get number of available interrupts/MSI */
2923 	status = ddi_intr_get_navail(state->ts_dip, intr_type,
2924 	    &state->ts_intrmsi_avail);
2925 	if (status != DDI_SUCCESS) {
2926 		return (DDI_FAILURE);
2927 	}
2928 
2929 	/* Ensure that we have at least one (1) usable MSI or interrupt */
2930 	if ((state->ts_intrmsi_avail < 1) || (state->ts_intrmsi_count < 1)) {
2931 		return (DDI_FAILURE);
2932 	}
2933 
2934 	/* Attempt to allocate a single interrupt/MSI handle */
2935 	status = ddi_intr_alloc(state->ts_dip, &state->ts_intrmsi_hdl,
2936 	    intr_type, 0, 1, &state->ts_intrmsi_allocd,
2937 	    DDI_INTR_ALLOC_STRICT);
2938 	if (status != DDI_SUCCESS) {
2939 		return (DDI_FAILURE);
2940 	}
2941 
2942 	/* Ensure that we have allocated at least one (1) MSI or interrupt */
2943 	if (state->ts_intrmsi_allocd < 1) {
2944 		return (DDI_FAILURE);
2945 	}
2946 
2947 	/*
2948 	 * Extract the priority for the allocated interrupt/MSI.  This
2949 	 * will be used later when initializing certain mutexes.
2950 	 */
2951 	status = ddi_intr_get_pri(state->ts_intrmsi_hdl,
2952 	    &state->ts_intrmsi_pri);
2953 	if (status != DDI_SUCCESS) {
2954 		/* Free the allocated interrupt/MSI handle */
2955 		(void) ddi_intr_free(state->ts_intrmsi_hdl);
2956 
2957 		return (DDI_FAILURE);
2958 	}
2959 
2960 	/* Make sure the interrupt/MSI priority is below 'high level' */
2961 	if (state->ts_intrmsi_pri >= ddi_intr_get_hilevel_pri()) {
2962 		/* Free the allocated interrupt/MSI handle */
2963 		(void) ddi_intr_free(state->ts_intrmsi_hdl);
2964 
2965 		return (DDI_FAILURE);
2966 	}
2967 
2968 	/* Get add'l capability information regarding interrupt/MSI */
2969 	status = ddi_intr_get_cap(state->ts_intrmsi_hdl,
2970 	    &state->ts_intrmsi_cap);
2971 	if (status != DDI_SUCCESS) {
2972 		/* Free the allocated interrupt/MSI handle */
2973 		(void) ddi_intr_free(state->ts_intrmsi_hdl);
2974 
2975 		return (DDI_FAILURE);
2976 	}
2977 
2978 	return (DDI_SUCCESS);
2979 }
2980 
2981 
2982 /*
2983  * tavor_intr_or_msi_fini()
2984  *    Context: Only called from attach() and/or detach() path contexts
2985  */
2986 static int
tavor_intr_or_msi_fini(tavor_state_t * state)2987 tavor_intr_or_msi_fini(tavor_state_t *state)
2988 {
2989 	int	status;
2990 
2991 	/* Free the allocated interrupt/MSI handle */
2992 	status = ddi_intr_free(state->ts_intrmsi_hdl);
2993 	if (status != DDI_SUCCESS) {
2994 		return (DDI_FAILURE);
2995 	}
2996 
2997 	return (DDI_SUCCESS);
2998 }
2999 
3000 
3001 /* Disable Tavor interrupts */
3002 static int
tavor_intr_disable(tavor_state_t * state)3003 tavor_intr_disable(tavor_state_t *state)
3004 {
3005 	ushort_t msi_ctrl = 0, caps_ctrl = 0;
3006 	ddi_acc_handle_t pci_cfg_hdl = state->ts_pci_cfghdl;
3007 	ASSERT(pci_cfg_hdl != NULL);
3008 	ASSERT(state->ts_intr_types_avail &
3009 	    (DDI_INTR_TYPE_FIXED | DDI_INTR_TYPE_MSI));
3010 
3011 	/*
3012 	 * Check if MSI interrupts are used. If so, disable MSI interupts.
3013 	 * If not, since Tavor doesn't support MSI-X interrupts, assuming the
3014 	 * legacy interrupt is used instead, disable the legacy interrupt.
3015 	 */
3016 	if ((state->ts_cfg_profile->cp_use_msi_if_avail != 0) &&
3017 	    (state->ts_intr_types_avail & DDI_INTR_TYPE_MSI)) {
3018 
3019 		if ((PCI_CAP_LOCATE(pci_cfg_hdl, PCI_CAP_ID_MSI,
3020 		    &caps_ctrl) == DDI_SUCCESS)) {
3021 			if ((msi_ctrl = PCI_CAP_GET16(pci_cfg_hdl, 0,
3022 			    caps_ctrl, PCI_MSI_CTRL)) == PCI_CAP_EINVAL16)
3023 				return (DDI_FAILURE);
3024 		}
3025 		ASSERT(msi_ctrl != 0);
3026 
3027 		if (!(msi_ctrl & PCI_MSI_ENABLE_BIT))
3028 			return (DDI_SUCCESS);
3029 
3030 		if (msi_ctrl &  PCI_MSI_PVM_MASK) {
3031 			int offset = (msi_ctrl &  PCI_MSI_64BIT_MASK) ?
3032 			    PCI_MSI_64BIT_MASKBITS : PCI_MSI_32BIT_MASK;
3033 
3034 			/* Clear all inums in MSI */
3035 			PCI_CAP_PUT32(pci_cfg_hdl, 0, caps_ctrl, offset, 0);
3036 		}
3037 
3038 		/* Disable MSI interrupts */
3039 		msi_ctrl &= ~PCI_MSI_ENABLE_BIT;
3040 		PCI_CAP_PUT16(pci_cfg_hdl, 0, caps_ctrl, PCI_MSI_CTRL,
3041 		    msi_ctrl);
3042 
3043 	} else {
3044 		uint16_t cmdreg = pci_config_get16(pci_cfg_hdl, PCI_CONF_COMM);
3045 		ASSERT(state->ts_intr_types_avail & DDI_INTR_TYPE_FIXED);
3046 
3047 		/* Disable the legacy interrupts */
3048 		cmdreg |= PCI_COMM_INTX_DISABLE;
3049 		pci_config_put16(pci_cfg_hdl, PCI_CONF_COMM, cmdreg);
3050 	}
3051 
3052 	return (DDI_SUCCESS);
3053 }
3054 
3055 /* Tavor quiesce(9F) entry */
3056 static int
tavor_quiesce(dev_info_t * dip)3057 tavor_quiesce(dev_info_t *dip)
3058 {
3059 	tavor_state_t *state = ddi_get_soft_state(tavor_statep,
3060 	    DEVI(dip)->devi_instance);
3061 	ASSERT(state != NULL);
3062 
3063 	/* start fastreboot */
3064 	state->ts_quiescing = B_TRUE;
3065 
3066 	/* If it's in maintenance mode, do nothing but return with SUCCESS */
3067 	if (!TAVOR_IS_OPERATIONAL(state->ts_operational_mode)) {
3068 		return (DDI_SUCCESS);
3069 	}
3070 
3071 	/* Shutdown HCA ports */
3072 	if (tavor_hca_ports_shutdown(state,
3073 	    state->ts_cfg_profile->cp_num_ports) != TAVOR_CMD_SUCCESS) {
3074 		state->ts_quiescing = B_FALSE;
3075 		return (DDI_FAILURE);
3076 	}
3077 
3078 	/* Close HCA */
3079 	if (tavor_close_hca_cmd_post(state, TAVOR_CMD_NOSLEEP_SPIN) !=
3080 	    TAVOR_CMD_SUCCESS) {
3081 		state->ts_quiescing = B_FALSE;
3082 		return (DDI_FAILURE);
3083 	}
3084 
3085 	/* Shutdown FW */
3086 	if (tavor_sys_dis_cmd_post(state, TAVOR_CMD_NOSLEEP_SPIN) !=
3087 	    TAVOR_CMD_SUCCESS) {
3088 		state->ts_quiescing = B_FALSE;
3089 		return (DDI_FAILURE);
3090 	}
3091 
3092 	/* Disable interrupts */
3093 	if (tavor_intr_disable(state) != DDI_SUCCESS) {
3094 		state->ts_quiescing = B_FALSE;
3095 		return (DDI_FAILURE);
3096 	}
3097 
3098 	/* SW-reset */
3099 	if (tavor_sw_reset(state) != DDI_SUCCESS) {
3100 		state->ts_quiescing = B_FALSE;
3101 		return (DDI_FAILURE);
3102 	}
3103 
3104 	return (DDI_SUCCESS);
3105 }
3106