1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13  * Copyright 2024 Racktop Systems, Inc.
14  */
15 
16 /*
17  * This driver targets the LSI/Broadcom/AVAGO Megaraid SAS controllers
18  * of the 3rd generation, in particular the models Aero and Ventura.
19  *
20  * This file contains the interfaces to DDI.
21  *
22  * Driver attach:
23  * --------------
24  *
25  * For each HBA, the driver will attach three instances. The first will be for
26  * the controller, carrying out hardware and driver initialzation, while the
27  * remaining two are SCSA instances for the RAID (LD) and physical (PD) iports.
28  *
29  * Controller Initialization:
30  * --------------------------
31  *
32  * The initialization of the controller hardware is split across multiple
33  * functions which are called during lmrc_ctrl_attach():
34  * 1. As soon as the device registers are mapped, lmrc_adapter_init() will
35  *    be called. This will attempt to bring the firmware to a ready state,
36  *    after which control registers are read to fetch basic hardware properties
37  *    and calculate the sizes of various data structures used by the driver.
38  * 2. After setting up interrupts and initializing mutexes, the expected number
39  *    of MFI and MPT commands will be pre-allocated. Then, the I/O controller
40  *    will be initialized by sending a IOC INIT command.
41  * 3. At this point the driver is able to send commands to the controller and
42  *    receive replies. This will first be used to retrieve controller firmware
43  *    properties to finish driver setup based on the information received.
44  * 4. As part of the remaining firmware configuration, we'll post a set of long-
45  *    running commands to keep us informed about RAID map and PD map changes.
46  *    These commands will complete asynchronously and will be rescheduled every
47  *    time they have completed.
48  *
49  * While it's not really part of the controller initialization, it is worthwhile
50  * to mention here that we send a CTRL SHUTDOWN command to the controller during
51  * our quiesce(9e).
52  *
53  *
54  * SCSA HBA Setup:
55  * ---------------
56  *
57  * The driver is written to conform to SCSAv3.
58  *
59  * The driver will attach two iport(9) instances, one for physical devices that
60  * are directly exposed by the HBA to the host, and another for logical devices.
61  * The latter category not only includes RAID volumes but also physical disks
62  * when the controller is in JBOD mode.
63  *
64  * The attach function for either iport will enumerate the physical and logical
65  * devices, respectively, and populate a tgtmap(9). The driver itself maintains
66  * target state state in lmrc_tgt_t. It will attempt to get the SAS WWN of the
67  * target and use it as a device address, falling back to the target ID as used
68  * by the controller hardware.
69  *
70  * The array of target states is initialized once during controller attach. The
71  * initial portion of each target state contains a back link to the controller
72  * soft state and a mutex, neither of which need changing when a new target is
73  * discovered or a target disappears. The array of target states is indexed by
74  * the target ID as used by the controller hardware. Unused targets will have
75  * their target ID set to LMRC_DEVHDL_INVALID.
76  *
77  *
78  * MPT I/O request sending and reply processing:
79  * -----------------------------------------
80  *
81  * The hardware expects to have access to two large areas of DMA memory that the
82  * driver will use to send I/O requests and receive replies. The size of these
83  * DMA buffers are based on the fixed size of I/O requests and the number of
84  * such requests that the controller may accept, and the size of the replies,
85  * the queue depth supported by the hardware, and the number interrupt vectors
86  * available for this driver.
87  *
88  * Based on these numbers, the driver will pre-allocate enough MPT and MFI
89  * commands to match the size of the I/O request buffer. In addition, each
90  * MPT command will have a SGL chain frame and a sense buffer pre-allocated.
91  * A set of functions are available to get a initialized command structure to
92  * send a request, and to return it to the command list after use.
93  *
94  * Sending a MPT I/O request to the controller is done by filling out the I/O
95  * frame with all the parameters needed for the request and creating a request
96  * descriptor, filling in the SMID of the I/O frame used and the queue number
97  * where the reply should be posted. The request descriptor is then written
98  * into the appropriate device registers.
99  *
100  * On completion, an interrupt may or may not be posted, depending the I/O
101  * request flags and the overall system state, such as whether interrupts are
102  * enabled at all. If an interrupt is received, any new replies posted into the
103  * queue associated with the interrupt vector are processed and their callbacks,
104  * if any, will be called. The hardware will be informed about the last reply
105  * index processed by writing the appropriate register.
106  *
107  * Polled I/O is facilitated by repeatedly checking for the presence of a reply,
108  * waiting a short time in between, up to a pre-defined timeout.
109  *
110  *
111  * MFI (MegaRAID Firmware Interface) commands:
112  * -------------------------------------------
113  *
114  * MFI commands are used internally by the driver or by user space via the ioctl
115  * interface. Except for the initial IOC INIT command, all MFI commands will be
116  * sent using MPT MFI passthru commands. As the driver uses a only small number
117  * of MFI commands, each MFI command has a MPT command preallocated.
118  *
119  * MFI commands can be sent synchronously in "blocked" or "polled" mode, which
120  * differ only in the way the driver waits for completion. When sending a
121  * "blocked" command, the driver will set a callback and wait for the hardware
122  * to return the command through the normal interrupt driven code path. In
123  * "polled" mode, the command has a flag set to indicate to the hardware it
124  * should not be posted to a reply queue, and the driver repeatedly checks its
125  * status until it changes to indicate completion.
126  *
127  * MFI commands can also be sent asynchronously, in which case they are always
128  * completed through the interrupt code path and have a callback. This is used
129  * for RAID and PD map updates and Asynchronous Event Notifications (AENs). In
130  * all these cases, the commands are usually send to the hardware again after
131  * having been completed, avoiding unnecessary reallocation.
132  *
133  * As asynchronous commands can still be outstanding during detach, they can and
134  * will be aborted by sending a MFI ABORT command when the driver is shutting
135  * down.
136  *
137  * Asynchronous Event Notifications:
138  * ---------------------------------
139  *
140  * The driver will always have one AEN request outstanding to receive events
141  * from the controller. These events aren't very well documented, but it is
142  * known that they include a "locale" describing to which aspect of the HBA
143  * they apply, which is either the controller itself, physical devices, or
144  * logical devices.
145  *
146  * Most events will be logged but otherwise ignored by the driver, but some
147  * inform us about changes to the physical or logical drives connected to the
148  * HBA, in which case we update the respective target map.
149  *
150  *
151  * DMA considerations:
152  * -------------------
153  *
154  * Most of the MPT structures can hold a 64bit physical address for DMA, but
155  * some don't. Additionally, the hardware may indicate that it doesn't handle
156  * 64bit DMA, even though the structures could hold an address this wide.
157  *
158  * Consequently, the driver keeps two sets of DMA attributes in its soft state,
159  * one decidedly for 32bit DMA and another one for all other uses which could
160  * potentially support 64bit DMA. The latter will be modified to fit what the
161  * hardware actually supports.
162  *
163  *
164  * Interrupt considerations:
165  * -------------------------
166  *
167  * Unless we're in the unlikely situation that the hardware claims to not
168  * actually support it, the driver will prefer to get MSI-X interrupts. If that
169  * fails it'll do with MSI interrupts, falling back to FIXED interrupts if that
170  * fails as well.
171  *
172  * The number of queues supported is set to the minimum of what the hardware
173  * claims to support, and the number of interrupt vectors we can allocate. It is
174  * expected that the hardware will support much more queues and interrupt
175  * vectors than what the OS gives us by default.
176  *
177  *
178  * Locking considerations:
179  * -----------------------
180  *
181  * The driver uses several mutexes, rwlocks, and one semaphore to serialize
182  * accessess to various parts of its internal state.
183  *
184  * The semaphore lmrc->l_ioctl_sema is used to limit the amount of MFI commands
185  * concurrently in use by user space. This semaphore needs to be decremented by
186  * the ioctl code path before any other locks may be acquired.
187  *
188  * The PD and RAID maps are each protected by a rwlock, lrmc->l_pdmap_lock and
189  * lmrc->l_raidmap_lock. Either map is write-locked only when we recieve an
190  * updated map from the firmware and copy it over our map, which happens only
191  * in the context of the MFI command completion for respective MAP GET INFO
192  * with the respective MFI command mutex being held. Read-locking of either map
193  * does not require any specific lock ordering.
194  *
195  * Each lmrc_tgt_t has its own rwlock, tgt->tgt_lock, which is write-locked only
196  * during lmrc_tgt_clear(), lmrc_tgt_init(), and lmrc_raid_get_wwn(), all of
197  * which run to update our internal target state as the hardware notifies us
198  * about a target change. No other locks are held during target state changes.
199  * During lmrc_tran_start() and lmrc_task_mgmt(), all other required command and
200  * map locks are acquired and released as necessary with the addressed target
201  * being read-locked, preventing target state updates while I/O is being done.
202  *
203  * Each MPT and MFI command has an associated mutex (mpt_lock and mfi_lock,
204  * respectively) and condition variable used for synchronization and completion
205  * signalling. In general, the mutex should be held while the command is set up
206  * until it has been sent to the hardware. The interrupt handler acquires the
207  * mutex of each completed command before signalling completion. In case of
208  * command abortion, the mutex of a command to be aborted is held to block
209  * completion until the ABORT or TASK MGMT command is sent to the hardware to
210  * avoid races.
211  *
212  * To simplify MPT command handling, the function lmrc_get_mpt() used to get a
213  * MPT command from the free list always returns the command locked. Mirroring
214  * that, lmrc_put_mpt() expects the MPT command to be locked when it is put back
215  * on the free list, unlocking it only once it had been linked onto that list.
216  *
217  * Additionally, each lmrc_tgt_t has an active command list to keep track of all
218  * MPT I/O commands send to a target, protected by its own mutex. When iterating
219  * the active command list of a target, the mutex protecting this list must be
220  * held while the command mutexes are entered and exited. When adding a command
221  * to an active command list, the mutex protecting the list is acquired while
222  * the command mutex is held. Care must be taken to avoid a deadlock against the
223  * iterating functions when removing a command from an active command list: The
224  * command mutex must not be held when the mutex protecting the list is entered.
225  * Using the functions for active command list management ensures lock ordering.
226  */
227 
228 #include <sys/class.h>
229 #include <sys/conf.h>
230 #include <sys/devops.h>
231 #include <sys/types.h>
232 #include <sys/errno.h>
233 #include <sys/ddi.h>
234 #include <sys/dditypes.h>
235 #include <sys/modctl.h>
236 #include <sys/debug.h>
237 #include <sys/pci.h>
238 #include <sys/policy.h>
239 #include <sys/scsi/scsi.h>
240 
241 #include <sys/ddifm.h>
242 #include <sys/fm/protocol.h>
243 #include <sys/fm/util.h>
244 #include <sys/fm/io/ddi.h>
245 
246 #include "lmrc.h"
247 #include "lmrc_reg.h"
248 #include "lmrc_ioctl.h"
249 #include "lmrc_phys.h"
250 
251 #define	INST2LSIRDCTL(x)	((x) << INST_MINOR_SHIFT)
252 
253 void *lmrc_state;
254 
255 /*
256  * Since the max sgl length can vary, we create a per-instance copy of
257  * lmrc_dma_attr and fill in .dma_attr_sgllen with the correct value
258  * during attach.
259  */
260 static const ddi_dma_attr_t lmrc_dma_attr = {
261 	.dma_attr_version =		DMA_ATTR_V0,
262 	.dma_attr_addr_lo =		0x00000000,
263 	.dma_attr_addr_hi =		0xFFFFFFFFFFFFFFFF,
264 	.dma_attr_count_max =		0xFFFFFFFF,
265 	.dma_attr_align =		8,
266 	.dma_attr_burstsizes =		0x7,
267 	.dma_attr_minxfer =		1,
268 	.dma_attr_maxxfer =		0xFFFFFFFF,
269 	.dma_attr_seg =			0xFFFFFFFF,
270 	.dma_attr_sgllen =		0,
271 	.dma_attr_granular =		512,
272 	.dma_attr_flags =		0,
273 };
274 
275 static struct ddi_device_acc_attr lmrc_acc_attr = {
276 	.devacc_attr_version =		DDI_DEVICE_ATTR_V1,
277 	.devacc_attr_endian_flags =	DDI_STRUCTURE_LE_ACC,
278 	.devacc_attr_dataorder =	DDI_STRICTORDER_ACC,
279 	.devacc_attr_access =		DDI_DEFAULT_ACC,
280 };
281 
282 static int lmrc_attach(dev_info_t *, ddi_attach_cmd_t);
283 static int lmrc_detach(dev_info_t *, ddi_detach_cmd_t);
284 static int lmrc_ctrl_attach(dev_info_t *);
285 static int lmrc_ctrl_detach(dev_info_t *);
286 static int lmrc_cleanup(lmrc_t *, boolean_t);
287 static lmrc_adapter_class_t lmrc_get_class(lmrc_t *);
288 static int lmrc_regs_init(lmrc_t *);
289 static uint_t lmrc_isr(caddr_t, caddr_t);
290 static int lmrc_add_intrs(lmrc_t *, int);
291 static int lmrc_intr_init(lmrc_t *);
292 static void lmrc_intr_fini(lmrc_t *);
293 static int lmrc_fm_error_cb(dev_info_t *, ddi_fm_error_t *, const void *);
294 static void lmrc_fm_init(lmrc_t *);
295 static void lmrc_fm_fini(lmrc_t *);
296 static int lmrc_alloc_mpt_cmds(lmrc_t *, const size_t);
297 static void lmrc_free_mpt_cmds(lmrc_t *, const size_t);
298 static int lmrc_alloc_mfi_cmds(lmrc_t *, const size_t);
299 static void lmrc_free_mfi_cmds(lmrc_t *, const size_t);
300 
301 static int
lmrc_ctrl_attach(dev_info_t * dip)302 lmrc_ctrl_attach(dev_info_t *dip)
303 {
304 	char name[64]; /* large enough fo the taskq name */
305 	lmrc_t *lmrc;
306 	uint32_t instance;
307 	int ret;
308 	int i;
309 
310 	instance = ddi_get_instance(dip);
311 	if (ddi_soft_state_zalloc(lmrc_state, instance) != DDI_SUCCESS) {
312 		dev_err(dip, CE_WARN, "could not allocate soft state");
313 		return (DDI_FAILURE);
314 	}
315 
316 	lmrc = ddi_get_soft_state(lmrc_state, instance);
317 	lmrc->l_dip = dip;
318 
319 	lmrc->l_ctrl_info = kmem_zalloc(sizeof (lmrc_ctrl_info_t), KM_SLEEP);
320 	INITLEVEL_SET(lmrc, LMRC_INITLEVEL_BASIC);
321 
322 	lmrc->l_class = lmrc_get_class(lmrc);
323 
324 	if (lmrc->l_class == LMRC_ACLASS_OTHER) {
325 		dev_err(dip, CE_WARN, "unknown controller class");
326 		goto fail;
327 	}
328 
329 	lmrc->l_acc_attr = lmrc_acc_attr;
330 	lmrc->l_dma_attr = lmrc_dma_attr;
331 	lmrc->l_dma_attr_32 = lmrc_dma_attr;
332 
333 	lmrc_fm_init(lmrc);
334 	INITLEVEL_SET(lmrc, LMRC_INITLEVEL_FM);
335 
336 	if (lmrc_regs_init(lmrc) != DDI_SUCCESS)
337 		goto fail;
338 	INITLEVEL_SET(lmrc, LMRC_INITLEVEL_REGS);
339 
340 	if (lmrc_adapter_init(lmrc) != DDI_SUCCESS)
341 		goto fail;
342 
343 	lmrc->l_dma_attr_32.dma_attr_addr_hi = 0xFFFFFFFF;
344 
345 	/* Restrict all DMA to the lower 32bit address space if necessary. */
346 	if (!lmrc->l_64bit_dma_support)
347 		lmrc->l_dma_attr.dma_attr_addr_hi = 0xFFFFFFFF;
348 
349 	if (lmrc_intr_init(lmrc) != DDI_SUCCESS)
350 		goto fail;
351 	INITLEVEL_SET(lmrc, LMRC_INITLEVEL_INTR);
352 
353 	mutex_init(&lmrc->l_mpt_cmd_lock, NULL, MUTEX_DRIVER,
354 	    DDI_INTR_PRI(lmrc->l_intr_pri));
355 	list_create(&lmrc->l_mpt_cmd_list, sizeof (lmrc_mpt_cmd_t),
356 	    offsetof(lmrc_mpt_cmd_t, mpt_node));
357 
358 	mutex_init(&lmrc->l_mfi_cmd_lock, NULL, MUTEX_DRIVER,
359 	    DDI_INTR_PRI(lmrc->l_intr_pri));
360 	list_create(&lmrc->l_mfi_cmd_list, sizeof (lmrc_mfi_cmd_t),
361 	    offsetof(lmrc_mfi_cmd_t, mfi_node));
362 
363 	mutex_init(&lmrc->l_reg_lock, NULL, MUTEX_DRIVER,
364 	    DDI_INTR_PRI(lmrc->l_intr_pri));
365 
366 	rw_init(&lmrc->l_raidmap_lock, NULL, RW_DRIVER,
367 	    DDI_INTR_PRI(lmrc->l_intr_pri));
368 	rw_init(&lmrc->l_pdmap_lock, NULL, RW_DRIVER,
369 	    DDI_INTR_PRI(lmrc->l_intr_pri));
370 
371 	sema_init(&lmrc->l_ioctl_sema, LMRC_MAX_IOCTL_CMDS, NULL, SEMA_DRIVER,
372 	    NULL);
373 
374 	mutex_init(&lmrc->l_thread_lock, NULL, MUTEX_DRIVER,
375 	    DDI_INTR_PRI(lmrc->l_intr_pri));
376 	cv_init(&lmrc->l_thread_cv, NULL, CV_DRIVER, NULL);
377 
378 
379 	for (i = 0; i < ARRAY_SIZE(lmrc->l_targets); i++) {
380 		lmrc_tgt_t *tgt = &lmrc->l_targets[i];
381 
382 		rw_init(&tgt->tgt_lock, NULL, RW_DRIVER,
383 		    DDI_INTR_PRI(lmrc->l_intr_pri));
384 		mutex_init(&tgt->tgt_mpt_active_lock, NULL, MUTEX_DRIVER,
385 		    DDI_INTR_PRI(lmrc->l_intr_pri));
386 		list_create(&tgt->tgt_mpt_active, sizeof (lmrc_mpt_cmd_t),
387 		    offsetof(lmrc_mpt_cmd_t, mpt_node));
388 		tgt->tgt_lmrc = lmrc;
389 		tgt->tgt_dev_id = LMRC_DEVHDL_INVALID;
390 	}
391 
392 	INITLEVEL_SET(lmrc, LMRC_INITLEVEL_SYNC);
393 
394 	if (lmrc_alloc_mpt_cmds(lmrc, lmrc->l_max_fw_cmds) != DDI_SUCCESS)
395 		goto fail;
396 	INITLEVEL_SET(lmrc, LMRC_INITLEVEL_MPTCMDS);
397 
398 	if (lmrc_alloc_mfi_cmds(lmrc, LMRC_MAX_MFI_CMDS) != DDI_SUCCESS)
399 		goto fail;
400 	INITLEVEL_SET(lmrc, LMRC_INITLEVEL_MFICMDS);
401 
402 	lmrc->l_thread = thread_create(NULL, 0, lmrc_thread, lmrc, 0, &p0,
403 	    TS_RUN, minclsyspri);
404 	INITLEVEL_SET(lmrc, LMRC_INITLEVEL_THREAD);
405 
406 	if (lmrc_ioc_init(lmrc) != DDI_SUCCESS)
407 		goto fail;
408 
409 	lmrc_enable_intr(lmrc);
410 
411 	if (lmrc_fw_init(lmrc) != DDI_SUCCESS)
412 		goto fail;
413 	INITLEVEL_SET(lmrc, LMRC_INITLEVEL_FW);
414 
415 	if (lmrc_hba_attach(lmrc) != DDI_SUCCESS)
416 		goto fail;
417 	INITLEVEL_SET(lmrc, LMRC_INITLEVEL_HBA);
418 
419 	(void) snprintf(lmrc->l_iocname, sizeof (lmrc->l_iocname),
420 	    "%d:lsirdctl", instance);
421 	if (ddi_create_minor_node(dip, lmrc->l_iocname, S_IFCHR,
422 	    INST2LSIRDCTL(instance), DDI_PSEUDO, 0) != DDI_SUCCESS) {
423 		dev_err(dip, CE_WARN, "failed to create ioctl node.");
424 		goto fail;
425 	}
426 	INITLEVEL_SET(lmrc, LMRC_INITLEVEL_NODE);
427 
428 	(void) snprintf(name, sizeof (name), "%s%d_taskq",
429 	    ddi_driver_name(dip), ddi_get_instance(dip));
430 
431 	lmrc->l_taskq = taskq_create(name, lmrc->l_max_reply_queues,
432 	    minclsyspri, 64, INT_MAX, TASKQ_PREPOPULATE);
433 	if (lmrc->l_taskq == NULL) {
434 		dev_err(dip, CE_WARN, "failed to create taskq.");
435 		goto fail;
436 	}
437 	INITLEVEL_SET(lmrc, LMRC_INITLEVEL_TASKQ);
438 
439 	if (lmrc_start_aen(lmrc) != DDI_SUCCESS) {
440 		dev_err(dip, CE_WARN, "failed to initiate AEN.");
441 		goto fail;
442 	}
443 	INITLEVEL_SET(lmrc, LMRC_INITLEVEL_AEN);
444 
445 	ddi_report_dev(dip);
446 
447 	if (lmrc_check_acc_handle(lmrc->l_reghandle) != DDI_SUCCESS) {
448 		lmrc_fm_ereport(lmrc, DDI_FM_DEVICE_NO_RESPONSE);
449 		ddi_fm_service_impact(lmrc->l_dip, DDI_SERVICE_LOST);
450 	}
451 
452 	return (DDI_SUCCESS);
453 
454 fail:
455 	ret = lmrc_cleanup(lmrc, B_TRUE);
456 	VERIFY3U(ret, ==, DDI_SUCCESS);
457 
458 	return (DDI_FAILURE);
459 }
460 
461 static int
lmrc_ctrl_detach(dev_info_t * dip)462 lmrc_ctrl_detach(dev_info_t *dip)
463 {
464 	lmrc_t *lmrc = ddi_get_soft_state(lmrc_state, ddi_get_instance(dip));
465 	VERIFY(lmrc != NULL);
466 
467 	return (lmrc_cleanup(lmrc, B_FALSE));
468 }
469 
470 static int
lmrc_cleanup(lmrc_t * lmrc,boolean_t failed)471 lmrc_cleanup(lmrc_t *lmrc, boolean_t failed)
472 {
473 	int i, ret;
474 
475 	if (lmrc->l_raid_dip != NULL || lmrc->l_phys_dip != NULL)
476 		return (DDI_FAILURE);
477 
478 	/*
479 	 * Before doing anything else, abort any outstanding commands.
480 	 * The first commands are issued during FW initialisation, so check
481 	 * that we're past this point.
482 	 */
483 	if (INITLEVEL_ACTIVE(lmrc, LMRC_INITLEVEL_FW)) {
484 		ret = lmrc_abort_outstanding_mfi(lmrc, LMRC_MAX_MFI_CMDS);
485 		lmrc_disable_intr(lmrc);
486 		if (ret != DDI_SUCCESS)
487 			return (ret);
488 	}
489 
490 	if (INITLEVEL_ACTIVE(lmrc, LMRC_INITLEVEL_AEN)) {
491 		/* The AEN command was aborted above already. */
492 		INITLEVEL_CLEAR(lmrc, LMRC_INITLEVEL_AEN);
493 	}
494 
495 	if (INITLEVEL_ACTIVE(lmrc, LMRC_INITLEVEL_TASKQ)) {
496 		taskq_destroy(lmrc->l_taskq);
497 		INITLEVEL_CLEAR(lmrc, LMRC_INITLEVEL_TASKQ);
498 	}
499 
500 	if (INITLEVEL_ACTIVE(lmrc, LMRC_INITLEVEL_NODE)) {
501 		ddi_remove_minor_node(lmrc->l_dip, lmrc->l_iocname);
502 		INITLEVEL_CLEAR(lmrc, LMRC_INITLEVEL_NODE);
503 	}
504 
505 	if (INITLEVEL_ACTIVE(lmrc, LMRC_INITLEVEL_HBA)) {
506 		(void) lmrc_hba_detach(lmrc);
507 		INITLEVEL_CLEAR(lmrc, LMRC_INITLEVEL_HBA);
508 	}
509 
510 
511 	if (INITLEVEL_ACTIVE(lmrc, LMRC_INITLEVEL_FW)) {
512 		lmrc_free_pdmap(lmrc);
513 		lmrc_free_raidmap(lmrc);
514 		INITLEVEL_CLEAR(lmrc, LMRC_INITLEVEL_FW);
515 	}
516 
517 	if (INITLEVEL_ACTIVE(lmrc, LMRC_INITLEVEL_THREAD)) {
518 		mutex_enter(&lmrc->l_thread_lock);
519 		lmrc->l_thread_stop = B_TRUE;
520 		cv_signal(&lmrc->l_thread_cv);
521 		mutex_exit(&lmrc->l_thread_lock);
522 		thread_join(lmrc->l_thread->t_did);
523 		INITLEVEL_CLEAR(lmrc, LMRC_INITLEVEL_THREAD);
524 	}
525 
526 	if (INITLEVEL_ACTIVE(lmrc, LMRC_INITLEVEL_MFICMDS)) {
527 		lmrc_free_mfi_cmds(lmrc, LMRC_MAX_MFI_CMDS);
528 		INITLEVEL_CLEAR(lmrc, LMRC_INITLEVEL_MFICMDS);
529 	}
530 
531 	if (INITLEVEL_ACTIVE(lmrc, LMRC_INITLEVEL_MPTCMDS)) {
532 		lmrc_free_mpt_cmds(lmrc, lmrc->l_max_fw_cmds);
533 		INITLEVEL_CLEAR(lmrc, LMRC_INITLEVEL_MPTCMDS);
534 	}
535 
536 	if (INITLEVEL_ACTIVE(lmrc, LMRC_INITLEVEL_SYNC)) {
537 		for (i = 0; i < ARRAY_SIZE(lmrc->l_targets); i++) {
538 			lmrc_tgt_t *tgt = &lmrc->l_targets[i];
539 
540 			list_destroy(&tgt->tgt_mpt_active);
541 			mutex_destroy(&tgt->tgt_mpt_active_lock);
542 			rw_destroy(&tgt->tgt_lock);
543 		}
544 
545 		mutex_destroy(&lmrc->l_thread_lock);
546 		cv_destroy(&lmrc->l_thread_cv);
547 
548 		sema_destroy(&lmrc->l_ioctl_sema);
549 
550 		mutex_destroy(&lmrc->l_mfi_cmd_lock);
551 		list_destroy(&lmrc->l_mfi_cmd_list);
552 
553 		mutex_destroy(&lmrc->l_mpt_cmd_lock);
554 		list_destroy(&lmrc->l_mpt_cmd_list);
555 
556 		rw_destroy(&lmrc->l_pdmap_lock);
557 		rw_destroy(&lmrc->l_raidmap_lock);
558 		mutex_destroy(&lmrc->l_reg_lock);
559 		INITLEVEL_CLEAR(lmrc, LMRC_INITLEVEL_SYNC);
560 	}
561 
562 	if (INITLEVEL_ACTIVE(lmrc, LMRC_INITLEVEL_INTR)) {
563 		lmrc_intr_fini(lmrc);
564 		INITLEVEL_CLEAR(lmrc, LMRC_INITLEVEL_INTR);
565 	}
566 
567 	if (INITLEVEL_ACTIVE(lmrc, LMRC_INITLEVEL_REGS)) {
568 		ddi_regs_map_free(&lmrc->l_reghandle);
569 		lmrc->l_regmap = NULL;
570 		INITLEVEL_CLEAR(lmrc, LMRC_INITLEVEL_REGS);
571 	}
572 
573 	if (INITLEVEL_ACTIVE(lmrc, LMRC_INITLEVEL_FM)) {
574 		lmrc_fm_fini(lmrc);
575 		INITLEVEL_CLEAR(lmrc, LMRC_INITLEVEL_FM);
576 	}
577 
578 	if (INITLEVEL_ACTIVE(lmrc, LMRC_INITLEVEL_BASIC)) {
579 		kmem_free(lmrc->l_ctrl_info, sizeof (lmrc_ctrl_info_t));
580 		INITLEVEL_CLEAR(lmrc, LMRC_INITLEVEL_BASIC);
581 	}
582 
583 	VERIFY0(lmrc->l_init_level);
584 	ddi_soft_state_free(lmrc_state, ddi_get_instance(lmrc->l_dip));
585 
586 	return (DDI_SUCCESS);
587 }
588 
589 static int
lmrc_regs_init(lmrc_t * lmrc)590 lmrc_regs_init(lmrc_t *lmrc)
591 {
592 	uint_t regno;
593 	off_t regsize;
594 
595 	switch (lmrc->l_class) {
596 	case LMRC_ACLASS_VENTURA:
597 	case LMRC_ACLASS_AERO:
598 		regno = 1;
599 		break;
600 	default:
601 		regno = 2;
602 		break;
603 	}
604 
605 	if (ddi_dev_regsize(lmrc->l_dip, regno, &regsize) != DDI_SUCCESS)
606 		return (DDI_FAILURE);
607 
608 	if (regsize < LMRC_MFI_MIN_MEM) {
609 		dev_err(lmrc->l_dip, CE_WARN, "reg %d size (%ld) is too small",
610 		    regno, regsize);
611 		return (DDI_FAILURE);
612 	}
613 
614 	if (ddi_regs_map_setup(lmrc->l_dip, regno, &lmrc->l_regmap, 0, 0,
615 	    &lmrc->l_acc_attr, &lmrc->l_reghandle)
616 	    != DDI_SUCCESS) {
617 		dev_err(lmrc->l_dip, CE_WARN,
618 		    "unable to map control registers");
619 		return (DDI_FAILURE);
620 	}
621 
622 	return (DDI_SUCCESS);
623 }
624 
625 static uint_t
lmrc_isr(caddr_t arg1,caddr_t arg2)626 lmrc_isr(caddr_t arg1, caddr_t arg2)
627 {
628 	lmrc_t *lmrc = (lmrc_t *)arg1;
629 	int queue = (int)(uintptr_t)arg2;
630 	uint_t ret = DDI_INTR_UNCLAIMED;
631 
632 	if (lmrc->l_intr_type == DDI_INTR_TYPE_FIXED) {
633 		ret = lmrc_intr_ack(lmrc);
634 		if (ret != DDI_INTR_CLAIMED)
635 			return (ret);
636 	}
637 
638 	ret = lmrc_process_replies(lmrc, queue);
639 	return (ret);
640 }
641 
642 static int
lmrc_add_intrs(lmrc_t * lmrc,int intr_type)643 lmrc_add_intrs(lmrc_t *lmrc, int intr_type)
644 {
645 	int navail, nintrs, count;
646 	int ret;
647 	int i;
648 
649 	if (lmrc->l_intr_types == 0) {
650 		ret = ddi_intr_get_supported_types(lmrc->l_dip,
651 		    &lmrc->l_intr_types);
652 		if (ret != DDI_SUCCESS) {
653 			dev_err(lmrc->l_dip, CE_WARN,
654 			    "!%s: ddi_intr_get_supported_types failed",
655 			    __func__);
656 			return (ret);
657 		}
658 	}
659 
660 	if ((lmrc->l_intr_types & intr_type) == 0)
661 		return (DDI_FAILURE);
662 
663 	/* Don't use MSI-X if the firmware doesn't support it. */
664 	if (intr_type == DDI_INTR_TYPE_MSIX && !lmrc->l_fw_msix_enabled)
665 		return (DDI_FAILURE);
666 
667 	ret = ddi_intr_get_nintrs(lmrc->l_dip, intr_type, &nintrs);
668 	if (ret != DDI_SUCCESS) {
669 		dev_err(lmrc->l_dip, CE_WARN,
670 		    "!%s: ddi_intr_get_nintrs failed", __func__);
671 		return (ret);
672 	}
673 
674 	ret = ddi_intr_get_navail(lmrc->l_dip, intr_type, &navail);
675 	if (ret != DDI_SUCCESS) {
676 		dev_err(lmrc->l_dip, CE_WARN,
677 		    "!%s: ddi_intr_get_navail failed", __func__);
678 		return (ret);
679 	}
680 
681 	/*
682 	 * There's no point in having more interrupts than queues supported by
683 	 * the hardware.
684 	 */
685 	if (navail > lmrc->l_max_reply_queues)
686 		navail = lmrc->l_max_reply_queues;
687 
688 	lmrc->l_intr_htable_size = navail * sizeof (ddi_intr_handle_t);
689 	lmrc->l_intr_htable = kmem_zalloc(lmrc->l_intr_htable_size, KM_SLEEP);
690 
691 	ret = ddi_intr_alloc(lmrc->l_dip, lmrc->l_intr_htable, intr_type, 0,
692 	    navail, &count, DDI_INTR_ALLOC_NORMAL);
693 	if (ret != DDI_SUCCESS) {
694 		dev_err(lmrc->l_dip, CE_WARN, "!%s: ddi_intr_alloc failed",
695 		    __func__);
696 		goto fail;
697 	}
698 
699 	if (count < navail) {
700 		dev_err(lmrc->l_dip, CE_CONT,
701 		    "?requested %d interrupts, received %d\n", navail, count);
702 	}
703 
704 	lmrc->l_intr_count = count;
705 
706 	ret = ddi_intr_get_pri(lmrc->l_intr_htable[0], &lmrc->l_intr_pri);
707 	if (ret != DDI_SUCCESS) {
708 		dev_err(lmrc->l_dip, CE_WARN, "!%s: ddi_intr_get_pri failed",
709 		    __func__);
710 		goto fail;
711 	}
712 
713 	if (lmrc->l_intr_pri >= ddi_intr_get_hilevel_pri()) {
714 		dev_err(lmrc->l_dip, CE_WARN,
715 		    "high level interrupts not supported");
716 		goto fail;
717 	}
718 
719 	for (i = 0; i < lmrc->l_intr_count; i++) {
720 		ret = ddi_intr_add_handler(lmrc->l_intr_htable[i], lmrc_isr,
721 		    (caddr_t)lmrc, (caddr_t)(uintptr_t)i);
722 		if (ret != DDI_SUCCESS) {
723 			dev_err(lmrc->l_dip, CE_WARN,
724 			    "!%s: ddi_intr_add_handler failed", __func__);
725 			goto fail;
726 		}
727 	}
728 
729 	ret = ddi_intr_get_cap(lmrc->l_intr_htable[0], &lmrc->l_intr_cap);
730 	if (ret != DDI_SUCCESS) {
731 		dev_err(lmrc->l_dip, CE_WARN,
732 		    "!%s: ddi_intr_get_cap failed", __func__);
733 		goto fail;
734 	}
735 
736 	if ((lmrc->l_intr_cap & DDI_INTR_FLAG_BLOCK) != 0) {
737 		ret = ddi_intr_block_enable(lmrc->l_intr_htable, count);
738 		if (ret != DDI_SUCCESS) {
739 			dev_err(lmrc->l_dip, CE_WARN,
740 			    "!%s: ddi_intr_block_enable failed", __func__);
741 			goto fail;
742 		}
743 	} else {
744 		for (i = 0; i < lmrc->l_intr_count; i++) {
745 			ret = ddi_intr_enable(lmrc->l_intr_htable[i]);
746 			if (ret != DDI_SUCCESS) {
747 				dev_err(lmrc->l_dip, CE_WARN,
748 				    "!%s: ddi_entr_enable failed", __func__);
749 				goto fail;
750 			}
751 		}
752 	}
753 
754 	lmrc->l_intr_type = intr_type;
755 	return (DDI_SUCCESS);
756 
757 fail:
758 	lmrc_intr_fini(lmrc);
759 	return (ret);
760 }
761 
762 static int
lmrc_intr_init(lmrc_t * lmrc)763 lmrc_intr_init(lmrc_t *lmrc)
764 {
765 	int ret;
766 
767 	lmrc_disable_intr(lmrc);
768 
769 	if ((lmrc_add_intrs(lmrc, DDI_INTR_TYPE_MSIX) != DDI_SUCCESS) &&
770 	    (lmrc_add_intrs(lmrc, DDI_INTR_TYPE_MSI) != DDI_SUCCESS) &&
771 	    (lmrc_add_intrs(lmrc, DDI_INTR_TYPE_FIXED) != DDI_SUCCESS)) {
772 		dev_err(lmrc->l_dip, CE_WARN, "failed to set up interrupts");
773 		return (DDI_FAILURE);
774 	}
775 
776 	dev_err(lmrc->l_dip, CE_NOTE, "!got %d %s interrupts",
777 	    lmrc->l_intr_count,
778 	    lmrc->l_intr_type == DDI_INTR_TYPE_MSIX ? "MSI-X" :
779 	    lmrc->l_intr_type == DDI_INTR_TYPE_MSI ? "MSI" : "FIXED");
780 
781 	/* Don't use more queues than we got interrupts for. */
782 	if (lmrc->l_max_reply_queues > lmrc->l_intr_count)
783 		lmrc->l_max_reply_queues = lmrc->l_intr_count;
784 
785 	lmrc->l_last_reply_idx =
786 	    kmem_zalloc(sizeof (uint16_t) * lmrc->l_max_reply_queues, KM_SLEEP);
787 
788 	/*
789 	 * While here, allocate the reply descriptor DMA memory and the array
790 	 * keeping the last reply index for each queue. Each queue will have
791 	 * space for reply_q_depth MPI2 descriptors (reply_alloc_sz).
792 	 */
793 	ret = lmrc_dma_alloc(lmrc, lmrc->l_dma_attr, &lmrc->l_reply_dma,
794 	    lmrc->l_reply_alloc_sz * lmrc->l_max_reply_queues, 16,
795 	    DDI_DMA_CONSISTENT);
796 	if (ret != DDI_SUCCESS) {
797 		lmrc_intr_fini(lmrc);
798 		return (ret);
799 	}
800 	memset(lmrc->l_reply_dma.ld_buf, -1, lmrc->l_reply_dma.ld_len);
801 
802 	return (DDI_SUCCESS);
803 }
804 
805 static void
lmrc_intr_fini(lmrc_t * lmrc)806 lmrc_intr_fini(lmrc_t *lmrc)
807 {
808 	uint_t i;
809 
810 	if (lmrc->l_intr_htable[0] == NULL)
811 		return;
812 
813 	if ((lmrc->l_intr_cap & DDI_INTR_FLAG_BLOCK) != 0) {
814 		(void) ddi_intr_block_disable(lmrc->l_intr_htable,
815 		    lmrc->l_intr_count);
816 	}
817 
818 	for (i = 0; i < lmrc->l_intr_count; i++) {
819 		if (lmrc->l_intr_htable[i] == NULL)
820 			break;
821 
822 		if ((lmrc->l_intr_cap & DDI_INTR_FLAG_BLOCK) == 0)
823 			(void) ddi_intr_disable(lmrc->l_intr_htable[i]);
824 		(void) ddi_intr_remove_handler(lmrc->l_intr_htable[i]);
825 		(void) ddi_intr_free(lmrc->l_intr_htable[i]);
826 	}
827 
828 	if (lmrc->l_intr_htable != NULL)
829 		kmem_free(lmrc->l_intr_htable, lmrc->l_intr_htable_size);
830 
831 	lmrc->l_intr_htable = NULL;
832 	lmrc->l_intr_htable_size = 0;
833 
834 	if (lmrc->l_last_reply_idx != NULL)
835 		kmem_free(lmrc->l_last_reply_idx,
836 		    sizeof (uint16_t) * lmrc->l_max_reply_queues);
837 
838 	lmrc_dma_free(&lmrc->l_reply_dma);
839 }
840 
841 static int
lmrc_fm_error_cb(dev_info_t * dip,ddi_fm_error_t * err_status,const void * arg)842 lmrc_fm_error_cb(dev_info_t *dip, ddi_fm_error_t *err_status,
843     const void *arg)
844 {
845 	pci_ereport_post(dip, err_status, NULL);
846 	return (err_status->fme_status);
847 }
848 
849 static void
lmrc_fm_init(lmrc_t * lmrc)850 lmrc_fm_init(lmrc_t *lmrc)
851 {
852 	ddi_iblock_cookie_t fm_ibc;
853 
854 	lmrc->l_fm_capabilities = ddi_prop_get_int(DDI_DEV_T_ANY,
855 	    lmrc->l_dip, DDI_PROP_DONTPASS, "fm-capable",
856 	    DDI_FM_EREPORT_CAPABLE | DDI_FM_ACCCHK_CAPABLE |
857 	    DDI_FM_DMACHK_CAPABLE | DDI_FM_ERRCB_CAPABLE);
858 
859 	if (lmrc->l_fm_capabilities == 0)
860 		return;
861 
862 	lmrc->l_dma_attr.dma_attr_flags = DDI_DMA_FLAGERR;
863 	lmrc->l_dma_attr_32.dma_attr_flags = DDI_DMA_FLAGERR;
864 	lmrc->l_acc_attr.devacc_attr_access = DDI_FLAGERR_ACC;
865 
866 	ddi_fm_init(lmrc->l_dip, &lmrc->l_fm_capabilities, &fm_ibc);
867 
868 	if (DDI_FM_EREPORT_CAP(lmrc->l_fm_capabilities) ||
869 	    DDI_FM_ERRCB_CAP(lmrc->l_fm_capabilities)) {
870 		pci_ereport_setup(lmrc->l_dip);
871 	}
872 
873 	if (DDI_FM_ERRCB_CAP(lmrc->l_fm_capabilities)) {
874 		ddi_fm_handler_register(lmrc->l_dip, lmrc_fm_error_cb,
875 		    lmrc);
876 	}
877 }
878 
879 static void
lmrc_fm_fini(lmrc_t * lmrc)880 lmrc_fm_fini(lmrc_t *lmrc)
881 {
882 	if (lmrc->l_fm_capabilities == 0)
883 		return;
884 
885 	if (DDI_FM_ERRCB_CAP(lmrc->l_fm_capabilities))
886 		ddi_fm_handler_unregister(lmrc->l_dip);
887 
888 	if (DDI_FM_EREPORT_CAP(lmrc->l_fm_capabilities) ||
889 	    DDI_FM_ERRCB_CAP(lmrc->l_fm_capabilities)) {
890 		pci_ereport_teardown(lmrc->l_dip);
891 	}
892 
893 	ddi_fm_fini(lmrc->l_dip);
894 }
895 
896 void
lmrc_fm_ereport(lmrc_t * lmrc,const char * detail)897 lmrc_fm_ereport(lmrc_t *lmrc, const char *detail)
898 {
899 	uint64_t ena;
900 	char buf[FM_MAX_CLASS];
901 
902 	(void) snprintf(buf, sizeof (buf), "%s.%s", DDI_FM_DEVICE, detail);
903 	ena = fm_ena_generate(0, FM_ENA_FMT1);
904 	if (DDI_FM_EREPORT_CAP(lmrc->l_fm_capabilities)) {
905 		ddi_fm_ereport_post(lmrc->l_dip, buf, ena, DDI_NOSLEEP,
906 		    FM_VERSION, DATA_TYPE_UINT8, FM_EREPORT_VERSION, NULL);
907 	}
908 }
909 
910 int
lmrc_check_acc_handle(ddi_acc_handle_t h)911 lmrc_check_acc_handle(ddi_acc_handle_t h)
912 {
913 	ddi_fm_error_t de;
914 
915 	if (h == NULL)
916 		return (DDI_FAILURE);
917 
918 	ddi_fm_acc_err_get(h, &de, DDI_FME_VERSION);
919 	return (de.fme_status);
920 }
921 
922 int
lmrc_check_dma_handle(ddi_dma_handle_t h)923 lmrc_check_dma_handle(ddi_dma_handle_t h)
924 {
925 	ddi_fm_error_t de;
926 
927 	if (h == NULL)
928 		return (DDI_FAILURE);
929 
930 	ddi_fm_dma_err_get(h, &de, DDI_FME_VERSION);
931 	return (de.fme_status);
932 }
933 
934 static int
lmrc_alloc_mpt_cmds(lmrc_t * lmrc,const size_t ncmd)935 lmrc_alloc_mpt_cmds(lmrc_t *lmrc, const size_t ncmd)
936 {
937 	lmrc_mpt_cmd_t **cmds;
938 	lmrc_mpt_cmd_t *cmd;
939 	uint32_t i;
940 	int ret;
941 
942 	/*
943 	 * The hardware expects to find MPI I/O request frames in a big chunk
944 	 * of DMA memory, indexed by the MPT cmd SMID.
945 	 */
946 	ret = lmrc_dma_alloc(lmrc, lmrc->l_dma_attr, &lmrc->l_ioreq_dma,
947 	    lmrc->l_io_frames_alloc_sz, 256, DDI_DMA_CONSISTENT);
948 	if (ret != DDI_SUCCESS)
949 		return (ret);
950 
951 	cmds = kmem_zalloc(ncmd * sizeof (lmrc_mpt_cmd_t *), KM_SLEEP);
952 	for (i = 0; i < ncmd; i++) {
953 		cmd = kmem_zalloc(sizeof (lmrc_mpt_cmd_t), KM_SLEEP);
954 
955 		/* XXX: allocate on demand in tran_start / build_sgl? */
956 		ret = lmrc_dma_alloc(lmrc, lmrc->l_dma_attr,
957 		    &cmd->mpt_chain_dma, lmrc->l_max_chain_frame_sz, 4,
958 		    DDI_DMA_CONSISTENT);
959 		if (ret != DDI_SUCCESS)
960 			goto fail;
961 
962 		cmd->mpt_chain = cmd->mpt_chain_dma.ld_buf;
963 
964 		/*
965 		 * We request a few bytes more for sense so that we can fit our
966 		 * arq struct before the actual sense data. We must make sure to
967 		 * put sts_sensedata at a 64 byte aligned address.
968 		 */
969 		ret = lmrc_dma_alloc(lmrc, lmrc->l_dma_attr_32,
970 		    &cmd->mpt_sense_dma, LMRC_SENSE_LEN + P2ROUNDUP(
971 		    offsetof(struct scsi_arq_status, sts_sensedata), 64), 64,
972 		    DDI_DMA_CONSISTENT);
973 		if (ret != DDI_SUCCESS)
974 			goto fail;
975 
976 		/*
977 		 * Now that we have a sufficiently sized and 64 byte aligned DMA
978 		 * buffer for sense, calculate mpt_sense so that it points at a
979 		 * struct scsi_arq_status somewhere within the first 64 bytes in
980 		 * the DMA buffer, making sure its sts_sensedata is aligned at
981 		 * 64 bytes as well.
982 		 */
983 		cmd->mpt_sense = cmd->mpt_sense_dma.ld_buf + 64 -
984 		    offsetof(struct scsi_arq_status, sts_sensedata);
985 		VERIFY(IS_P2ALIGNED(&(((struct scsi_arq_status *)cmd->mpt_sense)
986 		    ->sts_sensedata), 64));
987 
988 		cmd->mpt_smid = i + 1;
989 
990 		/*
991 		 * Calculate address of this commands I/O frame within the DMA
992 		 * memory allocated earlier.
993 		 */
994 		cmd->mpt_io_frame =
995 		    LMRC_MPI2_RAID_DEFAULT_IO_FRAME_SIZE * cmd->mpt_smid +
996 		    lmrc->l_ioreq_dma.ld_buf;
997 
998 		cmd->mpt_lmrc = lmrc;
999 
1000 		mutex_init(&cmd->mpt_lock, NULL, MUTEX_DRIVER,
1001 		    DDI_INTR_PRI(lmrc->l_intr_pri));
1002 
1003 		cmds[i] = cmd;
1004 		list_insert_tail(&lmrc->l_mpt_cmd_list, cmd);
1005 	}
1006 
1007 	lmrc->l_mpt_cmds = cmds;
1008 	return (DDI_SUCCESS);
1009 
1010 fail:
1011 	if (cmd->mpt_chain != NULL)
1012 		lmrc_dma_free(&cmd->mpt_chain_dma);
1013 	kmem_free(cmd, sizeof (lmrc_mpt_cmd_t));
1014 
1015 	lmrc_free_mpt_cmds(lmrc, ncmd);
1016 
1017 	return (ret);
1018 }
1019 
1020 static void
lmrc_free_mpt_cmds(lmrc_t * lmrc,const size_t ncmd)1021 lmrc_free_mpt_cmds(lmrc_t *lmrc, const size_t ncmd)
1022 {
1023 	lmrc_mpt_cmd_t *cmd;
1024 	size_t count = 0;
1025 
1026 	for (cmd = list_remove_head(&lmrc->l_mpt_cmd_list);
1027 	    cmd != NULL;
1028 	    cmd = list_remove_head(&lmrc->l_mpt_cmd_list)) {
1029 		lmrc_dma_free(&cmd->mpt_chain_dma);
1030 		lmrc_dma_free(&cmd->mpt_sense_dma);
1031 		mutex_destroy(&cmd->mpt_lock);
1032 		kmem_free(cmd, sizeof (lmrc_mpt_cmd_t));
1033 		count++;
1034 	}
1035 	VERIFY3U(count, ==, ncmd);
1036 	VERIFY(list_is_empty(&lmrc->l_mpt_cmd_list));
1037 
1038 	kmem_free(lmrc->l_mpt_cmds, ncmd * sizeof (lmrc_mpt_cmd_t *));
1039 
1040 	lmrc_dma_free(&lmrc->l_ioreq_dma);
1041 }
1042 
1043 static int
lmrc_alloc_mfi_cmds(lmrc_t * lmrc,const size_t ncmd)1044 lmrc_alloc_mfi_cmds(lmrc_t *lmrc, const size_t ncmd)
1045 {
1046 	int ret = DDI_SUCCESS;
1047 	lmrc_mfi_cmd_t **cmds;
1048 	lmrc_mfi_cmd_t *mfi;
1049 	uint32_t i;
1050 
1051 	cmds = kmem_zalloc(ncmd * sizeof (lmrc_mfi_cmd_t *), KM_SLEEP);
1052 	for (i = 0; i < ncmd; i++) {
1053 		mfi = kmem_zalloc(sizeof (lmrc_mfi_cmd_t), KM_SLEEP);
1054 		ret = lmrc_dma_alloc(lmrc, lmrc->l_dma_attr,
1055 		    &mfi->mfi_frame_dma, sizeof (lmrc_mfi_frame_t), 256,
1056 		    DDI_DMA_CONSISTENT);
1057 		if (ret != DDI_SUCCESS)
1058 			goto fail;
1059 
1060 		mfi->mfi_lmrc = lmrc;
1061 		mfi->mfi_frame = mfi->mfi_frame_dma.ld_buf;
1062 		mfi->mfi_idx = i;
1063 
1064 		if (lmrc_build_mptmfi_passthru(lmrc, mfi) != DDI_SUCCESS) {
1065 			lmrc_dma_free(&mfi->mfi_frame_dma);
1066 			goto fail;
1067 		}
1068 
1069 		mutex_init(&mfi->mfi_lock, NULL, MUTEX_DRIVER,
1070 		    DDI_INTR_PRI(lmrc->l_intr_pri));
1071 
1072 		cmds[i] = mfi;
1073 		list_insert_tail(&lmrc->l_mfi_cmd_list, mfi);
1074 	}
1075 
1076 	lmrc->l_mfi_cmds = cmds;
1077 	return (DDI_SUCCESS);
1078 
1079 fail:
1080 	kmem_free(mfi, sizeof (lmrc_mfi_cmd_t));
1081 	lmrc_free_mfi_cmds(lmrc, ncmd);
1082 
1083 	return (ret);
1084 }
1085 
1086 static void
lmrc_free_mfi_cmds(lmrc_t * lmrc,const size_t ncmd)1087 lmrc_free_mfi_cmds(lmrc_t *lmrc, const size_t ncmd)
1088 {
1089 	lmrc_mfi_cmd_t *mfi;
1090 	size_t count = 0;
1091 
1092 	for (mfi = list_remove_head(&lmrc->l_mfi_cmd_list);
1093 	    mfi != NULL;
1094 	    mfi = list_remove_head(&lmrc->l_mfi_cmd_list)) {
1095 		ASSERT(lmrc->l_mfi_cmds[mfi->mfi_idx] == mfi);
1096 		lmrc->l_mfi_cmds[mfi->mfi_idx] = NULL;
1097 
1098 		/*
1099 		 * lmrc_put_mpt() requires the command to be locked, unlocking
1100 		 * after it has been put back on the free list.
1101 		 */
1102 		mutex_enter(&mfi->mfi_mpt->mpt_lock);
1103 		lmrc_put_mpt(mfi->mfi_mpt);
1104 
1105 		lmrc_dma_free(&mfi->mfi_frame_dma);
1106 		mutex_destroy(&mfi->mfi_lock);
1107 		kmem_free(mfi, sizeof (lmrc_mfi_cmd_t));
1108 		count++;
1109 	}
1110 	VERIFY3U(count, ==, ncmd);
1111 	VERIFY(list_is_empty(&lmrc->l_mfi_cmd_list));
1112 
1113 	kmem_free(lmrc->l_mfi_cmds, ncmd * sizeof (lmrc_mfi_cmd_t *));
1114 }
1115 
1116 
1117 void
lmrc_dma_build_sgl(lmrc_t * lmrc,lmrc_mpt_cmd_t * mpt,const ddi_dma_cookie_t * cookie,uint_t ncookies)1118 lmrc_dma_build_sgl(lmrc_t *lmrc, lmrc_mpt_cmd_t *mpt,
1119     const ddi_dma_cookie_t *cookie, uint_t ncookies)
1120 {
1121 	Mpi25SCSIIORequest_t *io_req = mpt->mpt_io_frame;
1122 	Mpi25IeeeSgeChain64_t *sgl_ptr = &io_req->SGL.IeeeChain;
1123 	uint_t nsge, max_sge;
1124 	uint_t i;
1125 
1126 	ASSERT(ncookies > 0);
1127 
1128 	/* Start with the 8 SGEs in the I/O frame. */
1129 	max_sge = lmrc->l_max_sge_in_main_msg;
1130 
1131 	for (;;) {
1132 		nsge = min(ncookies, max_sge);
1133 
1134 		for (i = 0; i < nsge; i++, cookie++) {
1135 			*(uint64_t *)&sgl_ptr[i].Address =
1136 			    cookie->dmac_laddress;
1137 			sgl_ptr[i].Length = cookie->dmac_size;
1138 			sgl_ptr[i].Flags = 0;
1139 		}
1140 
1141 		ncookies -= nsge;
1142 
1143 		if (ncookies == 0)
1144 			break;
1145 
1146 		/*
1147 		 * There's more. Roll back to the last cookie processed,
1148 		 * setup SGE chain and repeat.
1149 		 */
1150 		cookie--;
1151 		ncookies++;
1152 
1153 		if ((io_req->IoFlags &
1154 		    MPI25_SAS_DEVICE0_FLAGS_ENABLED_FAST_PATH) == 0)
1155 			/* XXX: Why? And why only if not fast path? */
1156 			io_req->ChainOffset = lmrc->l_chain_offset_io_request;
1157 		else
1158 			io_req->ChainOffset = 0;
1159 
1160 		sgl_ptr[i - 1].Flags = MPI2_IEEE_SGE_FLAGS_CHAIN_ELEMENT;
1161 		sgl_ptr[i - 1].Length = sizeof (Mpi25SGEIOUnion_t) * ncookies;
1162 		lmrc_dma_set_addr64(&mpt->mpt_chain_dma,
1163 		    (uint64_t *)&sgl_ptr[i - 1].Address);
1164 		sgl_ptr = mpt->mpt_chain;
1165 
1166 		nsge = ncookies;
1167 		max_sge = lmrc->l_max_sge_in_chain;
1168 
1169 		VERIFY3U(nsge, <=, max_sge);
1170 	}
1171 
1172 	sgl_ptr[i - 1].Flags = MPI25_IEEE_SGE_FLAGS_END_OF_LIST;
1173 
1174 	(void) ddi_dma_sync(mpt->mpt_chain_dma.ld_hdl, 0,
1175 	    mpt->mpt_chain_dma.ld_len, DDI_DMA_SYNC_FORDEV);
1176 }
1177 
1178 size_t
lmrc_dma_get_size(lmrc_dma_t * dmap)1179 lmrc_dma_get_size(lmrc_dma_t *dmap)
1180 {
1181 	const ddi_dma_cookie_t *cookie = ddi_dma_cookie_one(dmap->ld_hdl);
1182 
1183 	return (cookie->dmac_size);
1184 }
1185 
1186 void
lmrc_dma_set_addr64(lmrc_dma_t * dmap,uint64_t * addr)1187 lmrc_dma_set_addr64(lmrc_dma_t *dmap, uint64_t *addr)
1188 {
1189 	const ddi_dma_cookie_t *cookie = ddi_dma_cookie_one(dmap->ld_hdl);
1190 
1191 	*addr = cookie->dmac_laddress;
1192 }
1193 
1194 void
lmrc_dma_set_addr32(lmrc_dma_t * dmap,uint32_t * addr)1195 lmrc_dma_set_addr32(lmrc_dma_t *dmap, uint32_t *addr)
1196 {
1197 	const ddi_dma_cookie_t *cookie = ddi_dma_cookie_one(dmap->ld_hdl);
1198 
1199 	*addr = cookie->dmac_address;
1200 }
1201 
1202 int
lmrc_dma_alloc(lmrc_t * lmrc,ddi_dma_attr_t attr,lmrc_dma_t * dmap,size_t len,uint64_t align,uint_t flags)1203 lmrc_dma_alloc(lmrc_t *lmrc, ddi_dma_attr_t attr, lmrc_dma_t *dmap, size_t len,
1204     uint64_t align, uint_t flags)
1205 {
1206 	int ret;
1207 
1208 	VERIFY3U(len, >, 0);
1209 	VERIFY3U(align, >=, 1);
1210 
1211 	bzero(dmap, sizeof (*dmap));
1212 
1213 	attr.dma_attr_align = align;
1214 	attr.dma_attr_sgllen = 1;
1215 	attr.dma_attr_granular = 1;
1216 
1217 
1218 	ret = ddi_dma_alloc_handle(lmrc->l_dip, &attr, DDI_DMA_SLEEP, NULL,
1219 	    &dmap->ld_hdl);
1220 	if (ret != DDI_SUCCESS) {
1221 		/*
1222 		 * Due to DDI_DMA_SLEEP this can't be DDI_DMA_NORESOURCES, and
1223 		 * the only other possible error is DDI_DMA_BADATTR which
1224 		 * indicates a driver bug which should cause a panic.
1225 		 */
1226 		dev_err(lmrc->l_dip, CE_PANIC,
1227 		    "failed to allocate DMA handle, check DMA attributes");
1228 		return (ret);
1229 	}
1230 
1231 	ret = ddi_dma_mem_alloc(dmap->ld_hdl, len, &lmrc->l_acc_attr,
1232 	    flags, DDI_DMA_SLEEP, NULL, (caddr_t *)&dmap->ld_buf,
1233 	    &dmap->ld_len, &dmap->ld_acc);
1234 	if (ret != DDI_SUCCESS) {
1235 		/*
1236 		 * When DDI_DMA_NOSLEEP is specified, ddi_dma_mem_alloc() can
1237 		 * only fail if the flags are conflicting, which indicates a
1238 		 * driver bug and should cause a panic.
1239 		 */
1240 		dev_err(lmrc->l_dip, CE_PANIC,
1241 		    "failed to allocate DMA memory, check DMA flags (%x)",
1242 		    flags);
1243 		return (ret);
1244 	}
1245 
1246 	ret = ddi_dma_addr_bind_handle(dmap->ld_hdl, NULL, dmap->ld_buf,
1247 	    dmap->ld_len, DDI_DMA_RDWR | flags, DDI_DMA_SLEEP, NULL, NULL,
1248 	    NULL);
1249 	if (ret != DDI_DMA_MAPPED) {
1250 		ddi_dma_mem_free(&dmap->ld_acc);
1251 		ddi_dma_free_handle(&dmap->ld_hdl);
1252 		return (ret);
1253 	}
1254 
1255 	bzero(dmap->ld_buf, dmap->ld_len);
1256 	return (DDI_SUCCESS);
1257 }
1258 
1259 void
lmrc_dma_free(lmrc_dma_t * dmap)1260 lmrc_dma_free(lmrc_dma_t *dmap)
1261 {
1262 	if (dmap->ld_hdl != NULL)
1263 		(void) ddi_dma_unbind_handle(dmap->ld_hdl);
1264 	if (dmap->ld_acc != NULL)
1265 		ddi_dma_mem_free(&dmap->ld_acc);
1266 	if (dmap->ld_hdl != NULL)
1267 		ddi_dma_free_handle(&dmap->ld_hdl);
1268 	bzero(dmap, sizeof (lmrc_dma_t));
1269 }
1270 
1271 static lmrc_adapter_class_t
lmrc_get_class(lmrc_t * lmrc)1272 lmrc_get_class(lmrc_t *lmrc)
1273 {
1274 	int device_id = ddi_prop_get_int(DDI_DEV_T_ANY, lmrc->l_dip,
1275 	    DDI_PROP_DONTPASS, "device-id", 0);
1276 
1277 	switch (device_id) {
1278 	case LMRC_VENTURA:
1279 	case LMRC_CRUSADER:
1280 	case LMRC_HARPOON:
1281 	case LMRC_TOMCAT:
1282 	case LMRC_VENTURA_4PORT:
1283 	case LMRC_CRUSADER_4PORT:
1284 		return (LMRC_ACLASS_VENTURA);
1285 
1286 	case LMRC_AERO_10E1:
1287 	case LMRC_AERO_10E5:
1288 		dev_err(lmrc->l_dip, CE_CONT,
1289 		    "?Adapter is in configurable secure mode\n");
1290 		/*FALLTHRU*/
1291 	case LMRC_AERO_10E2:
1292 	case LMRC_AERO_10E6:
1293 		return (LMRC_ACLASS_AERO);
1294 
1295 	case LMRC_AERO_10E0:
1296 	case LMRC_AERO_10E3:
1297 	case LMRC_AERO_10E4:
1298 	case LMRC_AERO_10E7:
1299 		dev_err(lmrc->l_dip, CE_CONT,
1300 		    "?Adapter is in non-secure mode\n");
1301 	}
1302 
1303 	return (LMRC_ACLASS_OTHER);
1304 }
1305 
1306 static int
lmrc_attach(dev_info_t * dip,ddi_attach_cmd_t cmd)1307 lmrc_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
1308 {
1309 	const char *addr = scsi_hba_iport_unit_address(dip);
1310 
1311 	if (cmd != DDI_ATTACH)
1312 		return (DDI_FAILURE);
1313 
1314 	if (addr == NULL)
1315 		return (lmrc_ctrl_attach(dip));
1316 
1317 	if (strcmp(addr, LMRC_IPORT_RAID) == 0)
1318 		return (lmrc_raid_attach(dip));
1319 
1320 	if (strcmp(addr, LMRC_IPORT_PHYS) == 0)
1321 		return (lmrc_phys_attach(dip));
1322 
1323 	return (DDI_FAILURE);
1324 }
1325 
1326 static int
lmrc_detach(dev_info_t * dip,ddi_detach_cmd_t cmd)1327 lmrc_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
1328 {
1329 	const char *addr = scsi_hba_iport_unit_address(dip);
1330 
1331 	if (cmd != DDI_DETACH)
1332 		return (DDI_FAILURE);
1333 
1334 	if (addr == NULL)
1335 		return (lmrc_ctrl_detach(dip));
1336 
1337 	if (strcmp(addr, LMRC_IPORT_RAID) == 0)
1338 		return (lmrc_raid_detach(dip));
1339 
1340 	if (strcmp(addr, LMRC_IPORT_PHYS) == 0)
1341 		return (lmrc_phys_detach(dip));
1342 
1343 	return (DDI_FAILURE);
1344 }
1345 
1346 static int
lmrc_quiesce(dev_info_t * dip)1347 lmrc_quiesce(dev_info_t *dip)
1348 {
1349 	lmrc_t *lmrc = ddi_get_soft_state(lmrc_state, ddi_get_instance(dip));
1350 
1351 	if (lmrc == NULL)
1352 		return (DDI_SUCCESS);
1353 
1354 	return (lmrc_ctrl_shutdown(lmrc));
1355 }
1356 
1357 static struct cb_ops lmrc_cb_ops = {
1358 	.cb_rev =		CB_REV,
1359 	.cb_flag =		D_NEW | D_MP,
1360 
1361 	.cb_open =		scsi_hba_open,
1362 	.cb_close =		scsi_hba_close,
1363 
1364 	.cb_ioctl =		lmrc_ioctl,
1365 
1366 	.cb_strategy =		nodev,
1367 	.cb_print =		nodev,
1368 	.cb_dump =		nodev,
1369 	.cb_read =		nodev,
1370 	.cb_write =		nodev,
1371 	.cb_devmap =		nodev,
1372 	.cb_mmap =		nodev,
1373 	.cb_segmap =		nodev,
1374 	.cb_chpoll =		nochpoll,
1375 	.cb_prop_op =		ddi_prop_op,
1376 	.cb_str =		NULL,
1377 	.cb_aread =		nodev,
1378 	.cb_awrite =		nodev,
1379 };
1380 
1381 static struct dev_ops lmrc_dev_ops = {
1382 	.devo_rev =		DEVO_REV,
1383 	.devo_refcnt =		0,
1384 
1385 	.devo_attach =		lmrc_attach,
1386 	.devo_detach =		lmrc_detach,
1387 
1388 	.devo_cb_ops =		&lmrc_cb_ops,
1389 
1390 	.devo_getinfo =		ddi_no_info,
1391 	.devo_identify =	nulldev,
1392 	.devo_probe =		nulldev,
1393 	.devo_reset =		nodev,
1394 	.devo_bus_ops =		NULL,
1395 	.devo_power =		nodev,
1396 	.devo_quiesce =		lmrc_quiesce,
1397 };
1398 
1399 static struct modldrv lmrc_modldrv = {
1400 	.drv_modops =		&mod_driverops,
1401 	.drv_linkinfo =		"Broadcom MegaRAID 12G SAS RAID",
1402 	.drv_dev_ops =		&lmrc_dev_ops,
1403 };
1404 
1405 static struct modlinkage lmrc_modlinkage = {
1406 	.ml_rev =		MODREV_1,
1407 	.ml_linkage =		{ &lmrc_modldrv, NULL },
1408 };
1409 
1410 int
_init(void)1411 _init(void)
1412 {
1413 	int ret;
1414 
1415 	ret = ddi_soft_state_init(&lmrc_state, sizeof (lmrc_t), 1);
1416 	if (ret != DDI_SUCCESS)
1417 		return (ret);
1418 
1419 	ret = scsi_hba_init(&lmrc_modlinkage);
1420 	if (ret != 0) {
1421 		ddi_soft_state_fini(&lmrc_state);
1422 		return (ret);
1423 	}
1424 
1425 	ret = mod_install(&lmrc_modlinkage);
1426 	if (ret != DDI_SUCCESS) {
1427 		scsi_hba_fini(&lmrc_modlinkage);
1428 		ddi_soft_state_fini(&lmrc_state);
1429 		return (ret);
1430 	}
1431 
1432 	return (DDI_SUCCESS);
1433 }
1434 
1435 int
_fini(void)1436 _fini(void)
1437 {
1438 	int ret;
1439 
1440 	ret = mod_remove(&lmrc_modlinkage);
1441 	if (ret == DDI_SUCCESS) {
1442 		scsi_hba_fini(&lmrc_modlinkage);
1443 		ddi_soft_state_fini(&lmrc_state);
1444 	}
1445 
1446 	return (ret);
1447 }
1448 
1449 int
_info(struct modinfo * modinfop)1450 _info(struct modinfo *modinfop)
1451 {
1452 	return (mod_info(&lmrc_modlinkage, modinfop));
1453 }
1454