1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright 2016 Nexenta Systems, Inc. All rights reserved.
25 * Copyright 2019 Joyent, Inc.
26 * Copyright 2014 OmniTI Computer Consulting, Inc. All rights reserved.
27 * Copyright (c) 2014, Tegile Systems Inc. All rights reserved.
28 */
29
30/*
31 * Copyright (c) 2000 to 2010, LSI Corporation.
32 * All rights reserved.
33 *
34 * Redistribution and use in source and binary forms of all code within
35 * this file that is exclusively owned by LSI, with or without
36 * modification, is permitted provided that, in addition to the CDDL 1.0
37 * License requirements, the following conditions are met:
38 *
39 *    Neither the name of the author nor the names of its contributors may be
40 *    used to endorse or promote products derived from this software without
41 *    specific prior written permission.
42 *
43 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
44 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
45 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
46 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
47 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
48 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
49 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
50 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
51 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
52 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
53 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
54 * DAMAGE.
55 */
56
57/*
58 * mptsas - This is a driver based on LSI Logic's MPT2.0/2.5 interface.
59 *
60 */
61
62#if defined(lint) || defined(DEBUG)
63#define	MPTSAS_DEBUG
64#endif
65
66/*
67 * standard header files.
68 */
69#include <sys/note.h>
70#include <sys/scsi/scsi.h>
71#include <sys/pci.h>
72#include <sys/file.h>
73#include <sys/policy.h>
74#include <sys/model.h>
75#include <sys/sysevent.h>
76#include <sys/sysevent/eventdefs.h>
77#include <sys/sysevent/dr.h>
78#include <sys/sata/sata_defs.h>
79#include <sys/sata/sata_hba.h>
80#include <sys/scsi/generic/sas.h>
81#include <sys/scsi/impl/scsi_sas.h>
82
83#pragma pack(1)
84#include <sys/scsi/adapters/mpt_sas/mpi/mpi2_type.h>
85#include <sys/scsi/adapters/mpt_sas/mpi/mpi2.h>
86#include <sys/scsi/adapters/mpt_sas/mpi/mpi2_cnfg.h>
87#include <sys/scsi/adapters/mpt_sas/mpi/mpi2_init.h>
88#include <sys/scsi/adapters/mpt_sas/mpi/mpi2_ioc.h>
89#include <sys/scsi/adapters/mpt_sas/mpi/mpi2_sas.h>
90#include <sys/scsi/adapters/mpt_sas/mpi/mpi2_tool.h>
91#include <sys/scsi/adapters/mpt_sas/mpi/mpi2_raid.h>
92#pragma pack()
93
94/*
95 * private header files.
96 *
97 */
98#include <sys/scsi/impl/scsi_reset_notify.h>
99#include <sys/scsi/adapters/mpt_sas/mptsas_var.h>
100#include <sys/scsi/adapters/mpt_sas/mptsas_ioctl.h>
101#include <sys/scsi/adapters/mpt_sas/mptsas_smhba.h>
102#include <sys/scsi/adapters/mpt_sas/mptsas_hash.h>
103#include <sys/raidioctl.h>
104
105#include <sys/fs/dv_node.h>	/* devfs_clean */
106
107/*
108 * FMA header files
109 */
110#include <sys/ddifm.h>
111#include <sys/fm/protocol.h>
112#include <sys/fm/util.h>
113#include <sys/fm/io/ddi.h>
114
115/*
116 * autoconfiguration data and routines.
117 */
118static int mptsas_attach(dev_info_t *dip, ddi_attach_cmd_t cmd);
119static int mptsas_detach(dev_info_t *devi, ddi_detach_cmd_t cmd);
120static int mptsas_power(dev_info_t *dip, int component, int level);
121
122/*
123 * cb_ops function
124 */
125static int mptsas_ioctl(dev_t dev, int cmd, intptr_t data, int mode,
126	cred_t *credp, int *rval);
127#ifdef __sparc
128static int mptsas_reset(dev_info_t *devi, ddi_reset_cmd_t cmd);
129#else  /* __sparc */
130static int mptsas_quiesce(dev_info_t *devi);
131#endif	/* __sparc */
132
133/*
134 * ddi_ufm_ops
135 */
136static int mptsas_ufm_fill_image(ddi_ufm_handle_t *ufmh, void *arg,
137    uint_t imgno, ddi_ufm_image_t *img);
138static int mptsas_ufm_fill_slot(ddi_ufm_handle_t *ufmh, void *arg,
139    uint_t imgno, uint_t slotno, ddi_ufm_slot_t *slot);
140static int mptsas_ufm_getcaps(ddi_ufm_handle_t *ufmh, void *arg,
141    ddi_ufm_cap_t *caps);
142
143/*
144 * Resource initialization for hardware
145 */
146static void mptsas_setup_cmd_reg(mptsas_t *mpt);
147static void mptsas_disable_bus_master(mptsas_t *mpt);
148static void mptsas_hba_fini(mptsas_t *mpt);
149static void mptsas_cfg_fini(mptsas_t *mptsas_blkp);
150static int mptsas_hba_setup(mptsas_t *mpt);
151static void mptsas_hba_teardown(mptsas_t *mpt);
152static int mptsas_config_space_init(mptsas_t *mpt);
153static void mptsas_config_space_fini(mptsas_t *mpt);
154static void mptsas_iport_register(mptsas_t *mpt);
155static int mptsas_smp_setup(mptsas_t *mpt);
156static void mptsas_smp_teardown(mptsas_t *mpt);
157static int mptsas_enc_setup(mptsas_t *mpt);
158static void mptsas_enc_teardown(mptsas_t *mpt);
159static int mptsas_cache_create(mptsas_t *mpt);
160static void mptsas_cache_destroy(mptsas_t *mpt);
161static int mptsas_alloc_request_frames(mptsas_t *mpt);
162static int mptsas_alloc_sense_bufs(mptsas_t *mpt);
163static int mptsas_alloc_reply_frames(mptsas_t *mpt);
164static int mptsas_alloc_free_queue(mptsas_t *mpt);
165static int mptsas_alloc_post_queue(mptsas_t *mpt);
166static void mptsas_alloc_reply_args(mptsas_t *mpt);
167static int mptsas_alloc_extra_sgl_frame(mptsas_t *mpt, mptsas_cmd_t *cmd);
168static void mptsas_free_extra_sgl_frame(mptsas_t *mpt, mptsas_cmd_t *cmd);
169static int mptsas_init_chip(mptsas_t *mpt, int first_time);
170static void mptsas_update_hashtab(mptsas_t *mpt);
171
172/*
173 * SCSA function prototypes
174 */
175static int mptsas_scsi_start(struct scsi_address *ap, struct scsi_pkt *pkt);
176static int mptsas_scsi_reset(struct scsi_address *ap, int level);
177static int mptsas_scsi_abort(struct scsi_address *ap, struct scsi_pkt *pkt);
178static int mptsas_scsi_getcap(struct scsi_address *ap, char *cap, int tgtonly);
179static int mptsas_scsi_setcap(struct scsi_address *ap, char *cap, int value,
180    int tgtonly);
181static void mptsas_scsi_dmafree(struct scsi_address *ap, struct scsi_pkt *pkt);
182static struct scsi_pkt *mptsas_scsi_init_pkt(struct scsi_address *ap,
183    struct scsi_pkt *pkt, struct buf *bp, int cmdlen, int statuslen,
184	int tgtlen, int flags, int (*callback)(), caddr_t arg);
185static void mptsas_scsi_sync_pkt(struct scsi_address *ap, struct scsi_pkt *pkt);
186static void mptsas_scsi_destroy_pkt(struct scsi_address *ap,
187    struct scsi_pkt *pkt);
188static int mptsas_scsi_tgt_init(dev_info_t *hba_dip, dev_info_t *tgt_dip,
189    scsi_hba_tran_t *hba_tran, struct scsi_device *sd);
190static void mptsas_scsi_tgt_free(dev_info_t *hba_dip, dev_info_t *tgt_dip,
191    scsi_hba_tran_t *hba_tran, struct scsi_device *sd);
192static int mptsas_scsi_reset_notify(struct scsi_address *ap, int flag,
193    void (*callback)(caddr_t), caddr_t arg);
194static int mptsas_get_name(struct scsi_device *sd, char *name, int len);
195static int mptsas_get_bus_addr(struct scsi_device *sd, char *name, int len);
196static int mptsas_scsi_quiesce(dev_info_t *dip);
197static int mptsas_scsi_unquiesce(dev_info_t *dip);
198static int mptsas_bus_config(dev_info_t *pdip, uint_t flags,
199    ddi_bus_config_op_t op, void *arg, dev_info_t **childp);
200
201/*
202 * SMP functions
203 */
204static int mptsas_smp_start(struct smp_pkt *smp_pkt);
205
206/*
207 * internal function prototypes.
208 */
209static void mptsas_list_add(mptsas_t *mpt);
210static void mptsas_list_del(mptsas_t *mpt);
211
212static int mptsas_quiesce_bus(mptsas_t *mpt);
213static int mptsas_unquiesce_bus(mptsas_t *mpt);
214
215static int mptsas_alloc_handshake_msg(mptsas_t *mpt, size_t alloc_size);
216static void mptsas_free_handshake_msg(mptsas_t *mpt);
217
218static void mptsas_ncmds_checkdrain(void *arg);
219
220static int mptsas_prepare_pkt(mptsas_cmd_t *cmd);
221static int mptsas_accept_pkt(mptsas_t *mpt, mptsas_cmd_t *sp);
222static int mptsas_accept_txwq_and_pkt(mptsas_t *mpt, mptsas_cmd_t *sp);
223static void mptsas_accept_tx_waitq(mptsas_t *mpt);
224
225static int mptsas_do_detach(dev_info_t *dev);
226static int mptsas_do_scsi_reset(mptsas_t *mpt, uint16_t devhdl);
227static int mptsas_do_scsi_abort(mptsas_t *mpt, int target, int lun,
228    struct scsi_pkt *pkt);
229static int mptsas_scsi_capchk(char *cap, int tgtonly, int *cidxp);
230
231static void mptsas_handle_qfull(mptsas_t *mpt, mptsas_cmd_t *cmd);
232static void mptsas_handle_event(void *args);
233static int mptsas_handle_event_sync(void *args);
234static void mptsas_handle_dr(void *args);
235static void mptsas_handle_topo_change(mptsas_topo_change_list_t *topo_node,
236    dev_info_t *pdip);
237
238static void mptsas_restart_cmd(void *);
239
240static void mptsas_flush_hba(mptsas_t *mpt);
241static void mptsas_flush_target(mptsas_t *mpt, ushort_t target, int lun,
242	uint8_t tasktype);
243static void mptsas_set_pkt_reason(mptsas_t *mpt, mptsas_cmd_t *cmd,
244    uchar_t reason, uint_t stat);
245
246static uint_t mptsas_intr(caddr_t arg1, caddr_t arg2);
247static void mptsas_process_intr(mptsas_t *mpt,
248    pMpi2ReplyDescriptorsUnion_t reply_desc_union);
249static void mptsas_handle_scsi_io_success(mptsas_t *mpt,
250    pMpi2ReplyDescriptorsUnion_t reply_desc);
251static void mptsas_handle_address_reply(mptsas_t *mpt,
252    pMpi2ReplyDescriptorsUnion_t reply_desc);
253static int mptsas_wait_intr(mptsas_t *mpt, int polltime);
254static void mptsas_sge_setup(mptsas_t *mpt, mptsas_cmd_t *cmd,
255    uint32_t *control, pMpi2SCSIIORequest_t frame, ddi_acc_handle_t acc_hdl);
256
257static void mptsas_watch(void *arg);
258static void mptsas_watchsubr(mptsas_t *mpt);
259static void mptsas_cmd_timeout(mptsas_t *mpt, mptsas_target_t *ptgt);
260
261static void mptsas_start_passthru(mptsas_t *mpt, mptsas_cmd_t *cmd);
262static int mptsas_do_passthru(mptsas_t *mpt, uint8_t *request, uint8_t *reply,
263    uint8_t *data, uint32_t request_size, uint32_t reply_size,
264    uint32_t data_size, uint32_t direction, uint8_t *dataout,
265    uint32_t dataout_size, short timeout, int mode);
266static int mptsas_free_devhdl(mptsas_t *mpt, uint16_t devhdl);
267
268static uint8_t mptsas_get_fw_diag_buffer_number(mptsas_t *mpt,
269    uint32_t unique_id);
270static void mptsas_start_diag(mptsas_t *mpt, mptsas_cmd_t *cmd);
271static int mptsas_post_fw_diag_buffer(mptsas_t *mpt,
272    mptsas_fw_diagnostic_buffer_t *pBuffer, uint32_t *return_code);
273static int mptsas_release_fw_diag_buffer(mptsas_t *mpt,
274    mptsas_fw_diagnostic_buffer_t *pBuffer, uint32_t *return_code,
275    uint32_t diag_type);
276static int mptsas_diag_register(mptsas_t *mpt,
277    mptsas_fw_diag_register_t *diag_register, uint32_t *return_code);
278static int mptsas_diag_unregister(mptsas_t *mpt,
279    mptsas_fw_diag_unregister_t *diag_unregister, uint32_t *return_code);
280static int mptsas_diag_query(mptsas_t *mpt, mptsas_fw_diag_query_t *diag_query,
281    uint32_t *return_code);
282static int mptsas_diag_read_buffer(mptsas_t *mpt,
283    mptsas_diag_read_buffer_t *diag_read_buffer, uint8_t *ioctl_buf,
284    uint32_t *return_code, int ioctl_mode);
285static int mptsas_diag_release(mptsas_t *mpt,
286    mptsas_fw_diag_release_t *diag_release, uint32_t *return_code);
287static int mptsas_do_diag_action(mptsas_t *mpt, uint32_t action,
288    uint8_t *diag_action, uint32_t length, uint32_t *return_code,
289    int ioctl_mode);
290static int mptsas_diag_action(mptsas_t *mpt, mptsas_diag_action_t *data,
291    int mode);
292
293static int mptsas_pkt_alloc_extern(mptsas_t *mpt, mptsas_cmd_t *cmd,
294    int cmdlen, int tgtlen, int statuslen, int kf);
295static void mptsas_pkt_destroy_extern(mptsas_t *mpt, mptsas_cmd_t *cmd);
296
297static int mptsas_kmem_cache_constructor(void *buf, void *cdrarg, int kmflags);
298static void mptsas_kmem_cache_destructor(void *buf, void *cdrarg);
299
300static int mptsas_cache_frames_constructor(void *buf, void *cdrarg,
301    int kmflags);
302static void mptsas_cache_frames_destructor(void *buf, void *cdrarg);
303
304static void mptsas_check_scsi_io_error(mptsas_t *mpt, pMpi2SCSIIOReply_t reply,
305    mptsas_cmd_t *cmd);
306static void mptsas_check_task_mgt(mptsas_t *mpt,
307    pMpi2SCSIManagementReply_t reply, mptsas_cmd_t *cmd);
308static int mptsas_send_scsi_cmd(mptsas_t *mpt, struct scsi_address *ap,
309    mptsas_target_t *ptgt, uchar_t *cdb, int cdblen, struct buf *data_bp,
310    int *resid);
311
312static int mptsas_alloc_active_slots(mptsas_t *mpt, int flag);
313static void mptsas_free_active_slots(mptsas_t *mpt);
314static int mptsas_start_cmd(mptsas_t *mpt, mptsas_cmd_t *cmd);
315
316static void mptsas_restart_hba(mptsas_t *mpt);
317static void mptsas_restart_waitq(mptsas_t *mpt);
318
319static void mptsas_deliver_doneq_thread(mptsas_t *mpt);
320static void mptsas_doneq_add(mptsas_t *mpt, mptsas_cmd_t *cmd);
321static void mptsas_doneq_mv(mptsas_t *mpt, uint64_t t);
322
323static mptsas_cmd_t *mptsas_doneq_thread_rm(mptsas_t *mpt, uint64_t t);
324static void mptsas_doneq_empty(mptsas_t *mpt);
325static void mptsas_doneq_thread(mptsas_doneq_thread_arg_t *arg);
326
327static mptsas_cmd_t *mptsas_waitq_rm(mptsas_t *mpt);
328static void mptsas_waitq_delete(mptsas_t *mpt, mptsas_cmd_t *cmd);
329static mptsas_cmd_t *mptsas_tx_waitq_rm(mptsas_t *mpt);
330static void mptsas_tx_waitq_delete(mptsas_t *mpt, mptsas_cmd_t *cmd);
331
332
333static void mptsas_start_watch_reset_delay();
334static void mptsas_setup_bus_reset_delay(mptsas_t *mpt);
335static void mptsas_watch_reset_delay(void *arg);
336static int mptsas_watch_reset_delay_subr(mptsas_t *mpt);
337
338/*
339 * helper functions
340 */
341static void mptsas_dump_cmd(mptsas_t *mpt, mptsas_cmd_t *cmd);
342
343static dev_info_t *mptsas_find_child(dev_info_t *pdip, char *name);
344static dev_info_t *mptsas_find_child_phy(dev_info_t *pdip, uint8_t phy);
345static dev_info_t *mptsas_find_child_addr(dev_info_t *pdip, uint64_t sasaddr,
346    int lun);
347static mdi_pathinfo_t *mptsas_find_path_addr(dev_info_t *pdip, uint64_t sasaddr,
348    int lun);
349static mdi_pathinfo_t *mptsas_find_path_phy(dev_info_t *pdip, uint8_t phy);
350static dev_info_t *mptsas_find_smp_child(dev_info_t *pdip, char *str_wwn);
351
352static int mptsas_parse_address(char *name, uint64_t *wwid, uint8_t *phy,
353    int *lun);
354static int mptsas_parse_smp_name(char *name, uint64_t *wwn);
355
356static mptsas_target_t *mptsas_phy_to_tgt(mptsas_t *mpt,
357    mptsas_phymask_t phymask, uint8_t phy);
358static mptsas_target_t *mptsas_wwid_to_ptgt(mptsas_t *mpt,
359    mptsas_phymask_t phymask, uint64_t wwid);
360static mptsas_smp_t *mptsas_wwid_to_psmp(mptsas_t *mpt,
361    mptsas_phymask_t phymask, uint64_t wwid);
362
363static int mptsas_inquiry(mptsas_t *mpt, mptsas_target_t *ptgt, int lun,
364    uchar_t page, unsigned char *buf, int len, int *rlen, uchar_t evpd);
365
366static int mptsas_get_target_device_info(mptsas_t *mpt, uint32_t page_address,
367    uint16_t *handle, mptsas_target_t **pptgt);
368static void mptsas_update_phymask(mptsas_t *mpt);
369
370static int mptsas_flush_led_status(mptsas_t *mpt, mptsas_enclosure_t *mep,
371    uint16_t idx);
372static int mptsas_send_sep(mptsas_t *mpt, mptsas_enclosure_t *mep, uint16_t idx,
373    uint32_t *status, uint8_t cmd);
374static dev_info_t *mptsas_get_dip_from_dev(dev_t dev,
375    mptsas_phymask_t *phymask);
376static mptsas_target_t *mptsas_addr_to_ptgt(mptsas_t *mpt, char *addr,
377    mptsas_phymask_t phymask);
378
379
380/*
381 * Enumeration / DR functions
382 */
383static void mptsas_config_all(dev_info_t *pdip);
384static int mptsas_config_one_addr(dev_info_t *pdip, uint64_t sasaddr, int lun,
385    dev_info_t **lundip);
386static int mptsas_config_one_phy(dev_info_t *pdip, uint8_t phy, int lun,
387    dev_info_t **lundip);
388
389static int mptsas_config_target(dev_info_t *pdip, mptsas_target_t *ptgt);
390static int mptsas_offline_target(dev_info_t *pdip, char *name);
391
392static int mptsas_config_raid(dev_info_t *pdip, uint16_t target,
393    dev_info_t **dip);
394
395static int mptsas_config_luns(dev_info_t *pdip, mptsas_target_t *ptgt);
396static int mptsas_probe_lun(dev_info_t *pdip, int lun,
397    dev_info_t **dip, mptsas_target_t *ptgt);
398
399static int mptsas_create_lun(dev_info_t *pdip, struct scsi_inquiry *sd_inq,
400    dev_info_t **dip, mptsas_target_t *ptgt, int lun);
401
402static int mptsas_create_phys_lun(dev_info_t *pdip, struct scsi_inquiry *sd,
403    char *guid, dev_info_t **dip, mptsas_target_t *ptgt, int lun);
404static int mptsas_create_virt_lun(dev_info_t *pdip, struct scsi_inquiry *sd,
405    char *guid, dev_info_t **dip, mdi_pathinfo_t **pip, mptsas_target_t *ptgt,
406    int lun);
407
408static void mptsas_offline_missed_luns(dev_info_t *pdip,
409    uint16_t *repluns, int lun_cnt, mptsas_target_t *ptgt);
410static int mptsas_offline_lun(dev_info_t *pdip, dev_info_t *rdip,
411    mdi_pathinfo_t *rpip, uint_t flags);
412
413static int mptsas_config_smp(dev_info_t *pdip, uint64_t sas_wwn,
414    dev_info_t **smp_dip);
415static int mptsas_offline_smp(dev_info_t *pdip, mptsas_smp_t *smp_node,
416    uint_t flags);
417
418static int mptsas_event_query(mptsas_t *mpt, mptsas_event_query_t *data,
419    int mode, int *rval);
420static int mptsas_event_enable(mptsas_t *mpt, mptsas_event_enable_t *data,
421    int mode, int *rval);
422static int mptsas_event_report(mptsas_t *mpt, mptsas_event_report_t *data,
423    int mode, int *rval);
424static void mptsas_record_event(void *args);
425static int mptsas_reg_access(mptsas_t *mpt, mptsas_reg_access_t *data,
426    int mode);
427
428mptsas_target_t *mptsas_tgt_alloc(refhash_t *, uint16_t, uint64_t,
429    uint32_t, mptsas_phymask_t, uint8_t);
430static mptsas_smp_t *mptsas_smp_alloc(mptsas_t *, mptsas_smp_t *);
431static int mptsas_online_smp(dev_info_t *pdip, mptsas_smp_t *smp_node,
432    dev_info_t **smp_dip);
433
434/*
435 * Power management functions
436 */
437static int mptsas_get_pci_cap(mptsas_t *mpt);
438static int mptsas_init_pm(mptsas_t *mpt);
439
440/*
441 * MPT MSI tunable:
442 *
443 * By default MSI is enabled on all supported platforms.
444 */
445boolean_t mptsas_enable_msi = B_TRUE;
446boolean_t mptsas_physical_bind_failed_page_83 = B_FALSE;
447
448/*
449 * Global switch for use of MPI2.5 FAST PATH.
450 * We don't really know what FAST PATH actually does, so if it is suspected
451 * to cause problems it can be turned off by setting this variable to B_FALSE.
452 */
453boolean_t mptsas_use_fastpath = B_TRUE;
454
455static int mptsas_register_intrs(mptsas_t *);
456static void mptsas_unregister_intrs(mptsas_t *);
457static int mptsas_add_intrs(mptsas_t *, int);
458static void mptsas_rem_intrs(mptsas_t *);
459
460/*
461 * FMA Prototypes
462 */
463static void mptsas_fm_init(mptsas_t *mpt);
464static void mptsas_fm_fini(mptsas_t *mpt);
465static int mptsas_fm_error_cb(dev_info_t *, ddi_fm_error_t *, const void *);
466
467extern pri_t minclsyspri, maxclsyspri;
468
469/*
470 * This device is created by the SCSI pseudo nexus driver (SCSI vHCI).  It is
471 * under this device that the paths to a physical device are created when
472 * MPxIO is used.
473 */
474extern dev_info_t	*scsi_vhci_dip;
475
476/*
477 * Tunable timeout value for Inquiry VPD page 0x83
478 * By default the value is 30 seconds.
479 */
480int mptsas_inq83_retry_timeout = 30;
481
482/*
483 * This is used to allocate memory for message frame storage, not for
484 * data I/O DMA. All message frames must be stored in the first 4G of
485 * physical memory.
486 */
487ddi_dma_attr_t mptsas_dma_attrs = {
488	DMA_ATTR_V0,	/* attribute layout version		*/
489	0x0ull,		/* address low - should be 0 (longlong)	*/
490	0xffffffffull,	/* address high - 32-bit max range	*/
491	0x00ffffffull,	/* count max - max DMA object size	*/
492	4,		/* allocation alignment requirements	*/
493	0x78,		/* burstsizes - binary encoded values	*/
494	1,		/* minxfer - gran. of DMA engine	*/
495	0x00ffffffull,	/* maxxfer - gran. of DMA engine	*/
496	0xffffffffull,	/* max segment size (DMA boundary)	*/
497	MPTSAS_MAX_DMA_SEGS, /* scatter/gather list length	*/
498	512,		/* granularity - device transfer size	*/
499	0		/* flags, set to 0			*/
500};
501
502/*
503 * This is used for data I/O DMA memory allocation. (full 64-bit DMA
504 * physical addresses are supported.)
505 */
506ddi_dma_attr_t mptsas_dma_attrs64 = {
507	DMA_ATTR_V0,	/* attribute layout version		*/
508	0x0ull,		/* address low - should be 0 (longlong)	*/
509	0xffffffffffffffffull,	/* address high - 64-bit max	*/
510	0x00ffffffull,	/* count max - max DMA object size	*/
511	4,		/* allocation alignment requirements	*/
512	0x78,		/* burstsizes - binary encoded values	*/
513	1,		/* minxfer - gran. of DMA engine	*/
514	0x00ffffffull,	/* maxxfer - gran. of DMA engine	*/
515	0xffffffffull,	/* max segment size (DMA boundary)	*/
516	MPTSAS_MAX_DMA_SEGS, /* scatter/gather list length	*/
517	512,		/* granularity - device transfer size	*/
518	0		/* flags, set to 0 */
519};
520
521ddi_device_acc_attr_t mptsas_dev_attr = {
522	DDI_DEVICE_ATTR_V1,
523	DDI_STRUCTURE_LE_ACC,
524	DDI_STRICTORDER_ACC,
525	DDI_DEFAULT_ACC
526};
527
528static struct cb_ops mptsas_cb_ops = {
529	scsi_hba_open,		/* open */
530	scsi_hba_close,		/* close */
531	nodev,			/* strategy */
532	nodev,			/* print */
533	nodev,			/* dump */
534	nodev,			/* read */
535	nodev,			/* write */
536	mptsas_ioctl,		/* ioctl */
537	nodev,			/* devmap */
538	nodev,			/* mmap */
539	nodev,			/* segmap */
540	nochpoll,		/* chpoll */
541	ddi_prop_op,		/* cb_prop_op */
542	NULL,			/* streamtab */
543	D_MP,			/* cb_flag */
544	CB_REV,			/* rev */
545	nodev,			/* aread */
546	nodev			/* awrite */
547};
548
549static struct dev_ops mptsas_ops = {
550	DEVO_REV,		/* devo_rev, */
551	0,			/* refcnt  */
552	ddi_no_info,		/* info */
553	nulldev,		/* identify */
554	nulldev,		/* probe */
555	mptsas_attach,		/* attach */
556	mptsas_detach,		/* detach */
557#ifdef  __sparc
558	mptsas_reset,
559#else
560	nodev,			/* reset */
561#endif  /* __sparc */
562	&mptsas_cb_ops,		/* driver operations */
563	NULL,			/* bus operations */
564	mptsas_power,		/* power management */
565#ifdef	__sparc
566	ddi_quiesce_not_needed
567#else
568	mptsas_quiesce		/* quiesce */
569#endif	/* __sparc */
570};
571
572static ddi_ufm_ops_t mptsas_ufm_ops = {
573	NULL,
574	mptsas_ufm_fill_image,
575	mptsas_ufm_fill_slot,
576	mptsas_ufm_getcaps
577};
578
579#define	MPTSAS_MOD_STRING "MPTSAS HBA Driver 00.00.00.24"
580
581static struct modldrv modldrv = {
582	&mod_driverops,	/* Type of module. This one is a driver */
583	MPTSAS_MOD_STRING, /* Name of the module. */
584	&mptsas_ops,	/* driver ops */
585};
586
587static struct modlinkage modlinkage = {
588	MODREV_1, &modldrv, NULL
589};
590#define	TARGET_PROP	"target"
591#define	LUN_PROP	"lun"
592#define	LUN64_PROP	"lun64"
593#define	SAS_PROP	"sas-mpt"
594#define	MDI_GUID	"wwn"
595#define	NDI_GUID	"guid"
596#define	MPTSAS_DEV_GONE	"mptsas_dev_gone"
597
598/*
599 * Local static data
600 */
601#if defined(MPTSAS_DEBUG)
602/*
603 * Flags to indicate which debug messages are to be printed and which go to the
604 * debug log ring buffer. Default is to not print anything, and to log
605 * everything except the watchsubr() output which normally happens every second.
606 */
607uint32_t mptsas_debugprt_flags = 0x0;
608uint32_t mptsas_debuglog_flags = ~(1U << 30);
609#endif	/* defined(MPTSAS_DEBUG) */
610uint32_t mptsas_debug_resets = 0;
611
612static kmutex_t		mptsas_global_mutex;
613static void		*mptsas_state;		/* soft	state ptr */
614static krwlock_t	mptsas_global_rwlock;
615
616static kmutex_t		mptsas_log_mutex;
617static char		mptsas_log_buf[256];
618_NOTE(MUTEX_PROTECTS_DATA(mptsas_log_mutex, mptsas_log_buf))
619
620static mptsas_t *mptsas_head, *mptsas_tail;
621static clock_t mptsas_scsi_watchdog_tick;
622static clock_t mptsas_tick;
623static timeout_id_t mptsas_reset_watch;
624static timeout_id_t mptsas_timeout_id;
625static int mptsas_timeouts_enabled = 0;
626
627/*
628 * Default length for extended auto request sense buffers.
629 * All sense buffers need to be under the same alloc because there
630 * is only one common top 32bits (of 64bits) address register.
631 * Most requests only require 32 bytes, but some request >256.
632 * We use rmalloc()/rmfree() on this additional memory to manage the
633 * "extended" requests.
634 */
635int mptsas_extreq_sense_bufsize = 256*64;
636
637/*
638 * We believe that all software resrictions of having to run with DMA
639 * attributes to limit allocation to the first 4G are removed.
640 * However, this flag remains to enable quick switchback should suspicious
641 * problems emerge.
642 * Note that scsi_alloc_consistent_buf() does still adhere to allocating
643 * 32 bit addressable memory, but we can cope if that is changed now.
644 */
645int mptsas_use_64bit_msgaddr = 1;
646
647/*
648 * warlock directives
649 */
650_NOTE(SCHEME_PROTECTS_DATA("unique per pkt", scsi_pkt \
651	mptsas_cmd NcrTableIndirect buf scsi_cdb scsi_status))
652_NOTE(SCHEME_PROTECTS_DATA("unique per pkt", smp_pkt))
653_NOTE(SCHEME_PROTECTS_DATA("stable data", scsi_device scsi_address))
654_NOTE(SCHEME_PROTECTS_DATA("No Mutex Needed", mptsas_tgt_private))
655_NOTE(SCHEME_PROTECTS_DATA("No Mutex Needed", scsi_hba_tran::tran_tgt_private))
656
657/*
658 * SM - HBA statics
659 */
660char	*mptsas_driver_rev = MPTSAS_MOD_STRING;
661
662#ifdef MPTSAS_DEBUG
663void debug_enter(char *);
664#endif
665
666/*
667 * Notes:
668 *	- scsi_hba_init(9F) initializes SCSI HBA modules
669 *	- must call scsi_hba_fini(9F) if modload() fails
670 */
671int
672_init(void)
673{
674	int status;
675	/* CONSTCOND */
676	ASSERT(NO_COMPETING_THREADS);
677
678	NDBG0(("_init"));
679
680	status = ddi_soft_state_init(&mptsas_state, MPTSAS_SIZE,
681	    MPTSAS_INITIAL_SOFT_SPACE);
682	if (status != 0) {
683		return (status);
684	}
685
686	if ((status = scsi_hba_init(&modlinkage)) != 0) {
687		ddi_soft_state_fini(&mptsas_state);
688		return (status);
689	}
690
691	mutex_init(&mptsas_global_mutex, NULL, MUTEX_DRIVER, NULL);
692	rw_init(&mptsas_global_rwlock, NULL, RW_DRIVER, NULL);
693	mutex_init(&mptsas_log_mutex, NULL, MUTEX_DRIVER, NULL);
694
695	if ((status = mod_install(&modlinkage)) != 0) {
696		mutex_destroy(&mptsas_log_mutex);
697		rw_destroy(&mptsas_global_rwlock);
698		mutex_destroy(&mptsas_global_mutex);
699		ddi_soft_state_fini(&mptsas_state);
700		scsi_hba_fini(&modlinkage);
701	}
702
703	return (status);
704}
705
706/*
707 * Notes:
708 *	- scsi_hba_fini(9F) uninitializes SCSI HBA modules
709 */
710int
711_fini(void)
712{
713	int	status;
714	/* CONSTCOND */
715	ASSERT(NO_COMPETING_THREADS);
716
717	NDBG0(("_fini"));
718
719	if ((status = mod_remove(&modlinkage)) == 0) {
720		ddi_soft_state_fini(&mptsas_state);
721		scsi_hba_fini(&modlinkage);
722		mutex_destroy(&mptsas_global_mutex);
723		rw_destroy(&mptsas_global_rwlock);
724		mutex_destroy(&mptsas_log_mutex);
725	}
726	return (status);
727}
728
729/*
730 * The loadable-module _info(9E) entry point
731 */
732int
733_info(struct modinfo *modinfop)
734{
735	/* CONSTCOND */
736	ASSERT(NO_COMPETING_THREADS);
737	NDBG0(("mptsas _info"));
738
739	return (mod_info(&modlinkage, modinfop));
740}
741
742static int
743mptsas_target_eval_devhdl(const void *op, void *arg)
744{
745	uint16_t dh = *(uint16_t *)arg;
746	const mptsas_target_t *tp = op;
747
748	return ((int)tp->m_devhdl - (int)dh);
749}
750
751static int
752mptsas_target_eval_nowwn(const void *op, void *arg)
753{
754	uint8_t phy = *(uint8_t *)arg;
755	const mptsas_target_t *tp = op;
756
757	if (tp->m_addr.mta_wwn != 0)
758		return (-1);
759
760	return ((int)tp->m_phynum - (int)phy);
761}
762
763static int
764mptsas_smp_eval_devhdl(const void *op, void *arg)
765{
766	uint16_t dh = *(uint16_t *)arg;
767	const mptsas_smp_t *sp = op;
768
769	return ((int)sp->m_devhdl - (int)dh);
770}
771
772static uint64_t
773mptsas_target_addr_hash(const void *tp)
774{
775	const mptsas_target_addr_t *tap = tp;
776
777	return ((tap->mta_wwn & 0xffffffffffffULL) |
778	    ((uint64_t)tap->mta_phymask << 48));
779}
780
781static int
782mptsas_target_addr_cmp(const void *a, const void *b)
783{
784	const mptsas_target_addr_t *aap = a;
785	const mptsas_target_addr_t *bap = b;
786
787	if (aap->mta_wwn < bap->mta_wwn)
788		return (-1);
789	if (aap->mta_wwn > bap->mta_wwn)
790		return (1);
791	return ((int)bap->mta_phymask - (int)aap->mta_phymask);
792}
793
794static uint64_t
795mptsas_tmp_target_hash(const void *tp)
796{
797	return ((uint64_t)(uintptr_t)tp);
798}
799
800static int
801mptsas_tmp_target_cmp(const void *a, const void *b)
802{
803	if (a > b)
804		return (1);
805	if (b < a)
806		return (-1);
807
808	return (0);
809}
810
811static void
812mptsas_target_free(void *op)
813{
814	kmem_free(op, sizeof (mptsas_target_t));
815}
816
817static void
818mptsas_smp_free(void *op)
819{
820	kmem_free(op, sizeof (mptsas_smp_t));
821}
822
823static void
824mptsas_destroy_hashes(mptsas_t *mpt)
825{
826	mptsas_target_t *tp;
827	mptsas_smp_t *sp;
828
829	for (tp = refhash_first(mpt->m_targets); tp != NULL;
830	    tp = refhash_next(mpt->m_targets, tp)) {
831		refhash_remove(mpt->m_targets, tp);
832	}
833	for (sp = refhash_first(mpt->m_smp_targets); sp != NULL;
834	    sp = refhash_next(mpt->m_smp_targets, sp)) {
835		refhash_remove(mpt->m_smp_targets, sp);
836	}
837	refhash_destroy(mpt->m_tmp_targets);
838	refhash_destroy(mpt->m_targets);
839	refhash_destroy(mpt->m_smp_targets);
840	mpt->m_targets = NULL;
841	mpt->m_smp_targets = NULL;
842}
843
844static int
845mptsas_iport_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
846{
847	dev_info_t		*pdip;
848	mptsas_t		*mpt;
849	scsi_hba_tran_t		*hba_tran;
850	char			*iport = NULL;
851	char			phymask[MPTSAS_MAX_PHYS];
852	mptsas_phymask_t	phy_mask = 0;
853	int			dynamic_port = 0;
854	uint32_t		page_address;
855	char			initiator_wwnstr[MPTSAS_WWN_STRLEN];
856	int			rval = DDI_FAILURE;
857	int			i = 0;
858	uint8_t			numphys = 0;
859	uint8_t			phy_id;
860	uint8_t			phy_port = 0;
861	uint16_t		attached_devhdl = 0;
862	uint32_t		dev_info;
863	uint64_t		attached_sas_wwn;
864	uint16_t		dev_hdl;
865	uint16_t		pdev_hdl;
866	uint16_t		bay_num, enclosure, io_flags;
867	char			attached_wwnstr[MPTSAS_WWN_STRLEN];
868
869	/* CONSTCOND */
870	ASSERT(NO_COMPETING_THREADS);
871
872	switch (cmd) {
873	case DDI_ATTACH:
874		break;
875
876	case DDI_RESUME:
877		/*
878		 * If this a scsi-iport node, nothing to do here.
879		 */
880		return (DDI_SUCCESS);
881
882	default:
883		return (DDI_FAILURE);
884	}
885
886	pdip = ddi_get_parent(dip);
887
888	if ((hba_tran = ndi_flavorv_get(pdip, SCSA_FLAVOR_SCSI_DEVICE)) ==
889	    NULL) {
890		cmn_err(CE_WARN, "Failed attach iport because fail to "
891		    "get tran vector for the HBA node");
892		return (DDI_FAILURE);
893	}
894
895	mpt = TRAN2MPT(hba_tran);
896	ASSERT(mpt != NULL);
897	if (mpt == NULL)
898		return (DDI_FAILURE);
899
900	if ((hba_tran = ndi_flavorv_get(dip, SCSA_FLAVOR_SCSI_DEVICE)) ==
901	    NULL) {
902		mptsas_log(mpt, CE_WARN, "Failed attach iport because fail to "
903		    "get tran vector for the iport node");
904		return (DDI_FAILURE);
905	}
906
907	/*
908	 * Overwrite parent's tran_hba_private to iport's tran vector
909	 */
910	hba_tran->tran_hba_private = mpt;
911
912	ddi_report_dev(dip);
913
914	/*
915	 * Get SAS address for initiator port according dev_handle
916	 */
917	iport = ddi_get_name_addr(dip);
918	if (iport && strncmp(iport, "v0", 2) == 0) {
919		if (ddi_prop_update_int(DDI_DEV_T_NONE, dip,
920		    MPTSAS_VIRTUAL_PORT, 1) !=
921		    DDI_PROP_SUCCESS) {
922			(void) ddi_prop_remove(DDI_DEV_T_NONE, dip,
923			    MPTSAS_VIRTUAL_PORT);
924			mptsas_log(mpt, CE_WARN, "mptsas virtual port "
925			    "prop update failed");
926			return (DDI_FAILURE);
927		}
928		return (DDI_SUCCESS);
929	}
930
931	mutex_enter(&mpt->m_mutex);
932	for (i = 0; i < MPTSAS_MAX_PHYS; i++) {
933		bzero(phymask, sizeof (phymask));
934		(void) sprintf(phymask,
935		    "%x", mpt->m_phy_info[i].phy_mask);
936		if (strcmp(phymask, iport) == 0) {
937			break;
938		}
939	}
940
941	if (i == MPTSAS_MAX_PHYS) {
942		mptsas_log(mpt, CE_WARN, "Failed attach port %s because port"
943		    "seems not exist", iport);
944		mutex_exit(&mpt->m_mutex);
945		return (DDI_FAILURE);
946	}
947
948	phy_mask = mpt->m_phy_info[i].phy_mask;
949
950	if (mpt->m_phy_info[i].port_flags & AUTO_PORT_CONFIGURATION)
951		dynamic_port = 1;
952	else
953		dynamic_port = 0;
954
955	/*
956	 * Update PHY info for smhba
957	 */
958	if (mptsas_smhba_phy_init(mpt)) {
959		mutex_exit(&mpt->m_mutex);
960		mptsas_log(mpt, CE_WARN, "mptsas phy update "
961		    "failed");
962		return (DDI_FAILURE);
963	}
964
965	mutex_exit(&mpt->m_mutex);
966
967	numphys = 0;
968	for (i = 0; i < MPTSAS_MAX_PHYS; i++) {
969		if ((phy_mask >> i) & 0x01) {
970			numphys++;
971		}
972	}
973
974	bzero(initiator_wwnstr, sizeof (initiator_wwnstr));
975	(void) sprintf(initiator_wwnstr, "w%016"PRIx64,
976	    mpt->un.m_base_wwid);
977
978	if (ddi_prop_update_string(DDI_DEV_T_NONE, dip,
979	    SCSI_ADDR_PROP_INITIATOR_PORT, initiator_wwnstr) !=
980	    DDI_PROP_SUCCESS) {
981		(void) ddi_prop_remove(DDI_DEV_T_NONE,
982		    dip, SCSI_ADDR_PROP_INITIATOR_PORT);
983		mptsas_log(mpt, CE_WARN, "mptsas Initiator port "
984		    "prop update failed");
985		return (DDI_FAILURE);
986	}
987	if (ddi_prop_update_int(DDI_DEV_T_NONE, dip,
988	    MPTSAS_NUM_PHYS, numphys) !=
989	    DDI_PROP_SUCCESS) {
990		(void) ddi_prop_remove(DDI_DEV_T_NONE, dip, MPTSAS_NUM_PHYS);
991		return (DDI_FAILURE);
992	}
993
994	if (ddi_prop_update_int(DDI_DEV_T_NONE, dip,
995	    "phymask", phy_mask) !=
996	    DDI_PROP_SUCCESS) {
997		(void) ddi_prop_remove(DDI_DEV_T_NONE, dip, "phymask");
998		mptsas_log(mpt, CE_WARN, "mptsas phy mask "
999		    "prop update failed");
1000		return (DDI_FAILURE);
1001	}
1002
1003	if (ddi_prop_update_int(DDI_DEV_T_NONE, dip,
1004	    "dynamic-port", dynamic_port) !=
1005	    DDI_PROP_SUCCESS) {
1006		(void) ddi_prop_remove(DDI_DEV_T_NONE, dip, "dynamic-port");
1007		mptsas_log(mpt, CE_WARN, "mptsas dynamic port "
1008		    "prop update failed");
1009		return (DDI_FAILURE);
1010	}
1011	if (ddi_prop_update_int(DDI_DEV_T_NONE, dip,
1012	    MPTSAS_VIRTUAL_PORT, 0) !=
1013	    DDI_PROP_SUCCESS) {
1014		(void) ddi_prop_remove(DDI_DEV_T_NONE, dip,
1015		    MPTSAS_VIRTUAL_PORT);
1016		mptsas_log(mpt, CE_WARN, "mptsas virtual port "
1017		    "prop update failed");
1018		return (DDI_FAILURE);
1019	}
1020	mptsas_smhba_set_all_phy_props(mpt, dip, numphys, phy_mask,
1021	    &attached_devhdl);
1022
1023	mutex_enter(&mpt->m_mutex);
1024	page_address = (MPI2_SAS_DEVICE_PGAD_FORM_HANDLE &
1025	    MPI2_SAS_DEVICE_PGAD_FORM_MASK) | (uint32_t)attached_devhdl;
1026	rval = mptsas_get_sas_device_page0(mpt, page_address, &dev_hdl,
1027	    &attached_sas_wwn, &dev_info, &phy_port, &phy_id,
1028	    &pdev_hdl, &bay_num, &enclosure, &io_flags);
1029	if (rval != DDI_SUCCESS) {
1030		mptsas_log(mpt, CE_WARN,
1031		    "Failed to get device page0 for handle:%d",
1032		    attached_devhdl);
1033		mutex_exit(&mpt->m_mutex);
1034		return (DDI_FAILURE);
1035	}
1036
1037	for (i = 0; i < MPTSAS_MAX_PHYS; i++) {
1038		bzero(phymask, sizeof (phymask));
1039		(void) sprintf(phymask, "%x", mpt->m_phy_info[i].phy_mask);
1040		if (strcmp(phymask, iport) == 0) {
1041			(void) sprintf(&mpt->m_phy_info[i].smhba_info.path[0],
1042			    "%x",
1043			    mpt->m_phy_info[i].phy_mask);
1044		}
1045	}
1046	mutex_exit(&mpt->m_mutex);
1047
1048	bzero(attached_wwnstr, sizeof (attached_wwnstr));
1049	(void) sprintf(attached_wwnstr, "w%016"PRIx64,
1050	    attached_sas_wwn);
1051	if (ddi_prop_update_string(DDI_DEV_T_NONE, dip,
1052	    SCSI_ADDR_PROP_ATTACHED_PORT, attached_wwnstr) !=
1053	    DDI_PROP_SUCCESS) {
1054		(void) ddi_prop_remove(DDI_DEV_T_NONE,
1055		    dip, SCSI_ADDR_PROP_ATTACHED_PORT);
1056		return (DDI_FAILURE);
1057	}
1058
1059	/* Create kstats for each phy on this iport */
1060
1061	mptsas_create_phy_stats(mpt, iport, dip);
1062
1063	/*
1064	 * register sas hba iport with mdi (MPxIO/vhci)
1065	 */
1066	if (mdi_phci_register(MDI_HCI_CLASS_SCSI,
1067	    dip, 0) == MDI_SUCCESS) {
1068		mpt->m_mpxio_enable = TRUE;
1069	}
1070	return (DDI_SUCCESS);
1071}
1072
1073/*
1074 * Notes:
1075 *	Set up all device state and allocate data structures,
1076 *	mutexes, condition variables, etc. for device operation.
1077 *	Add interrupts needed.
1078 *	Return DDI_SUCCESS if device is ready, else return DDI_FAILURE.
1079 */
1080static int
1081mptsas_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
1082{
1083	mptsas_t		*mpt = NULL;
1084	int			instance, i, j;
1085	int			doneq_thread_num;
1086	char			intr_added = 0;
1087	char			map_setup = 0;
1088	char			config_setup = 0;
1089	char			hba_attach_setup = 0;
1090	char			smp_attach_setup = 0;
1091	char			enc_attach_setup = 0;
1092	char			mutex_init_done = 0;
1093	char			event_taskq_create = 0;
1094	char			dr_taskq_create = 0;
1095	char			doneq_thread_create = 0;
1096	char			added_watchdog = 0;
1097	scsi_hba_tran_t		*hba_tran;
1098	uint_t			mem_bar = MEM_SPACE;
1099	int			rval = DDI_FAILURE;
1100
1101	/* CONSTCOND */
1102	ASSERT(NO_COMPETING_THREADS);
1103
1104	if (scsi_hba_iport_unit_address(dip)) {
1105		return (mptsas_iport_attach(dip, cmd));
1106	}
1107
1108	switch (cmd) {
1109	case DDI_ATTACH:
1110		break;
1111
1112	case DDI_RESUME:
1113		if ((hba_tran = ddi_get_driver_private(dip)) == NULL)
1114			return (DDI_FAILURE);
1115
1116		mpt = TRAN2MPT(hba_tran);
1117
1118		if (!mpt) {
1119			return (DDI_FAILURE);
1120		}
1121
1122		/*
1123		 * Reset hardware and softc to "no outstanding commands"
1124		 * Note	that a check condition can result on first command
1125		 * to a	target.
1126		 */
1127		mutex_enter(&mpt->m_mutex);
1128
1129		/*
1130		 * raise power.
1131		 */
1132		if (mpt->m_options & MPTSAS_OPT_PM) {
1133			mutex_exit(&mpt->m_mutex);
1134			(void) pm_busy_component(dip, 0);
1135			rval = pm_power_has_changed(dip, 0, PM_LEVEL_D0);
1136			if (rval == DDI_SUCCESS) {
1137				mutex_enter(&mpt->m_mutex);
1138			} else {
1139				/*
1140				 * The pm_raise_power() call above failed,
1141				 * and that can only occur if we were unable
1142				 * to reset the hardware.  This is probably
1143				 * due to unhealty hardware, and because
1144				 * important filesystems(such as the root
1145				 * filesystem) could be on the attached disks,
1146				 * it would not be a good idea to continue,
1147				 * as we won't be entirely certain we are
1148				 * writing correct data.  So we panic() here
1149				 * to not only prevent possible data corruption,
1150				 * but to give developers or end users a hope
1151				 * of identifying and correcting any problems.
1152				 */
1153				fm_panic("mptsas could not reset hardware "
1154				    "during resume");
1155			}
1156		}
1157
1158		mpt->m_suspended = 0;
1159
1160		/*
1161		 * Reinitialize ioc
1162		 */
1163		mpt->m_softstate |= MPTSAS_SS_MSG_UNIT_RESET;
1164		if (mptsas_init_chip(mpt, FALSE) == DDI_FAILURE) {
1165			mutex_exit(&mpt->m_mutex);
1166			if (mpt->m_options & MPTSAS_OPT_PM) {
1167				(void) pm_idle_component(dip, 0);
1168			}
1169			fm_panic("mptsas init chip fail during resume");
1170		}
1171		/*
1172		 * mptsas_update_driver_data needs interrupts so enable them
1173		 * first.
1174		 */
1175		MPTSAS_ENABLE_INTR(mpt);
1176		mptsas_update_driver_data(mpt);
1177
1178		/* start requests, if possible */
1179		mptsas_restart_hba(mpt);
1180
1181		mutex_exit(&mpt->m_mutex);
1182
1183		/*
1184		 * Restart watch thread
1185		 */
1186		mutex_enter(&mptsas_global_mutex);
1187		if (mptsas_timeout_id == 0) {
1188			mptsas_timeout_id = timeout(mptsas_watch, NULL,
1189			    mptsas_tick);
1190			mptsas_timeouts_enabled = 1;
1191		}
1192		mutex_exit(&mptsas_global_mutex);
1193
1194		/* report idle status to pm framework */
1195		if (mpt->m_options & MPTSAS_OPT_PM) {
1196			(void) pm_idle_component(dip, 0);
1197		}
1198
1199		return (DDI_SUCCESS);
1200
1201	default:
1202		return (DDI_FAILURE);
1203
1204	}
1205
1206	instance = ddi_get_instance(dip);
1207
1208	/*
1209	 * Allocate softc information.
1210	 */
1211	if (ddi_soft_state_zalloc(mptsas_state, instance) != DDI_SUCCESS) {
1212		mptsas_log(NULL, CE_WARN,
1213		    "mptsas%d: cannot allocate soft state", instance);
1214		goto fail;
1215	}
1216
1217	mpt = ddi_get_soft_state(mptsas_state, instance);
1218
1219	if (mpt == NULL) {
1220		mptsas_log(NULL, CE_WARN,
1221		    "mptsas%d: cannot get soft state", instance);
1222		goto fail;
1223	}
1224
1225	/* Indicate that we are 'sizeof (scsi_*(9S))' clean. */
1226	scsi_size_clean(dip);
1227
1228	mpt->m_dip = dip;
1229	mpt->m_instance = instance;
1230
1231	/* Make a per-instance copy of the structures */
1232	mpt->m_io_dma_attr = mptsas_dma_attrs64;
1233	if (mptsas_use_64bit_msgaddr) {
1234		mpt->m_msg_dma_attr = mptsas_dma_attrs64;
1235	} else {
1236		mpt->m_msg_dma_attr = mptsas_dma_attrs;
1237	}
1238	mpt->m_reg_acc_attr = mptsas_dev_attr;
1239	mpt->m_dev_acc_attr = mptsas_dev_attr;
1240
1241	/*
1242	 * Size of individual request sense buffer
1243	 */
1244	mpt->m_req_sense_size = EXTCMDS_STATUS_SIZE;
1245
1246	/*
1247	 * Initialize FMA
1248	 */
1249	mpt->m_fm_capabilities = ddi_getprop(DDI_DEV_T_ANY, mpt->m_dip,
1250	    DDI_PROP_CANSLEEP | DDI_PROP_DONTPASS, "fm-capable",
1251	    DDI_FM_EREPORT_CAPABLE | DDI_FM_ACCCHK_CAPABLE |
1252	    DDI_FM_DMACHK_CAPABLE | DDI_FM_ERRCB_CAPABLE);
1253
1254	mptsas_fm_init(mpt);
1255
1256	/*
1257	 * Initialize us with the UFM subsystem
1258	 */
1259	if (ddi_ufm_init(dip, DDI_UFM_CURRENT_VERSION, &mptsas_ufm_ops,
1260	    &mpt->m_ufmh, mpt) != 0) {
1261		mptsas_log(mpt, CE_WARN, "failed to initialize UFM subsystem");
1262		goto fail;
1263	}
1264
1265	if (mptsas_alloc_handshake_msg(mpt,
1266	    sizeof (Mpi2SCSITaskManagementRequest_t)) == DDI_FAILURE) {
1267		mptsas_log(mpt, CE_WARN, "cannot initialize handshake msg.");
1268		goto fail;
1269	}
1270
1271	/*
1272	 * Setup configuration space
1273	 */
1274	if (mptsas_config_space_init(mpt) == FALSE) {
1275		mptsas_log(mpt, CE_WARN, "mptsas_config_space_init failed");
1276		goto fail;
1277	}
1278	config_setup++;
1279
1280	if (ddi_regs_map_setup(dip, mem_bar, (caddr_t *)&mpt->m_reg,
1281	    0, 0, &mpt->m_reg_acc_attr, &mpt->m_datap) != DDI_SUCCESS) {
1282		mptsas_log(mpt, CE_WARN, "map setup failed");
1283		goto fail;
1284	}
1285	map_setup++;
1286
1287	/*
1288	 * A taskq is created for dealing with the event handler
1289	 */
1290	if ((mpt->m_event_taskq = ddi_taskq_create(dip, "mptsas_event_taskq",
1291	    1, TASKQ_DEFAULTPRI, 0)) == NULL) {
1292		mptsas_log(mpt, CE_NOTE, "ddi_taskq_create failed");
1293		goto fail;
1294	}
1295	event_taskq_create++;
1296
1297	/*
1298	 * A taskq is created for dealing with dr events
1299	 */
1300	if ((mpt->m_dr_taskq = ddi_taskq_create(dip,
1301	    "mptsas_dr_taskq",
1302	    1, TASKQ_DEFAULTPRI, 0)) == NULL) {
1303		mptsas_log(mpt, CE_NOTE, "ddi_taskq_create for discovery "
1304		    "failed");
1305		goto fail;
1306	}
1307	dr_taskq_create++;
1308
1309	mpt->m_doneq_thread_threshold = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
1310	    0, "mptsas_doneq_thread_threshold_prop", 10);
1311	mpt->m_doneq_length_threshold = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
1312	    0, "mptsas_doneq_length_threshold_prop", 8);
1313	mpt->m_doneq_thread_n = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
1314	    0, "mptsas_doneq_thread_n_prop", 8);
1315
1316	if (mpt->m_doneq_thread_n) {
1317		cv_init(&mpt->m_doneq_thread_cv, NULL, CV_DRIVER, NULL);
1318		mutex_init(&mpt->m_doneq_mutex, NULL, MUTEX_DRIVER, NULL);
1319
1320		mutex_enter(&mpt->m_doneq_mutex);
1321		mpt->m_doneq_thread_id =
1322		    kmem_zalloc(sizeof (mptsas_doneq_thread_list_t)
1323		    * mpt->m_doneq_thread_n, KM_SLEEP);
1324
1325		for (j = 0; j < mpt->m_doneq_thread_n; j++) {
1326			cv_init(&mpt->m_doneq_thread_id[j].cv, NULL,
1327			    CV_DRIVER, NULL);
1328			mutex_init(&mpt->m_doneq_thread_id[j].mutex, NULL,
1329			    MUTEX_DRIVER, NULL);
1330			mutex_enter(&mpt->m_doneq_thread_id[j].mutex);
1331			mpt->m_doneq_thread_id[j].flag |=
1332			    MPTSAS_DONEQ_THREAD_ACTIVE;
1333			mpt->m_doneq_thread_id[j].arg.mpt = mpt;
1334			mpt->m_doneq_thread_id[j].arg.t = j;
1335			mpt->m_doneq_thread_id[j].threadp =
1336			    thread_create(NULL, 0, mptsas_doneq_thread,
1337			    &mpt->m_doneq_thread_id[j].arg,
1338			    0, &p0, TS_RUN, minclsyspri);
1339			mpt->m_doneq_thread_id[j].donetail =
1340			    &mpt->m_doneq_thread_id[j].doneq;
1341			mutex_exit(&mpt->m_doneq_thread_id[j].mutex);
1342		}
1343		mutex_exit(&mpt->m_doneq_mutex);
1344		doneq_thread_create++;
1345	}
1346
1347	/*
1348	 * Disable hardware interrupt since we're not ready to
1349	 * handle it yet.
1350	 */
1351	MPTSAS_DISABLE_INTR(mpt);
1352	if (mptsas_register_intrs(mpt) == FALSE)
1353		goto fail;
1354	intr_added++;
1355
1356	/* Initialize mutex used in interrupt handler */
1357	mutex_init(&mpt->m_mutex, NULL, MUTEX_DRIVER,
1358	    DDI_INTR_PRI(mpt->m_intr_pri));
1359	mutex_init(&mpt->m_passthru_mutex, NULL, MUTEX_DRIVER, NULL);
1360	mutex_init(&mpt->m_tx_waitq_mutex, NULL, MUTEX_DRIVER,
1361	    DDI_INTR_PRI(mpt->m_intr_pri));
1362	for (i = 0; i < MPTSAS_MAX_PHYS; i++) {
1363		mutex_init(&mpt->m_phy_info[i].smhba_info.phy_mutex,
1364		    NULL, MUTEX_DRIVER,
1365		    DDI_INTR_PRI(mpt->m_intr_pri));
1366	}
1367
1368	cv_init(&mpt->m_cv, NULL, CV_DRIVER, NULL);
1369	cv_init(&mpt->m_passthru_cv, NULL, CV_DRIVER, NULL);
1370	cv_init(&mpt->m_fw_cv, NULL, CV_DRIVER, NULL);
1371	cv_init(&mpt->m_config_cv, NULL, CV_DRIVER, NULL);
1372	cv_init(&mpt->m_fw_diag_cv, NULL, CV_DRIVER, NULL);
1373	cv_init(&mpt->m_extreq_sense_refcount_cv, NULL, CV_DRIVER, NULL);
1374	mutex_init_done++;
1375
1376	mutex_enter(&mpt->m_mutex);
1377	/*
1378	 * Initialize power management component
1379	 */
1380	if (mpt->m_options & MPTSAS_OPT_PM) {
1381		if (mptsas_init_pm(mpt)) {
1382			mutex_exit(&mpt->m_mutex);
1383			mptsas_log(mpt, CE_WARN, "mptsas pm initialization "
1384			    "failed");
1385			goto fail;
1386		}
1387	}
1388
1389	/*
1390	 * Initialize chip using Message Unit Reset, if allowed
1391	 */
1392	mpt->m_softstate |= MPTSAS_SS_MSG_UNIT_RESET;
1393	if (mptsas_init_chip(mpt, TRUE) == DDI_FAILURE) {
1394		mutex_exit(&mpt->m_mutex);
1395		mptsas_log(mpt, CE_WARN, "mptsas chip initialization failed");
1396		goto fail;
1397	}
1398
1399	mpt->m_targets = refhash_create(MPTSAS_TARGET_BUCKET_COUNT,
1400	    mptsas_target_addr_hash, mptsas_target_addr_cmp,
1401	    mptsas_target_free, sizeof (mptsas_target_t),
1402	    offsetof(mptsas_target_t, m_link),
1403	    offsetof(mptsas_target_t, m_addr), KM_SLEEP);
1404
1405	/*
1406	 * The refhash for temporary targets uses the address of the target
1407	 * struct itself as tag, so the tag offset is 0. See the implementation
1408	 * of mptsas_tmp_target_hash() and mptsas_tmp_target_cmp().
1409	 */
1410	mpt->m_tmp_targets = refhash_create(MPTSAS_TMP_TARGET_BUCKET_COUNT,
1411	    mptsas_tmp_target_hash, mptsas_tmp_target_cmp,
1412	    mptsas_target_free, sizeof (mptsas_target_t),
1413	    offsetof(mptsas_target_t, m_link), 0, KM_SLEEP);
1414
1415	/*
1416	 * Fill in the phy_info structure and get the base WWID
1417	 */
1418	if (mptsas_get_manufacture_page5(mpt) == DDI_FAILURE) {
1419		mptsas_log(mpt, CE_WARN,
1420		    "mptsas_get_manufacture_page5 failed!");
1421		goto fail;
1422	}
1423
1424	if (mptsas_get_sas_io_unit_page_hndshk(mpt)) {
1425		mptsas_log(mpt, CE_WARN,
1426		    "mptsas_get_sas_io_unit_page_hndshk failed!");
1427		goto fail;
1428	}
1429
1430	if (mptsas_get_manufacture_page0(mpt) == DDI_FAILURE) {
1431		mptsas_log(mpt, CE_WARN,
1432		    "mptsas_get_manufacture_page0 failed!");
1433		goto fail;
1434	}
1435
1436	mutex_exit(&mpt->m_mutex);
1437
1438	/*
1439	 * Register the iport for multiple port HBA
1440	 */
1441	mptsas_iport_register(mpt);
1442
1443	/*
1444	 * initialize SCSI HBA transport structure
1445	 */
1446	if (mptsas_hba_setup(mpt) == FALSE)
1447		goto fail;
1448	hba_attach_setup++;
1449
1450	if (mptsas_smp_setup(mpt) == FALSE)
1451		goto fail;
1452	smp_attach_setup++;
1453
1454	if (mptsas_enc_setup(mpt) == FALSE)
1455		goto fail;
1456	enc_attach_setup++;
1457
1458	if (mptsas_cache_create(mpt) == FALSE)
1459		goto fail;
1460
1461	mpt->m_scsi_reset_delay	= ddi_prop_get_int(DDI_DEV_T_ANY,
1462	    dip, 0, "scsi-reset-delay",	SCSI_DEFAULT_RESET_DELAY);
1463	if (mpt->m_scsi_reset_delay == 0) {
1464		mptsas_log(mpt, CE_NOTE,
1465		    "scsi_reset_delay of 0 is not recommended,"
1466		    " resetting to SCSI_DEFAULT_RESET_DELAY\n");
1467		mpt->m_scsi_reset_delay = SCSI_DEFAULT_RESET_DELAY;
1468	}
1469
1470	/*
1471	 * Initialize the wait and done FIFO queue
1472	 */
1473	mpt->m_donetail = &mpt->m_doneq;
1474	mpt->m_waitqtail = &mpt->m_waitq;
1475	mpt->m_tx_waitqtail = &mpt->m_tx_waitq;
1476	mpt->m_tx_draining = 0;
1477
1478	/*
1479	 * ioc cmd queue initialize
1480	 */
1481	mpt->m_ioc_event_cmdtail = &mpt->m_ioc_event_cmdq;
1482	mpt->m_dev_handle = 0xFFFF;
1483
1484	MPTSAS_ENABLE_INTR(mpt);
1485
1486	/*
1487	 * enable event notification
1488	 */
1489	mutex_enter(&mpt->m_mutex);
1490	if (mptsas_ioc_enable_event_notification(mpt)) {
1491		mutex_exit(&mpt->m_mutex);
1492		goto fail;
1493	}
1494	mutex_exit(&mpt->m_mutex);
1495
1496	/*
1497	 * used for mptsas_watch
1498	 */
1499	mptsas_list_add(mpt);
1500
1501	mutex_enter(&mptsas_global_mutex);
1502	if (mptsas_timeouts_enabled == 0) {
1503		mptsas_scsi_watchdog_tick = ddi_prop_get_int(DDI_DEV_T_ANY,
1504		    dip, 0, "scsi-watchdog-tick", DEFAULT_WD_TICK);
1505
1506		mptsas_tick = mptsas_scsi_watchdog_tick *
1507		    drv_usectohz((clock_t)1000000);
1508
1509		mptsas_timeout_id = timeout(mptsas_watch, NULL, mptsas_tick);
1510		mptsas_timeouts_enabled = 1;
1511	}
1512	mutex_exit(&mptsas_global_mutex);
1513	added_watchdog++;
1514
1515	/*
1516	 * Initialize PHY info for smhba.
1517	 * This requires watchdog to be enabled otherwise if interrupts
1518	 * don't work the system will hang.
1519	 */
1520	if (mptsas_smhba_setup(mpt)) {
1521		mptsas_log(mpt, CE_WARN, "mptsas phy initialization "
1522		    "failed");
1523		goto fail;
1524	}
1525
1526	/* Check all dma handles allocated in attach */
1527	if ((mptsas_check_dma_handle(mpt->m_dma_req_frame_hdl)
1528	    != DDI_SUCCESS) ||
1529	    (mptsas_check_dma_handle(mpt->m_dma_req_sense_hdl)
1530	    != DDI_SUCCESS) ||
1531	    (mptsas_check_dma_handle(mpt->m_dma_reply_frame_hdl)
1532	    != DDI_SUCCESS) ||
1533	    (mptsas_check_dma_handle(mpt->m_dma_free_queue_hdl)
1534	    != DDI_SUCCESS) ||
1535	    (mptsas_check_dma_handle(mpt->m_dma_post_queue_hdl)
1536	    != DDI_SUCCESS) ||
1537	    (mptsas_check_dma_handle(mpt->m_hshk_dma_hdl)
1538	    != DDI_SUCCESS)) {
1539		goto fail;
1540	}
1541
1542	/* Check all acc handles allocated in attach */
1543	if ((mptsas_check_acc_handle(mpt->m_datap) != DDI_SUCCESS) ||
1544	    (mptsas_check_acc_handle(mpt->m_acc_req_frame_hdl)
1545	    != DDI_SUCCESS) ||
1546	    (mptsas_check_acc_handle(mpt->m_acc_req_sense_hdl)
1547	    != DDI_SUCCESS) ||
1548	    (mptsas_check_acc_handle(mpt->m_acc_reply_frame_hdl)
1549	    != DDI_SUCCESS) ||
1550	    (mptsas_check_acc_handle(mpt->m_acc_free_queue_hdl)
1551	    != DDI_SUCCESS) ||
1552	    (mptsas_check_acc_handle(mpt->m_acc_post_queue_hdl)
1553	    != DDI_SUCCESS) ||
1554	    (mptsas_check_acc_handle(mpt->m_hshk_acc_hdl)
1555	    != DDI_SUCCESS) ||
1556	    (mptsas_check_acc_handle(mpt->m_config_handle)
1557	    != DDI_SUCCESS)) {
1558		goto fail;
1559	}
1560
1561	/*
1562	 * After this point, we are not going to fail the attach.
1563	 */
1564
1565	/* Let the UFM susbsystem know we're ready to receive callbacks */
1566	ddi_ufm_update(mpt->m_ufmh);
1567
1568	/* Print message of HBA present */
1569	ddi_report_dev(dip);
1570
1571	/* report idle status to pm framework */
1572	if (mpt->m_options & MPTSAS_OPT_PM) {
1573		(void) pm_idle_component(dip, 0);
1574	}
1575
1576	return (DDI_SUCCESS);
1577
1578fail:
1579	mptsas_log(mpt, CE_WARN, "attach failed");
1580	mptsas_fm_ereport(mpt, DDI_FM_DEVICE_NO_RESPONSE);
1581	ddi_fm_service_impact(mpt->m_dip, DDI_SERVICE_LOST);
1582	if (mpt) {
1583		/* deallocate in reverse order */
1584		if (added_watchdog) {
1585			mptsas_list_del(mpt);
1586			mutex_enter(&mptsas_global_mutex);
1587
1588			if (mptsas_timeout_id && (mptsas_head == NULL)) {
1589				timeout_id_t tid = mptsas_timeout_id;
1590				mptsas_timeouts_enabled = 0;
1591				mptsas_timeout_id = 0;
1592				mutex_exit(&mptsas_global_mutex);
1593				(void) untimeout(tid);
1594				mutex_enter(&mptsas_global_mutex);
1595			}
1596			mutex_exit(&mptsas_global_mutex);
1597		}
1598
1599		mptsas_cache_destroy(mpt);
1600
1601		if (smp_attach_setup) {
1602			mptsas_smp_teardown(mpt);
1603		}
1604		if (enc_attach_setup) {
1605			mptsas_enc_teardown(mpt);
1606		}
1607		if (hba_attach_setup) {
1608			mptsas_hba_teardown(mpt);
1609		}
1610
1611		if (mpt->m_tmp_targets)
1612			refhash_destroy(mpt->m_tmp_targets);
1613		if (mpt->m_targets)
1614			refhash_destroy(mpt->m_targets);
1615		if (mpt->m_smp_targets)
1616			refhash_destroy(mpt->m_smp_targets);
1617
1618		if (mpt->m_active) {
1619			mptsas_free_active_slots(mpt);
1620		}
1621		if (intr_added) {
1622			mptsas_unregister_intrs(mpt);
1623		}
1624
1625		if (doneq_thread_create) {
1626			mutex_enter(&mpt->m_doneq_mutex);
1627			doneq_thread_num = mpt->m_doneq_thread_n;
1628			for (j = 0; j < mpt->m_doneq_thread_n; j++) {
1629				mutex_enter(&mpt->m_doneq_thread_id[j].mutex);
1630				mpt->m_doneq_thread_id[j].flag &=
1631				    (~MPTSAS_DONEQ_THREAD_ACTIVE);
1632				cv_signal(&mpt->m_doneq_thread_id[j].cv);
1633				mutex_exit(&mpt->m_doneq_thread_id[j].mutex);
1634			}
1635			while (mpt->m_doneq_thread_n) {
1636				cv_wait(&mpt->m_doneq_thread_cv,
1637				    &mpt->m_doneq_mutex);
1638			}
1639			for (j = 0; j < doneq_thread_num; j++) {
1640				cv_destroy(&mpt->m_doneq_thread_id[j].cv);
1641				mutex_destroy(&mpt->m_doneq_thread_id[j].mutex);
1642			}
1643			kmem_free(mpt->m_doneq_thread_id,
1644			    sizeof (mptsas_doneq_thread_list_t)
1645			    * doneq_thread_num);
1646			mutex_exit(&mpt->m_doneq_mutex);
1647			cv_destroy(&mpt->m_doneq_thread_cv);
1648			mutex_destroy(&mpt->m_doneq_mutex);
1649		}
1650		if (event_taskq_create) {
1651			ddi_taskq_destroy(mpt->m_event_taskq);
1652		}
1653		if (dr_taskq_create) {
1654			ddi_taskq_destroy(mpt->m_dr_taskq);
1655		}
1656		if (mutex_init_done) {
1657			mutex_destroy(&mpt->m_tx_waitq_mutex);
1658			mutex_destroy(&mpt->m_passthru_mutex);
1659			mutex_destroy(&mpt->m_mutex);
1660			for (i = 0; i < MPTSAS_MAX_PHYS; i++) {
1661				mutex_destroy(
1662				    &mpt->m_phy_info[i].smhba_info.phy_mutex);
1663			}
1664			cv_destroy(&mpt->m_cv);
1665			cv_destroy(&mpt->m_passthru_cv);
1666			cv_destroy(&mpt->m_fw_cv);
1667			cv_destroy(&mpt->m_config_cv);
1668			cv_destroy(&mpt->m_fw_diag_cv);
1669			cv_destroy(&mpt->m_extreq_sense_refcount_cv);
1670		}
1671
1672		if (map_setup) {
1673			mptsas_cfg_fini(mpt);
1674		}
1675		if (config_setup) {
1676			mptsas_config_space_fini(mpt);
1677		}
1678		mptsas_free_handshake_msg(mpt);
1679		mptsas_hba_fini(mpt);
1680
1681		mptsas_fm_fini(mpt);
1682		ddi_soft_state_free(mptsas_state, instance);
1683		ddi_prop_remove_all(dip);
1684	}
1685	return (DDI_FAILURE);
1686}
1687
1688static int
1689mptsas_suspend(dev_info_t *devi)
1690{
1691	mptsas_t	*mpt, *g;
1692	scsi_hba_tran_t	*tran;
1693
1694	if (scsi_hba_iport_unit_address(devi)) {
1695		return (DDI_SUCCESS);
1696	}
1697
1698	if ((tran = ddi_get_driver_private(devi)) == NULL)
1699		return (DDI_SUCCESS);
1700
1701	mpt = TRAN2MPT(tran);
1702	if (!mpt) {
1703		return (DDI_SUCCESS);
1704	}
1705
1706	mutex_enter(&mpt->m_mutex);
1707
1708	if (mpt->m_suspended++) {
1709		mutex_exit(&mpt->m_mutex);
1710		return (DDI_SUCCESS);
1711	}
1712
1713	/*
1714	 * Cancel timeout threads for this mpt
1715	 */
1716	if (mpt->m_quiesce_timeid) {
1717		timeout_id_t tid = mpt->m_quiesce_timeid;
1718		mpt->m_quiesce_timeid = 0;
1719		mutex_exit(&mpt->m_mutex);
1720		(void) untimeout(tid);
1721		mutex_enter(&mpt->m_mutex);
1722	}
1723
1724	if (mpt->m_restart_cmd_timeid) {
1725		timeout_id_t tid = mpt->m_restart_cmd_timeid;
1726		mpt->m_restart_cmd_timeid = 0;
1727		mutex_exit(&mpt->m_mutex);
1728		(void) untimeout(tid);
1729		mutex_enter(&mpt->m_mutex);
1730	}
1731
1732	mutex_exit(&mpt->m_mutex);
1733
1734	(void) pm_idle_component(mpt->m_dip, 0);
1735
1736	/*
1737	 * Cancel watch threads if all mpts suspended
1738	 */
1739	rw_enter(&mptsas_global_rwlock, RW_WRITER);
1740	for (g = mptsas_head; g != NULL; g = g->m_next) {
1741		if (!g->m_suspended)
1742			break;
1743	}
1744	rw_exit(&mptsas_global_rwlock);
1745
1746	mutex_enter(&mptsas_global_mutex);
1747	if (g == NULL) {
1748		timeout_id_t tid;
1749
1750		mptsas_timeouts_enabled = 0;
1751		if (mptsas_timeout_id) {
1752			tid = mptsas_timeout_id;
1753			mptsas_timeout_id = 0;
1754			mutex_exit(&mptsas_global_mutex);
1755			(void) untimeout(tid);
1756			mutex_enter(&mptsas_global_mutex);
1757		}
1758		if (mptsas_reset_watch) {
1759			tid = mptsas_reset_watch;
1760			mptsas_reset_watch = 0;
1761			mutex_exit(&mptsas_global_mutex);
1762			(void) untimeout(tid);
1763			mutex_enter(&mptsas_global_mutex);
1764		}
1765	}
1766	mutex_exit(&mptsas_global_mutex);
1767
1768	mutex_enter(&mpt->m_mutex);
1769
1770	/*
1771	 * If this mpt is not in full power(PM_LEVEL_D0), just return.
1772	 */
1773	if ((mpt->m_options & MPTSAS_OPT_PM) &&
1774	    (mpt->m_power_level != PM_LEVEL_D0)) {
1775		mutex_exit(&mpt->m_mutex);
1776		return (DDI_SUCCESS);
1777	}
1778
1779	/* Disable HBA interrupts in hardware */
1780	MPTSAS_DISABLE_INTR(mpt);
1781	/*
1782	 * Send RAID action system shutdown to sync IR
1783	 */
1784	mptsas_raid_action_system_shutdown(mpt);
1785
1786	mutex_exit(&mpt->m_mutex);
1787
1788	/* drain the taskq */
1789	ddi_taskq_wait(mpt->m_event_taskq);
1790	ddi_taskq_wait(mpt->m_dr_taskq);
1791
1792	return (DDI_SUCCESS);
1793}
1794
1795#ifdef	__sparc
1796/*ARGSUSED*/
1797static int
1798mptsas_reset(dev_info_t *devi, ddi_reset_cmd_t cmd)
1799{
1800	mptsas_t	*mpt;
1801	scsi_hba_tran_t *tran;
1802
1803	/*
1804	 * If this call is for iport, just return.
1805	 */
1806	if (scsi_hba_iport_unit_address(devi))
1807		return (DDI_SUCCESS);
1808
1809	if ((tran = ddi_get_driver_private(devi)) == NULL)
1810		return (DDI_SUCCESS);
1811
1812	if ((mpt = TRAN2MPT(tran)) == NULL)
1813		return (DDI_SUCCESS);
1814
1815	/*
1816	 * Send RAID action system shutdown to sync IR.  Disable HBA
1817	 * interrupts in hardware first.
1818	 */
1819	MPTSAS_DISABLE_INTR(mpt);
1820	mptsas_raid_action_system_shutdown(mpt);
1821
1822	return (DDI_SUCCESS);
1823}
1824#else /* __sparc */
1825/*
1826 * quiesce(9E) entry point.
1827 *
1828 * This function is called when the system is single-threaded at high
1829 * PIL with preemption disabled. Therefore, this function must not be
1830 * blocked.
1831 *
1832 * This function returns DDI_SUCCESS on success, or DDI_FAILURE on failure.
1833 * DDI_FAILURE indicates an error condition and should almost never happen.
1834 */
1835static int
1836mptsas_quiesce(dev_info_t *devi)
1837{
1838	mptsas_t	*mpt;
1839	scsi_hba_tran_t *tran;
1840
1841	/*
1842	 * If this call is for iport, just return.
1843	 */
1844	if (scsi_hba_iport_unit_address(devi))
1845		return (DDI_SUCCESS);
1846
1847	if ((tran = ddi_get_driver_private(devi)) == NULL)
1848		return (DDI_SUCCESS);
1849
1850	if ((mpt = TRAN2MPT(tran)) == NULL)
1851		return (DDI_SUCCESS);
1852
1853	/* Disable HBA interrupts in hardware */
1854	MPTSAS_DISABLE_INTR(mpt);
1855	/* Send RAID action system shutdonw to sync IR */
1856	mptsas_raid_action_system_shutdown(mpt);
1857
1858	return (DDI_SUCCESS);
1859}
1860#endif	/* __sparc */
1861
1862/*
1863 * detach(9E).	Remove all device allocations and system resources;
1864 * disable device interrupts.
1865 * Return DDI_SUCCESS if done; DDI_FAILURE if there's a problem.
1866 */
1867static int
1868mptsas_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
1869{
1870	/* CONSTCOND */
1871	ASSERT(NO_COMPETING_THREADS);
1872	NDBG0(("mptsas_detach: dip=0x%p cmd=0x%p", (void *)devi, (void *)cmd));
1873
1874	switch (cmd) {
1875	case DDI_DETACH:
1876		return (mptsas_do_detach(devi));
1877
1878	case DDI_SUSPEND:
1879		return (mptsas_suspend(devi));
1880
1881	default:
1882		return (DDI_FAILURE);
1883	}
1884	/* NOTREACHED */
1885}
1886
1887static int
1888mptsas_do_detach(dev_info_t *dip)
1889{
1890	mptsas_t	*mpt;
1891	scsi_hba_tran_t	*tran;
1892	int		circ = 0;
1893	int		circ1 = 0;
1894	mdi_pathinfo_t	*pip = NULL;
1895	int		i;
1896	int		doneq_thread_num = 0;
1897
1898	NDBG0(("mptsas_do_detach: dip=0x%p", (void *)dip));
1899
1900	if ((tran = ndi_flavorv_get(dip, SCSA_FLAVOR_SCSI_DEVICE)) == NULL)
1901		return (DDI_FAILURE);
1902
1903	mpt = TRAN2MPT(tran);
1904	if (!mpt) {
1905		return (DDI_FAILURE);
1906	}
1907
1908	ddi_ufm_fini(mpt->m_ufmh);
1909
1910	/*
1911	 * Still have pathinfo child, should not detach mpt driver
1912	 */
1913	if (scsi_hba_iport_unit_address(dip)) {
1914		if (mpt->m_mpxio_enable) {
1915			/*
1916			 * MPxIO enabled for the iport
1917			 */
1918			ndi_devi_enter(scsi_vhci_dip, &circ1);
1919			ndi_devi_enter(dip, &circ);
1920			while ((pip = mdi_get_next_client_path(dip, NULL)) !=
1921			    NULL) {
1922				if (mdi_pi_free(pip, 0) == MDI_SUCCESS) {
1923					continue;
1924				}
1925				ndi_devi_exit(dip, circ);
1926				ndi_devi_exit(scsi_vhci_dip, circ1);
1927				NDBG12(("detach failed because of "
1928				    "outstanding path info"));
1929				return (DDI_FAILURE);
1930			}
1931			ndi_devi_exit(dip, circ);
1932			ndi_devi_exit(scsi_vhci_dip, circ1);
1933			(void) mdi_phci_unregister(dip, 0);
1934		}
1935
1936		ddi_prop_remove_all(dip);
1937
1938		return (DDI_SUCCESS);
1939	}
1940
1941	/* Make sure power level is D0 before accessing registers */
1942	if (mpt->m_options & MPTSAS_OPT_PM) {
1943		(void) pm_busy_component(dip, 0);
1944		if (mpt->m_power_level != PM_LEVEL_D0) {
1945			if (pm_raise_power(dip, 0, PM_LEVEL_D0) !=
1946			    DDI_SUCCESS) {
1947				mptsas_log(mpt, CE_WARN,
1948				    "mptsas%d: Raise power request failed.",
1949				    mpt->m_instance);
1950				(void) pm_idle_component(dip, 0);
1951				return (DDI_FAILURE);
1952			}
1953		}
1954	}
1955
1956	/*
1957	 * Send RAID action system shutdown to sync IR.  After action, send a
1958	 * Message Unit Reset. Since after that DMA resource will be freed,
1959	 * set ioc to READY state will avoid HBA initiated DMA operation.
1960	 */
1961	mutex_enter(&mpt->m_mutex);
1962	MPTSAS_DISABLE_INTR(mpt);
1963	mptsas_raid_action_system_shutdown(mpt);
1964	mpt->m_softstate |= MPTSAS_SS_MSG_UNIT_RESET;
1965	(void) mptsas_ioc_reset(mpt, FALSE);
1966	mutex_exit(&mpt->m_mutex);
1967	mptsas_rem_intrs(mpt);
1968	ddi_taskq_destroy(mpt->m_event_taskq);
1969	ddi_taskq_destroy(mpt->m_dr_taskq);
1970
1971	if (mpt->m_doneq_thread_n) {
1972		mutex_enter(&mpt->m_doneq_mutex);
1973		doneq_thread_num = mpt->m_doneq_thread_n;
1974		for (i = 0; i < mpt->m_doneq_thread_n; i++) {
1975			mutex_enter(&mpt->m_doneq_thread_id[i].mutex);
1976			mpt->m_doneq_thread_id[i].flag &=
1977			    (~MPTSAS_DONEQ_THREAD_ACTIVE);
1978			cv_signal(&mpt->m_doneq_thread_id[i].cv);
1979			mutex_exit(&mpt->m_doneq_thread_id[i].mutex);
1980		}
1981		while (mpt->m_doneq_thread_n) {
1982			cv_wait(&mpt->m_doneq_thread_cv,
1983			    &mpt->m_doneq_mutex);
1984		}
1985		for (i = 0;  i < doneq_thread_num; i++) {
1986			cv_destroy(&mpt->m_doneq_thread_id[i].cv);
1987			mutex_destroy(&mpt->m_doneq_thread_id[i].mutex);
1988		}
1989		kmem_free(mpt->m_doneq_thread_id,
1990		    sizeof (mptsas_doneq_thread_list_t)
1991		    * doneq_thread_num);
1992		mutex_exit(&mpt->m_doneq_mutex);
1993		cv_destroy(&mpt->m_doneq_thread_cv);
1994		mutex_destroy(&mpt->m_doneq_mutex);
1995	}
1996
1997	scsi_hba_reset_notify_tear_down(mpt->m_reset_notify_listf);
1998
1999	mptsas_list_del(mpt);
2000
2001	/*
2002	 * Cancel timeout threads for this mpt
2003	 */
2004	mutex_enter(&mpt->m_mutex);
2005	if (mpt->m_quiesce_timeid) {
2006		timeout_id_t tid = mpt->m_quiesce_timeid;
2007		mpt->m_quiesce_timeid = 0;
2008		mutex_exit(&mpt->m_mutex);
2009		(void) untimeout(tid);
2010		mutex_enter(&mpt->m_mutex);
2011	}
2012
2013	if (mpt->m_restart_cmd_timeid) {
2014		timeout_id_t tid = mpt->m_restart_cmd_timeid;
2015		mpt->m_restart_cmd_timeid = 0;
2016		mutex_exit(&mpt->m_mutex);
2017		(void) untimeout(tid);
2018		mutex_enter(&mpt->m_mutex);
2019	}
2020
2021	mutex_exit(&mpt->m_mutex);
2022
2023	/*
2024	 * last mpt? ... if active, CANCEL watch threads.
2025	 */
2026	mutex_enter(&mptsas_global_mutex);
2027	if (mptsas_head == NULL) {
2028		timeout_id_t tid;
2029		/*
2030		 * Clear mptsas_timeouts_enable so that the watch thread
2031		 * gets restarted on DDI_ATTACH
2032		 */
2033		mptsas_timeouts_enabled = 0;
2034		if (mptsas_timeout_id) {
2035			tid = mptsas_timeout_id;
2036			mptsas_timeout_id = 0;
2037			mutex_exit(&mptsas_global_mutex);
2038			(void) untimeout(tid);
2039			mutex_enter(&mptsas_global_mutex);
2040		}
2041		if (mptsas_reset_watch) {
2042			tid = mptsas_reset_watch;
2043			mptsas_reset_watch = 0;
2044			mutex_exit(&mptsas_global_mutex);
2045			(void) untimeout(tid);
2046			mutex_enter(&mptsas_global_mutex);
2047		}
2048	}
2049	mutex_exit(&mptsas_global_mutex);
2050
2051	/*
2052	 * Delete Phy stats
2053	 */
2054	mptsas_destroy_phy_stats(mpt);
2055
2056	mptsas_destroy_hashes(mpt);
2057
2058	/*
2059	 * Delete nt_active.
2060	 */
2061	mutex_enter(&mpt->m_mutex);
2062	mptsas_free_active_slots(mpt);
2063	mutex_exit(&mpt->m_mutex);
2064
2065	/* deallocate everything that was allocated in mptsas_attach */
2066	mptsas_cache_destroy(mpt);
2067
2068	mptsas_hba_fini(mpt);
2069	mptsas_cfg_fini(mpt);
2070
2071	/* Lower the power informing PM Framework */
2072	if (mpt->m_options & MPTSAS_OPT_PM) {
2073		if (pm_lower_power(dip, 0, PM_LEVEL_D3) != DDI_SUCCESS)
2074			mptsas_log(mpt, CE_WARN,
2075			    "!mptsas%d: Lower power request failed "
2076			    "during detach, ignoring.",
2077			    mpt->m_instance);
2078	}
2079
2080	mutex_destroy(&mpt->m_tx_waitq_mutex);
2081	mutex_destroy(&mpt->m_passthru_mutex);
2082	mutex_destroy(&mpt->m_mutex);
2083	for (i = 0; i < MPTSAS_MAX_PHYS; i++) {
2084		mutex_destroy(&mpt->m_phy_info[i].smhba_info.phy_mutex);
2085	}
2086	cv_destroy(&mpt->m_cv);
2087	cv_destroy(&mpt->m_passthru_cv);
2088	cv_destroy(&mpt->m_fw_cv);
2089	cv_destroy(&mpt->m_config_cv);
2090	cv_destroy(&mpt->m_fw_diag_cv);
2091	cv_destroy(&mpt->m_extreq_sense_refcount_cv);
2092
2093	mptsas_smp_teardown(mpt);
2094	mptsas_enc_teardown(mpt);
2095	mptsas_hba_teardown(mpt);
2096
2097	mptsas_config_space_fini(mpt);
2098
2099	mptsas_free_handshake_msg(mpt);
2100
2101	mptsas_fm_fini(mpt);
2102	ddi_soft_state_free(mptsas_state, ddi_get_instance(dip));
2103	ddi_prop_remove_all(dip);
2104
2105	return (DDI_SUCCESS);
2106}
2107
2108static void
2109mptsas_list_add(mptsas_t *mpt)
2110{
2111	rw_enter(&mptsas_global_rwlock, RW_WRITER);
2112
2113	if (mptsas_head == NULL) {
2114		mptsas_head = mpt;
2115	} else {
2116		mptsas_tail->m_next = mpt;
2117	}
2118	mptsas_tail = mpt;
2119	rw_exit(&mptsas_global_rwlock);
2120}
2121
2122static void
2123mptsas_list_del(mptsas_t *mpt)
2124{
2125	mptsas_t *m;
2126	/*
2127	 * Remove device instance from the global linked list
2128	 */
2129	rw_enter(&mptsas_global_rwlock, RW_WRITER);
2130	if (mptsas_head == mpt) {
2131		m = mptsas_head = mpt->m_next;
2132	} else {
2133		for (m = mptsas_head; m != NULL; m = m->m_next) {
2134			if (m->m_next == mpt) {
2135				m->m_next = mpt->m_next;
2136				break;
2137			}
2138		}
2139		if (m == NULL) {
2140			mptsas_log(mpt, CE_PANIC, "Not in softc list!");
2141		}
2142	}
2143
2144	if (mptsas_tail == mpt) {
2145		mptsas_tail = m;
2146	}
2147	rw_exit(&mptsas_global_rwlock);
2148}
2149
2150static int
2151mptsas_alloc_handshake_msg(mptsas_t *mpt, size_t alloc_size)
2152{
2153	ddi_dma_attr_t	task_dma_attrs;
2154
2155	mpt->m_hshk_dma_size = 0;
2156	task_dma_attrs = mpt->m_msg_dma_attr;
2157	task_dma_attrs.dma_attr_sgllen = 1;
2158	task_dma_attrs.dma_attr_granular = (uint32_t)(alloc_size);
2159
2160	/* allocate Task Management ddi_dma resources */
2161	if (mptsas_dma_addr_create(mpt, task_dma_attrs,
2162	    &mpt->m_hshk_dma_hdl, &mpt->m_hshk_acc_hdl, &mpt->m_hshk_memp,
2163	    alloc_size, NULL) == FALSE) {
2164		return (DDI_FAILURE);
2165	}
2166	mpt->m_hshk_dma_size = alloc_size;
2167
2168	return (DDI_SUCCESS);
2169}
2170
2171static void
2172mptsas_free_handshake_msg(mptsas_t *mpt)
2173{
2174	if (mpt->m_hshk_dma_size == 0)
2175		return;
2176	mptsas_dma_addr_destroy(&mpt->m_hshk_dma_hdl, &mpt->m_hshk_acc_hdl);
2177	mpt->m_hshk_dma_size = 0;
2178}
2179
2180static int
2181mptsas_hba_setup(mptsas_t *mpt)
2182{
2183	scsi_hba_tran_t		*hba_tran;
2184	int			tran_flags;
2185
2186	/* Allocate a transport structure */
2187	hba_tran = mpt->m_tran = scsi_hba_tran_alloc(mpt->m_dip,
2188	    SCSI_HBA_CANSLEEP);
2189	ASSERT(mpt->m_tran != NULL);
2190
2191	hba_tran->tran_hba_private	= mpt;
2192	hba_tran->tran_tgt_private	= NULL;
2193
2194	hba_tran->tran_tgt_init		= mptsas_scsi_tgt_init;
2195	hba_tran->tran_tgt_free		= mptsas_scsi_tgt_free;
2196
2197	hba_tran->tran_start		= mptsas_scsi_start;
2198	hba_tran->tran_reset		= mptsas_scsi_reset;
2199	hba_tran->tran_abort		= mptsas_scsi_abort;
2200	hba_tran->tran_getcap		= mptsas_scsi_getcap;
2201	hba_tran->tran_setcap		= mptsas_scsi_setcap;
2202	hba_tran->tran_init_pkt		= mptsas_scsi_init_pkt;
2203	hba_tran->tran_destroy_pkt	= mptsas_scsi_destroy_pkt;
2204
2205	hba_tran->tran_dmafree		= mptsas_scsi_dmafree;
2206	hba_tran->tran_sync_pkt		= mptsas_scsi_sync_pkt;
2207	hba_tran->tran_reset_notify	= mptsas_scsi_reset_notify;
2208
2209	hba_tran->tran_get_bus_addr	= mptsas_get_bus_addr;
2210	hba_tran->tran_get_name		= mptsas_get_name;
2211
2212	hba_tran->tran_quiesce		= mptsas_scsi_quiesce;
2213	hba_tran->tran_unquiesce	= mptsas_scsi_unquiesce;
2214	hba_tran->tran_bus_reset	= NULL;
2215
2216	hba_tran->tran_add_eventcall	= NULL;
2217	hba_tran->tran_get_eventcookie	= NULL;
2218	hba_tran->tran_post_event	= NULL;
2219	hba_tran->tran_remove_eventcall	= NULL;
2220
2221	hba_tran->tran_bus_config	= mptsas_bus_config;
2222
2223	hba_tran->tran_interconnect_type = INTERCONNECT_SAS;
2224
2225	/*
2226	 * All children of the HBA are iports. We need tran was cloned.
2227	 * So we pass the flags to SCSA. SCSI_HBA_TRAN_CLONE will be
2228	 * inherited to iport's tran vector.
2229	 */
2230	tran_flags = (SCSI_HBA_HBA | SCSI_HBA_TRAN_CLONE);
2231
2232	if (scsi_hba_attach_setup(mpt->m_dip, &mpt->m_msg_dma_attr,
2233	    hba_tran, tran_flags) != DDI_SUCCESS) {
2234		mptsas_log(mpt, CE_WARN, "hba attach setup failed");
2235		scsi_hba_tran_free(hba_tran);
2236		mpt->m_tran = NULL;
2237		return (FALSE);
2238	}
2239	return (TRUE);
2240}
2241
2242static void
2243mptsas_hba_teardown(mptsas_t *mpt)
2244{
2245	(void) scsi_hba_detach(mpt->m_dip);
2246	if (mpt->m_tran != NULL) {
2247		scsi_hba_tran_free(mpt->m_tran);
2248		mpt->m_tran = NULL;
2249	}
2250}
2251
2252static void
2253mptsas_iport_register(mptsas_t *mpt)
2254{
2255	int i, j;
2256	mptsas_phymask_t	mask = 0x0;
2257	/*
2258	 * initial value of mask is 0
2259	 */
2260	mutex_enter(&mpt->m_mutex);
2261	for (i = 0; i < mpt->m_num_phys; i++) {
2262		mptsas_phymask_t phy_mask = 0x0;
2263		char phy_mask_name[MPTSAS_MAX_PHYS];
2264		uint8_t current_port;
2265
2266		if (mpt->m_phy_info[i].attached_devhdl == 0)
2267			continue;
2268
2269		bzero(phy_mask_name, sizeof (phy_mask_name));
2270
2271		current_port = mpt->m_phy_info[i].port_num;
2272
2273		if ((mask & (1 << i)) != 0)
2274			continue;
2275
2276		for (j = 0; j < mpt->m_num_phys; j++) {
2277			if (mpt->m_phy_info[j].attached_devhdl &&
2278			    (mpt->m_phy_info[j].port_num == current_port)) {
2279				phy_mask |= (1 << j);
2280			}
2281		}
2282		mask = mask | phy_mask;
2283
2284		for (j = 0; j < mpt->m_num_phys; j++) {
2285			if ((phy_mask >> j) & 0x01) {
2286				mpt->m_phy_info[j].phy_mask = phy_mask;
2287			}
2288		}
2289
2290		(void) sprintf(phy_mask_name, "%x", phy_mask);
2291
2292		mutex_exit(&mpt->m_mutex);
2293		/*
2294		 * register a iport
2295		 */
2296		(void) scsi_hba_iport_register(mpt->m_dip, phy_mask_name);
2297		mutex_enter(&mpt->m_mutex);
2298	}
2299	mutex_exit(&mpt->m_mutex);
2300	/*
2301	 * register a virtual port for RAID volume always
2302	 */
2303	(void) scsi_hba_iport_register(mpt->m_dip, "v0");
2304
2305}
2306
2307static int
2308mptsas_smp_setup(mptsas_t *mpt)
2309{
2310	mpt->m_smptran = smp_hba_tran_alloc(mpt->m_dip);
2311	ASSERT(mpt->m_smptran != NULL);
2312	mpt->m_smptran->smp_tran_hba_private = mpt;
2313	mpt->m_smptran->smp_tran_start = mptsas_smp_start;
2314	if (smp_hba_attach_setup(mpt->m_dip, mpt->m_smptran) != DDI_SUCCESS) {
2315		mptsas_log(mpt, CE_WARN, "smp attach setup failed");
2316		smp_hba_tran_free(mpt->m_smptran);
2317		mpt->m_smptran = NULL;
2318		return (FALSE);
2319	}
2320	/*
2321	 * Initialize smp hash table
2322	 */
2323	mpt->m_smp_targets = refhash_create(MPTSAS_SMP_BUCKET_COUNT,
2324	    mptsas_target_addr_hash, mptsas_target_addr_cmp,
2325	    mptsas_smp_free, sizeof (mptsas_smp_t),
2326	    offsetof(mptsas_smp_t, m_link), offsetof(mptsas_smp_t, m_addr),
2327	    KM_SLEEP);
2328	mpt->m_smp_devhdl = 0xFFFF;
2329
2330	return (TRUE);
2331}
2332
2333static void
2334mptsas_smp_teardown(mptsas_t *mpt)
2335{
2336	(void) smp_hba_detach(mpt->m_dip);
2337	if (mpt->m_smptran != NULL) {
2338		smp_hba_tran_free(mpt->m_smptran);
2339		mpt->m_smptran = NULL;
2340	}
2341	mpt->m_smp_devhdl = 0;
2342}
2343
2344static int
2345mptsas_enc_setup(mptsas_t *mpt)
2346{
2347	list_create(&mpt->m_enclosures, sizeof (mptsas_enclosure_t),
2348	    offsetof(mptsas_enclosure_t, me_link));
2349	return (TRUE);
2350}
2351
2352static void
2353mptsas_enc_free(mptsas_enclosure_t *mep)
2354{
2355	if (mep == NULL)
2356		return;
2357	if (mep->me_slotleds != NULL) {
2358		VERIFY3U(mep->me_nslots, >, 0);
2359		kmem_free(mep->me_slotleds, sizeof (uint8_t) * mep->me_nslots);
2360	}
2361	kmem_free(mep, sizeof (mptsas_enclosure_t));
2362}
2363
2364static void
2365mptsas_enc_teardown(mptsas_t *mpt)
2366{
2367	mptsas_enclosure_t *mep;
2368
2369	while ((mep = list_remove_head(&mpt->m_enclosures)) != NULL) {
2370		mptsas_enc_free(mep);
2371	}
2372	list_destroy(&mpt->m_enclosures);
2373}
2374
2375static mptsas_enclosure_t *
2376mptsas_enc_lookup(mptsas_t *mpt, uint16_t hdl)
2377{
2378	mptsas_enclosure_t *mep;
2379
2380	ASSERT(MUTEX_HELD(&mpt->m_mutex));
2381
2382	for (mep = list_head(&mpt->m_enclosures); mep != NULL;
2383	    mep = list_next(&mpt->m_enclosures, mep)) {
2384		if (hdl == mep->me_enchdl) {
2385			return (mep);
2386		}
2387	}
2388
2389	return (NULL);
2390}
2391
2392static int
2393mptsas_cache_create(mptsas_t *mpt)
2394{
2395	int instance = mpt->m_instance;
2396	char buf[64];
2397
2398	/*
2399	 * create kmem cache for packets
2400	 */
2401	(void) sprintf(buf, "mptsas%d_cache", instance);
2402	mpt->m_kmem_cache = kmem_cache_create(buf,
2403	    sizeof (struct mptsas_cmd) + scsi_pkt_size(), 8,
2404	    mptsas_kmem_cache_constructor, mptsas_kmem_cache_destructor,
2405	    NULL, (void *)mpt, NULL, 0);
2406
2407	if (mpt->m_kmem_cache == NULL) {
2408		mptsas_log(mpt, CE_WARN, "creating kmem cache failed");
2409		return (FALSE);
2410	}
2411
2412	/*
2413	 * create kmem cache for extra SGL frames if SGL cannot
2414	 * be accomodated into main request frame.
2415	 */
2416	(void) sprintf(buf, "mptsas%d_cache_frames", instance);
2417	mpt->m_cache_frames = kmem_cache_create(buf,
2418	    sizeof (mptsas_cache_frames_t), 8,
2419	    mptsas_cache_frames_constructor, mptsas_cache_frames_destructor,
2420	    NULL, (void *)mpt, NULL, 0);
2421
2422	if (mpt->m_cache_frames == NULL) {
2423		mptsas_log(mpt, CE_WARN, "creating cache for frames failed");
2424		return (FALSE);
2425	}
2426
2427	return (TRUE);
2428}
2429
2430static void
2431mptsas_cache_destroy(mptsas_t *mpt)
2432{
2433	/* deallocate in reverse order */
2434	if (mpt->m_cache_frames) {
2435		kmem_cache_destroy(mpt->m_cache_frames);
2436		mpt->m_cache_frames = NULL;
2437	}
2438	if (mpt->m_kmem_cache) {
2439		kmem_cache_destroy(mpt->m_kmem_cache);
2440		mpt->m_kmem_cache = NULL;
2441	}
2442}
2443
2444static int
2445mptsas_power(dev_info_t *dip, int component, int level)
2446{
2447#ifndef __lock_lint
2448	_NOTE(ARGUNUSED(component))
2449#endif
2450	mptsas_t	*mpt;
2451	int		rval = DDI_SUCCESS;
2452	int		polls = 0;
2453	uint32_t	ioc_status;
2454
2455	if (scsi_hba_iport_unit_address(dip) != 0)
2456		return (DDI_SUCCESS);
2457
2458	mpt = ddi_get_soft_state(mptsas_state, ddi_get_instance(dip));
2459	if (mpt == NULL) {
2460		return (DDI_FAILURE);
2461	}
2462
2463	mutex_enter(&mpt->m_mutex);
2464
2465	/*
2466	 * If the device is busy, don't lower its power level
2467	 */
2468	if (mpt->m_busy && (mpt->m_power_level > level)) {
2469		mutex_exit(&mpt->m_mutex);
2470		return (DDI_FAILURE);
2471	}
2472	switch (level) {
2473	case PM_LEVEL_D0:
2474		NDBG11(("mptsas%d: turning power ON.", mpt->m_instance));
2475		MPTSAS_POWER_ON(mpt);
2476		/*
2477		 * Wait up to 30 seconds for IOC to come out of reset.
2478		 */
2479		while (((ioc_status = ddi_get32(mpt->m_datap,
2480		    &mpt->m_reg->Doorbell)) &
2481		    MPI2_IOC_STATE_MASK) == MPI2_IOC_STATE_RESET) {
2482			if (polls++ > 3000) {
2483				break;
2484			}
2485			delay(drv_usectohz(10000));
2486		}
2487		/*
2488		 * If IOC is not in operational state, try to hard reset it.
2489		 */
2490		if ((ioc_status & MPI2_IOC_STATE_MASK) !=
2491		    MPI2_IOC_STATE_OPERATIONAL) {
2492			mpt->m_softstate &= ~MPTSAS_SS_MSG_UNIT_RESET;
2493			if (mptsas_restart_ioc(mpt) == DDI_FAILURE) {
2494				mptsas_log(mpt, CE_WARN,
2495				    "mptsas_power: hard reset failed");
2496				mutex_exit(&mpt->m_mutex);
2497				return (DDI_FAILURE);
2498			}
2499		}
2500		mpt->m_power_level = PM_LEVEL_D0;
2501		break;
2502	case PM_LEVEL_D3:
2503		NDBG11(("mptsas%d: turning power OFF.", mpt->m_instance));
2504		MPTSAS_POWER_OFF(mpt);
2505		break;
2506	default:
2507		mptsas_log(mpt, CE_WARN, "mptsas%d: unknown power level <%x>.",
2508		    mpt->m_instance, level);
2509		rval = DDI_FAILURE;
2510		break;
2511	}
2512	mutex_exit(&mpt->m_mutex);
2513	return (rval);
2514}
2515
2516/*
2517 * Initialize configuration space and figure out which
2518 * chip and revison of the chip the mpt driver is using.
2519 */
2520static int
2521mptsas_config_space_init(mptsas_t *mpt)
2522{
2523	NDBG0(("mptsas_config_space_init"));
2524
2525	if (mpt->m_config_handle != NULL)
2526		return (TRUE);
2527
2528	if (pci_config_setup(mpt->m_dip,
2529	    &mpt->m_config_handle) != DDI_SUCCESS) {
2530		mptsas_log(mpt, CE_WARN, "cannot map configuration space.");
2531		return (FALSE);
2532	}
2533
2534	/*
2535	 * This is a workaround for a XMITS ASIC bug which does not
2536	 * drive the CBE upper bits.
2537	 */
2538	if (pci_config_get16(mpt->m_config_handle, PCI_CONF_STAT) &
2539	    PCI_STAT_PERROR) {
2540		pci_config_put16(mpt->m_config_handle, PCI_CONF_STAT,
2541		    PCI_STAT_PERROR);
2542	}
2543
2544	mptsas_setup_cmd_reg(mpt);
2545
2546	/*
2547	 * Get the chip device id:
2548	 */
2549	mpt->m_devid = pci_config_get16(mpt->m_config_handle, PCI_CONF_DEVID);
2550
2551	/*
2552	 * Save the revision.
2553	 */
2554	mpt->m_revid = pci_config_get8(mpt->m_config_handle, PCI_CONF_REVID);
2555
2556	/*
2557	 * Save the SubSystem Vendor and Device IDs
2558	 */
2559	mpt->m_svid = pci_config_get16(mpt->m_config_handle, PCI_CONF_SUBVENID);
2560	mpt->m_ssid = pci_config_get16(mpt->m_config_handle, PCI_CONF_SUBSYSID);
2561
2562	/*
2563	 * Set the latency timer to 0x40 as specified by the upa -> pci
2564	 * bridge chip design team.  This may be done by the sparc pci
2565	 * bus nexus driver, but the driver should make sure the latency
2566	 * timer is correct for performance reasons.
2567	 */
2568	pci_config_put8(mpt->m_config_handle, PCI_CONF_LATENCY_TIMER,
2569	    MPTSAS_LATENCY_TIMER);
2570
2571	(void) mptsas_get_pci_cap(mpt);
2572	return (TRUE);
2573}
2574
2575static void
2576mptsas_config_space_fini(mptsas_t *mpt)
2577{
2578	if (mpt->m_config_handle != NULL) {
2579		mptsas_disable_bus_master(mpt);
2580		pci_config_teardown(&mpt->m_config_handle);
2581		mpt->m_config_handle = NULL;
2582	}
2583}
2584
2585static void
2586mptsas_setup_cmd_reg(mptsas_t *mpt)
2587{
2588	ushort_t	cmdreg;
2589
2590	/*
2591	 * Set the command register to the needed values.
2592	 */
2593	cmdreg = pci_config_get16(mpt->m_config_handle, PCI_CONF_COMM);
2594	cmdreg |= (PCI_COMM_ME | PCI_COMM_SERR_ENABLE |
2595	    PCI_COMM_PARITY_DETECT | PCI_COMM_MAE);
2596	cmdreg &= ~PCI_COMM_IO;
2597	pci_config_put16(mpt->m_config_handle, PCI_CONF_COMM, cmdreg);
2598}
2599
2600static void
2601mptsas_disable_bus_master(mptsas_t *mpt)
2602{
2603	ushort_t	cmdreg;
2604
2605	/*
2606	 * Clear the master enable bit in the PCI command register.
2607	 * This prevents any bus mastering activity like DMA.
2608	 */
2609	cmdreg = pci_config_get16(mpt->m_config_handle, PCI_CONF_COMM);
2610	cmdreg &= ~PCI_COMM_ME;
2611	pci_config_put16(mpt->m_config_handle, PCI_CONF_COMM, cmdreg);
2612}
2613
2614int
2615mptsas_dma_alloc(mptsas_t *mpt, mptsas_dma_alloc_state_t *dma_statep)
2616{
2617	ddi_dma_attr_t	attrs;
2618
2619	attrs = mpt->m_io_dma_attr;
2620	attrs.dma_attr_sgllen = 1;
2621
2622	ASSERT(dma_statep != NULL);
2623
2624	if (mptsas_dma_addr_create(mpt, attrs, &dma_statep->handle,
2625	    &dma_statep->accessp, &dma_statep->memp, dma_statep->size,
2626	    &dma_statep->cookie) == FALSE) {
2627		return (DDI_FAILURE);
2628	}
2629
2630	return (DDI_SUCCESS);
2631}
2632
2633void
2634mptsas_dma_free(mptsas_dma_alloc_state_t *dma_statep)
2635{
2636	ASSERT(dma_statep != NULL);
2637	mptsas_dma_addr_destroy(&dma_statep->handle, &dma_statep->accessp);
2638	dma_statep->size = 0;
2639}
2640
2641int
2642mptsas_do_dma(mptsas_t *mpt, uint32_t size, int var, int (*callback)())
2643{
2644	ddi_dma_attr_t		attrs;
2645	ddi_dma_handle_t	dma_handle;
2646	caddr_t			memp;
2647	ddi_acc_handle_t	accessp;
2648	int			rval;
2649
2650	ASSERT(mutex_owned(&mpt->m_mutex));
2651
2652	attrs = mpt->m_msg_dma_attr;
2653	attrs.dma_attr_sgllen = 1;
2654	attrs.dma_attr_granular = size;
2655
2656	if (mptsas_dma_addr_create(mpt, attrs, &dma_handle,
2657	    &accessp, &memp, size, NULL) == FALSE) {
2658		return (DDI_FAILURE);
2659	}
2660
2661	rval = (*callback) (mpt, memp, var, accessp);
2662
2663	if ((mptsas_check_dma_handle(dma_handle) != DDI_SUCCESS) ||
2664	    (mptsas_check_acc_handle(accessp) != DDI_SUCCESS)) {
2665		ddi_fm_service_impact(mpt->m_dip, DDI_SERVICE_UNAFFECTED);
2666		rval = DDI_FAILURE;
2667	}
2668
2669	mptsas_dma_addr_destroy(&dma_handle, &accessp);
2670	return (rval);
2671
2672}
2673
2674static int
2675mptsas_alloc_request_frames(mptsas_t *mpt)
2676{
2677	ddi_dma_attr_t		frame_dma_attrs;
2678	caddr_t			memp;
2679	ddi_dma_cookie_t	cookie;
2680	size_t			mem_size;
2681
2682	/*
2683	 * re-alloc when it has already alloced
2684	 */
2685	if (mpt->m_dma_req_frame_hdl)
2686		mptsas_dma_addr_destroy(&mpt->m_dma_req_frame_hdl,
2687		    &mpt->m_acc_req_frame_hdl);
2688
2689	/*
2690	 * The size of the request frame pool is:
2691	 *   Number of Request Frames * Request Frame Size
2692	 */
2693	mem_size = mpt->m_max_requests * mpt->m_req_frame_size;
2694
2695	/*
2696	 * set the DMA attributes.  System Request Message Frames must be
2697	 * aligned on a 16-byte boundry.
2698	 */
2699	frame_dma_attrs = mpt->m_msg_dma_attr;
2700	frame_dma_attrs.dma_attr_align = 16;
2701	frame_dma_attrs.dma_attr_sgllen = 1;
2702
2703	/*
2704	 * allocate the request frame pool.
2705	 */
2706	if (mptsas_dma_addr_create(mpt, frame_dma_attrs,
2707	    &mpt->m_dma_req_frame_hdl, &mpt->m_acc_req_frame_hdl, &memp,
2708	    mem_size, &cookie) == FALSE) {
2709		return (DDI_FAILURE);
2710	}
2711
2712	/*
2713	 * Store the request frame memory address.  This chip uses this
2714	 * address to dma to and from the driver's frame.  The second
2715	 * address is the address mpt uses to fill in the frame.
2716	 */
2717	mpt->m_req_frame_dma_addr = cookie.dmac_laddress;
2718	mpt->m_req_frame = memp;
2719
2720	/*
2721	 * Clear the request frame pool.
2722	 */
2723	bzero(mpt->m_req_frame, mem_size);
2724
2725	return (DDI_SUCCESS);
2726}
2727
2728static int
2729mptsas_alloc_sense_bufs(mptsas_t *mpt)
2730{
2731	ddi_dma_attr_t		sense_dma_attrs;
2732	caddr_t			memp;
2733	ddi_dma_cookie_t	cookie;
2734	size_t			mem_size;
2735	int			num_extrqsense_bufs;
2736
2737	ASSERT(mpt->m_extreq_sense_refcount == 0);
2738
2739	/*
2740	 * re-alloc when it has already alloced
2741	 */
2742	if (mpt->m_dma_req_sense_hdl) {
2743		rmfreemap(mpt->m_erqsense_map);
2744		mptsas_dma_addr_destroy(&mpt->m_dma_req_sense_hdl,
2745		    &mpt->m_acc_req_sense_hdl);
2746	}
2747
2748	/*
2749	 * The size of the request sense pool is:
2750	 *   (Number of Request Frames - 2 ) * Request Sense Size +
2751	 *   extra memory for extended sense requests.
2752	 */
2753	mem_size = ((mpt->m_max_requests - 2) * mpt->m_req_sense_size) +
2754	    mptsas_extreq_sense_bufsize;
2755
2756	/*
2757	 * set the DMA attributes.  ARQ buffers
2758	 * aligned on a 16-byte boundry.
2759	 */
2760	sense_dma_attrs = mpt->m_msg_dma_attr;
2761	sense_dma_attrs.dma_attr_align = 16;
2762	sense_dma_attrs.dma_attr_sgllen = 1;
2763
2764	/*
2765	 * allocate the request sense buffer pool.
2766	 */
2767	if (mptsas_dma_addr_create(mpt, sense_dma_attrs,
2768	    &mpt->m_dma_req_sense_hdl, &mpt->m_acc_req_sense_hdl, &memp,
2769	    mem_size, &cookie) == FALSE) {
2770		return (DDI_FAILURE);
2771	}
2772
2773	/*
2774	 * Store the request sense base memory address.  This chip uses this
2775	 * address to dma the request sense data.  The second
2776	 * address is the address mpt uses to access the data.
2777	 * The third is the base for the extended rqsense buffers.
2778	 */
2779	mpt->m_req_sense_dma_addr = cookie.dmac_laddress;
2780	mpt->m_req_sense = memp;
2781	memp += (mpt->m_max_requests - 2) * mpt->m_req_sense_size;
2782	mpt->m_extreq_sense = memp;
2783
2784	/*
2785	 * The extra memory is divided up into multiples of the base
2786	 * buffer size in order to allocate via rmalloc().
2787	 * Note that the rmallocmap cannot start at zero!
2788	 */
2789	num_extrqsense_bufs = mptsas_extreq_sense_bufsize /
2790	    mpt->m_req_sense_size;
2791	mpt->m_erqsense_map = rmallocmap_wait(num_extrqsense_bufs);
2792	rmfree(mpt->m_erqsense_map, num_extrqsense_bufs, 1);
2793
2794	/*
2795	 * Clear the pool.
2796	 */
2797	bzero(mpt->m_req_sense, mem_size);
2798
2799	return (DDI_SUCCESS);
2800}
2801
2802static int
2803mptsas_alloc_reply_frames(mptsas_t *mpt)
2804{
2805	ddi_dma_attr_t		frame_dma_attrs;
2806	caddr_t			memp;
2807	ddi_dma_cookie_t	cookie;
2808	size_t			mem_size;
2809
2810	/*
2811	 * re-alloc when it has already alloced
2812	 */
2813	if (mpt->m_dma_reply_frame_hdl) {
2814		mptsas_dma_addr_destroy(&mpt->m_dma_reply_frame_hdl,
2815		    &mpt->m_acc_reply_frame_hdl);
2816	}
2817
2818	/*
2819	 * The size of the reply frame pool is:
2820	 *   Number of Reply Frames * Reply Frame Size
2821	 */
2822	mem_size = mpt->m_max_replies * mpt->m_reply_frame_size;
2823
2824	/*
2825	 * set the DMA attributes.   System Reply Message Frames must be
2826	 * aligned on a 4-byte boundry.  This is the default.
2827	 */
2828	frame_dma_attrs = mpt->m_msg_dma_attr;
2829	frame_dma_attrs.dma_attr_sgllen = 1;
2830
2831	/*
2832	 * allocate the reply frame pool
2833	 */
2834	if (mptsas_dma_addr_create(mpt, frame_dma_attrs,
2835	    &mpt->m_dma_reply_frame_hdl, &mpt->m_acc_reply_frame_hdl, &memp,
2836	    mem_size, &cookie) == FALSE) {
2837		return (DDI_FAILURE);
2838	}
2839
2840	/*
2841	 * Store the reply frame memory address.  This chip uses this
2842	 * address to dma to and from the driver's frame.  The second
2843	 * address is the address mpt uses to process the frame.
2844	 */
2845	mpt->m_reply_frame_dma_addr = cookie.dmac_laddress;
2846	mpt->m_reply_frame = memp;
2847
2848	/*
2849	 * Clear the reply frame pool.
2850	 */
2851	bzero(mpt->m_reply_frame, mem_size);
2852
2853	return (DDI_SUCCESS);
2854}
2855
2856static int
2857mptsas_alloc_free_queue(mptsas_t *mpt)
2858{
2859	ddi_dma_attr_t		frame_dma_attrs;
2860	caddr_t			memp;
2861	ddi_dma_cookie_t	cookie;
2862	size_t			mem_size;
2863
2864	/*
2865	 * re-alloc when it has already alloced
2866	 */
2867	if (mpt->m_dma_free_queue_hdl) {
2868		mptsas_dma_addr_destroy(&mpt->m_dma_free_queue_hdl,
2869		    &mpt->m_acc_free_queue_hdl);
2870	}
2871
2872	/*
2873	 * The reply free queue size is:
2874	 *   Reply Free Queue Depth * 4
2875	 * The "4" is the size of one 32 bit address (low part of 64-bit
2876	 *   address)
2877	 */
2878	mem_size = mpt->m_free_queue_depth * 4;
2879
2880	/*
2881	 * set the DMA attributes  The Reply Free Queue must be aligned on a
2882	 * 16-byte boundry.
2883	 */
2884	frame_dma_attrs = mpt->m_msg_dma_attr;
2885	frame_dma_attrs.dma_attr_align = 16;
2886	frame_dma_attrs.dma_attr_sgllen = 1;
2887
2888	/*
2889	 * allocate the reply free queue
2890	 */
2891	if (mptsas_dma_addr_create(mpt, frame_dma_attrs,
2892	    &mpt->m_dma_free_queue_hdl, &mpt->m_acc_free_queue_hdl, &memp,
2893	    mem_size, &cookie) == FALSE) {
2894		return (DDI_FAILURE);
2895	}
2896
2897	/*
2898	 * Store the reply free queue memory address.  This chip uses this
2899	 * address to read from the reply free queue.  The second address
2900	 * is the address mpt uses to manage the queue.
2901	 */
2902	mpt->m_free_queue_dma_addr = cookie.dmac_laddress;
2903	mpt->m_free_queue = memp;
2904
2905	/*
2906	 * Clear the reply free queue memory.
2907	 */
2908	bzero(mpt->m_free_queue, mem_size);
2909
2910	return (DDI_SUCCESS);
2911}
2912
2913static int
2914mptsas_alloc_post_queue(mptsas_t *mpt)
2915{
2916	ddi_dma_attr_t		frame_dma_attrs;
2917	caddr_t			memp;
2918	ddi_dma_cookie_t	cookie;
2919	size_t			mem_size;
2920
2921	/*
2922	 * re-alloc when it has already alloced
2923	 */
2924	if (mpt->m_dma_post_queue_hdl) {
2925		mptsas_dma_addr_destroy(&mpt->m_dma_post_queue_hdl,
2926		    &mpt->m_acc_post_queue_hdl);
2927	}
2928
2929	/*
2930	 * The reply descriptor post queue size is:
2931	 *   Reply Descriptor Post Queue Depth * 8
2932	 * The "8" is the size of each descriptor (8 bytes or 64 bits).
2933	 */
2934	mem_size = mpt->m_post_queue_depth * 8;
2935
2936	/*
2937	 * set the DMA attributes.  The Reply Descriptor Post Queue must be
2938	 * aligned on a 16-byte boundry.
2939	 */
2940	frame_dma_attrs = mpt->m_msg_dma_attr;
2941	frame_dma_attrs.dma_attr_align = 16;
2942	frame_dma_attrs.dma_attr_sgllen = 1;
2943
2944	/*
2945	 * allocate the reply post queue
2946	 */
2947	if (mptsas_dma_addr_create(mpt, frame_dma_attrs,
2948	    &mpt->m_dma_post_queue_hdl, &mpt->m_acc_post_queue_hdl, &memp,
2949	    mem_size, &cookie) == FALSE) {
2950		return (DDI_FAILURE);
2951	}
2952
2953	/*
2954	 * Store the reply descriptor post queue memory address.  This chip
2955	 * uses this address to write to the reply descriptor post queue.  The
2956	 * second address is the address mpt uses to manage the queue.
2957	 */
2958	mpt->m_post_queue_dma_addr = cookie.dmac_laddress;
2959	mpt->m_post_queue = memp;
2960
2961	/*
2962	 * Clear the reply post queue memory.
2963	 */
2964	bzero(mpt->m_post_queue, mem_size);
2965
2966	return (DDI_SUCCESS);
2967}
2968
2969static void
2970mptsas_alloc_reply_args(mptsas_t *mpt)
2971{
2972	if (mpt->m_replyh_args == NULL) {
2973		mpt->m_replyh_args = kmem_zalloc(sizeof (m_replyh_arg_t) *
2974		    mpt->m_max_replies, KM_SLEEP);
2975	}
2976}
2977
2978static int
2979mptsas_alloc_extra_sgl_frame(mptsas_t *mpt, mptsas_cmd_t *cmd)
2980{
2981	mptsas_cache_frames_t	*frames = NULL;
2982	if (cmd->cmd_extra_frames == NULL) {
2983		frames = kmem_cache_alloc(mpt->m_cache_frames, KM_NOSLEEP);
2984		if (frames == NULL) {
2985			return (DDI_FAILURE);
2986		}
2987		cmd->cmd_extra_frames = frames;
2988	}
2989	return (DDI_SUCCESS);
2990}
2991
2992static void
2993mptsas_free_extra_sgl_frame(mptsas_t *mpt, mptsas_cmd_t *cmd)
2994{
2995	if (cmd->cmd_extra_frames) {
2996		kmem_cache_free(mpt->m_cache_frames,
2997		    (void *)cmd->cmd_extra_frames);
2998		cmd->cmd_extra_frames = NULL;
2999	}
3000}
3001
3002static void
3003mptsas_cfg_fini(mptsas_t *mpt)
3004{
3005	NDBG0(("mptsas_cfg_fini"));
3006	ddi_regs_map_free(&mpt->m_datap);
3007}
3008
3009static void
3010mptsas_hba_fini(mptsas_t *mpt)
3011{
3012	NDBG0(("mptsas_hba_fini"));
3013
3014	/*
3015	 * Free up any allocated memory
3016	 */
3017	if (mpt->m_dma_req_frame_hdl) {
3018		mptsas_dma_addr_destroy(&mpt->m_dma_req_frame_hdl,
3019		    &mpt->m_acc_req_frame_hdl);
3020	}
3021
3022	if (mpt->m_dma_req_sense_hdl) {
3023		rmfreemap(mpt->m_erqsense_map);
3024		mptsas_dma_addr_destroy(&mpt->m_dma_req_sense_hdl,
3025		    &mpt->m_acc_req_sense_hdl);
3026	}
3027
3028	if (mpt->m_dma_reply_frame_hdl) {
3029		mptsas_dma_addr_destroy(&mpt->m_dma_reply_frame_hdl,
3030		    &mpt->m_acc_reply_frame_hdl);
3031	}
3032
3033	if (mpt->m_dma_free_queue_hdl) {
3034		mptsas_dma_addr_destroy(&mpt->m_dma_free_queue_hdl,
3035		    &mpt->m_acc_free_queue_hdl);
3036	}
3037
3038	if (mpt->m_dma_post_queue_hdl) {
3039		mptsas_dma_addr_destroy(&mpt->m_dma_post_queue_hdl,
3040		    &mpt->m_acc_post_queue_hdl);
3041	}
3042
3043	if (mpt->m_replyh_args != NULL) {
3044		kmem_free(mpt->m_replyh_args, sizeof (m_replyh_arg_t)
3045		    * mpt->m_max_replies);
3046	}
3047}
3048
3049static int
3050mptsas_name_child(dev_info_t *lun_dip, char *name, int len)
3051{
3052	int		lun = 0;
3053	char		*sas_wwn = NULL;
3054	int		phynum = -1;
3055	int		reallen = 0;
3056
3057	/* Get the target num */
3058	lun = ddi_prop_get_int(DDI_DEV_T_ANY, lun_dip, DDI_PROP_DONTPASS,
3059	    LUN_PROP, 0);
3060
3061	if ((phynum = ddi_prop_get_int(DDI_DEV_T_ANY, lun_dip,
3062	    DDI_PROP_DONTPASS, "sata-phy", -1)) != -1) {
3063		/*
3064		 * Stick in the address of form "pPHY,LUN"
3065		 */
3066		reallen = snprintf(name, len, "p%x,%x", phynum, lun);
3067	} else if (ddi_prop_lookup_string(DDI_DEV_T_ANY, lun_dip,
3068	    DDI_PROP_DONTPASS, SCSI_ADDR_PROP_TARGET_PORT, &sas_wwn)
3069	    == DDI_PROP_SUCCESS) {
3070		/*
3071		 * Stick in the address of the form "wWWN,LUN"
3072		 */
3073		reallen = snprintf(name, len, "%s,%x", sas_wwn, lun);
3074		ddi_prop_free(sas_wwn);
3075	} else {
3076		return (DDI_FAILURE);
3077	}
3078
3079	ASSERT(reallen < len);
3080	if (reallen >= len) {
3081		mptsas_log(0, CE_WARN, "!mptsas_get_name: name parameter "
3082		    "length too small, it needs to be %d bytes", reallen + 1);
3083	}
3084	return (DDI_SUCCESS);
3085}
3086
3087/*
3088 * tran_tgt_init(9E) - target device instance initialization
3089 */
3090static int
3091mptsas_scsi_tgt_init(dev_info_t *hba_dip, dev_info_t *tgt_dip,
3092    scsi_hba_tran_t *hba_tran, struct scsi_device *sd)
3093{
3094#ifndef __lock_lint
3095	_NOTE(ARGUNUSED(hba_tran))
3096#endif
3097
3098	/*
3099	 * At this point, the scsi_device structure already exists
3100	 * and has been initialized.
3101	 *
3102	 * Use this function to allocate target-private data structures,
3103	 * if needed by this HBA.  Add revised flow-control and queue
3104	 * properties for child here, if desired and if you can tell they
3105	 * support tagged queueing by now.
3106	 */
3107	mptsas_t		*mpt;
3108	int			lun = sd->sd_address.a_lun;
3109	mdi_pathinfo_t		*pip = NULL;
3110	mptsas_tgt_private_t	*tgt_private = NULL;
3111	mptsas_target_t		*ptgt = NULL;
3112	char			*psas_wwn = NULL;
3113	mptsas_phymask_t	phymask = 0;
3114	uint64_t		sas_wwn = 0;
3115	mptsas_target_addr_t	addr;
3116	mpt = SDEV2MPT(sd);
3117
3118	ASSERT(scsi_hba_iport_unit_address(hba_dip) != 0);
3119
3120	NDBG0(("mptsas_scsi_tgt_init: hbadip=0x%p tgtdip=0x%p lun=%d",
3121	    (void *)hba_dip, (void *)tgt_dip, lun));
3122
3123	if (ndi_dev_is_persistent_node(tgt_dip) == 0) {
3124		(void) ndi_merge_node(tgt_dip, mptsas_name_child);
3125		ddi_set_name_addr(tgt_dip, NULL);
3126		return (DDI_FAILURE);
3127	}
3128	/*
3129	 * phymask is 0 means the virtual port for RAID
3130	 */
3131	phymask = (mptsas_phymask_t)ddi_prop_get_int(DDI_DEV_T_ANY, hba_dip, 0,
3132	    "phymask", 0);
3133	if (mdi_component_is_client(tgt_dip, NULL) == MDI_SUCCESS) {
3134		if ((pip = (void *)(sd->sd_private)) == NULL) {
3135			/*
3136			 * Very bad news if this occurs. Somehow scsi_vhci has
3137			 * lost the pathinfo node for this target.
3138			 */
3139			return (DDI_NOT_WELL_FORMED);
3140		}
3141
3142		if (mdi_prop_lookup_int(pip, LUN_PROP, &lun) !=
3143		    DDI_PROP_SUCCESS) {
3144			mptsas_log(mpt, CE_WARN, "Get lun property failed\n");
3145			return (DDI_FAILURE);
3146		}
3147
3148		if (mdi_prop_lookup_string(pip, SCSI_ADDR_PROP_TARGET_PORT,
3149		    &psas_wwn) == MDI_SUCCESS) {
3150			if (scsi_wwnstr_to_wwn(psas_wwn, &sas_wwn)) {
3151				sas_wwn = 0;
3152			}
3153			(void) mdi_prop_free(psas_wwn);
3154		}
3155	} else {
3156		lun = ddi_prop_get_int(DDI_DEV_T_ANY, tgt_dip,
3157		    DDI_PROP_DONTPASS, LUN_PROP, 0);
3158		if (ddi_prop_lookup_string(DDI_DEV_T_ANY, tgt_dip,
3159		    DDI_PROP_DONTPASS, SCSI_ADDR_PROP_TARGET_PORT, &psas_wwn) ==
3160		    DDI_PROP_SUCCESS) {
3161			if (scsi_wwnstr_to_wwn(psas_wwn, &sas_wwn)) {
3162				sas_wwn = 0;
3163			}
3164			ddi_prop_free(psas_wwn);
3165		} else {
3166			sas_wwn = 0;
3167		}
3168	}
3169
3170	ASSERT((sas_wwn != 0) || (phymask != 0));
3171	addr.mta_wwn = sas_wwn;
3172	addr.mta_phymask = phymask;
3173	mutex_enter(&mpt->m_mutex);
3174	ptgt = refhash_lookup(mpt->m_targets, &addr);
3175	mutex_exit(&mpt->m_mutex);
3176	if (ptgt == NULL) {
3177		mptsas_log(mpt, CE_WARN, "!tgt_init: target doesn't exist or "
3178		    "gone already! phymask:%x, saswwn %"PRIx64, phymask,
3179		    sas_wwn);
3180		return (DDI_FAILURE);
3181	}
3182	if (hba_tran->tran_tgt_private == NULL) {
3183		tgt_private = kmem_zalloc(sizeof (mptsas_tgt_private_t),
3184		    KM_SLEEP);
3185		tgt_private->t_lun = lun;
3186		tgt_private->t_private = ptgt;
3187		hba_tran->tran_tgt_private = tgt_private;
3188	}
3189
3190	if (mdi_component_is_client(tgt_dip, NULL) == MDI_SUCCESS) {
3191		return (DDI_SUCCESS);
3192	}
3193	mutex_enter(&mpt->m_mutex);
3194
3195	if (ptgt->m_deviceinfo &
3196	    (MPI2_SAS_DEVICE_INFO_SATA_DEVICE |
3197	    MPI2_SAS_DEVICE_INFO_ATAPI_DEVICE)) {
3198		uchar_t *inq89 = NULL;
3199		int inq89_len = 0x238;
3200		int reallen = 0;
3201		int rval = 0;
3202		struct sata_id *sid = NULL;
3203		char model[SATA_ID_MODEL_LEN + 1];
3204		char fw[SATA_ID_FW_LEN + 1];
3205		char *vid, *pid;
3206
3207		mutex_exit(&mpt->m_mutex);
3208		/*
3209		 * According SCSI/ATA Translation -2 (SAT-2) revision 01a
3210		 * chapter 12.4.2 VPD page 89h includes 512 bytes ATA IDENTIFY
3211		 * DEVICE data or ATA IDENTIFY PACKET DEVICE data.
3212		 */
3213		inq89 = kmem_zalloc(inq89_len, KM_SLEEP);
3214		rval = mptsas_inquiry(mpt, ptgt, 0, 0x89,
3215		    inq89, inq89_len, &reallen, 1);
3216
3217		if (rval != 0) {
3218			if (inq89 != NULL) {
3219				kmem_free(inq89, inq89_len);
3220			}
3221
3222			mptsas_log(mpt, CE_WARN, "!mptsas request inquiry page "
3223			    "0x89 for SATA target:%x failed!", ptgt->m_devhdl);
3224			return (DDI_SUCCESS);
3225		}
3226		sid = (void *)(&inq89[60]);
3227
3228		swab(sid->ai_model, model, SATA_ID_MODEL_LEN);
3229		swab(sid->ai_fw, fw, SATA_ID_FW_LEN);
3230
3231		model[SATA_ID_MODEL_LEN] = 0;
3232		fw[SATA_ID_FW_LEN] = 0;
3233
3234		sata_split_model(model, &vid, &pid);
3235
3236		/*
3237		 * override SCSA "inquiry-*" properties
3238		 */
3239		if (vid)
3240			(void) scsi_device_prop_update_inqstring(sd,
3241			    INQUIRY_VENDOR_ID, vid, strlen(vid));
3242		if (pid)
3243			(void) scsi_device_prop_update_inqstring(sd,
3244			    INQUIRY_PRODUCT_ID, pid, strlen(pid));
3245		(void) scsi_device_prop_update_inqstring(sd,
3246		    INQUIRY_REVISION_ID, fw, strlen(fw));
3247
3248		if (inq89 != NULL) {
3249			kmem_free(inq89, inq89_len);
3250		}
3251	} else {
3252		mutex_exit(&mpt->m_mutex);
3253	}
3254
3255	return (DDI_SUCCESS);
3256}
3257/*
3258 * tran_tgt_free(9E) - target device instance deallocation
3259 */
3260static void
3261mptsas_scsi_tgt_free(dev_info_t *hba_dip, dev_info_t *tgt_dip,
3262    scsi_hba_tran_t *hba_tran, struct scsi_device *sd)
3263{
3264#ifndef __lock_lint
3265	_NOTE(ARGUNUSED(hba_dip, tgt_dip, hba_tran, sd))
3266#endif
3267
3268	mptsas_tgt_private_t	*tgt_private = hba_tran->tran_tgt_private;
3269
3270	if (tgt_private != NULL) {
3271		kmem_free(tgt_private, sizeof (mptsas_tgt_private_t));
3272		hba_tran->tran_tgt_private = NULL;
3273	}
3274}
3275
3276/*
3277 * scsi_pkt handling
3278 *
3279 * Visible to the external world via the transport structure.
3280 */
3281
3282/*
3283 * Notes:
3284 *	- transport the command to the addressed SCSI target/lun device
3285 *	- normal operation is to schedule the command to be transported,
3286 *	  and return TRAN_ACCEPT if this is successful.
3287 *	- if NO_INTR, tran_start must poll device for command completion
3288 */
3289static int
3290mptsas_scsi_start(struct scsi_address *ap, struct scsi_pkt *pkt)
3291{
3292#ifndef __lock_lint
3293	_NOTE(ARGUNUSED(ap))
3294#endif
3295	mptsas_t	*mpt = PKT2MPT(pkt);
3296	mptsas_cmd_t	*cmd = PKT2CMD(pkt);
3297	int		rval;
3298	mptsas_target_t	*ptgt = cmd->cmd_tgt_addr;
3299
3300	NDBG1(("mptsas_scsi_start: pkt=0x%p", (void *)pkt));
3301	ASSERT(ptgt);
3302	if (ptgt == NULL)
3303		return (TRAN_FATAL_ERROR);
3304
3305	/*
3306	 * prepare the pkt before taking mutex.
3307	 */
3308	rval = mptsas_prepare_pkt(cmd);
3309	if (rval != TRAN_ACCEPT) {
3310		return (rval);
3311	}
3312
3313	/*
3314	 * Send the command to target/lun, however your HBA requires it.
3315	 * If busy, return TRAN_BUSY; if there's some other formatting error
3316	 * in the packet, return TRAN_BADPKT; otherwise, fall through to the
3317	 * return of TRAN_ACCEPT.
3318	 *
3319	 * Remember that access to shared resources, including the mptsas_t
3320	 * data structure and the HBA hardware registers, must be protected
3321	 * with mutexes, here and everywhere.
3322	 *
3323	 * Also remember that at interrupt time, you'll get an argument
3324	 * to the interrupt handler which is a pointer to your mptsas_t
3325	 * structure; you'll have to remember which commands are outstanding
3326	 * and which scsi_pkt is the currently-running command so the
3327	 * interrupt handler can refer to the pkt to set completion
3328	 * status, call the target driver back through pkt_comp, etc.
3329	 *
3330	 * If the instance lock is held by other thread, don't spin to wait
3331	 * for it. Instead, queue the cmd and next time when the instance lock
3332	 * is not held, accept all the queued cmd. A extra tx_waitq is
3333	 * introduced to protect the queue.
3334	 *
3335	 * The polled cmd will not be queud and accepted as usual.
3336	 *
3337	 * Under the tx_waitq mutex, record whether a thread is draining
3338	 * the tx_waitq.  An IO requesting thread that finds the instance
3339	 * mutex contended appends to the tx_waitq and while holding the
3340	 * tx_wait mutex, if the draining flag is not set, sets it and then
3341	 * proceeds to spin for the instance mutex. This scheme ensures that
3342	 * the last cmd in a burst be processed.
3343	 *
3344	 * we enable this feature only when the helper threads are enabled,
3345	 * at which we think the loads are heavy.
3346	 *
3347	 * per instance mutex m_tx_waitq_mutex is introduced to protect the
3348	 * m_tx_waitqtail, m_tx_waitq, m_tx_draining.
3349	 */
3350
3351	if (mpt->m_doneq_thread_n) {
3352		if (mutex_tryenter(&mpt->m_mutex) != 0) {
3353			rval = mptsas_accept_txwq_and_pkt(mpt, cmd);
3354			mutex_exit(&mpt->m_mutex);
3355		} else if (cmd->cmd_pkt_flags & FLAG_NOINTR) {
3356			mutex_enter(&mpt->m_mutex);
3357			rval = mptsas_accept_txwq_and_pkt(mpt, cmd);
3358			mutex_exit(&mpt->m_mutex);
3359		} else {
3360			mutex_enter(&mpt->m_tx_waitq_mutex);
3361			/*
3362			 * ptgt->m_dr_flag is protected by m_mutex or
3363			 * m_tx_waitq_mutex. In this case, m_tx_waitq_mutex
3364			 * is acquired.
3365			 */
3366			if (ptgt->m_dr_flag == MPTSAS_DR_INTRANSITION) {
3367				if (cmd->cmd_pkt_flags & FLAG_NOQUEUE) {
3368					/*
3369					 * The command should be allowed to
3370					 * retry by returning TRAN_BUSY to
3371					 * to stall the I/O's which come from
3372					 * scsi_vhci since the device/path is
3373					 * in unstable state now.
3374					 */
3375					mutex_exit(&mpt->m_tx_waitq_mutex);
3376					return (TRAN_BUSY);
3377				} else {
3378					/*
3379					 * The device is offline, just fail the
3380					 * command by returning
3381					 * TRAN_FATAL_ERROR.
3382					 */
3383					mutex_exit(&mpt->m_tx_waitq_mutex);
3384					return (TRAN_FATAL_ERROR);
3385				}
3386			}
3387			if (mpt->m_tx_draining) {
3388				cmd->cmd_flags |= CFLAG_TXQ;
3389				*mpt->m_tx_waitqtail = cmd;
3390				mpt->m_tx_waitqtail = &cmd->cmd_linkp;
3391				mutex_exit(&mpt->m_tx_waitq_mutex);
3392			} else { /* drain the queue */
3393				mpt->m_tx_draining = 1;
3394				mutex_exit(&mpt->m_tx_waitq_mutex);
3395				mutex_enter(&mpt->m_mutex);
3396				rval = mptsas_accept_txwq_and_pkt(mpt, cmd);
3397				mutex_exit(&mpt->m_mutex);
3398			}
3399		}
3400	} else {
3401		mutex_enter(&mpt->m_mutex);
3402		/*
3403		 * ptgt->m_dr_flag is protected by m_mutex or m_tx_waitq_mutex
3404		 * in this case, m_mutex is acquired.
3405		 */
3406		if (ptgt->m_dr_flag == MPTSAS_DR_INTRANSITION) {
3407			if (cmd->cmd_pkt_flags & FLAG_NOQUEUE) {
3408				/*
3409				 * commands should be allowed to retry by
3410				 * returning TRAN_BUSY to stall the I/O's
3411				 * which come from scsi_vhci since the device/
3412				 * path is in unstable state now.
3413				 */
3414				mutex_exit(&mpt->m_mutex);
3415				return (TRAN_BUSY);
3416			} else {
3417				/*
3418				 * The device is offline, just fail the
3419				 * command by returning TRAN_FATAL_ERROR.
3420				 */
3421				mutex_exit(&mpt->m_mutex);
3422				return (TRAN_FATAL_ERROR);
3423			}
3424		}
3425		rval = mptsas_accept_pkt(mpt, cmd);
3426		mutex_exit(&mpt->m_mutex);
3427	}
3428
3429	return (rval);
3430}
3431
3432/*
3433 * Accept all the queued cmds(if any) before accept the current one.
3434 */
3435static int
3436mptsas_accept_txwq_and_pkt(mptsas_t *mpt, mptsas_cmd_t *cmd)
3437{
3438	int rval;
3439	mptsas_target_t	*ptgt = cmd->cmd_tgt_addr;
3440
3441	ASSERT(mutex_owned(&mpt->m_mutex));
3442	/*
3443	 * The call to mptsas_accept_tx_waitq() must always be performed
3444	 * because that is where mpt->m_tx_draining is cleared.
3445	 */
3446	mutex_enter(&mpt->m_tx_waitq_mutex);
3447	mptsas_accept_tx_waitq(mpt);
3448	mutex_exit(&mpt->m_tx_waitq_mutex);
3449	/*
3450	 * ptgt->m_dr_flag is protected by m_mutex or m_tx_waitq_mutex
3451	 * in this case, m_mutex is acquired.
3452	 */
3453	if (ptgt->m_dr_flag == MPTSAS_DR_INTRANSITION) {
3454		if (cmd->cmd_pkt_flags & FLAG_NOQUEUE) {
3455			/*
3456			 * The command should be allowed to retry by returning
3457			 * TRAN_BUSY to stall the I/O's which come from
3458			 * scsi_vhci since the device/path is in unstable state
3459			 * now.
3460			 */
3461			return (TRAN_BUSY);
3462		} else {
3463			/*
3464			 * The device is offline, just fail the command by
3465			 * return TRAN_FATAL_ERROR.
3466			 */
3467			return (TRAN_FATAL_ERROR);
3468		}
3469	}
3470	rval = mptsas_accept_pkt(mpt, cmd);
3471
3472	return (rval);
3473}
3474
3475static int
3476mptsas_accept_pkt(mptsas_t *mpt, mptsas_cmd_t *cmd)
3477{
3478	int		rval = TRAN_ACCEPT;
3479	mptsas_target_t	*ptgt = cmd->cmd_tgt_addr;
3480
3481	NDBG1(("mptsas_accept_pkt: cmd=0x%p", (void *)cmd));
3482
3483	ASSERT(mutex_owned(&mpt->m_mutex));
3484
3485	if ((cmd->cmd_flags & CFLAG_PREPARED) == 0) {
3486		rval = mptsas_prepare_pkt(cmd);
3487		if (rval != TRAN_ACCEPT) {
3488			cmd->cmd_flags &= ~CFLAG_TRANFLAG;
3489			return (rval);
3490		}
3491	}
3492
3493	/*
3494	 * reset the throttle if we were draining
3495	 */
3496	if ((ptgt->m_t_ncmds == 0) &&
3497	    (ptgt->m_t_throttle == DRAIN_THROTTLE)) {
3498		NDBG23(("reset throttle"));
3499		ASSERT(ptgt->m_reset_delay == 0);
3500		mptsas_set_throttle(mpt, ptgt, MAX_THROTTLE);
3501	}
3502
3503	/*
3504	 * If HBA is being reset, the DevHandles are being re-initialized,
3505	 * which means that they could be invalid even if the target is still
3506	 * attached.  Check if being reset and if DevHandle is being
3507	 * re-initialized.  If this is the case, return BUSY so the I/O can be
3508	 * retried later.
3509	 */
3510	if ((ptgt->m_devhdl == MPTSAS_INVALID_DEVHDL) && mpt->m_in_reset) {
3511		mptsas_set_pkt_reason(mpt, cmd, CMD_RESET, STAT_BUS_RESET);
3512		if (cmd->cmd_flags & CFLAG_TXQ) {
3513			mptsas_doneq_add(mpt, cmd);
3514			mptsas_doneq_empty(mpt);
3515			return (rval);
3516		} else {
3517			return (TRAN_BUSY);
3518		}
3519	}
3520
3521	/*
3522	 * If device handle has already been invalidated, just
3523	 * fail the command. In theory, command from scsi_vhci
3524	 * client is impossible send down command with invalid
3525	 * devhdl since devhdl is set after path offline, target
3526	 * driver is not suppose to select a offlined path.
3527	 */
3528	if (ptgt->m_devhdl == MPTSAS_INVALID_DEVHDL) {
3529		NDBG3(("rejecting command, it might because invalid devhdl "
3530		    "request."));
3531		mptsas_set_pkt_reason(mpt, cmd, CMD_DEV_GONE, STAT_TERMINATED);
3532		if (cmd->cmd_flags & CFLAG_TXQ) {
3533			mptsas_doneq_add(mpt, cmd);
3534			mptsas_doneq_empty(mpt);
3535			return (rval);
3536		} else {
3537			return (TRAN_FATAL_ERROR);
3538		}
3539	}
3540	/*
3541	 * The first case is the normal case.  mpt gets a command from the
3542	 * target driver and starts it.
3543	 * Since SMID 0 is reserved and the TM slot is reserved, the actual max
3544	 * commands is m_max_requests - 2.
3545	 */
3546	if ((mpt->m_ncmds <= (mpt->m_max_requests - 2)) &&
3547	    (ptgt->m_t_throttle > HOLD_THROTTLE) &&
3548	    (ptgt->m_t_ncmds < ptgt->m_t_throttle) &&
3549	    (ptgt->m_reset_delay == 0) &&
3550	    (ptgt->m_t_nwait == 0) &&
3551	    ((cmd->cmd_pkt_flags & FLAG_NOINTR) == 0)) {
3552		if (mptsas_save_cmd(mpt, cmd) == TRUE) {
3553			(void) mptsas_start_cmd(mpt, cmd);
3554		} else {
3555			mptsas_waitq_add(mpt, cmd);
3556		}
3557	} else {
3558		/*
3559		 * Add this pkt to the work queue
3560		 */
3561		mptsas_waitq_add(mpt, cmd);
3562
3563		if (cmd->cmd_pkt_flags & FLAG_NOINTR) {
3564			(void) mptsas_poll(mpt, cmd, MPTSAS_POLL_TIME);
3565
3566			/*
3567			 * Only flush the doneq if this is not a TM
3568			 * cmd.  For TM cmds the flushing of the
3569			 * doneq will be done in those routines.
3570			 */
3571			if ((cmd->cmd_flags & CFLAG_TM_CMD) == 0) {
3572				mptsas_doneq_empty(mpt);
3573			}
3574		}
3575	}
3576	return (rval);
3577}
3578
3579int
3580mptsas_save_cmd(mptsas_t *mpt, mptsas_cmd_t *cmd)
3581{
3582	mptsas_slots_t *slots = mpt->m_active;
3583	uint_t slot, start_rotor;
3584	mptsas_target_t *ptgt = cmd->cmd_tgt_addr;
3585
3586	ASSERT(MUTEX_HELD(&mpt->m_mutex));
3587
3588	/*
3589	 * Account for reserved TM request slot and reserved SMID of 0.
3590	 */
3591	ASSERT(slots->m_n_normal == (mpt->m_max_requests - 2));
3592
3593	/*
3594	 * Find the next available slot, beginning at m_rotor.  If no slot is
3595	 * available, we'll return FALSE to indicate that.  This mechanism
3596	 * considers only the normal slots, not the reserved slot 0 nor the
3597	 * task management slot m_n_normal + 1.  The rotor is left to point to
3598	 * the normal slot after the one we select, unless we select the last
3599	 * normal slot in which case it returns to slot 1.
3600	 */
3601	start_rotor = slots->m_rotor;
3602	do {
3603		slot = slots->m_rotor++;
3604		if (slots->m_rotor > slots->m_n_normal)
3605			slots->m_rotor = 1;
3606
3607		if (slots->m_rotor == start_rotor)
3608			break;
3609	} while (slots->m_slot[slot] != NULL);
3610
3611	if (slots->m_slot[slot] != NULL)
3612		return (FALSE);
3613
3614	ASSERT(slot != 0 && slot <= slots->m_n_normal);
3615
3616	cmd->cmd_slot = slot;
3617	slots->m_slot[slot] = cmd;
3618	mpt->m_ncmds++;
3619
3620	/*
3621	 * only increment per target ncmds if this is not a
3622	 * command that has no target associated with it (i.e. a
3623	 * event acknoledgment)
3624	 */
3625	if ((cmd->cmd_flags & CFLAG_CMDIOC) == 0) {
3626		/*
3627		 * Expiration time is set in mptsas_start_cmd
3628		 */
3629		ptgt->m_t_ncmds++;
3630		cmd->cmd_active_expiration = 0;
3631	} else {
3632		/*
3633		 * Initialize expiration time for passthrough commands,
3634		 */
3635		cmd->cmd_active_expiration = gethrtime() +
3636		    (hrtime_t)cmd->cmd_pkt->pkt_time * NANOSEC;
3637	}
3638	return (TRUE);
3639}
3640
3641/*
3642 * prepare the pkt:
3643 * the pkt may have been resubmitted or just reused so
3644 * initialize some fields and do some checks.
3645 */
3646static int
3647mptsas_prepare_pkt(mptsas_cmd_t *cmd)
3648{
3649	struct scsi_pkt	*pkt = CMD2PKT(cmd);
3650
3651	NDBG1(("mptsas_prepare_pkt: cmd=0x%p", (void *)cmd));
3652
3653	/*
3654	 * Reinitialize some fields that need it; the packet may
3655	 * have been resubmitted
3656	 */
3657	pkt->pkt_reason = CMD_CMPLT;
3658	pkt->pkt_state = 0;
3659	pkt->pkt_statistics = 0;
3660	pkt->pkt_resid = 0;
3661	cmd->cmd_age = 0;
3662	cmd->cmd_pkt_flags = pkt->pkt_flags;
3663
3664	/*
3665	 * zero status byte.
3666	 */
3667	*(pkt->pkt_scbp) = 0;
3668
3669	if (cmd->cmd_flags & CFLAG_DMAVALID) {
3670		pkt->pkt_resid = cmd->cmd_dmacount;
3671
3672		/*
3673		 * consistent packets need to be sync'ed first
3674		 * (only for data going out)
3675		 */
3676		if ((cmd->cmd_flags & CFLAG_CMDIOPB) &&
3677		    (cmd->cmd_flags & CFLAG_DMASEND)) {
3678			(void) ddi_dma_sync(cmd->cmd_dmahandle, 0, 0,
3679			    DDI_DMA_SYNC_FORDEV);
3680		}
3681	}
3682
3683	cmd->cmd_flags =
3684	    (cmd->cmd_flags & ~(CFLAG_TRANFLAG)) |
3685	    CFLAG_PREPARED | CFLAG_IN_TRANSPORT;
3686
3687	return (TRAN_ACCEPT);
3688}
3689
3690/*
3691 * tran_init_pkt(9E) - allocate scsi_pkt(9S) for command
3692 *
3693 * One of three possibilities:
3694 *	- allocate scsi_pkt
3695 *	- allocate scsi_pkt and DMA resources
3696 *	- allocate DMA resources to an already-allocated pkt
3697 */
3698static struct scsi_pkt *
3699mptsas_scsi_init_pkt(struct scsi_address *ap, struct scsi_pkt *pkt,
3700    struct buf *bp, int cmdlen, int statuslen, int tgtlen, int flags,
3701    int (*callback)(), caddr_t arg)
3702{
3703	mptsas_cmd_t		*cmd, *new_cmd;
3704	mptsas_t		*mpt = ADDR2MPT(ap);
3705	uint_t			oldcookiec;
3706	mptsas_target_t		*ptgt = NULL;
3707	int			rval;
3708	mptsas_tgt_private_t	*tgt_private;
3709	int			kf;
3710
3711	kf = (callback == SLEEP_FUNC)? KM_SLEEP: KM_NOSLEEP;
3712
3713	tgt_private = (mptsas_tgt_private_t *)ap->a_hba_tran->
3714	    tran_tgt_private;
3715	ASSERT(tgt_private != NULL);
3716	if (tgt_private == NULL) {
3717		return (NULL);
3718	}
3719	ptgt = tgt_private->t_private;
3720	ASSERT(ptgt != NULL);
3721	if (ptgt == NULL)
3722		return (NULL);
3723	ap->a_target = ptgt->m_devhdl;
3724	ap->a_lun = tgt_private->t_lun;
3725
3726	ASSERT(callback == NULL_FUNC || callback == SLEEP_FUNC);
3727#ifdef MPTSAS_TEST_EXTRN_ALLOC
3728	statuslen *= 100; tgtlen *= 4;
3729#endif
3730	NDBG3(("mptsas_scsi_init_pkt:\n"
3731	    "\ttgt=%d in=0x%p bp=0x%p clen=%d slen=%d tlen=%d flags=%x",
3732	    ap->a_target, (void *)pkt, (void *)bp,
3733	    cmdlen, statuslen, tgtlen, flags));
3734
3735	/*
3736	 * Allocate the new packet.
3737	 */
3738	if (pkt == NULL) {
3739		ddi_dma_handle_t	save_dma_handle;
3740
3741		cmd = kmem_cache_alloc(mpt->m_kmem_cache, kf);
3742		if (cmd == NULL)
3743			return (NULL);
3744
3745		save_dma_handle = cmd->cmd_dmahandle;
3746		bzero(cmd, sizeof (*cmd) + scsi_pkt_size());
3747		cmd->cmd_dmahandle = save_dma_handle;
3748
3749		pkt = (void *)((uchar_t *)cmd +
3750		    sizeof (struct mptsas_cmd));
3751		pkt->pkt_ha_private = (opaque_t)cmd;
3752		pkt->pkt_address = *ap;
3753		pkt->pkt_private = (opaque_t)cmd->cmd_pkt_private;
3754		pkt->pkt_scbp = (opaque_t)&cmd->cmd_scb;
3755		pkt->pkt_cdbp = (opaque_t)&cmd->cmd_cdb;
3756		cmd->cmd_pkt = (struct scsi_pkt *)pkt;
3757		cmd->cmd_cdblen = (uchar_t)cmdlen;
3758		cmd->cmd_scblen = statuslen;
3759		cmd->cmd_rqslen = SENSE_LENGTH;
3760		cmd->cmd_tgt_addr = ptgt;
3761
3762		if ((cmdlen > sizeof (cmd->cmd_cdb)) ||
3763		    (tgtlen > PKT_PRIV_LEN) ||
3764		    (statuslen > EXTCMDS_STATUS_SIZE)) {
3765			int failure;
3766
3767			/*
3768			 * We are going to allocate external packet space which
3769			 * might include the sense data buffer for DMA so we
3770			 * need to increase the reference counter here.  In a
3771			 * case the HBA is in reset we just simply free the
3772			 * allocated packet and bail out.
3773			 */
3774			mutex_enter(&mpt->m_mutex);
3775			if (mpt->m_in_reset) {
3776				mutex_exit(&mpt->m_mutex);
3777
3778				cmd->cmd_flags = CFLAG_FREE;
3779				kmem_cache_free(mpt->m_kmem_cache, cmd);
3780				return (NULL);
3781			}
3782			mpt->m_extreq_sense_refcount++;
3783			ASSERT(mpt->m_extreq_sense_refcount > 0);
3784			mutex_exit(&mpt->m_mutex);
3785
3786			/*
3787			 * if extern alloc fails, all will be
3788			 * deallocated, including cmd
3789			 */
3790			failure = mptsas_pkt_alloc_extern(mpt, cmd,
3791			    cmdlen, tgtlen, statuslen, kf);
3792
3793			if (failure != 0 || cmd->cmd_extrqslen == 0) {
3794				/*
3795				 * If the external packet space allocation
3796				 * failed, or we didn't allocate the sense
3797				 * data buffer for DMA we need to decrease the
3798				 * reference counter.
3799				 */
3800				mutex_enter(&mpt->m_mutex);
3801				ASSERT(mpt->m_extreq_sense_refcount > 0);
3802				mpt->m_extreq_sense_refcount--;
3803				if (mpt->m_extreq_sense_refcount == 0)
3804					cv_broadcast(
3805					    &mpt->m_extreq_sense_refcount_cv);
3806				mutex_exit(&mpt->m_mutex);
3807
3808				if (failure != 0) {
3809					/*
3810					 * if extern allocation fails, it will
3811					 * deallocate the new pkt as well
3812					 */
3813					return (NULL);
3814				}
3815			}
3816		}
3817		new_cmd = cmd;
3818
3819	} else {
3820		cmd = PKT2CMD(pkt);
3821		new_cmd = NULL;
3822	}
3823
3824
3825	/* grab cmd->cmd_cookiec here as oldcookiec */
3826
3827	oldcookiec = cmd->cmd_cookiec;
3828
3829	/*
3830	 * If the dma was broken up into PARTIAL transfers cmd_nwin will be
3831	 * greater than 0 and we'll need to grab the next dma window
3832	 */
3833	/*
3834	 * SLM-not doing extra command frame right now; may add later
3835	 */
3836
3837	if (cmd->cmd_nwin > 0) {
3838
3839		/*
3840		 * Make sure we havn't gone past the the total number
3841		 * of windows
3842		 */
3843		if (++cmd->cmd_winindex >= cmd->cmd_nwin) {
3844			return (NULL);
3845		}
3846		if (ddi_dma_getwin(cmd->cmd_dmahandle, cmd->cmd_winindex,
3847		    &cmd->cmd_dma_offset, &cmd->cmd_dma_len,
3848		    &cmd->cmd_cookie, &cmd->cmd_cookiec) == DDI_FAILURE) {
3849			return (NULL);
3850		}
3851		goto get_dma_cookies;
3852	}
3853
3854
3855	if (flags & PKT_XARQ) {
3856		cmd->cmd_flags |= CFLAG_XARQ;
3857	}
3858
3859	/*
3860	 * DMA resource allocation.  This version assumes your
3861	 * HBA has some sort of bus-mastering or onboard DMA capability, with a
3862	 * scatter-gather list of length MPTSAS_MAX_DMA_SEGS, as given in the
3863	 * ddi_dma_attr_t structure and passed to scsi_impl_dmaget.
3864	 */
3865	if (bp && (bp->b_bcount != 0) &&
3866	    (cmd->cmd_flags & CFLAG_DMAVALID) == 0) {
3867
3868		int	cnt, dma_flags;
3869		mptti_t	*dmap;		/* ptr to the S/G list */
3870
3871		/*
3872		 * Set up DMA memory and position to the next DMA segment.
3873		 */
3874		ASSERT(cmd->cmd_dmahandle != NULL);
3875
3876		if (bp->b_flags & B_READ) {
3877			dma_flags = DDI_DMA_READ;
3878			cmd->cmd_flags &= ~CFLAG_DMASEND;
3879		} else {
3880			dma_flags = DDI_DMA_WRITE;
3881			cmd->cmd_flags |= CFLAG_DMASEND;
3882		}
3883		if (flags & PKT_CONSISTENT) {
3884			cmd->cmd_flags |= CFLAG_CMDIOPB;
3885			dma_flags |= DDI_DMA_CONSISTENT;
3886		}
3887
3888		if (flags & PKT_DMA_PARTIAL) {
3889			dma_flags |= DDI_DMA_PARTIAL;
3890		}
3891
3892		/*
3893		 * workaround for byte hole issue on psycho and
3894		 * schizo pre 2.1
3895		 */
3896		if ((bp->b_flags & B_READ) && ((bp->b_flags &
3897		    (B_PAGEIO|B_REMAPPED)) != B_PAGEIO) &&
3898		    ((uintptr_t)bp->b_un.b_addr & 0x7)) {
3899			dma_flags |= DDI_DMA_CONSISTENT;
3900		}
3901
3902		rval = ddi_dma_buf_bind_handle(cmd->cmd_dmahandle, bp,
3903		    dma_flags, callback, arg,
3904		    &cmd->cmd_cookie, &cmd->cmd_cookiec);
3905		if (rval == DDI_DMA_PARTIAL_MAP) {
3906			(void) ddi_dma_numwin(cmd->cmd_dmahandle,
3907			    &cmd->cmd_nwin);
3908			cmd->cmd_winindex = 0;
3909			(void) ddi_dma_getwin(cmd->cmd_dmahandle,
3910			    cmd->cmd_winindex, &cmd->cmd_dma_offset,
3911			    &cmd->cmd_dma_len, &cmd->cmd_cookie,
3912			    &cmd->cmd_cookiec);
3913		} else if (rval && (rval != DDI_DMA_MAPPED)) {
3914			switch (rval) {
3915			case DDI_DMA_NORESOURCES:
3916				bioerror(bp, 0);
3917				break;
3918			case DDI_DMA_BADATTR:
3919			case DDI_DMA_NOMAPPING:
3920				bioerror(bp, EFAULT);
3921				break;
3922			case DDI_DMA_TOOBIG:
3923			default:
3924				bioerror(bp, EINVAL);
3925				break;
3926			}
3927			cmd->cmd_flags &= ~CFLAG_DMAVALID;
3928			if (new_cmd) {
3929				mptsas_scsi_destroy_pkt(ap, pkt);
3930			}
3931			return ((struct scsi_pkt *)NULL);
3932		}
3933
3934get_dma_cookies:
3935		cmd->cmd_flags |= CFLAG_DMAVALID;
3936		ASSERT(cmd->cmd_cookiec > 0);
3937
3938		if (cmd->cmd_cookiec > MPTSAS_MAX_CMD_SEGS) {
3939			mptsas_log(mpt, CE_NOTE, "large cookiec received %d\n",
3940			    cmd->cmd_cookiec);
3941			bioerror(bp, EINVAL);
3942			if (new_cmd) {
3943				mptsas_scsi_destroy_pkt(ap, pkt);
3944			}
3945			return ((struct scsi_pkt *)NULL);
3946		}
3947
3948		/*
3949		 * Allocate extra SGL buffer if needed.
3950		 */
3951		if ((cmd->cmd_cookiec > MPTSAS_MAX_FRAME_SGES64(mpt)) &&
3952		    (cmd->cmd_extra_frames == NULL)) {
3953			if (mptsas_alloc_extra_sgl_frame(mpt, cmd) ==
3954			    DDI_FAILURE) {
3955				mptsas_log(mpt, CE_WARN, "MPT SGL mem alloc "
3956				    "failed");
3957				bioerror(bp, ENOMEM);
3958				if (new_cmd) {
3959					mptsas_scsi_destroy_pkt(ap, pkt);
3960				}
3961				return ((struct scsi_pkt *)NULL);
3962			}
3963		}
3964
3965		/*
3966		 * Always use scatter-gather transfer
3967		 * Use the loop below to store physical addresses of
3968		 * DMA segments, from the DMA cookies, into your HBA's
3969		 * scatter-gather list.
3970		 * We need to ensure we have enough kmem alloc'd
3971		 * for the sg entries since we are no longer using an
3972		 * array inside mptsas_cmd_t.
3973		 *
3974		 * We check cmd->cmd_cookiec against oldcookiec so
3975		 * the scatter-gather list is correctly allocated
3976		 */
3977
3978		if (oldcookiec != cmd->cmd_cookiec) {
3979			if (cmd->cmd_sg != (mptti_t *)NULL) {
3980				kmem_free(cmd->cmd_sg, sizeof (mptti_t) *
3981				    oldcookiec);
3982				cmd->cmd_sg = NULL;
3983			}
3984		}
3985
3986		if (cmd->cmd_sg == (mptti_t *)NULL) {
3987			cmd->cmd_sg = kmem_alloc((size_t)(sizeof (mptti_t)*
3988			    cmd->cmd_cookiec), kf);
3989
3990			if (cmd->cmd_sg == (mptti_t *)NULL) {
3991				mptsas_log(mpt, CE_WARN,
3992				    "unable to kmem_alloc enough memory "
3993				    "for scatter/gather list");
3994		/*
3995		 * if we have an ENOMEM condition we need to behave
3996		 * the same way as the rest of this routine
3997		 */
3998
3999				bioerror(bp, ENOMEM);
4000				if (new_cmd) {
4001					mptsas_scsi_destroy_pkt(ap, pkt);
4002				}
4003				return ((struct scsi_pkt *)NULL);
4004			}
4005		}
4006
4007		dmap = cmd->cmd_sg;
4008
4009		ASSERT(cmd->cmd_cookie.dmac_size != 0);
4010
4011		/*
4012		 * store the first segment into the S/G list
4013		 */
4014		dmap->count = cmd->cmd_cookie.dmac_size;
4015		dmap->addr.address64.Low = (uint32_t)
4016		    (cmd->cmd_cookie.dmac_laddress & 0xffffffffull);
4017		dmap->addr.address64.High = (uint32_t)
4018		    (cmd->cmd_cookie.dmac_laddress >> 32);
4019
4020		/*
4021		 * dmacount counts the size of the dma for this window
4022		 * (if partial dma is being used).  totaldmacount
4023		 * keeps track of the total amount of dma we have
4024		 * transferred for all the windows (needed to calculate
4025		 * the resid value below).
4026		 */
4027		cmd->cmd_dmacount = cmd->cmd_cookie.dmac_size;
4028		cmd->cmd_totaldmacount += cmd->cmd_cookie.dmac_size;
4029
4030		/*
4031		 * We already stored the first DMA scatter gather segment,
4032		 * start at 1 if we need to store more.
4033		 */
4034		for (cnt = 1; cnt < cmd->cmd_cookiec; cnt++) {
4035			/*
4036			 * Get next DMA cookie
4037			 */
4038			ddi_dma_nextcookie(cmd->cmd_dmahandle,
4039			    &cmd->cmd_cookie);
4040			dmap++;
4041
4042			cmd->cmd_dmacount += cmd->cmd_cookie.dmac_size;
4043			cmd->cmd_totaldmacount += cmd->cmd_cookie.dmac_size;
4044
4045			/*
4046			 * store the segment parms into the S/G list
4047			 */
4048			dmap->count = cmd->cmd_cookie.dmac_size;
4049			dmap->addr.address64.Low = (uint32_t)
4050			    (cmd->cmd_cookie.dmac_laddress & 0xffffffffull);
4051			dmap->addr.address64.High = (uint32_t)
4052			    (cmd->cmd_cookie.dmac_laddress >> 32);
4053		}
4054
4055		/*
4056		 * If this was partially allocated we set the resid
4057		 * the amount of data NOT transferred in this window
4058		 * If there is only one window, the resid will be 0
4059		 */
4060		pkt->pkt_resid = (bp->b_bcount - cmd->cmd_totaldmacount);
4061		NDBG3(("mptsas_scsi_init_pkt: cmd_dmacount=%d.",
4062		    cmd->cmd_dmacount));
4063	}
4064	return (pkt);
4065}
4066
4067/*
4068 * tran_destroy_pkt(9E) - scsi_pkt(9s) deallocation
4069 *
4070 * Notes:
4071 *	- also frees DMA resources if allocated
4072 *	- implicit DMA synchonization
4073 */
4074static void
4075mptsas_scsi_destroy_pkt(struct scsi_address *ap, struct scsi_pkt *pkt)
4076{
4077	mptsas_cmd_t	*cmd = PKT2CMD(pkt);
4078	mptsas_t	*mpt = ADDR2MPT(ap);
4079
4080	NDBG3(("mptsas_scsi_destroy_pkt: target=%d pkt=0x%p",
4081	    ap->a_target, (void *)pkt));
4082
4083	if (cmd->cmd_flags & CFLAG_DMAVALID) {
4084		(void) ddi_dma_unbind_handle(cmd->cmd_dmahandle);
4085		cmd->cmd_flags &= ~CFLAG_DMAVALID;
4086	}
4087
4088	if (cmd->cmd_sg) {
4089		kmem_free(cmd->cmd_sg, sizeof (mptti_t) * cmd->cmd_cookiec);
4090		cmd->cmd_sg = NULL;
4091	}
4092
4093	mptsas_free_extra_sgl_frame(mpt, cmd);
4094
4095	if ((cmd->cmd_flags &
4096	    (CFLAG_FREE | CFLAG_CDBEXTERN | CFLAG_PRIVEXTERN |
4097	    CFLAG_SCBEXTERN)) == 0) {
4098		cmd->cmd_flags = CFLAG_FREE;
4099		kmem_cache_free(mpt->m_kmem_cache, (void *)cmd);
4100	} else {
4101		boolean_t extrqslen = cmd->cmd_extrqslen != 0;
4102
4103		mptsas_pkt_destroy_extern(mpt, cmd);
4104
4105		/*
4106		 * If the packet had the sense data buffer for DMA allocated we
4107		 * need to decrease the reference counter.
4108		 */
4109		if (extrqslen) {
4110			mutex_enter(&mpt->m_mutex);
4111			ASSERT(mpt->m_extreq_sense_refcount > 0);
4112			mpt->m_extreq_sense_refcount--;
4113			if (mpt->m_extreq_sense_refcount == 0)
4114				cv_broadcast(&mpt->m_extreq_sense_refcount_cv);
4115			mutex_exit(&mpt->m_mutex);
4116		}
4117	}
4118}
4119
4120/*
4121 * kmem cache constructor and destructor:
4122 * When constructing, we bzero the cmd and allocate the dma handle
4123 * When destructing, just free the dma handle
4124 */
4125static int
4126mptsas_kmem_cache_constructor(void *buf, void *cdrarg, int kmflags)
4127{
4128	mptsas_cmd_t		*cmd = buf;
4129	mptsas_t		*mpt  = cdrarg;
4130	int			(*callback)(caddr_t);
4131
4132	callback = (kmflags == KM_SLEEP)? DDI_DMA_SLEEP: DDI_DMA_DONTWAIT;
4133
4134	NDBG4(("mptsas_kmem_cache_constructor"));
4135
4136	/*
4137	 * allocate a dma handle
4138	 */
4139	if ((ddi_dma_alloc_handle(mpt->m_dip, &mpt->m_io_dma_attr, callback,
4140	    NULL, &cmd->cmd_dmahandle)) != DDI_SUCCESS) {
4141		cmd->cmd_dmahandle = NULL;
4142		return (-1);
4143	}
4144	return (0);
4145}
4146
4147static void
4148mptsas_kmem_cache_destructor(void *buf, void *cdrarg)
4149{
4150#ifndef __lock_lint
4151	_NOTE(ARGUNUSED(cdrarg))
4152#endif
4153	mptsas_cmd_t	*cmd = buf;
4154
4155	NDBG4(("mptsas_kmem_cache_destructor"));
4156
4157	if (cmd->cmd_dmahandle) {
4158		ddi_dma_free_handle(&cmd->cmd_dmahandle);
4159		cmd->cmd_dmahandle = NULL;
4160	}
4161}
4162
4163static int
4164mptsas_cache_frames_constructor(void *buf, void *cdrarg, int kmflags)
4165{
4166	mptsas_cache_frames_t	*p = buf;
4167	mptsas_t		*mpt = cdrarg;
4168	ddi_dma_attr_t		frame_dma_attr;
4169	size_t			mem_size, alloc_len;
4170	ddi_dma_cookie_t	cookie;
4171	uint_t			ncookie;
4172	int (*callback)(caddr_t) = (kmflags == KM_SLEEP)
4173	    ? DDI_DMA_SLEEP: DDI_DMA_DONTWAIT;
4174
4175	frame_dma_attr = mpt->m_msg_dma_attr;
4176	frame_dma_attr.dma_attr_align = 0x10;
4177	frame_dma_attr.dma_attr_sgllen = 1;
4178
4179	if (ddi_dma_alloc_handle(mpt->m_dip, &frame_dma_attr, callback, NULL,
4180	    &p->m_dma_hdl) != DDI_SUCCESS) {
4181		mptsas_log(mpt, CE_WARN, "Unable to allocate dma handle for"
4182		    " extra SGL.");
4183		return (DDI_FAILURE);
4184	}
4185
4186	mem_size = (mpt->m_max_request_frames - 1) * mpt->m_req_frame_size;
4187
4188	if (ddi_dma_mem_alloc(p->m_dma_hdl, mem_size, &mpt->m_dev_acc_attr,
4189	    DDI_DMA_CONSISTENT, callback, NULL, (caddr_t *)&p->m_frames_addr,
4190	    &alloc_len, &p->m_acc_hdl) != DDI_SUCCESS) {
4191		ddi_dma_free_handle(&p->m_dma_hdl);
4192		p->m_dma_hdl = NULL;
4193		mptsas_log(mpt, CE_WARN, "Unable to allocate dma memory for"
4194		    " extra SGL.");
4195		return (DDI_FAILURE);
4196	}
4197
4198	if (ddi_dma_addr_bind_handle(p->m_dma_hdl, NULL, p->m_frames_addr,
4199	    alloc_len, DDI_DMA_RDWR | DDI_DMA_CONSISTENT, callback, NULL,
4200	    &cookie, &ncookie) != DDI_DMA_MAPPED) {
4201		(void) ddi_dma_mem_free(&p->m_acc_hdl);
4202		ddi_dma_free_handle(&p->m_dma_hdl);
4203		p->m_dma_hdl = NULL;
4204		mptsas_log(mpt, CE_WARN, "Unable to bind DMA resources for"
4205		    " extra SGL");
4206		return (DDI_FAILURE);
4207	}
4208
4209	/*
4210	 * Store the SGL memory address.  This chip uses this
4211	 * address to dma to and from the driver.  The second
4212	 * address is the address mpt uses to fill in the SGL.
4213	 */
4214	p->m_phys_addr = cookie.dmac_laddress;
4215
4216	return (DDI_SUCCESS);
4217}
4218
4219static void
4220mptsas_cache_frames_destructor(void *buf, void *cdrarg)
4221{
4222#ifndef __lock_lint
4223	_NOTE(ARGUNUSED(cdrarg))
4224#endif
4225	mptsas_cache_frames_t	*p = buf;
4226	if (p->m_dma_hdl != NULL) {
4227		(void) ddi_dma_unbind_handle(p->m_dma_hdl);
4228		(void) ddi_dma_mem_free(&p->m_acc_hdl);
4229		ddi_dma_free_handle(&p->m_dma_hdl);
4230		p->m_phys_addr = 0;
4231		p->m_frames_addr = NULL;
4232		p->m_dma_hdl = NULL;
4233		p->m_acc_hdl = NULL;
4234	}
4235
4236}
4237
4238/*
4239 * Figure out if we need to use a different method for the request
4240 * sense buffer and allocate from the map if necessary.
4241 */
4242static boolean_t
4243mptsas_cmdarqsize(mptsas_t *mpt, mptsas_cmd_t *cmd, size_t senselength, int kf)
4244{
4245	if (senselength > mpt->m_req_sense_size) {
4246		unsigned long i;
4247
4248		/* Sense length is limited to an 8 bit value in MPI Spec. */
4249		if (senselength > 255)
4250			senselength = 255;
4251		cmd->cmd_extrqschunks = (senselength +
4252		    (mpt->m_req_sense_size - 1))/mpt->m_req_sense_size;
4253		i = (kf == KM_SLEEP ? rmalloc_wait : rmalloc)
4254		    (mpt->m_erqsense_map, cmd->cmd_extrqschunks);
4255
4256		if (i == 0)
4257			return (B_FALSE);
4258
4259		cmd->cmd_extrqslen = (uint16_t)senselength;
4260		cmd->cmd_extrqsidx = i - 1;
4261		cmd->cmd_arq_buf = mpt->m_extreq_sense +
4262		    (cmd->cmd_extrqsidx * mpt->m_req_sense_size);
4263	} else {
4264		cmd->cmd_rqslen = (uchar_t)senselength;
4265	}
4266
4267	return (B_TRUE);
4268}
4269
4270/*
4271 * allocate and deallocate external pkt space (ie. not part of mptsas_cmd)
4272 * for non-standard length cdb, pkt_private, status areas
4273 * if allocation fails, then deallocate all external space and the pkt
4274 */
4275/* ARGSUSED */
4276static int
4277mptsas_pkt_alloc_extern(mptsas_t *mpt, mptsas_cmd_t *cmd,
4278    int cmdlen, int tgtlen, int statuslen, int kf)
4279{
4280	caddr_t			cdbp, scbp, tgt;
4281
4282	NDBG3(("mptsas_pkt_alloc_extern: "
4283	    "cmd=0x%p cmdlen=%d tgtlen=%d statuslen=%d kf=%x",
4284	    (void *)cmd, cmdlen, tgtlen, statuslen, kf));
4285
4286	tgt = cdbp = scbp = NULL;
4287	cmd->cmd_scblen		= statuslen;
4288	cmd->cmd_privlen	= (uchar_t)tgtlen;
4289
4290	if (cmdlen > sizeof (cmd->cmd_cdb)) {
4291		if ((cdbp = kmem_zalloc((size_t)cmdlen, kf)) == NULL) {
4292			goto fail;
4293		}
4294		cmd->cmd_pkt->pkt_cdbp = (opaque_t)cdbp;
4295		cmd->cmd_flags |= CFLAG_CDBEXTERN;
4296	}
4297	if (tgtlen > PKT_PRIV_LEN) {
4298		if ((tgt = kmem_zalloc((size_t)tgtlen, kf)) == NULL) {
4299			goto fail;
4300		}
4301		cmd->cmd_flags |= CFLAG_PRIVEXTERN;
4302		cmd->cmd_pkt->pkt_private = tgt;
4303	}
4304	if (statuslen > EXTCMDS_STATUS_SIZE) {
4305		if ((scbp = kmem_zalloc((size_t)statuslen, kf)) == NULL) {
4306			goto fail;
4307		}
4308		cmd->cmd_flags |= CFLAG_SCBEXTERN;
4309		cmd->cmd_pkt->pkt_scbp = (opaque_t)scbp;
4310
4311		/* allocate sense data buf for DMA */
4312		if (mptsas_cmdarqsize(mpt, cmd, statuslen -
4313		    MPTSAS_GET_ITEM_OFF(struct scsi_arq_status, sts_sensedata),
4314		    kf) == B_FALSE)
4315			goto fail;
4316	}
4317	return (0);
4318fail:
4319	mptsas_pkt_destroy_extern(mpt, cmd);
4320	return (1);
4321}
4322
4323/*
4324 * deallocate external pkt space and deallocate the pkt
4325 */
4326static void
4327mptsas_pkt_destroy_extern(mptsas_t *mpt, mptsas_cmd_t *cmd)
4328{
4329	NDBG3(("mptsas_pkt_destroy_extern: cmd=0x%p", (void *)cmd));
4330
4331	if (cmd->cmd_flags & CFLAG_FREE) {
4332		mptsas_log(mpt, CE_PANIC,
4333		    "mptsas_pkt_destroy_extern: freeing free packet");
4334		_NOTE(NOT_REACHED)
4335		/* NOTREACHED */
4336	}
4337	if (cmd->cmd_extrqslen != 0) {
4338		rmfree(mpt->m_erqsense_map, cmd->cmd_extrqschunks,
4339		    cmd->cmd_extrqsidx + 1);
4340	}
4341	if (cmd->cmd_flags & CFLAG_CDBEXTERN) {
4342		kmem_free(cmd->cmd_pkt->pkt_cdbp, (size_t)cmd->cmd_cdblen);
4343	}
4344	if (cmd->cmd_flags & CFLAG_SCBEXTERN) {
4345		kmem_free(cmd->cmd_pkt->pkt_scbp, (size_t)cmd->cmd_scblen);
4346	}
4347	if (cmd->cmd_flags & CFLAG_PRIVEXTERN) {
4348		kmem_free(cmd->cmd_pkt->pkt_private, (size_t)cmd->cmd_privlen);
4349	}
4350	cmd->cmd_flags = CFLAG_FREE;
4351	kmem_cache_free(mpt->m_kmem_cache, (void *)cmd);
4352}
4353
4354/*
4355 * tran_sync_pkt(9E) - explicit DMA synchronization
4356 */
4357/*ARGSUSED*/
4358static void
4359mptsas_scsi_sync_pkt(struct scsi_address *ap, struct scsi_pkt *pkt)
4360{
4361	mptsas_cmd_t	*cmd = PKT2CMD(pkt);
4362
4363	NDBG3(("mptsas_scsi_sync_pkt: target=%d, pkt=0x%p",
4364	    ap->a_target, (void *)pkt));
4365
4366	if (cmd->cmd_dmahandle) {
4367		(void) ddi_dma_sync(cmd->cmd_dmahandle, 0, 0,
4368		    (cmd->cmd_flags & CFLAG_DMASEND) ?
4369		    DDI_DMA_SYNC_FORDEV : DDI_DMA_SYNC_FORCPU);
4370	}
4371}
4372
4373/*
4374 * tran_dmafree(9E) - deallocate DMA resources allocated for command
4375 */
4376/*ARGSUSED*/
4377static void
4378mptsas_scsi_dmafree(struct scsi_address *ap, struct scsi_pkt *pkt)
4379{
4380	mptsas_cmd_t	*cmd = PKT2CMD(pkt);
4381	mptsas_t	*mpt = ADDR2MPT(ap);
4382
4383	NDBG3(("mptsas_scsi_dmafree: target=%d pkt=0x%p",
4384	    ap->a_target, (void *)pkt));
4385
4386	if (cmd->cmd_flags & CFLAG_DMAVALID) {
4387		(void) ddi_dma_unbind_handle(cmd->cmd_dmahandle);
4388		cmd->cmd_flags &= ~CFLAG_DMAVALID;
4389	}
4390
4391	mptsas_free_extra_sgl_frame(mpt, cmd);
4392}
4393
4394static void
4395mptsas_pkt_comp(struct scsi_pkt *pkt, mptsas_cmd_t *cmd)
4396{
4397	if ((cmd->cmd_flags & CFLAG_CMDIOPB) &&
4398	    (!(cmd->cmd_flags & CFLAG_DMASEND))) {
4399		(void) ddi_dma_sync(cmd->cmd_dmahandle, 0, 0,
4400		    DDI_DMA_SYNC_FORCPU);
4401	}
4402	(*pkt->pkt_comp)(pkt);
4403}
4404
4405static void
4406mptsas_sge_mainframe(mptsas_cmd_t *cmd, pMpi2SCSIIORequest_t frame,
4407    ddi_acc_handle_t acc_hdl, uint_t cookiec, uint32_t end_flags)
4408{
4409	pMpi2SGESimple64_t	sge;
4410	mptti_t			*dmap;
4411	uint32_t		flags;
4412
4413	dmap = cmd->cmd_sg;
4414
4415	sge = (pMpi2SGESimple64_t)(&frame->SGL);
4416	while (cookiec--) {
4417		ddi_put32(acc_hdl,
4418		    &sge->Address.Low, dmap->addr.address64.Low);
4419		ddi_put32(acc_hdl,
4420		    &sge->Address.High, dmap->addr.address64.High);
4421		ddi_put32(acc_hdl, &sge->FlagsLength,
4422		    dmap->count);
4423		flags = ddi_get32(acc_hdl, &sge->FlagsLength);
4424		flags |= ((uint32_t)
4425		    (MPI2_SGE_FLAGS_SIMPLE_ELEMENT |
4426		    MPI2_SGE_FLAGS_SYSTEM_ADDRESS |
4427		    MPI2_SGE_FLAGS_64_BIT_ADDRESSING) <<
4428		    MPI2_SGE_FLAGS_SHIFT);
4429
4430		/*
4431		 * If this is the last cookie, we set the flags
4432		 * to indicate so
4433		 */
4434		if (cookiec == 0) {
4435			flags |= end_flags;
4436		}
4437		if (cmd->cmd_flags & CFLAG_DMASEND) {
4438			flags |= (MPI2_SGE_FLAGS_HOST_TO_IOC <<
4439			    MPI2_SGE_FLAGS_SHIFT);
4440		} else {
4441			flags |= (MPI2_SGE_FLAGS_IOC_TO_HOST <<
4442			    MPI2_SGE_FLAGS_SHIFT);
4443		}
4444		ddi_put32(acc_hdl, &sge->FlagsLength, flags);
4445		dmap++;
4446		sge++;
4447	}
4448}
4449
4450static void
4451mptsas_sge_chain(mptsas_t *mpt, mptsas_cmd_t *cmd,
4452    pMpi2SCSIIORequest_t frame, ddi_acc_handle_t acc_hdl)
4453{
4454	pMpi2SGESimple64_t	sge;
4455	pMpi2SGEChain64_t	sgechain;
4456	uint64_t		nframe_phys_addr;
4457	uint_t			cookiec;
4458	mptti_t			*dmap;
4459	uint32_t		flags;
4460
4461	/*
4462	 * Save the number of entries in the DMA
4463	 * Scatter/Gather list
4464	 */
4465	cookiec = cmd->cmd_cookiec;
4466
4467	/*
4468	 * Hereby we start to deal with multiple frames.
4469	 * The process is as follows:
4470	 * 1. Determine how many frames are needed for SGL element
4471	 *    storage; Note that all frames are stored in contiguous
4472	 *    memory space and in 64-bit DMA mode each element is
4473	 *    3 double-words (12 bytes) long.
4474	 * 2. Fill up the main frame. We need to do this separately
4475	 *    since it contains the SCSI IO request header and needs
4476	 *    dedicated processing. Note that the last 4 double-words
4477	 *    of the SCSI IO header is for SGL element storage
4478	 *    (MPI2_SGE_IO_UNION).
4479	 * 3. Fill the chain element in the main frame, so the DMA
4480	 *    engine can use the following frames.
4481	 * 4. Enter a loop to fill the remaining frames. Note that the
4482	 *    last frame contains no chain element.  The remaining
4483	 *    frames go into the mpt SGL buffer allocated on the fly,
4484	 *    not immediately following the main message frame, as in
4485	 *    Gen1.
4486	 * Some restrictions:
4487	 * 1. For 64-bit DMA, the simple element and chain element
4488	 *    are both of 3 double-words (12 bytes) in size, even
4489	 *    though all frames are stored in the first 4G of mem
4490	 *    range and the higher 32-bits of the address are always 0.
4491	 * 2. On some controllers (like the 1064/1068), a frame can
4492	 *    hold SGL elements with the last 1 or 2 double-words
4493	 *    (4 or 8 bytes) un-used. On these controllers, we should
4494	 *    recognize that there's not enough room for another SGL
4495	 *    element and move the sge pointer to the next frame.
4496	 */
4497	int			i, j, k, l, frames, sgemax;
4498	int			temp;
4499	uint8_t			chainflags;
4500	uint16_t		chainlength;
4501	mptsas_cache_frames_t	*p;
4502
4503	/*
4504	 * Sgemax is the number of SGE's that will fit
4505	 * each extra frame and frames is total
4506	 * number of frames we'll need.  1 sge entry per
4507	 * frame is reseverd for the chain element thus the -1 below.
4508	 */
4509	sgemax = ((mpt->m_req_frame_size / sizeof (MPI2_SGE_SIMPLE64))
4510	    - 1);
4511	temp = (cookiec - (MPTSAS_MAX_FRAME_SGES64(mpt) - 1)) / sgemax;
4512
4513	/*
4514	 * A little check to see if we need to round up the number
4515	 * of frames we need
4516	 */
4517	if ((cookiec - (MPTSAS_MAX_FRAME_SGES64(mpt) - 1)) - (temp *
4518	    sgemax) > 1) {
4519		frames = (temp + 1);
4520	} else {
4521		frames = temp;
4522	}
4523	dmap = cmd->cmd_sg;
4524	sge = (pMpi2SGESimple64_t)(&frame->SGL);
4525
4526	/*
4527	 * First fill in the main frame
4528	 */
4529	j = MPTSAS_MAX_FRAME_SGES64(mpt) - 1;
4530	mptsas_sge_mainframe(cmd, frame, acc_hdl, j,
4531	    ((uint32_t)(MPI2_SGE_FLAGS_LAST_ELEMENT) <<
4532	    MPI2_SGE_FLAGS_SHIFT));
4533	dmap += j;
4534	sge += j;
4535	j++;
4536
4537	/*
4538	 * Fill in the chain element in the main frame.
4539	 * About calculation on ChainOffset:
4540	 * 1. Struct msg_scsi_io_request has 4 double-words (16 bytes)
4541	 *    in the end reserved for SGL element storage
4542	 *    (MPI2_SGE_IO_UNION); we should count it in our
4543	 *    calculation.  See its definition in the header file.
4544	 * 2. Constant j is the counter of the current SGL element
4545	 *    that will be processed, and (j - 1) is the number of
4546	 *    SGL elements that have been processed (stored in the
4547	 *    main frame).
4548	 * 3. ChainOffset value should be in units of double-words (4
4549	 *    bytes) so the last value should be divided by 4.
4550	 */
4551	ddi_put8(acc_hdl, &frame->ChainOffset,
4552	    (sizeof (MPI2_SCSI_IO_REQUEST) -
4553	    sizeof (MPI2_SGE_IO_UNION) +
4554	    (j - 1) * sizeof (MPI2_SGE_SIMPLE64)) >> 2);
4555	sgechain = (pMpi2SGEChain64_t)sge;
4556	chainflags = (MPI2_SGE_FLAGS_CHAIN_ELEMENT |
4557	    MPI2_SGE_FLAGS_SYSTEM_ADDRESS |
4558	    MPI2_SGE_FLAGS_64_BIT_ADDRESSING);
4559	ddi_put8(acc_hdl, &sgechain->Flags, chainflags);
4560
4561	/*
4562	 * The size of the next frame is the accurate size of space
4563	 * (in bytes) used to store the SGL elements. j is the counter
4564	 * of SGL elements. (j - 1) is the number of SGL elements that
4565	 * have been processed (stored in frames).
4566	 */
4567	if (frames >= 2) {
4568		ASSERT(mpt->m_req_frame_size >= sizeof (MPI2_SGE_SIMPLE64));
4569		chainlength = mpt->m_req_frame_size /
4570		    sizeof (MPI2_SGE_SIMPLE64) *
4571		    sizeof (MPI2_SGE_SIMPLE64);
4572	} else {
4573		chainlength = ((cookiec - (j - 1)) *
4574		    sizeof (MPI2_SGE_SIMPLE64));
4575	}
4576
4577	p = cmd->cmd_extra_frames;
4578
4579	ddi_put16(acc_hdl, &sgechain->Length, chainlength);
4580	ddi_put32(acc_hdl, &sgechain->Address.Low, p->m_phys_addr);
4581	ddi_put32(acc_hdl, &sgechain->Address.High, p->m_phys_addr >> 32);
4582
4583	/*
4584	 * If there are more than 2 frames left we have to
4585	 * fill in the next chain offset to the location of
4586	 * the chain element in the next frame.
4587	 * sgemax is the number of simple elements in an extra
4588	 * frame. Note that the value NextChainOffset should be
4589	 * in double-words (4 bytes).
4590	 */
4591	if (frames >= 2) {
4592		ddi_put8(acc_hdl, &sgechain->NextChainOffset,
4593		    (sgemax * sizeof (MPI2_SGE_SIMPLE64)) >> 2);
4594	} else {
4595		ddi_put8(acc_hdl, &sgechain->NextChainOffset, 0);
4596	}
4597
4598	/*
4599	 * Jump to next frame;
4600	 * Starting here, chain buffers go into the per command SGL.
4601	 * This buffer is allocated when chain buffers are needed.
4602	 */
4603	sge = (pMpi2SGESimple64_t)p->m_frames_addr;
4604	i = cookiec;
4605
4606	/*
4607	 * Start filling in frames with SGE's.  If we
4608	 * reach the end of frame and still have SGE's
4609	 * to fill we need to add a chain element and
4610	 * use another frame.  j will be our counter
4611	 * for what cookie we are at and i will be
4612	 * the total cookiec. k is the current frame
4613	 */
4614	for (k = 1; k <= frames; k++) {
4615		for (l = 1; (l <= (sgemax + 1)) && (j <= i); j++, l++) {
4616
4617			/*
4618			 * If we have reached the end of frame
4619			 * and we have more SGE's to fill in
4620			 * we have to fill the final entry
4621			 * with a chain element and then
4622			 * continue to the next frame
4623			 */
4624			if ((l == (sgemax + 1)) && (k != frames)) {
4625				sgechain = (pMpi2SGEChain64_t)sge;
4626				j--;
4627				chainflags = (
4628				    MPI2_SGE_FLAGS_CHAIN_ELEMENT |
4629				    MPI2_SGE_FLAGS_SYSTEM_ADDRESS |
4630				    MPI2_SGE_FLAGS_64_BIT_ADDRESSING);
4631				ddi_put8(p->m_acc_hdl,
4632				    &sgechain->Flags, chainflags);
4633				/*
4634				 * k is the frame counter and (k + 1)
4635				 * is the number of the next frame.
4636				 * Note that frames are in contiguous
4637				 * memory space.
4638				 */
4639				nframe_phys_addr = p->m_phys_addr +
4640				    (mpt->m_req_frame_size * k);
4641				ddi_put32(p->m_acc_hdl,
4642				    &sgechain->Address.Low,
4643				    nframe_phys_addr);
4644				ddi_put32(p->m_acc_hdl,
4645				    &sgechain->Address.High,
4646				    nframe_phys_addr >> 32);
4647
4648				/*
4649				 * If there are more than 2 frames left
4650				 * we have to next chain offset to
4651				 * the location of the chain element
4652				 * in the next frame and fill in the
4653				 * length of the next chain
4654				 */
4655				if ((frames - k) >= 2) {
4656					ddi_put8(p->m_acc_hdl,
4657					    &sgechain->NextChainOffset,
4658					    (sgemax *
4659					    sizeof (MPI2_SGE_SIMPLE64))
4660					    >> 2);
4661					ddi_put16(p->m_acc_hdl,
4662					    &sgechain->Length,
4663					    mpt->m_req_frame_size /
4664					    sizeof (MPI2_SGE_SIMPLE64) *
4665					    sizeof (MPI2_SGE_SIMPLE64));
4666				} else {
4667					/*
4668					 * This is the last frame. Set
4669					 * the NextChainOffset to 0 and
4670					 * Length is the total size of
4671					 * all remaining simple elements
4672					 */
4673					ddi_put8(p->m_acc_hdl,
4674					    &sgechain->NextChainOffset,
4675					    0);
4676					ddi_put16(p->m_acc_hdl,
4677					    &sgechain->Length,
4678					    (cookiec - j) *
4679					    sizeof (MPI2_SGE_SIMPLE64));
4680				}
4681
4682				/* Jump to the next frame */
4683				sge = (pMpi2SGESimple64_t)
4684				    ((char *)p->m_frames_addr +
4685				    (int)mpt->m_req_frame_size * k);
4686
4687				continue;
4688			}
4689
4690			ddi_put32(p->m_acc_hdl,
4691			    &sge->Address.Low,
4692			    dmap->addr.address64.Low);
4693			ddi_put32(p->m_acc_hdl,
4694			    &sge->Address.High,
4695			    dmap->addr.address64.High);
4696			ddi_put32(p->m_acc_hdl,
4697			    &sge->FlagsLength, dmap->count);
4698			flags = ddi_get32(p->m_acc_hdl,
4699			    &sge->FlagsLength);
4700			flags |= ((uint32_t)(
4701			    MPI2_SGE_FLAGS_SIMPLE_ELEMENT |
4702			    MPI2_SGE_FLAGS_SYSTEM_ADDRESS |
4703			    MPI2_SGE_FLAGS_64_BIT_ADDRESSING) <<
4704			    MPI2_SGE_FLAGS_SHIFT);
4705
4706			/*
4707			 * If we are at the end of the frame and
4708			 * there is another frame to fill in
4709			 * we set the last simple element as last
4710			 * element
4711			 */
4712			if ((l == sgemax) && (k != frames)) {
4713				flags |= ((uint32_t)
4714				    (MPI2_SGE_FLAGS_LAST_ELEMENT) <<
4715				    MPI2_SGE_FLAGS_SHIFT);
4716			}
4717
4718			/*
4719			 * If this is the final cookie we
4720			 * indicate it by setting the flags
4721			 */
4722			if (j == i) {
4723				flags |= ((uint32_t)
4724				    (MPI2_SGE_FLAGS_LAST_ELEMENT |
4725				    MPI2_SGE_FLAGS_END_OF_BUFFER |
4726				    MPI2_SGE_FLAGS_END_OF_LIST) <<
4727				    MPI2_SGE_FLAGS_SHIFT);
4728			}
4729			if (cmd->cmd_flags & CFLAG_DMASEND) {
4730				flags |=
4731				    (MPI2_SGE_FLAGS_HOST_TO_IOC <<
4732				    MPI2_SGE_FLAGS_SHIFT);
4733			} else {
4734				flags |=
4735				    (MPI2_SGE_FLAGS_IOC_TO_HOST <<
4736				    MPI2_SGE_FLAGS_SHIFT);
4737			}
4738			ddi_put32(p->m_acc_hdl,
4739			    &sge->FlagsLength, flags);
4740			dmap++;
4741			sge++;
4742		}
4743	}
4744
4745	/*
4746	 * Sync DMA with the chain buffers that were just created
4747	 */
4748	(void) ddi_dma_sync(p->m_dma_hdl, 0, 0, DDI_DMA_SYNC_FORDEV);
4749}
4750
4751static void
4752mptsas_ieee_sge_mainframe(mptsas_cmd_t *cmd, pMpi2SCSIIORequest_t frame,
4753    ddi_acc_handle_t acc_hdl, uint_t cookiec, uint8_t end_flag)
4754{
4755	pMpi2IeeeSgeSimple64_t	ieeesge;
4756	mptti_t			*dmap;
4757	uint8_t			flags;
4758
4759	dmap = cmd->cmd_sg;
4760
4761	NDBG1(("mptsas_ieee_sge_mainframe: cookiec=%d, %s", cookiec,
4762	    cmd->cmd_flags & CFLAG_DMASEND?"Out":"In"));
4763
4764	ieeesge = (pMpi2IeeeSgeSimple64_t)(&frame->SGL);
4765	while (cookiec--) {
4766		ddi_put32(acc_hdl,
4767		    &ieeesge->Address.Low, dmap->addr.address64.Low);
4768		ddi_put32(acc_hdl,
4769		    &ieeesge->Address.High, dmap->addr.address64.High);
4770		ddi_put32(acc_hdl, &ieeesge->Length,
4771		    dmap->count);
4772		NDBG1(("mptsas_ieee_sge_mainframe: len=%d", dmap->count));
4773		flags = (MPI2_IEEE_SGE_FLAGS_SIMPLE_ELEMENT |
4774		    MPI2_IEEE_SGE_FLAGS_SYSTEM_ADDR);
4775
4776		/*
4777		 * If this is the last cookie, we set the flags
4778		 * to indicate so
4779		 */
4780		if (cookiec == 0) {
4781			flags |= end_flag;
4782		}
4783
4784		ddi_put8(acc_hdl, &ieeesge->Flags, flags);
4785		dmap++;
4786		ieeesge++;
4787	}
4788}
4789
4790static void
4791mptsas_ieee_sge_chain(mptsas_t *mpt, mptsas_cmd_t *cmd,
4792    pMpi2SCSIIORequest_t frame, ddi_acc_handle_t acc_hdl)
4793{
4794	pMpi2IeeeSgeSimple64_t	ieeesge;
4795	pMpi25IeeeSgeChain64_t	ieeesgechain;
4796	uint64_t		nframe_phys_addr;
4797	uint_t			cookiec;
4798	mptti_t			*dmap;
4799	uint8_t			flags;
4800
4801	/*
4802	 * Save the number of entries in the DMA
4803	 * Scatter/Gather list
4804	 */
4805	cookiec = cmd->cmd_cookiec;
4806
4807	NDBG1(("mptsas_ieee_sge_chain: cookiec=%d", cookiec));
4808
4809	/*
4810	 * Hereby we start to deal with multiple frames.
4811	 * The process is as follows:
4812	 * 1. Determine how many frames are needed for SGL element
4813	 *    storage; Note that all frames are stored in contiguous
4814	 *    memory space and in 64-bit DMA mode each element is
4815	 *    4 double-words (16 bytes) long.
4816	 * 2. Fill up the main frame. We need to do this separately
4817	 *    since it contains the SCSI IO request header and needs
4818	 *    dedicated processing. Note that the last 4 double-words
4819	 *    of the SCSI IO header is for SGL element storage
4820	 *    (MPI2_SGE_IO_UNION).
4821	 * 3. Fill the chain element in the main frame, so the DMA
4822	 *    engine can use the following frames.
4823	 * 4. Enter a loop to fill the remaining frames. Note that the
4824	 *    last frame contains no chain element.  The remaining
4825	 *    frames go into the mpt SGL buffer allocated on the fly,
4826	 *    not immediately following the main message frame, as in
4827	 *    Gen1.
4828	 * Restrictions:
4829	 *    For 64-bit DMA, the simple element and chain element
4830	 *    are both of 4 double-words (16 bytes) in size, even
4831	 *    though all frames are stored in the first 4G of mem
4832	 *    range and the higher 32-bits of the address are always 0.
4833	 */
4834	int			i, j, k, l, frames, sgemax;
4835	int			temp;
4836	uint8_t			chainflags;
4837	uint32_t		chainlength;
4838	mptsas_cache_frames_t	*p;
4839
4840	/*
4841	 * Sgemax is the number of SGE's that will fit
4842	 * each extra frame and frames is total
4843	 * number of frames we'll need.  1 sge entry per
4844	 * frame is reseverd for the chain element thus the -1 below.
4845	 */
4846	sgemax = ((mpt->m_req_frame_size / sizeof (MPI2_IEEE_SGE_SIMPLE64))
4847	    - 1);
4848	temp = (cookiec - (MPTSAS_MAX_FRAME_SGES64(mpt) - 1)) / sgemax;
4849
4850	/*
4851	 * A little check to see if we need to round up the number
4852	 * of frames we need
4853	 */
4854	if ((cookiec - (MPTSAS_MAX_FRAME_SGES64(mpt) - 1)) - (temp *
4855	    sgemax) > 1) {
4856		frames = (temp + 1);
4857	} else {
4858		frames = temp;
4859	}
4860	NDBG1(("mptsas_ieee_sge_chain: temp=%d, frames=%d", temp, frames));
4861	dmap = cmd->cmd_sg;
4862	ieeesge = (pMpi2IeeeSgeSimple64_t)(&frame->SGL);
4863
4864	/*
4865	 * First fill in the main frame
4866	 */
4867	j = MPTSAS_MAX_FRAME_SGES64(mpt) - 1;
4868	mptsas_ieee_sge_mainframe(cmd, frame, acc_hdl, j, 0);
4869	dmap += j;
4870	ieeesge += j;
4871	j++;
4872
4873	/*
4874	 * Fill in the chain element in the main frame.
4875	 * About calculation on ChainOffset:
4876	 * 1. Struct msg_scsi_io_request has 4 double-words (16 bytes)
4877	 *    in the end reserved for SGL element storage
4878	 *    (MPI2_SGE_IO_UNION); we should count it in our
4879	 *    calculation.  See its definition in the header file.
4880	 * 2. Constant j is the counter of the current SGL element
4881	 *    that will be processed, and (j - 1) is the number of
4882	 *    SGL elements that have been processed (stored in the
4883	 *    main frame).
4884	 * 3. ChainOffset value should be in units of quad-words (16
4885	 *    bytes) so the last value should be divided by 16.
4886	 */
4887	ddi_put8(acc_hdl, &frame->ChainOffset,
4888	    (sizeof (MPI2_SCSI_IO_REQUEST) -
4889	    sizeof (MPI2_SGE_IO_UNION) +
4890	    (j - 1) * sizeof (MPI2_IEEE_SGE_SIMPLE64)) >> 4);
4891	ieeesgechain = (pMpi25IeeeSgeChain64_t)ieeesge;
4892	chainflags = (MPI2_IEEE_SGE_FLAGS_CHAIN_ELEMENT |
4893	    MPI2_IEEE_SGE_FLAGS_SYSTEM_ADDR);
4894	ddi_put8(acc_hdl, &ieeesgechain->Flags, chainflags);
4895
4896	/*
4897	 * The size of the next frame is the accurate size of space
4898	 * (in bytes) used to store the SGL elements. j is the counter
4899	 * of SGL elements. (j - 1) is the number of SGL elements that
4900	 * have been processed (stored in frames).
4901	 */
4902	if (frames >= 2) {
4903		ASSERT(mpt->m_req_frame_size >=
4904		    sizeof (MPI2_IEEE_SGE_SIMPLE64));
4905		chainlength = mpt->m_req_frame_size /
4906		    sizeof (MPI2_IEEE_SGE_SIMPLE64) *
4907		    sizeof (MPI2_IEEE_SGE_SIMPLE64);
4908	} else {
4909		chainlength = ((cookiec - (j - 1)) *
4910		    sizeof (MPI2_IEEE_SGE_SIMPLE64));
4911	}
4912
4913	p = cmd->cmd_extra_frames;
4914
4915	ddi_put32(acc_hdl, &ieeesgechain->Length, chainlength);
4916	ddi_put32(acc_hdl, &ieeesgechain->Address.Low, p->m_phys_addr);
4917	ddi_put32(acc_hdl, &ieeesgechain->Address.High, p->m_phys_addr >> 32);
4918
4919	/*
4920	 * If there are more than 2 frames left we have to
4921	 * fill in the next chain offset to the location of
4922	 * the chain element in the next frame.
4923	 * sgemax is the number of simple elements in an extra
4924	 * frame. Note that the value NextChainOffset should be
4925	 * in double-words (4 bytes).
4926	 */
4927	if (frames >= 2) {
4928		ddi_put8(acc_hdl, &ieeesgechain->NextChainOffset,
4929		    (sgemax * sizeof (MPI2_IEEE_SGE_SIMPLE64)) >> 4);
4930	} else {
4931		ddi_put8(acc_hdl, &ieeesgechain->NextChainOffset, 0);
4932	}
4933
4934	/*
4935	 * Jump to next frame;
4936	 * Starting here, chain buffers go into the per command SGL.
4937	 * This buffer is allocated when chain buffers are needed.
4938	 */
4939	ieeesge = (pMpi2IeeeSgeSimple64_t)p->m_frames_addr;
4940	i = cookiec;
4941
4942	/*
4943	 * Start filling in frames with SGE's.  If we
4944	 * reach the end of frame and still have SGE's
4945	 * to fill we need to add a chain element and
4946	 * use another frame.  j will be our counter
4947	 * for what cookie we are at and i will be
4948	 * the total cookiec. k is the current frame
4949	 */
4950	for (k = 1; k <= frames; k++) {
4951		for (l = 1; (l <= (sgemax + 1)) && (j <= i); j++, l++) {
4952
4953			/*
4954			 * If we have reached the end of frame
4955			 * and we have more SGE's to fill in
4956			 * we have to fill the final entry
4957			 * with a chain element and then
4958			 * continue to the next frame
4959			 */
4960			if ((l == (sgemax + 1)) && (k != frames)) {
4961				ieeesgechain = (pMpi25IeeeSgeChain64_t)ieeesge;
4962				j--;
4963				chainflags =
4964				    MPI2_IEEE_SGE_FLAGS_CHAIN_ELEMENT |
4965				    MPI2_IEEE_SGE_FLAGS_SYSTEM_ADDR;
4966				ddi_put8(p->m_acc_hdl,
4967				    &ieeesgechain->Flags, chainflags);
4968				/*
4969				 * k is the frame counter and (k + 1)
4970				 * is the number of the next frame.
4971				 * Note that frames are in contiguous
4972				 * memory space.
4973				 */
4974				nframe_phys_addr = p->m_phys_addr +
4975				    (mpt->m_req_frame_size * k);
4976				ddi_put32(p->m_acc_hdl,
4977				    &ieeesgechain->Address.Low,
4978				    nframe_phys_addr);
4979				ddi_put32(p->m_acc_hdl,
4980				    &ieeesgechain->Address.High,
4981				    nframe_phys_addr >> 32);
4982
4983				/*
4984				 * If there are more than 2 frames left
4985				 * we have to next chain offset to
4986				 * the location of the chain element
4987				 * in the next frame and fill in the
4988				 * length of the next chain
4989				 */
4990				if ((frames - k) >= 2) {
4991					ddi_put8(p->m_acc_hdl,
4992					    &ieeesgechain->NextChainOffset,
4993					    (sgemax *
4994					    sizeof (MPI2_IEEE_SGE_SIMPLE64))
4995					    >> 4);
4996					ASSERT(mpt->m_req_frame_size >=
4997					    sizeof (MPI2_IEEE_SGE_SIMPLE64));
4998					ddi_put32(p->m_acc_hdl,
4999					    &ieeesgechain->Length,
5000					    mpt->m_req_frame_size /
5001					    sizeof (MPI2_IEEE_SGE_SIMPLE64) *
5002					    sizeof (MPI2_IEEE_SGE_SIMPLE64));
5003				} else {
5004					/*
5005					 * This is the last frame. Set
5006					 * the NextChainOffset to 0 and
5007					 * Length is the total size of
5008					 * all remaining simple elements
5009					 */
5010					ddi_put8(p->m_acc_hdl,
5011					    &ieeesgechain->NextChainOffset,
5012					    0);
5013					ddi_put32(p->m_acc_hdl,
5014					    &ieeesgechain->Length,
5015					    (cookiec - j) *
5016					    sizeof (MPI2_IEEE_SGE_SIMPLE64));
5017				}
5018
5019				/* Jump to the next frame */
5020				ieeesge = (pMpi2IeeeSgeSimple64_t)
5021				    ((char *)p->m_frames_addr +
5022				    (int)mpt->m_req_frame_size * k);
5023
5024				continue;
5025			}
5026
5027			ddi_put32(p->m_acc_hdl,
5028			    &ieeesge->Address.Low,
5029			    dmap->addr.address64.Low);
5030			ddi_put32(p->m_acc_hdl,
5031			    &ieeesge->Address.High,
5032			    dmap->addr.address64.High);
5033			ddi_put32(p->m_acc_hdl,
5034			    &ieeesge->Length, dmap->count);
5035			flags = (MPI2_IEEE_SGE_FLAGS_SIMPLE_ELEMENT |
5036			    MPI2_IEEE_SGE_FLAGS_SYSTEM_ADDR);
5037
5038			/*
5039			 * If we are at the end of the frame and
5040			 * there is another frame to fill in
5041			 * do we need to do anything?
5042			 * if ((l == sgemax) && (k != frames)) {
5043			 * }
5044			 */
5045
5046			/*
5047			 * If this is the final cookie set end of list.
5048			 */
5049			if (j == i) {
5050				flags |= MPI25_IEEE_SGE_FLAGS_END_OF_LIST;
5051			}
5052
5053			ddi_put8(p->m_acc_hdl, &ieeesge->Flags, flags);
5054			dmap++;
5055			ieeesge++;
5056		}
5057	}
5058
5059	/*
5060	 * Sync DMA with the chain buffers that were just created
5061	 */
5062	(void) ddi_dma_sync(p->m_dma_hdl, 0, 0, DDI_DMA_SYNC_FORDEV);
5063}
5064
5065static void
5066mptsas_sge_setup(mptsas_t *mpt, mptsas_cmd_t *cmd, uint32_t *control,
5067    pMpi2SCSIIORequest_t frame, ddi_acc_handle_t acc_hdl)
5068{
5069	ASSERT(cmd->cmd_flags & CFLAG_DMAVALID);
5070
5071	NDBG1(("mptsas_sge_setup: cookiec=%d", cmd->cmd_cookiec));
5072
5073	/*
5074	 * Set read/write bit in control.
5075	 */
5076	if (cmd->cmd_flags & CFLAG_DMASEND) {
5077		*control |= MPI2_SCSIIO_CONTROL_WRITE;
5078	} else {
5079		*control |= MPI2_SCSIIO_CONTROL_READ;
5080	}
5081
5082	ddi_put32(acc_hdl, &frame->DataLength, cmd->cmd_dmacount);
5083
5084	/*
5085	 * We have 4 cases here.  First where we can fit all the
5086	 * SG elements into the main frame, and the case
5087	 * where we can't. The SG element is also different when using
5088	 * MPI2.5 interface.
5089	 * If we have more cookies than we can attach to a frame
5090	 * we will need to use a chain element to point
5091	 * a location of memory where the rest of the S/G
5092	 * elements reside.
5093	 */
5094	if (cmd->cmd_cookiec <= MPTSAS_MAX_FRAME_SGES64(mpt)) {
5095		if (mpt->m_MPI25) {
5096			mptsas_ieee_sge_mainframe(cmd, frame, acc_hdl,
5097			    cmd->cmd_cookiec,
5098			    MPI25_IEEE_SGE_FLAGS_END_OF_LIST);
5099		} else {
5100			mptsas_sge_mainframe(cmd, frame, acc_hdl,
5101			    cmd->cmd_cookiec,
5102			    ((uint32_t)(MPI2_SGE_FLAGS_LAST_ELEMENT
5103			    | MPI2_SGE_FLAGS_END_OF_BUFFER
5104			    | MPI2_SGE_FLAGS_END_OF_LIST) <<
5105			    MPI2_SGE_FLAGS_SHIFT));
5106		}
5107	} else {
5108		if (mpt->m_MPI25) {
5109			mptsas_ieee_sge_chain(mpt, cmd, frame, acc_hdl);
5110		} else {
5111			mptsas_sge_chain(mpt, cmd, frame, acc_hdl);
5112		}
5113	}
5114}
5115
5116/*
5117 * Interrupt handling
5118 * Utility routine.  Poll for status of a command sent to HBA
5119 * without interrupts (a FLAG_NOINTR command).
5120 */
5121int
5122mptsas_poll(mptsas_t *mpt, mptsas_cmd_t *poll_cmd, int polltime)
5123{
5124	int	rval = TRUE;
5125
5126	NDBG5(("mptsas_poll: cmd=0x%p", (void *)poll_cmd));
5127
5128	if ((poll_cmd->cmd_flags & CFLAG_TM_CMD) == 0) {
5129		mptsas_restart_hba(mpt);
5130	}
5131
5132	/*
5133	 * Wait, using drv_usecwait(), long enough for the command to
5134	 * reasonably return from the target if the target isn't
5135	 * "dead".  A polled command may well be sent from scsi_poll, and
5136	 * there are retries built in to scsi_poll if the transport
5137	 * accepted the packet (TRAN_ACCEPT).  scsi_poll waits 1 second
5138	 * and retries the transport up to scsi_poll_busycnt times
5139	 * (currently 60) if
5140	 * 1. pkt_reason is CMD_INCOMPLETE and pkt_state is 0, or
5141	 * 2. pkt_reason is CMD_CMPLT and *pkt_scbp has STATUS_BUSY
5142	 *
5143	 * limit the waiting to avoid a hang in the event that the
5144	 * cmd never gets started but we are still receiving interrupts
5145	 */
5146	while (!(poll_cmd->cmd_flags & CFLAG_FINISHED)) {
5147		if (mptsas_wait_intr(mpt, polltime) == FALSE) {
5148			NDBG5(("mptsas_poll: command incomplete"));
5149			rval = FALSE;
5150			break;
5151		}
5152	}
5153
5154	if (rval == FALSE) {
5155
5156		/*
5157		 * this isn't supposed to happen, the hba must be wedged
5158		 * Mark this cmd as a timeout.
5159		 */
5160		mptsas_set_pkt_reason(mpt, poll_cmd, CMD_TIMEOUT,
5161		    (STAT_TIMEOUT|STAT_ABORTED));
5162
5163		if (poll_cmd->cmd_queued == FALSE) {
5164
5165			NDBG5(("mptsas_poll: not on waitq"));
5166
5167			poll_cmd->cmd_pkt->pkt_state |=
5168			    (STATE_GOT_BUS|STATE_GOT_TARGET|STATE_SENT_CMD);
5169		} else {
5170
5171			/* find and remove it from the waitq */
5172			NDBG5(("mptsas_poll: delete from waitq"));
5173			mptsas_waitq_delete(mpt, poll_cmd);
5174		}
5175
5176	}
5177	mptsas_fma_check(mpt, poll_cmd);
5178	NDBG5(("mptsas_poll: done"));
5179	return (rval);
5180}
5181
5182/*
5183 * Used for polling cmds and TM function
5184 */
5185static int
5186mptsas_wait_intr(mptsas_t *mpt, int polltime)
5187{
5188	int				cnt;
5189	pMpi2ReplyDescriptorsUnion_t	reply_desc_union;
5190	uint32_t			int_mask;
5191
5192	NDBG5(("mptsas_wait_intr"));
5193
5194	mpt->m_polled_intr = 1;
5195
5196	/*
5197	 * Get the current interrupt mask and disable interrupts.  When
5198	 * re-enabling ints, set mask to saved value.
5199	 */
5200	int_mask = ddi_get32(mpt->m_datap, &mpt->m_reg->HostInterruptMask);
5201	MPTSAS_DISABLE_INTR(mpt);
5202
5203	/*
5204	 * Keep polling for at least (polltime * 1000) seconds
5205	 */
5206	for (cnt = 0; cnt < polltime; cnt++) {
5207		(void) ddi_dma_sync(mpt->m_dma_post_queue_hdl, 0, 0,
5208		    DDI_DMA_SYNC_FORCPU);
5209
5210		reply_desc_union = (pMpi2ReplyDescriptorsUnion_t)
5211		    MPTSAS_GET_NEXT_REPLY(mpt, mpt->m_post_index);
5212
5213		if (ddi_get32(mpt->m_acc_post_queue_hdl,
5214		    &reply_desc_union->Words.Low) == 0xFFFFFFFF ||
5215		    ddi_get32(mpt->m_acc_post_queue_hdl,
5216		    &reply_desc_union->Words.High) == 0xFFFFFFFF) {
5217			drv_usecwait(1000);
5218			continue;
5219		}
5220
5221		/*
5222		 * The reply is valid, process it according to its
5223		 * type.
5224		 */
5225		mptsas_process_intr(mpt, reply_desc_union);
5226
5227		if (++mpt->m_post_index == mpt->m_post_queue_depth) {
5228			mpt->m_post_index = 0;
5229		}
5230
5231		/*
5232		 * Update the global reply index
5233		 */
5234		ddi_put32(mpt->m_datap,
5235		    &mpt->m_reg->ReplyPostHostIndex, mpt->m_post_index);
5236		mpt->m_polled_intr = 0;
5237
5238		/*
5239		 * Re-enable interrupts and quit.
5240		 */
5241		ddi_put32(mpt->m_datap, &mpt->m_reg->HostInterruptMask,
5242		    int_mask);
5243		return (TRUE);
5244
5245	}
5246
5247	/*
5248	 * Clear polling flag, re-enable interrupts and quit.
5249	 */
5250	mpt->m_polled_intr = 0;
5251	ddi_put32(mpt->m_datap, &mpt->m_reg->HostInterruptMask, int_mask);
5252	return (FALSE);
5253}
5254
5255static void
5256mptsas_handle_scsi_io_success(mptsas_t *mpt,
5257    pMpi2ReplyDescriptorsUnion_t reply_desc)
5258{
5259	pMpi2SCSIIOSuccessReplyDescriptor_t	scsi_io_success;
5260	uint16_t				SMID;
5261	mptsas_slots_t				*slots = mpt->m_active;
5262	mptsas_cmd_t				*cmd = NULL;
5263	struct scsi_pkt				*pkt;
5264
5265	ASSERT(mutex_owned(&mpt->m_mutex));
5266
5267	scsi_io_success = (pMpi2SCSIIOSuccessReplyDescriptor_t)reply_desc;
5268	SMID = ddi_get16(mpt->m_acc_post_queue_hdl, &scsi_io_success->SMID);
5269
5270	/*
5271	 * This is a success reply so just complete the IO.  First, do a sanity
5272	 * check on the SMID.  The final slot is used for TM requests, which
5273	 * would not come into this reply handler.
5274	 */
5275	if ((SMID == 0) || (SMID > slots->m_n_normal)) {
5276		mptsas_log(mpt, CE_WARN, "?Received invalid SMID of %d\n",
5277		    SMID);
5278		ddi_fm_service_impact(mpt->m_dip, DDI_SERVICE_UNAFFECTED);
5279		return;
5280	}
5281
5282	cmd = slots->m_slot[SMID];
5283
5284	/*
5285	 * print warning and return if the slot is empty
5286	 */
5287	if (cmd == NULL) {
5288		mptsas_log(mpt, CE_WARN, "?NULL command for successful SCSI IO "
5289		    "in slot %d", SMID);
5290		return;
5291	}
5292
5293	pkt = CMD2PKT(cmd);
5294	pkt->pkt_state |= (STATE_GOT_BUS | STATE_GOT_TARGET | STATE_SENT_CMD |
5295	    STATE_GOT_STATUS);
5296	if (cmd->cmd_flags & CFLAG_DMAVALID) {
5297		pkt->pkt_state |= STATE_XFERRED_DATA;
5298	}
5299	pkt->pkt_resid = 0;
5300
5301	if (cmd->cmd_flags & CFLAG_PASSTHRU) {
5302		cmd->cmd_flags |= CFLAG_FINISHED;
5303		cv_broadcast(&mpt->m_passthru_cv);
5304		return;
5305	} else {
5306		mptsas_remove_cmd(mpt, cmd);
5307	}
5308
5309	if (cmd->cmd_flags & CFLAG_RETRY) {
5310		/*
5311		 * The target returned QFULL or busy, do not add tihs
5312		 * pkt to the doneq since the hba will retry
5313		 * this cmd.
5314		 *
5315		 * The pkt has already been resubmitted in
5316		 * mptsas_handle_qfull() or in mptsas_check_scsi_io_error().
5317		 * Remove this cmd_flag here.
5318		 */
5319		cmd->cmd_flags &= ~CFLAG_RETRY;
5320	} else {
5321		mptsas_doneq_add(mpt, cmd);
5322	}
5323}
5324
5325static void
5326mptsas_handle_address_reply(mptsas_t *mpt,
5327    pMpi2ReplyDescriptorsUnion_t reply_desc)
5328{
5329	pMpi2AddressReplyDescriptor_t	address_reply;
5330	pMPI2DefaultReply_t		reply;
5331	mptsas_fw_diagnostic_buffer_t	*pBuffer;
5332	uint32_t			reply_addr, reply_frame_dma_baseaddr;
5333	uint16_t			SMID, iocstatus;
5334	mptsas_slots_t			*slots = mpt->m_active;
5335	mptsas_cmd_t			*cmd = NULL;
5336	uint8_t				function, buffer_type;
5337	m_replyh_arg_t			*args;
5338	int				reply_frame_no;
5339
5340	ASSERT(mutex_owned(&mpt->m_mutex));
5341
5342	address_reply = (pMpi2AddressReplyDescriptor_t)reply_desc;
5343	reply_addr = ddi_get32(mpt->m_acc_post_queue_hdl,
5344	    &address_reply->ReplyFrameAddress);
5345	SMID = ddi_get16(mpt->m_acc_post_queue_hdl, &address_reply->SMID);
5346
5347	/*
5348	 * If reply frame is not in the proper range we should ignore this
5349	 * message and exit the interrupt handler.
5350	 */
5351	reply_frame_dma_baseaddr = mpt->m_reply_frame_dma_addr & 0xffffffffu;
5352	if ((reply_addr < reply_frame_dma_baseaddr) ||
5353	    (reply_addr >= (reply_frame_dma_baseaddr +
5354	    (mpt->m_reply_frame_size * mpt->m_max_replies))) ||
5355	    ((reply_addr - reply_frame_dma_baseaddr) %
5356	    mpt->m_reply_frame_size != 0)) {
5357		mptsas_log(mpt, CE_WARN, "?Received invalid reply frame "
5358		    "address 0x%x\n", reply_addr);
5359		ddi_fm_service_impact(mpt->m_dip, DDI_SERVICE_UNAFFECTED);
5360		return;
5361	}
5362
5363	(void) ddi_dma_sync(mpt->m_dma_reply_frame_hdl, 0, 0,
5364	    DDI_DMA_SYNC_FORCPU);
5365	reply = (pMPI2DefaultReply_t)(mpt->m_reply_frame + (reply_addr -
5366	    reply_frame_dma_baseaddr));
5367	function = ddi_get8(mpt->m_acc_reply_frame_hdl, &reply->Function);
5368
5369	NDBG31(("mptsas_handle_address_reply: function 0x%x, reply_addr=0x%x",
5370	    function, reply_addr));
5371
5372	/*
5373	 * don't get slot information and command for events since these values
5374	 * don't exist
5375	 */
5376	if ((function != MPI2_FUNCTION_EVENT_NOTIFICATION) &&
5377	    (function != MPI2_FUNCTION_DIAG_BUFFER_POST)) {
5378		/*
5379		 * This could be a TM reply, which use the last allocated SMID,
5380		 * so allow for that.
5381		 */
5382		if ((SMID == 0) || (SMID > (slots->m_n_normal + 1))) {
5383			mptsas_log(mpt, CE_WARN, "?Received invalid SMID of "
5384			    "%d\n", SMID);
5385			ddi_fm_service_impact(mpt->m_dip,
5386			    DDI_SERVICE_UNAFFECTED);
5387			return;
5388		}
5389
5390		cmd = slots->m_slot[SMID];
5391
5392		/*
5393		 * print warning and return if the slot is empty
5394		 */
5395		if (cmd == NULL) {
5396			mptsas_log(mpt, CE_WARN, "?NULL command for address "
5397			    "reply in slot %d", SMID);
5398			return;
5399		}
5400		if ((cmd->cmd_flags &
5401		    (CFLAG_PASSTHRU | CFLAG_CONFIG | CFLAG_FW_DIAG))) {
5402			cmd->cmd_rfm = reply_addr;
5403			cmd->cmd_flags |= CFLAG_FINISHED;
5404			cv_broadcast(&mpt->m_passthru_cv);
5405			cv_broadcast(&mpt->m_config_cv);
5406			cv_broadcast(&mpt->m_fw_diag_cv);
5407			return;
5408		} else if (!(cmd->cmd_flags & CFLAG_FW_CMD)) {
5409			mptsas_remove_cmd(mpt, cmd);
5410		}
5411		NDBG31(("\t\tmptsas_process_intr: slot=%d", SMID));
5412	}
5413	/*
5414	 * Depending on the function, we need to handle
5415	 * the reply frame (and cmd) differently.
5416	 */
5417	switch (function) {
5418	case MPI2_FUNCTION_SCSI_IO_REQUEST:
5419		mptsas_check_scsi_io_error(mpt, (pMpi2SCSIIOReply_t)reply, cmd);
5420		break;
5421	case MPI2_FUNCTION_SCSI_TASK_MGMT:
5422		cmd->cmd_rfm = reply_addr;
5423		mptsas_check_task_mgt(mpt, (pMpi2SCSIManagementReply_t)reply,
5424		    cmd);
5425		break;
5426	case MPI2_FUNCTION_FW_DOWNLOAD:
5427		cmd->cmd_flags |= CFLAG_FINISHED;
5428		cv_signal(&mpt->m_fw_cv);
5429		break;
5430	case MPI2_FUNCTION_EVENT_NOTIFICATION:
5431		reply_frame_no = (reply_addr - reply_frame_dma_baseaddr) /
5432		    mpt->m_reply_frame_size;
5433		args = &mpt->m_replyh_args[reply_frame_no];
5434		args->mpt = (void *)mpt;
5435		args->rfm = reply_addr;
5436
5437		/*
5438		 * Record the event if its type is enabled in
5439		 * this mpt instance by ioctl.
5440		 */
5441		mptsas_record_event(args);
5442
5443		/*
5444		 * Handle time critical events
5445		 * NOT_RESPONDING/ADDED only now
5446		 */
5447		if (mptsas_handle_event_sync(args) == DDI_SUCCESS) {
5448			/*
5449			 * Would not return main process,
5450			 * just let taskq resolve ack action
5451			 * and ack would be sent in taskq thread
5452			 */
5453			NDBG20(("send mptsas_handle_event_sync success"));
5454		}
5455
5456		if (mpt->m_in_reset) {
5457			NDBG20(("dropping event received during reset"));
5458			return;
5459		}
5460
5461		if ((ddi_taskq_dispatch(mpt->m_event_taskq, mptsas_handle_event,
5462		    (void *)args, DDI_NOSLEEP)) != DDI_SUCCESS) {
5463			mptsas_log(mpt, CE_WARN, "No memory available"
5464			"for dispatch taskq");
5465			/*
5466			 * Return the reply frame to the free queue.
5467			 */
5468			ddi_put32(mpt->m_acc_free_queue_hdl,
5469			    &((uint32_t *)(void *)
5470			    mpt->m_free_queue)[mpt->m_free_index], reply_addr);
5471			(void) ddi_dma_sync(mpt->m_dma_free_queue_hdl, 0, 0,
5472			    DDI_DMA_SYNC_FORDEV);
5473			if (++mpt->m_free_index == mpt->m_free_queue_depth) {
5474				mpt->m_free_index = 0;
5475			}
5476
5477			ddi_put32(mpt->m_datap,
5478			    &mpt->m_reg->ReplyFreeHostIndex, mpt->m_free_index);
5479		}
5480		return;
5481	case MPI2_FUNCTION_DIAG_BUFFER_POST:
5482		/*
5483		 * If SMID is 0, this implies that the reply is due to a
5484		 * release function with a status that the buffer has been
5485		 * released.  Set the buffer flags accordingly.
5486		 */
5487		if (SMID == 0) {
5488			iocstatus = ddi_get16(mpt->m_acc_reply_frame_hdl,
5489			    &reply->IOCStatus);
5490			buffer_type = ddi_get8(mpt->m_acc_reply_frame_hdl,
5491			    &(((pMpi2DiagBufferPostReply_t)reply)->BufferType));
5492			if (iocstatus == MPI2_IOCSTATUS_DIAGNOSTIC_RELEASED) {
5493				pBuffer =
5494				    &mpt->m_fw_diag_buffer_list[buffer_type];
5495				pBuffer->valid_data = TRUE;
5496				pBuffer->owned_by_firmware = FALSE;
5497				pBuffer->immediate = FALSE;
5498			}
5499		} else {
5500			/*
5501			 * Normal handling of diag post reply with SMID.
5502			 */
5503			cmd = slots->m_slot[SMID];
5504
5505			/*
5506			 * print warning and return if the slot is empty
5507			 */
5508			if (cmd == NULL) {
5509				mptsas_log(mpt, CE_WARN, "?NULL command for "
5510				    "address reply in slot %d", SMID);
5511				return;
5512			}
5513			cmd->cmd_rfm = reply_addr;
5514			cmd->cmd_flags |= CFLAG_FINISHED;
5515			cv_broadcast(&mpt->m_fw_diag_cv);
5516		}
5517		return;
5518	default:
5519		mptsas_log(mpt, CE_WARN, "Unknown function 0x%x ", function);
5520		break;
5521	}
5522
5523	/*
5524	 * Return the reply frame to the free queue.
5525	 */
5526	ddi_put32(mpt->m_acc_free_queue_hdl,
5527	    &((uint32_t *)(void *)mpt->m_free_queue)[mpt->m_free_index],
5528	    reply_addr);
5529	(void) ddi_dma_sync(mpt->m_dma_free_queue_hdl, 0, 0,
5530	    DDI_DMA_SYNC_FORDEV);
5531	if (++mpt->m_free_index == mpt->m_free_queue_depth) {
5532		mpt->m_free_index = 0;
5533	}
5534	ddi_put32(mpt->m_datap, &mpt->m_reg->ReplyFreeHostIndex,
5535	    mpt->m_free_index);
5536
5537	if (cmd->cmd_flags & CFLAG_FW_CMD)
5538		return;
5539
5540	if (cmd->cmd_flags & CFLAG_RETRY) {
5541		/*
5542		 * The target returned QFULL or busy, do not add this
5543		 * pkt to the doneq since the hba will retry
5544		 * this cmd.
5545		 *
5546		 * The pkt has already been resubmitted in
5547		 * mptsas_handle_qfull() or in mptsas_check_scsi_io_error().
5548		 * Remove this cmd_flag here.
5549		 */
5550		cmd->cmd_flags &= ~CFLAG_RETRY;
5551	} else {
5552		mptsas_doneq_add(mpt, cmd);
5553	}
5554}
5555
5556#ifdef MPTSAS_DEBUG
5557static uint8_t mptsas_last_sense[256];
5558#endif
5559
5560static void
5561mptsas_check_scsi_io_error(mptsas_t *mpt, pMpi2SCSIIOReply_t reply,
5562    mptsas_cmd_t *cmd)
5563{
5564	uint8_t			scsi_status, scsi_state;
5565	uint16_t		ioc_status, cmd_rqs_len;
5566	uint32_t		xferred, sensecount, responsedata, loginfo = 0;
5567	struct scsi_pkt		*pkt;
5568	struct scsi_arq_status	*arqstat;
5569	mptsas_target_t		*ptgt = cmd->cmd_tgt_addr;
5570	uint8_t			*sensedata = NULL;
5571	uint64_t		sas_wwn;
5572	uint8_t			phy;
5573	char			wwn_str[MPTSAS_WWN_STRLEN];
5574
5575	scsi_status = ddi_get8(mpt->m_acc_reply_frame_hdl, &reply->SCSIStatus);
5576	ioc_status = ddi_get16(mpt->m_acc_reply_frame_hdl, &reply->IOCStatus);
5577	scsi_state = ddi_get8(mpt->m_acc_reply_frame_hdl, &reply->SCSIState);
5578	xferred = ddi_get32(mpt->m_acc_reply_frame_hdl, &reply->TransferCount);
5579	sensecount = ddi_get32(mpt->m_acc_reply_frame_hdl, &reply->SenseCount);
5580	responsedata = ddi_get32(mpt->m_acc_reply_frame_hdl,
5581	    &reply->ResponseInfo);
5582
5583	if (ioc_status & MPI2_IOCSTATUS_FLAG_LOG_INFO_AVAILABLE) {
5584		sas_wwn = ptgt->m_addr.mta_wwn;
5585		phy = ptgt->m_phynum;
5586		if (sas_wwn == 0) {
5587			(void) sprintf(wwn_str, "p%x", phy);
5588		} else {
5589			(void) sprintf(wwn_str, "w%016"PRIx64, sas_wwn);
5590		}
5591		loginfo = ddi_get32(mpt->m_acc_reply_frame_hdl,
5592		    &reply->IOCLogInfo);
5593		mptsas_log(mpt, CE_NOTE,
5594		    "?Log info 0x%x received for target %d %s.\n"
5595		    "\tscsi_status=0x%x, ioc_status=0x%x, scsi_state=0x%x",
5596		    loginfo, Tgt(cmd), wwn_str, scsi_status, ioc_status,
5597		    scsi_state);
5598	}
5599
5600	NDBG31(("\t\tscsi_status=0x%x, ioc_status=0x%x, scsi_state=0x%x",
5601	    scsi_status, ioc_status, scsi_state));
5602
5603	pkt = CMD2PKT(cmd);
5604	*(pkt->pkt_scbp) = scsi_status;
5605
5606	if (loginfo == 0x31170000) {
5607		/*
5608		 * if loginfo PL_LOGINFO_CODE_IO_DEVICE_MISSING_DELAY_RETRY
5609		 * 0x31170000 comes, that means the device missing delay
5610		 * is in progressing, the command need retry later.
5611		 */
5612		*(pkt->pkt_scbp) = STATUS_BUSY;
5613		return;
5614	}
5615
5616	if ((scsi_state & MPI2_SCSI_STATE_NO_SCSI_STATUS) &&
5617	    ((ioc_status & MPI2_IOCSTATUS_MASK) ==
5618	    MPI2_IOCSTATUS_SCSI_DEVICE_NOT_THERE)) {
5619		pkt->pkt_reason = CMD_INCOMPLETE;
5620		pkt->pkt_state |= STATE_GOT_BUS;
5621		if (ptgt->m_reset_delay == 0) {
5622			mptsas_set_throttle(mpt, ptgt,
5623			    DRAIN_THROTTLE);
5624		}
5625		return;
5626	}
5627
5628	if (scsi_state & MPI2_SCSI_STATE_RESPONSE_INFO_VALID) {
5629		responsedata &= 0x000000FF;
5630		if (responsedata & MPTSAS_SCSI_RESPONSE_CODE_TLR_OFF) {
5631			mptsas_log(mpt, CE_NOTE, "Do not support the TLR\n");
5632			pkt->pkt_reason = CMD_TLR_OFF;
5633			return;
5634		}
5635	}
5636
5637
5638	switch (scsi_status) {
5639	case MPI2_SCSI_STATUS_CHECK_CONDITION:
5640		pkt->pkt_resid = (cmd->cmd_dmacount - xferred);
5641		arqstat = (void*)(pkt->pkt_scbp);
5642		arqstat->sts_rqpkt_status = *((struct scsi_status *)
5643		    (pkt->pkt_scbp));
5644		pkt->pkt_state |= (STATE_GOT_BUS | STATE_GOT_TARGET |
5645		    STATE_SENT_CMD | STATE_GOT_STATUS | STATE_ARQ_DONE);
5646		if (cmd->cmd_flags & CFLAG_XARQ) {
5647			pkt->pkt_state |= STATE_XARQ_DONE;
5648		}
5649		if (pkt->pkt_resid != cmd->cmd_dmacount) {
5650			pkt->pkt_state |= STATE_XFERRED_DATA;
5651		}
5652		arqstat->sts_rqpkt_reason = pkt->pkt_reason;
5653		arqstat->sts_rqpkt_state  = pkt->pkt_state;
5654		arqstat->sts_rqpkt_state |= STATE_XFERRED_DATA;
5655		arqstat->sts_rqpkt_statistics = pkt->pkt_statistics;
5656		sensedata = (uint8_t *)&arqstat->sts_sensedata;
5657		cmd_rqs_len = cmd->cmd_extrqslen ?
5658		    cmd->cmd_extrqslen : cmd->cmd_rqslen;
5659		(void) ddi_dma_sync(mpt->m_dma_req_sense_hdl, 0, 0,
5660		    DDI_DMA_SYNC_FORKERNEL);
5661#ifdef MPTSAS_DEBUG
5662		bcopy(cmd->cmd_arq_buf, mptsas_last_sense,
5663		    ((cmd_rqs_len >= sizeof (mptsas_last_sense)) ?
5664		    sizeof (mptsas_last_sense):cmd_rqs_len));
5665#endif
5666		bcopy((uchar_t *)cmd->cmd_arq_buf, sensedata,
5667		    ((cmd_rqs_len >= sensecount) ? sensecount :
5668		    cmd_rqs_len));
5669		arqstat->sts_rqpkt_resid = (cmd_rqs_len - sensecount);
5670		cmd->cmd_flags |= CFLAG_CMDARQ;
5671		/*
5672		 * Set proper status for pkt if autosense was valid
5673		 */
5674		if (scsi_state & MPI2_SCSI_STATE_AUTOSENSE_VALID) {
5675			struct scsi_status zero_status = { 0 };
5676			arqstat->sts_rqpkt_status = zero_status;
5677		}
5678
5679		/*
5680		 * ASC=0x47 is parity error
5681		 * ASC=0x48 is initiator detected error received
5682		 */
5683		if ((scsi_sense_key(sensedata) == KEY_ABORTED_COMMAND) &&
5684		    ((scsi_sense_asc(sensedata) == 0x47) ||
5685		    (scsi_sense_asc(sensedata) == 0x48))) {
5686			mptsas_log(mpt, CE_NOTE, "Aborted_command!");
5687		}
5688
5689		/*
5690		 * ASC/ASCQ=0x3F/0x0E means report_luns data changed
5691		 * ASC/ASCQ=0x25/0x00 means invalid lun
5692		 */
5693		if (((scsi_sense_key(sensedata) == KEY_UNIT_ATTENTION) &&
5694		    (scsi_sense_asc(sensedata) == 0x3F) &&
5695		    (scsi_sense_ascq(sensedata) == 0x0E)) ||
5696		    ((scsi_sense_key(sensedata) == KEY_ILLEGAL_REQUEST) &&
5697		    (scsi_sense_asc(sensedata) == 0x25) &&
5698		    (scsi_sense_ascq(sensedata) == 0x00))) {
5699			mptsas_topo_change_list_t *topo_node = NULL;
5700
5701			topo_node = kmem_zalloc(
5702			    sizeof (mptsas_topo_change_list_t),
5703			    KM_NOSLEEP);
5704			if (topo_node == NULL) {
5705				mptsas_log(mpt, CE_NOTE, "No memory"
5706				    "resource for handle SAS dynamic"
5707				    "reconfigure.\n");
5708				break;
5709			}
5710			topo_node->mpt = mpt;
5711			topo_node->event = MPTSAS_DR_EVENT_RECONFIG_TARGET;
5712			topo_node->un.phymask = ptgt->m_addr.mta_phymask;
5713			topo_node->devhdl = ptgt->m_devhdl;
5714			topo_node->object = (void *)ptgt;
5715			topo_node->flags = MPTSAS_TOPO_FLAG_LUN_ASSOCIATED;
5716
5717			if ((ddi_taskq_dispatch(mpt->m_dr_taskq,
5718			    mptsas_handle_dr,
5719			    (void *)topo_node,
5720			    DDI_NOSLEEP)) != DDI_SUCCESS) {
5721				kmem_free(topo_node,
5722				    sizeof (mptsas_topo_change_list_t));
5723				mptsas_log(mpt, CE_NOTE, "mptsas start taskq"
5724				    "for handle SAS dynamic reconfigure"
5725				    "failed. \n");
5726			}
5727		}
5728		break;
5729	case MPI2_SCSI_STATUS_GOOD:
5730		switch (ioc_status & MPI2_IOCSTATUS_MASK) {
5731		case MPI2_IOCSTATUS_SCSI_DEVICE_NOT_THERE:
5732			pkt->pkt_reason = CMD_DEV_GONE;
5733			pkt->pkt_state |= STATE_GOT_BUS;
5734			if (ptgt->m_reset_delay == 0) {
5735				mptsas_set_throttle(mpt, ptgt, DRAIN_THROTTLE);
5736			}
5737			NDBG31(("lost disk for target%d, command:%x",
5738			    Tgt(cmd), pkt->pkt_cdbp[0]));
5739			break;
5740		case MPI2_IOCSTATUS_SCSI_DATA_OVERRUN:
5741			NDBG31(("data overrun: xferred=%d", xferred));
5742			NDBG31(("dmacount=%d", cmd->cmd_dmacount));
5743			pkt->pkt_reason = CMD_DATA_OVR;
5744			pkt->pkt_state |= (STATE_GOT_BUS | STATE_GOT_TARGET
5745			    | STATE_SENT_CMD | STATE_GOT_STATUS
5746			    | STATE_XFERRED_DATA);
5747			pkt->pkt_resid = 0;
5748			break;
5749		case MPI2_IOCSTATUS_SCSI_RESIDUAL_MISMATCH:
5750		case MPI2_IOCSTATUS_SCSI_DATA_UNDERRUN:
5751			NDBG31(("data underrun: xferred=%d", xferred));
5752			NDBG31(("dmacount=%d", cmd->cmd_dmacount));
5753			pkt->pkt_state |= (STATE_GOT_BUS | STATE_GOT_TARGET
5754			    | STATE_SENT_CMD | STATE_GOT_STATUS);
5755			pkt->pkt_resid = (cmd->cmd_dmacount - xferred);
5756			if (pkt->pkt_resid != cmd->cmd_dmacount) {
5757				pkt->pkt_state |= STATE_XFERRED_DATA;
5758			}
5759			break;
5760		case MPI2_IOCSTATUS_SCSI_TASK_TERMINATED:
5761			if (cmd->cmd_active_expiration <= gethrtime()) {
5762				/*
5763				 * When timeout requested, propagate
5764				 * proper reason and statistics to
5765				 * target drivers.
5766				 */
5767				mptsas_set_pkt_reason(mpt, cmd, CMD_TIMEOUT,
5768				    STAT_BUS_RESET | STAT_TIMEOUT);
5769			} else {
5770				mptsas_set_pkt_reason(mpt, cmd, CMD_RESET,
5771				    STAT_BUS_RESET);
5772			}
5773			break;
5774		case MPI2_IOCSTATUS_SCSI_IOC_TERMINATED:
5775		case MPI2_IOCSTATUS_SCSI_EXT_TERMINATED:
5776			mptsas_set_pkt_reason(mpt,
5777			    cmd, CMD_RESET, STAT_DEV_RESET);
5778			break;
5779		case MPI2_IOCSTATUS_SCSI_IO_DATA_ERROR:
5780		case MPI2_IOCSTATUS_SCSI_PROTOCOL_ERROR:
5781			pkt->pkt_state |= (STATE_GOT_BUS | STATE_GOT_TARGET);
5782			mptsas_set_pkt_reason(mpt,
5783			    cmd, CMD_TERMINATED, STAT_TERMINATED);
5784			break;
5785		case MPI2_IOCSTATUS_INSUFFICIENT_RESOURCES:
5786		case MPI2_IOCSTATUS_BUSY:
5787			/*
5788			 * set throttles to drain
5789			 */
5790			for (ptgt = refhash_first(mpt->m_targets); ptgt != NULL;
5791			    ptgt = refhash_next(mpt->m_targets, ptgt)) {
5792				mptsas_set_throttle(mpt, ptgt, DRAIN_THROTTLE);
5793			}
5794
5795			/*
5796			 * retry command
5797			 */
5798			cmd->cmd_flags |= CFLAG_RETRY;
5799			cmd->cmd_pkt_flags |= FLAG_HEAD;
5800
5801			(void) mptsas_accept_pkt(mpt, cmd);
5802			break;
5803		default:
5804			mptsas_log(mpt, CE_WARN,
5805			    "unknown ioc_status = %x\n", ioc_status);
5806			mptsas_log(mpt, CE_CONT, "scsi_state = %x, transfer "
5807			    "count = %x, scsi_status = %x", scsi_state,
5808			    xferred, scsi_status);
5809			break;
5810		}
5811		break;
5812	case MPI2_SCSI_STATUS_TASK_SET_FULL:
5813		mptsas_handle_qfull(mpt, cmd);
5814		break;
5815	case MPI2_SCSI_STATUS_BUSY:
5816		NDBG31(("scsi_status busy received"));
5817		break;
5818	case MPI2_SCSI_STATUS_RESERVATION_CONFLICT:
5819		NDBG31(("scsi_status reservation conflict received"));
5820		break;
5821	default:
5822		mptsas_log(mpt, CE_WARN, "scsi_status=%x, ioc_status=%x\n",
5823		    scsi_status, ioc_status);
5824		mptsas_log(mpt, CE_WARN,
5825		    "mptsas_process_intr: invalid scsi status\n");
5826		break;
5827	}
5828}
5829
5830static void
5831mptsas_check_task_mgt(mptsas_t *mpt, pMpi2SCSIManagementReply_t reply,
5832    mptsas_cmd_t *cmd)
5833{
5834	uint8_t		task_type;
5835	uint16_t	ioc_status;
5836	uint32_t	log_info;
5837	uint16_t	dev_handle;
5838	struct scsi_pkt *pkt = CMD2PKT(cmd);
5839
5840	task_type = ddi_get8(mpt->m_acc_reply_frame_hdl, &reply->TaskType);
5841	ioc_status = ddi_get16(mpt->m_acc_reply_frame_hdl, &reply->IOCStatus);
5842	log_info = ddi_get32(mpt->m_acc_reply_frame_hdl, &reply->IOCLogInfo);
5843	dev_handle = ddi_get16(mpt->m_acc_reply_frame_hdl, &reply->DevHandle);
5844
5845	if (ioc_status != MPI2_IOCSTATUS_SUCCESS) {
5846		mptsas_log(mpt, CE_WARN, "mptsas_check_task_mgt: Task 0x%x "
5847		    "failed. IOCStatus=0x%x IOCLogInfo=0x%x target=%d\n",
5848		    task_type, ioc_status, log_info, dev_handle);
5849		pkt->pkt_reason = CMD_INCOMPLETE;
5850		return;
5851	}
5852
5853	switch (task_type) {
5854	case MPI2_SCSITASKMGMT_TASKTYPE_ABORT_TASK:
5855	case MPI2_SCSITASKMGMT_TASKTYPE_CLEAR_TASK_SET:
5856	case MPI2_SCSITASKMGMT_TASKTYPE_QUERY_TASK:
5857	case MPI2_SCSITASKMGMT_TASKTYPE_CLR_ACA:
5858	case MPI2_SCSITASKMGMT_TASKTYPE_QRY_TASK_SET:
5859	case MPI2_SCSITASKMGMT_TASKTYPE_QRY_UNIT_ATTENTION:
5860		break;
5861	case MPI2_SCSITASKMGMT_TASKTYPE_ABRT_TASK_SET:
5862	case MPI2_SCSITASKMGMT_TASKTYPE_LOGICAL_UNIT_RESET:
5863	case MPI2_SCSITASKMGMT_TASKTYPE_TARGET_RESET:
5864		/*
5865		 * Check for invalid DevHandle of 0 in case application
5866		 * sends bad command.  DevHandle of 0 could cause problems.
5867		 */
5868		if (dev_handle == 0) {
5869			mptsas_log(mpt, CE_WARN, "!Can't flush target with"
5870			    " DevHandle of 0.");
5871		} else {
5872			mptsas_flush_target(mpt, dev_handle, Lun(cmd),
5873			    task_type);
5874		}
5875		break;
5876	default:
5877		mptsas_log(mpt, CE_WARN, "Unknown task management type %d.",
5878		    task_type);
5879		mptsas_log(mpt, CE_WARN, "ioc status = %x", ioc_status);
5880		break;
5881	}
5882}
5883
5884static void
5885mptsas_doneq_thread(mptsas_doneq_thread_arg_t *arg)
5886{
5887	mptsas_t			*mpt = arg->mpt;
5888	uint64_t			t = arg->t;
5889	mptsas_cmd_t			*cmd;
5890	struct scsi_pkt			*pkt;
5891	mptsas_doneq_thread_list_t	*item = &mpt->m_doneq_thread_id[t];
5892
5893	mutex_enter(&item->mutex);
5894	while (item->flag & MPTSAS_DONEQ_THREAD_ACTIVE) {
5895		if (!item->doneq) {
5896			cv_wait(&item->cv, &item->mutex);
5897		}
5898		pkt = NULL;
5899		if ((cmd = mptsas_doneq_thread_rm(mpt, t)) != NULL) {
5900			cmd->cmd_flags |= CFLAG_COMPLETED;
5901			pkt = CMD2PKT(cmd);
5902		}
5903		mutex_exit(&item->mutex);
5904		if (pkt) {
5905			mptsas_pkt_comp(pkt, cmd);
5906		}
5907		mutex_enter(&item->mutex);
5908	}
5909	mutex_exit(&item->mutex);
5910	mutex_enter(&mpt->m_doneq_mutex);
5911	mpt->m_doneq_thread_n--;
5912	cv_broadcast(&mpt->m_doneq_thread_cv);
5913	mutex_exit(&mpt->m_doneq_mutex);
5914}
5915
5916
5917/*
5918 * mpt interrupt handler.
5919 */
5920static uint_t
5921mptsas_intr(caddr_t arg1, caddr_t arg2)
5922{
5923	mptsas_t			*mpt = (void *)arg1;
5924	pMpi2ReplyDescriptorsUnion_t	reply_desc_union;
5925	uchar_t				did_reply = FALSE;
5926
5927	NDBG1(("mptsas_intr: arg1 0x%p arg2 0x%p", (void *)arg1, (void *)arg2));
5928
5929	mutex_enter(&mpt->m_mutex);
5930
5931	/*
5932	 * If interrupts are shared by two channels then check whether this
5933	 * interrupt is genuinely for this channel by making sure first the
5934	 * chip is in high power state.
5935	 */
5936	if ((mpt->m_options & MPTSAS_OPT_PM) &&
5937	    (mpt->m_power_level != PM_LEVEL_D0)) {
5938		mutex_exit(&mpt->m_mutex);
5939		return (DDI_INTR_UNCLAIMED);
5940	}
5941
5942	/*
5943	 * If polling, interrupt was triggered by some shared interrupt because
5944	 * IOC interrupts are disabled during polling, so polling routine will
5945	 * handle any replies.  Considering this, if polling is happening,
5946	 * return with interrupt unclaimed.
5947	 */
5948	if (mpt->m_polled_intr) {
5949		mutex_exit(&mpt->m_mutex);
5950		mptsas_log(mpt, CE_WARN, "mpt_sas: Unclaimed interrupt");
5951		return (DDI_INTR_UNCLAIMED);
5952	}
5953
5954	/*
5955	 * Read the istat register.
5956	 */
5957	if ((INTPENDING(mpt)) != 0) {
5958		/*
5959		 * read fifo until empty.
5960		 */
5961#ifndef __lock_lint
5962		_NOTE(CONSTCOND)
5963#endif
5964		while (TRUE) {
5965			(void) ddi_dma_sync(mpt->m_dma_post_queue_hdl, 0, 0,
5966			    DDI_DMA_SYNC_FORCPU);
5967			reply_desc_union = (pMpi2ReplyDescriptorsUnion_t)
5968			    MPTSAS_GET_NEXT_REPLY(mpt, mpt->m_post_index);
5969
5970			if (ddi_get32(mpt->m_acc_post_queue_hdl,
5971			    &reply_desc_union->Words.Low) == 0xFFFFFFFF ||
5972			    ddi_get32(mpt->m_acc_post_queue_hdl,
5973			    &reply_desc_union->Words.High) == 0xFFFFFFFF) {
5974				break;
5975			}
5976
5977			/*
5978			 * The reply is valid, process it according to its
5979			 * type.  Also, set a flag for updating the reply index
5980			 * after they've all been processed.
5981			 */
5982			did_reply = TRUE;
5983
5984			mptsas_process_intr(mpt, reply_desc_union);
5985
5986			/*
5987			 * Increment post index and roll over if needed.
5988			 */
5989			if (++mpt->m_post_index == mpt->m_post_queue_depth) {
5990				mpt->m_post_index = 0;
5991			}
5992		}
5993
5994		/*
5995		 * Update the global reply index if at least one reply was
5996		 * processed.
5997		 */
5998		if (did_reply) {
5999			ddi_put32(mpt->m_datap,
6000			    &mpt->m_reg->ReplyPostHostIndex, mpt->m_post_index);
6001		}
6002	} else {
6003		mutex_exit(&mpt->m_mutex);
6004		return (DDI_INTR_UNCLAIMED);
6005	}
6006	NDBG1(("mptsas_intr complete"));
6007
6008	/*
6009	 * If no helper threads are created, process the doneq in ISR. If
6010	 * helpers are created, use the doneq length as a metric to measure the
6011	 * load on the interrupt CPU. If it is long enough, which indicates the
6012	 * load is heavy, then we deliver the IO completions to the helpers.
6013	 * This measurement has some limitations, although it is simple and
6014	 * straightforward and works well for most of the cases at present.
6015	 */
6016	if (!mpt->m_doneq_thread_n ||
6017	    (mpt->m_doneq_len <= mpt->m_doneq_length_threshold)) {
6018		mptsas_doneq_empty(mpt);
6019	} else {
6020		mptsas_deliver_doneq_thread(mpt);
6021	}
6022
6023	/*
6024	 * If there are queued cmd, start them now.
6025	 */
6026	if (mpt->m_waitq != NULL) {
6027		mptsas_restart_waitq(mpt);
6028	}
6029
6030	mutex_exit(&mpt->m_mutex);
6031	return (DDI_INTR_CLAIMED);
6032}
6033
6034static void
6035mptsas_process_intr(mptsas_t *mpt,
6036    pMpi2ReplyDescriptorsUnion_t reply_desc_union)
6037{
6038	uint8_t	reply_type;
6039
6040	ASSERT(mutex_owned(&mpt->m_mutex));
6041
6042	/*
6043	 * The reply is valid, process it according to its
6044	 * type.  Also, set a flag for updated the reply index
6045	 * after they've all been processed.
6046	 */
6047	reply_type = ddi_get8(mpt->m_acc_post_queue_hdl,
6048	    &reply_desc_union->Default.ReplyFlags);
6049	reply_type &= MPI2_RPY_DESCRIPT_FLAGS_TYPE_MASK;
6050	if (reply_type == MPI2_RPY_DESCRIPT_FLAGS_SCSI_IO_SUCCESS ||
6051	    reply_type == MPI25_RPY_DESCRIPT_FLAGS_FAST_PATH_SCSI_IO_SUCCESS) {
6052		mptsas_handle_scsi_io_success(mpt, reply_desc_union);
6053	} else if (reply_type == MPI2_RPY_DESCRIPT_FLAGS_ADDRESS_REPLY) {
6054		mptsas_handle_address_reply(mpt, reply_desc_union);
6055	} else {
6056		mptsas_log(mpt, CE_WARN, "?Bad reply type %x", reply_type);
6057		ddi_fm_service_impact(mpt->m_dip, DDI_SERVICE_UNAFFECTED);
6058	}
6059
6060	/*
6061	 * Clear the reply descriptor for re-use and increment
6062	 * index.
6063	 */
6064	ddi_put64(mpt->m_acc_post_queue_hdl,
6065	    &((uint64_t *)(void *)mpt->m_post_queue)[mpt->m_post_index],
6066	    0xFFFFFFFFFFFFFFFF);
6067	(void) ddi_dma_sync(mpt->m_dma_post_queue_hdl, 0, 0,
6068	    DDI_DMA_SYNC_FORDEV);
6069}
6070
6071/*
6072 * handle qfull condition
6073 */
6074static void
6075mptsas_handle_qfull(mptsas_t *mpt, mptsas_cmd_t *cmd)
6076{
6077	mptsas_target_t	*ptgt = cmd->cmd_tgt_addr;
6078
6079	if ((++cmd->cmd_qfull_retries > ptgt->m_qfull_retries) ||
6080	    (ptgt->m_qfull_retries == 0)) {
6081		/*
6082		 * We have exhausted the retries on QFULL, or,
6083		 * the target driver has indicated that it
6084		 * wants to handle QFULL itself by setting
6085		 * qfull-retries capability to 0. In either case
6086		 * we want the target driver's QFULL handling
6087		 * to kick in. We do this by having pkt_reason
6088		 * as CMD_CMPLT and pkt_scbp as STATUS_QFULL.
6089		 */
6090		mptsas_set_throttle(mpt, ptgt, DRAIN_THROTTLE);
6091	} else {
6092		if (ptgt->m_reset_delay == 0) {
6093			ptgt->m_t_throttle =
6094			    max((ptgt->m_t_ncmds - 2), 0);
6095		}
6096
6097		cmd->cmd_pkt_flags |= FLAG_HEAD;
6098		cmd->cmd_flags &= ~(CFLAG_TRANFLAG);
6099		cmd->cmd_flags |= CFLAG_RETRY;
6100
6101		(void) mptsas_accept_pkt(mpt, cmd);
6102
6103		/*
6104		 * when target gives queue full status with no commands
6105		 * outstanding (m_t_ncmds == 0), throttle is set to 0
6106		 * (HOLD_THROTTLE), and the queue full handling start
6107		 * (see psarc/1994/313); if there are commands outstanding,
6108		 * throttle is set to (m_t_ncmds - 2)
6109		 */
6110		if (ptgt->m_t_throttle == HOLD_THROTTLE) {
6111			/*
6112			 * By setting throttle to QFULL_THROTTLE, we
6113			 * avoid submitting new commands and in
6114			 * mptsas_restart_cmd find out slots which need
6115			 * their throttles to be cleared.
6116			 */
6117			mptsas_set_throttle(mpt, ptgt, QFULL_THROTTLE);
6118			if (mpt->m_restart_cmd_timeid == 0) {
6119				mpt->m_restart_cmd_timeid =
6120				    timeout(mptsas_restart_cmd, mpt,
6121				    ptgt->m_qfull_retry_interval);
6122			}
6123		}
6124	}
6125}
6126
6127mptsas_phymask_t
6128mptsas_physport_to_phymask(mptsas_t *mpt, uint8_t physport)
6129{
6130	mptsas_phymask_t	phy_mask = 0;
6131	uint8_t			i = 0;
6132
6133	NDBG20(("mptsas%d physport_to_phymask enter", mpt->m_instance));
6134
6135	ASSERT(mutex_owned(&mpt->m_mutex));
6136
6137	/*
6138	 * If physport is 0xFF, this is a RAID volume.  Use phymask of 0.
6139	 */
6140	if (physport == 0xFF) {
6141		return (0);
6142	}
6143
6144	for (i = 0; i < MPTSAS_MAX_PHYS; i++) {
6145		if (mpt->m_phy_info[i].attached_devhdl &&
6146		    (mpt->m_phy_info[i].phy_mask != 0) &&
6147		    (mpt->m_phy_info[i].port_num == physport)) {
6148			phy_mask = mpt->m_phy_info[i].phy_mask;
6149			break;
6150		}
6151	}
6152	NDBG20(("mptsas%d physport_to_phymask:physport :%x phymask :%x, ",
6153	    mpt->m_instance, physport, phy_mask));
6154	return (phy_mask);
6155}
6156
6157/*
6158 * mpt free device handle after device gone, by use of passthrough
6159 */
6160static int
6161mptsas_free_devhdl(mptsas_t *mpt, uint16_t devhdl)
6162{
6163	Mpi2SasIoUnitControlRequest_t	req;
6164	Mpi2SasIoUnitControlReply_t	rep;
6165	int				ret;
6166
6167	ASSERT(mutex_owned(&mpt->m_mutex));
6168
6169	/*
6170	 * Need to compose a SAS IO Unit Control request message
6171	 * and call mptsas_do_passthru() function
6172	 */
6173	bzero(&req, sizeof (req));
6174	bzero(&rep, sizeof (rep));
6175
6176	req.Function = MPI2_FUNCTION_SAS_IO_UNIT_CONTROL;
6177	req.Operation = MPI2_SAS_OP_REMOVE_DEVICE;
6178	req.DevHandle = LE_16(devhdl);
6179
6180	ret = mptsas_do_passthru(mpt, (uint8_t *)&req, (uint8_t *)&rep, NULL,
6181	    sizeof (req), sizeof (rep), 0, MPTSAS_PASS_THRU_DIRECTION_NONE,
6182	    NULL, 0, 60, FKIOCTL);
6183	if (ret != 0) {
6184		cmn_err(CE_WARN, "mptsas_free_devhdl: passthru SAS IO Unit "
6185		    "Control error %d", ret);
6186		return (DDI_FAILURE);
6187	}
6188
6189	/* do passthrough success, check the ioc status */
6190	if (LE_16(rep.IOCStatus) != MPI2_IOCSTATUS_SUCCESS) {
6191		cmn_err(CE_WARN, "mptsas_free_devhdl: passthru SAS IO Unit "
6192		    "Control IOCStatus %d", LE_16(rep.IOCStatus));
6193		return (DDI_FAILURE);
6194	}
6195
6196	return (DDI_SUCCESS);
6197}
6198
6199/*
6200 * We have a SATA target that has changed, which means the "bridge-port"
6201 * property must be updated to reflect the SAS WWN of the new attachment point.
6202 * This may change if a SATA device changes which bay, and therefore phy, it is
6203 * plugged into. This SATA device may be a multipath virtual device or may be a
6204 * physical device. We have to handle both cases.
6205 */
6206static boolean_t
6207mptsas_update_sata_bridge(mptsas_t *mpt, dev_info_t *parent,
6208    mptsas_target_t *ptgt)
6209{
6210	int			rval;
6211	uint16_t		dev_hdl;
6212	uint16_t		pdev_hdl;
6213	uint64_t		dev_sas_wwn;
6214	uint8_t			physport;
6215	uint8_t			phy_id;
6216	uint32_t		page_address;
6217	uint16_t		bay_num, enclosure, io_flags;
6218	uint32_t		dev_info;
6219	char			uabuf[SCSI_WWN_BUFLEN];
6220	dev_info_t		*dip;
6221	mdi_pathinfo_t		*pip;
6222
6223	mutex_enter(&mpt->m_mutex);
6224	page_address = (MPI2_SAS_DEVICE_PGAD_FORM_HANDLE &
6225	    MPI2_SAS_DEVICE_PGAD_FORM_MASK) | (uint32_t)ptgt->m_devhdl;
6226	rval = mptsas_get_sas_device_page0(mpt, page_address, &dev_hdl,
6227	    &dev_sas_wwn, &dev_info, &physport, &phy_id, &pdev_hdl, &bay_num,
6228	    &enclosure, &io_flags);
6229	mutex_exit(&mpt->m_mutex);
6230	if (rval != DDI_SUCCESS) {
6231		mptsas_log(mpt, CE_WARN, "unable to get SAS page 0 for "
6232		    "handle %d", page_address);
6233		return (B_FALSE);
6234	}
6235
6236	if (scsi_wwn_to_wwnstr(dev_sas_wwn, 1, uabuf) == NULL) {
6237		mptsas_log(mpt, CE_WARN,
6238		    "mptsas unable to format SATA bridge WWN");
6239		return (B_FALSE);
6240	}
6241
6242	if (mpt->m_mpxio_enable == TRUE && (pip = mptsas_find_path_addr(parent,
6243	    ptgt->