1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 /*
27  * I/O Controller functions for the Solaris COMSTAR SCSI RDMA Protocol
28  * Target (SRPT) port provider.
29  */
30 
31 #include <sys/types.h>
32 #include <sys/ddi.h>
33 #include <sys/types.h>
34 #include <sys/sunddi.h>
35 #include <sys/atomic.h>
36 #include <sys/sysmacros.h>
37 #include <sys/ib/ibtl/ibti.h>
38 #include <sys/sdt.h>
39 
40 #include "srp.h"
41 #include "srpt_impl.h"
42 #include "srpt_ioc.h"
43 #include "srpt_stp.h"
44 #include "srpt_ch.h"
45 #include "srpt_common.h"
46 
47 /*
48  * srpt_ioc_srq_size - Tunable parameter that specifies the number
49  * of receive WQ entries that can be posted to the IOC shared
50  * receive queue.
51  */
52 uint32_t		srpt_ioc_srq_size = SRPT_DEFAULT_IOC_SRQ_SIZE;
53 extern uint16_t		srpt_send_msg_depth;
54 extern uint32_t		srpt_iu_size;
55 extern boolean_t	srpt_enable_by_default;
56 
57 /* IOC profile capabilities mask must be big-endian */
58 typedef struct srpt_ioc_opcap_bits_s {
59 #if	defined(_BIT_FIELDS_LTOH)
60 	uint8_t		af:1,
61 			at:1,
62 			wf:1,
63 			wt:1,
64 			rf:1,
65 			rt:1,
66 			sf:1,
67 			st:1;
68 #elif	defined(_BIT_FIELDS_HTOL)
69 	uint8_t		st:1,
70 			sf:1,
71 			rt:1,
72 			rf:1,
73 			wt:1,
74 			wf:1,
75 			at:1,
76 			af:1;
77 #else
78 #error	One of _BIT_FIELDS_LTOH or _BIT_FIELDS_HTOL must be defined
79 #endif
80 } srpt_ioc_opcap_bits_t;
81 
82 typedef union {
83 	srpt_ioc_opcap_bits_t	bits;
84 	uint8_t			mask;
85 } srpt_ioc_opcap_mask_t;
86 
87 /*
88  * vmem arena variables - values derived from iSER
89  */
90 #define	SRPT_MR_QUANTSIZE	0x400			/* 1K */
91 #define	SRPT_MIN_CHUNKSIZE	0x100000		/* 1MB */
92 
93 /* use less memory on 32-bit kernels as it's much more constrained */
94 #ifdef _LP64
95 #define	SRPT_BUF_MR_CHUNKSIZE	0x1000000		/* 16MB */
96 #define	SRPT_BUF_POOL_MAX	0x40000000		/* 1GB */
97 #else
98 #define	SRPT_BUF_MR_CHUNKSIZE	0x400000		/* 4MB */
99 #define	SRPT_BUF_POOL_MAX	0x4000000		/* 64MB */
100 #endif
101 
102 static ibt_mr_flags_t	srpt_dbuf_mr_flags =
103     IBT_MR_ENABLE_LOCAL_WRITE | IBT_MR_ENABLE_REMOTE_WRITE |
104     IBT_MR_ENABLE_REMOTE_READ;
105 
106 void srpt_ioc_ib_async_hdlr(void *clnt, ibt_hca_hdl_t hdl,
107 	ibt_async_code_t code, ibt_async_event_t *event);
108 
109 static struct ibt_clnt_modinfo_s srpt_ibt_modinfo = {
110 	IBTI_V_CURR,
111 	IBT_STORAGE_DEV,
112 	srpt_ioc_ib_async_hdlr,
113 	NULL,
114 	"srpt"
115 };
116 
117 static srpt_ioc_t *srpt_ioc_init(ib_guid_t guid);
118 static void srpt_ioc_fini(srpt_ioc_t *ioc);
119 static boolean_t srpt_check_hca_cfg_enabled(ib_guid_t hca_guid);
120 
121 static srpt_vmem_pool_t *srpt_vmem_create(const char *name, srpt_ioc_t *ioc,
122     ib_memlen_t chunksize, uint64_t maxsize, ibt_mr_flags_t flags);
123 static void *srpt_vmem_alloc(srpt_vmem_pool_t *vm_pool, size_t size);
124 static int srpt_vmem_mr_compare(const void *a, const void *b);
125 static srpt_mr_t *srpt_vmem_chunk_alloc(srpt_vmem_pool_t *ioc,
126     ib_memlen_t chunksize);
127 static void srpt_vmem_destroy(srpt_vmem_pool_t *vm_pool);
128 static void srpt_vmem_free(srpt_vmem_pool_t *vm_pool, void *vaddr, size_t size);
129 static srpt_mr_t *srpt_reg_mem(srpt_vmem_pool_t *vm_pool, ib_vaddr_t vaddr,
130     ib_memlen_t len);
131 static void srpt_vmem_chunk_free(srpt_vmem_pool_t *vm_pool, srpt_mr_t *mr);
132 static void srpt_dereg_mem(srpt_ioc_t *ioc, srpt_mr_t *mr);
133 static int srpt_vmem_mr(srpt_vmem_pool_t *vm_pool, void *vaddr, size_t size,
134     srpt_mr_t *mr);
135 
136 /*
137  * srpt_ioc_attach() - I/O Controller attach
138  *
139  * Attach to IBTF and initialize I/O controllers. The srpt_ctxt->sc_rwlock
140  * should be held outside of this call.
141  */
142 int
srpt_ioc_attach()143 srpt_ioc_attach()
144 {
145 	int		status;
146 	int		hca_cnt;
147 	int		hca_ndx;
148 	ib_guid_t	*guid;
149 
150 	ASSERT(srpt_ctxt != NULL);
151 
152 	/*
153 	 * Attach to IBTF and initialize a list of IB devices.  Each
154 	 * HCA will be represented by an I/O Controller.
155 	 */
156 	status = ibt_attach(&srpt_ibt_modinfo, srpt_ctxt->sc_dip,
157 	    srpt_ctxt,  &srpt_ctxt->sc_ibt_hdl);
158 	if (status != DDI_SUCCESS) {
159 		SRPT_DPRINTF_L1("ioc_attach, ibt_attach failed (0x%x)",
160 		    status);
161 		return (DDI_FAILURE);
162 	}
163 
164 	hca_cnt = ibt_get_hca_list(&guid);
165 	if (hca_cnt < 1) {
166 		/*
167 		 * not a fatal error.  Service will be up and
168 		 * waiting for ATTACH events.
169 		 */
170 		SRPT_DPRINTF_L2("ioc_attach, no HCA found");
171 		return (DDI_SUCCESS);
172 	}
173 
174 	for (hca_ndx = 0; hca_ndx < hca_cnt; hca_ndx++) {
175 		SRPT_DPRINTF_L2("ioc_attach, attaching HCA %016llx",
176 		    (u_longlong_t)guid[hca_ndx]);
177 		srpt_ioc_attach_hca(guid[hca_ndx], B_FALSE);
178 	}
179 
180 	ibt_free_hca_list(guid, hca_cnt);
181 	SRPT_DPRINTF_L3("ioc_attach, added %d I/O Controller(s)",
182 	    srpt_ctxt->sc_num_iocs);
183 	return (DDI_SUCCESS);
184 }
185 
186 /*
187  * Initialize I/O Controllers.  sprt_ctxt->sc_rwlock must be locked by the
188  * caller.
189  *
190  * 'checked' indicates no need to lookup the hca in the HCA configuration
191  * list.
192  */
193 void
srpt_ioc_attach_hca(ib_guid_t hca_guid,boolean_t checked)194 srpt_ioc_attach_hca(ib_guid_t hca_guid, boolean_t checked)
195 {
196 	boolean_t	enable_hca = B_TRUE;
197 	srpt_ioc_t	*ioc;
198 
199 	if (!checked) {
200 		enable_hca = srpt_check_hca_cfg_enabled(hca_guid);
201 
202 		if (!enable_hca) {
203 			/* nothing to do */
204 			SRPT_DPRINTF_L2(
205 			    "ioc_attach_hca, HCA %016llx disabled "
206 			    "by srpt config",
207 			    (u_longlong_t)hca_guid);
208 			return;
209 		}
210 	}
211 
212 	SRPT_DPRINTF_L2("ioc_attach_hca, adding I/O"
213 	    " Controller (%016llx)", (u_longlong_t)hca_guid);
214 
215 	ioc = srpt_ioc_init(hca_guid);
216 	if (ioc == NULL) {
217 		/*
218 		 * IOC already exists or an error occurred.  Already
219 		 * logged by srpt_ioc_init()
220 		 */
221 		return;
222 	}
223 
224 	/*
225 	 * Create the COMSTAR SRP Target for this IOC.  If this fails,
226 	 * remove the IOC.
227 	 */
228 	rw_enter(&ioc->ioc_rwlock, RW_WRITER);
229 	ioc->ioc_tgt_port = srpt_stp_alloc_port(ioc, ioc->ioc_guid);
230 	if (ioc->ioc_tgt_port == NULL) {
231 		SRPT_DPRINTF_L1("ioc_attach_hca: alloc SCSI"
232 		    " Target Port error on GUID(%016llx)",
233 		    (u_longlong_t)ioc->ioc_guid);
234 		rw_exit(&ioc->ioc_rwlock);
235 		srpt_ioc_fini(ioc);
236 		return;
237 	}
238 	rw_exit(&ioc->ioc_rwlock);
239 
240 	/*
241 	 * New HCA added with default SCSI Target Port, SRP service
242 	 * will be started when SCSI Target Port is brought
243 	 * on-line by STMF.
244 	 */
245 	list_insert_tail(&srpt_ctxt->sc_ioc_list, ioc);
246 	SRPT_DPRINTF_L2("ioc_attach_hca, I/O Controller ibt HCA hdl (%p)",
247 	    (void *)ioc->ioc_ibt_hdl);
248 
249 	srpt_ctxt->sc_num_iocs++;
250 }
251 
252 /*
253  * srpt_check_hca_cfg_enabled()
254  *
255  * Function to check the configuration for the enabled status of a given
256  * HCA.  Returns B_TRUE if SRPT services should be activated for this HCA,
257  * B_FALSE if it should be disabled.
258  */
259 static boolean_t
srpt_check_hca_cfg_enabled(ib_guid_t hca_guid)260 srpt_check_hca_cfg_enabled(ib_guid_t hca_guid)
261 {
262 	int		status;
263 	char		buf[32];
264 	nvlist_t	*hcanv;
265 	boolean_t	enable_hca;
266 
267 	enable_hca = srpt_enable_by_default;
268 
269 	SRPT_FORMAT_HCAKEY(buf, sizeof (buf), (u_longlong_t)hca_guid);
270 
271 	if (srpt_ctxt->sc_cfg_hca_nv != NULL) {
272 		status = nvlist_lookup_nvlist(srpt_ctxt->sc_cfg_hca_nv,
273 		    buf, &hcanv);
274 		if (status == 0) {
275 			SRPT_DPRINTF_L3("check_hca_cfg, found guid %s",  buf);
276 			(void) nvlist_lookup_boolean_value(hcanv,
277 			    SRPT_PROP_ENABLED, &enable_hca);
278 		} else {
279 			SRPT_DPRINTF_L3("check_hca_cfg, did not find guid %s",
280 			    buf);
281 		}
282 	}
283 
284 	return (enable_hca);
285 }
286 
287 /*
288  * srpt_ioc_update()
289  *
290  * Using the configuration nvlist, enables or disables SRP services
291  * the provided HCAs.  srpt_ctxt->sc_rwlock should be held outside of this call.
292  */
293 void
srpt_ioc_update(void)294 srpt_ioc_update(void)
295 {
296 	boolean_t	enabled;
297 	nvpair_t	*nvp = NULL;
298 	uint64_t	hca_guid;
299 	nvlist_t	*nvl;
300 	nvlist_t	*cfg = srpt_ctxt->sc_cfg_hca_nv;
301 
302 	if (cfg == NULL) {
303 		SRPT_DPRINTF_L2("ioc_update, no configuration data");
304 		return;
305 	}
306 
307 	while ((nvp = nvlist_next_nvpair(cfg, nvp)) != NULL) {
308 		enabled = srpt_enable_by_default;
309 
310 		if ((nvpair_value_nvlist(nvp, &nvl)) != 0) {
311 			SRPT_DPRINTF_L2("ioc_update, did not find an nvlist");
312 			continue;
313 		}
314 
315 		if ((nvlist_lookup_uint64(nvl, SRPT_PROP_GUID, &hca_guid))
316 		    != 0) {
317 			SRPT_DPRINTF_L2("ioc_update, did not find a guid");
318 			continue;
319 		}
320 
321 		(void) nvlist_lookup_boolean_value(nvl, SRPT_PROP_ENABLED,
322 		    &enabled);
323 
324 		if (enabled) {
325 			SRPT_DPRINTF_L2("ioc_update, enabling guid %016llx",
326 			    (u_longlong_t)hca_guid);
327 			srpt_ioc_attach_hca(hca_guid, B_TRUE);
328 		} else {
329 			SRPT_DPRINTF_L2("ioc_update, disabling guid %016llx",
330 			    (u_longlong_t)hca_guid);
331 			srpt_ioc_detach_hca(hca_guid);
332 		}
333 	}
334 }
335 
336 /*
337  * srpt_ioc_detach() - I/O Controller detach
338  *
339  * srpt_ctxt->sc_rwlock should be held outside of this call.
340  */
341 void
srpt_ioc_detach()342 srpt_ioc_detach()
343 {
344 	srpt_ioc_t	*ioc;
345 
346 	/*
347 	 * All SRP targets must be destroyed before calling this
348 	 * function.
349 	 */
350 	while ((ioc = list_head(&srpt_ctxt->sc_ioc_list)) != NULL) {
351 		SRPT_DPRINTF_L2("ioc_detach, removing I/O Controller(%p)"
352 		    " (%016llx), ibt_hdl(%p)",
353 		    (void *)ioc,
354 		    ioc ? (u_longlong_t)ioc->ioc_guid : 0x0ll,
355 		    (void *)ioc->ioc_ibt_hdl);
356 
357 		list_remove(&srpt_ctxt->sc_ioc_list, ioc);
358 		srpt_ioc_fini(ioc);
359 		srpt_ctxt->sc_num_iocs--;
360 	}
361 
362 	srpt_ctxt->sc_ibt_hdl = NULL;
363 }
364 
365 /*
366  * srpt_ioc_detach_hca()
367  *
368  * Stop SRP Target services on this HCA
369  *
370  * Note that this is not entirely synchronous with srpt_ioc_attach_hca()
371  * in that we don't need to check the configuration to know whether to
372  * disable an HCA.  We get here either because the IB framework has told
373  * us the HCA has been detached, or because the administrator has explicitly
374  * disabled this HCA.
375  *
376  * Must be called with srpt_ctxt->sc_rwlock locked as RW_WRITER.
377  */
378 void
srpt_ioc_detach_hca(ib_guid_t hca_guid)379 srpt_ioc_detach_hca(ib_guid_t hca_guid)
380 {
381 	srpt_ioc_t		*ioc;
382 	srpt_target_port_t	*tgt;
383 	stmf_status_t		stmf_status = STMF_SUCCESS;
384 
385 	ioc = srpt_ioc_get_locked(hca_guid);
386 	if (ioc == NULL) {
387 		/* doesn't exist, nothing to do */
388 		return;
389 	}
390 
391 	rw_enter(&ioc->ioc_rwlock, RW_WRITER);
392 	tgt = ioc->ioc_tgt_port;
393 
394 	if (tgt != NULL) {
395 		stmf_status = srpt_stp_destroy_port(tgt);
396 		if (stmf_status == STMF_SUCCESS) {
397 			ioc->ioc_tgt_port = NULL;
398 			(void) srpt_stp_free_port(tgt);
399 		}
400 	}
401 
402 	rw_exit(&ioc->ioc_rwlock);
403 
404 	if (stmf_status != STMF_SUCCESS) {
405 		/* should never happen */
406 		return;
407 	}
408 
409 	list_remove(&srpt_ctxt->sc_ioc_list, ioc);
410 	srpt_ctxt->sc_num_iocs--;
411 
412 	srpt_ioc_fini(ioc);
413 	SRPT_DPRINTF_L2("ioc_detach_hca, HCA %016llx detached",
414 	    (u_longlong_t)hca_guid);
415 }
416 
417 /*
418  * srpt_ioc_init() - I/O Controller initialization
419  *
420  * Requires srpt_ctxt->rw_lock be held outside of call.
421  */
422 static srpt_ioc_t *
srpt_ioc_init(ib_guid_t guid)423 srpt_ioc_init(ib_guid_t guid)
424 {
425 	ibt_status_t		status;
426 	srpt_ioc_t		*ioc;
427 	ibt_hca_attr_t		hca_attr;
428 	uint_t			iu_ndx;
429 	uint_t			err_ndx;
430 	ibt_mr_attr_t		mr_attr;
431 	ibt_mr_desc_t		mr_desc;
432 	srpt_iu_t		*iu;
433 	ibt_srq_sizes_t		srq_attr;
434 	char			namebuf[32];
435 	size_t			iu_offset;
436 	uint_t			srq_sz;
437 
438 	status = ibt_query_hca_byguid(guid, &hca_attr);
439 	if (status != IBT_SUCCESS) {
440 		SRPT_DPRINTF_L1("ioc_init, HCA query error (%d)",
441 		    status);
442 		return (NULL);
443 	}
444 
445 	ioc = srpt_ioc_get_locked(guid);
446 	if (ioc != NULL) {
447 		SRPT_DPRINTF_L2("ioc_init, HCA already exists");
448 		return (NULL);
449 	}
450 
451 	ioc = kmem_zalloc(sizeof (srpt_ioc_t), KM_SLEEP);
452 
453 	rw_init(&ioc->ioc_rwlock, NULL, RW_DRIVER, NULL);
454 	rw_enter(&ioc->ioc_rwlock, RW_WRITER);
455 
456 	bcopy(&hca_attr, &ioc->ioc_attr, sizeof (ibt_hca_attr_t));
457 
458 	SRPT_DPRINTF_L2("ioc_init, HCA max mr=%d, mrlen=%lld",
459 	    hca_attr.hca_max_memr, (u_longlong_t)hca_attr.hca_max_memr_len);
460 	ioc->ioc_guid   = guid;
461 
462 	status = ibt_open_hca(srpt_ctxt->sc_ibt_hdl, guid, &ioc->ioc_ibt_hdl);
463 	if (status != IBT_SUCCESS) {
464 		SRPT_DPRINTF_L1("ioc_init, IBT open failed (%d)", status);
465 		goto hca_open_err;
466 	}
467 
468 	status = ibt_alloc_pd(ioc->ioc_ibt_hdl, IBT_PD_NO_FLAGS,
469 	    &ioc->ioc_pd_hdl);
470 	if (status != IBT_SUCCESS) {
471 		SRPT_DPRINTF_L1("ioc_init, IBT create PD failed (%d)", status);
472 		goto pd_alloc_err;
473 	}
474 
475 	/*
476 	 * We require hardware support for SRQs.  We use a common SRQ to
477 	 * reduce channel memory consumption.
478 	 */
479 	if ((ioc->ioc_attr.hca_flags & IBT_HCA_SRQ) == 0) {
480 		SRPT_DPRINTF_L0(
481 		    "ioc_init, no SRQ capability, HCA not supported");
482 		goto srq_alloc_err;
483 	}
484 
485 	SRPT_DPRINTF_L3("ioc_init, Using shared receive queues, max srq work"
486 	    " queue size(%d), def size = %d", ioc->ioc_attr.hca_max_srqs_sz,
487 	    srpt_ioc_srq_size);
488 	srq_sz = srq_attr.srq_wr_sz = min(srpt_ioc_srq_size,
489 	    ioc->ioc_attr.hca_max_srqs_sz) - 1;
490 	srq_attr.srq_sgl_sz = 1;
491 
492 	status = ibt_alloc_srq(ioc->ioc_ibt_hdl, IBT_SRQ_NO_FLAGS,
493 	    ioc->ioc_pd_hdl, &srq_attr, &ioc->ioc_srq_hdl,
494 	    &ioc->ioc_srq_attr);
495 	if (status != IBT_SUCCESS) {
496 		SRPT_DPRINTF_L1("ioc_init, IBT create SRQ failed(%d)", status);
497 		goto srq_alloc_err;
498 	}
499 
500 	SRPT_DPRINTF_L2("ioc_init, Using SRQ size(%d), MAX SG size(%d)",
501 	    srq_sz, 1);
502 
503 	ibt_set_srq_private(ioc->ioc_srq_hdl, ioc);
504 
505 	/*
506 	 * Allocate a pool of SRP IU message buffers and post them to
507 	 * the I/O Controller SRQ.  We let the SRQ manage the free IU
508 	 * messages.
509 	 */
510 	ioc->ioc_num_iu_entries = srq_sz;
511 
512 	ioc->ioc_iu_pool = kmem_zalloc(sizeof (srpt_iu_t) *
513 	    ioc->ioc_num_iu_entries, KM_SLEEP);
514 
515 	ioc->ioc_iu_bufs = kmem_alloc(srpt_iu_size *
516 	    ioc->ioc_num_iu_entries, KM_SLEEP);
517 
518 	if ((ioc->ioc_iu_pool == NULL) || (ioc->ioc_iu_bufs == NULL)) {
519 		SRPT_DPRINTF_L1("ioc_init, failed to allocate SRQ IUs");
520 		goto srq_iu_alloc_err;
521 	}
522 
523 	mr_attr.mr_vaddr = (ib_vaddr_t)(uintptr_t)ioc->ioc_iu_bufs;
524 	mr_attr.mr_len   = srpt_iu_size * ioc->ioc_num_iu_entries;
525 	mr_attr.mr_as    = NULL;
526 	mr_attr.mr_flags = IBT_MR_ENABLE_LOCAL_WRITE;
527 
528 	status = ibt_register_mr(ioc->ioc_ibt_hdl, ioc->ioc_pd_hdl,
529 	    &mr_attr, &ioc->ioc_iu_mr_hdl, &mr_desc);
530 	if (status != IBT_SUCCESS) {
531 		SRPT_DPRINTF_L1("ioc_init, IU buffer pool MR err(%d)",
532 		    status);
533 		goto srq_iu_alloc_err;
534 	}
535 
536 	for (iu_ndx = 0, iu = ioc->ioc_iu_pool; iu_ndx <
537 	    ioc->ioc_num_iu_entries; iu_ndx++, iu++) {
538 
539 		iu_offset = (iu_ndx * srpt_iu_size);
540 		iu->iu_buf = (void *)((uintptr_t)ioc->ioc_iu_bufs + iu_offset);
541 
542 		mutex_init(&iu->iu_lock, NULL, MUTEX_DRIVER, NULL);
543 
544 		iu->iu_sge.ds_va  = mr_desc.md_vaddr + iu_offset;
545 		iu->iu_sge.ds_key = mr_desc.md_lkey;
546 		iu->iu_sge.ds_len = srpt_iu_size;
547 		iu->iu_ioc	  = ioc;
548 		iu->iu_pool_ndx   = iu_ndx;
549 
550 		status = srpt_ioc_post_recv_iu(ioc, &ioc->ioc_iu_pool[iu_ndx]);
551 		if (status != IBT_SUCCESS) {
552 			SRPT_DPRINTF_L1("ioc_init, SRQ IU post err(%d)",
553 			    status);
554 			goto srq_iu_post_err;
555 		}
556 	}
557 
558 	/*
559 	 * Initialize the dbuf vmem arena
560 	 */
561 	(void) snprintf(namebuf, sizeof (namebuf),
562 	    "srpt_buf_pool_%16llX", (u_longlong_t)guid);
563 	ioc->ioc_dbuf_pool = srpt_vmem_create(namebuf, ioc,
564 	    SRPT_BUF_MR_CHUNKSIZE, SRPT_BUF_POOL_MAX, srpt_dbuf_mr_flags);
565 
566 	if (ioc->ioc_dbuf_pool == NULL) {
567 		goto stmf_db_alloc_err;
568 	}
569 
570 	/*
571 	 * Allocate the I/O Controller STMF data buffer allocator.  The
572 	 * data store will span all targets associated with this IOC.
573 	 */
574 	ioc->ioc_stmf_ds = stmf_alloc(STMF_STRUCT_DBUF_STORE, 0, 0);
575 	if (ioc->ioc_stmf_ds == NULL) {
576 		SRPT_DPRINTF_L1("ioc_attach, STMF DBUF alloc failure for IOC");
577 		goto stmf_db_alloc_err;
578 	}
579 	ioc->ioc_stmf_ds->ds_alloc_data_buf = &srpt_ioc_ds_alloc_dbuf;
580 	ioc->ioc_stmf_ds->ds_free_data_buf  = &srpt_ioc_ds_free_dbuf;
581 	ioc->ioc_stmf_ds->ds_port_private   = ioc;
582 
583 	rw_exit(&ioc->ioc_rwlock);
584 	return (ioc);
585 
586 stmf_db_alloc_err:
587 	if (ioc->ioc_dbuf_pool != NULL) {
588 		srpt_vmem_destroy(ioc->ioc_dbuf_pool);
589 	}
590 
591 srq_iu_post_err:
592 	if (ioc->ioc_iu_mr_hdl != NULL) {
593 		status = ibt_deregister_mr(ioc->ioc_ibt_hdl,
594 		    ioc->ioc_iu_mr_hdl);
595 		if (status != IBT_SUCCESS) {
596 			SRPT_DPRINTF_L1("ioc_init, error deregistering"
597 			    " memory region (%d)", status);
598 		}
599 	}
600 	for (err_ndx = 0, iu = ioc->ioc_iu_pool; err_ndx < iu_ndx;
601 	    err_ndx++, iu++) {
602 		mutex_destroy(&iu->iu_lock);
603 	}
604 
605 srq_iu_alloc_err:
606 	if (ioc->ioc_iu_bufs != NULL) {
607 		kmem_free(ioc->ioc_iu_bufs, srpt_iu_size *
608 		    ioc->ioc_num_iu_entries);
609 	}
610 	if (ioc->ioc_iu_pool != NULL) {
611 		kmem_free(ioc->ioc_iu_pool,
612 		    sizeof (srpt_iu_t) * ioc->ioc_num_iu_entries);
613 	}
614 	if (ioc->ioc_srq_hdl != NULL) {
615 		status = ibt_free_srq(ioc->ioc_srq_hdl);
616 		if (status != IBT_SUCCESS) {
617 			SRPT_DPRINTF_L1("ioc_init, error freeing SRQ (%d)",
618 			    status);
619 		}
620 
621 	}
622 
623 srq_alloc_err:
624 	status = ibt_free_pd(ioc->ioc_ibt_hdl, ioc->ioc_pd_hdl);
625 	if (status != IBT_SUCCESS) {
626 		SRPT_DPRINTF_L1("ioc_init, free PD error (%d)", status);
627 	}
628 
629 pd_alloc_err:
630 	status = ibt_close_hca(ioc->ioc_ibt_hdl);
631 	if (status != IBT_SUCCESS) {
632 		SRPT_DPRINTF_L1("ioc_init, close ioc error (%d)", status);
633 	}
634 
635 hca_open_err:
636 	rw_exit(&ioc->ioc_rwlock);
637 	rw_destroy(&ioc->ioc_rwlock);
638 	kmem_free(ioc, sizeof (*ioc));
639 	return (NULL);
640 }
641 
642 /*
643  * srpt_ioc_fini() - I/O Controller Cleanup
644  *
645  * Requires srpt_ctxt->sc_rwlock be held outside of call.
646  */
647 static void
srpt_ioc_fini(srpt_ioc_t * ioc)648 srpt_ioc_fini(srpt_ioc_t *ioc)
649 {
650 	int		status;
651 	int		ndx;
652 
653 	/*
654 	 * Note driver flows will have already taken all SRP
655 	 * services running on the I/O Controller off-line.
656 	 */
657 	ASSERT(ioc->ioc_tgt_port == NULL);
658 	rw_enter(&ioc->ioc_rwlock, RW_WRITER);
659 	if (ioc->ioc_ibt_hdl != NULL) {
660 		if (ioc->ioc_stmf_ds != NULL) {
661 			stmf_free(ioc->ioc_stmf_ds);
662 		}
663 
664 		if (ioc->ioc_srq_hdl != NULL) {
665 			SRPT_DPRINTF_L4("ioc_fini, freeing SRQ");
666 			status = ibt_free_srq(ioc->ioc_srq_hdl);
667 			if (status != IBT_SUCCESS) {
668 				SRPT_DPRINTF_L1("ioc_fini, free SRQ"
669 				    " error (%d)", status);
670 			}
671 		}
672 
673 		if (ioc->ioc_iu_mr_hdl != NULL) {
674 			status = ibt_deregister_mr(
675 			    ioc->ioc_ibt_hdl, ioc->ioc_iu_mr_hdl);
676 			if (status != IBT_SUCCESS) {
677 				SRPT_DPRINTF_L1("ioc_fini, error deregistering"
678 				    " memory region (%d)", status);
679 			}
680 		}
681 
682 		if (ioc->ioc_iu_bufs != NULL) {
683 			kmem_free(ioc->ioc_iu_bufs, srpt_iu_size *
684 			    ioc->ioc_num_iu_entries);
685 		}
686 
687 		if (ioc->ioc_iu_pool != NULL) {
688 			SRPT_DPRINTF_L4("ioc_fini, freeing IU entries");
689 			for (ndx = 0; ndx < ioc->ioc_num_iu_entries; ndx++) {
690 				mutex_destroy(&ioc->ioc_iu_pool[ndx].iu_lock);
691 			}
692 
693 			SRPT_DPRINTF_L4("ioc_fini, free IU pool struct");
694 			kmem_free(ioc->ioc_iu_pool,
695 			    sizeof (srpt_iu_t) * (ioc->ioc_num_iu_entries));
696 			ioc->ioc_iu_pool = NULL;
697 			ioc->ioc_num_iu_entries = 0;
698 		}
699 
700 		if (ioc->ioc_dbuf_pool != NULL) {
701 			srpt_vmem_destroy(ioc->ioc_dbuf_pool);
702 		}
703 
704 		if (ioc->ioc_pd_hdl != NULL) {
705 			status = ibt_free_pd(ioc->ioc_ibt_hdl,
706 			    ioc->ioc_pd_hdl);
707 			if (status != IBT_SUCCESS) {
708 				SRPT_DPRINTF_L1("ioc_fini, free PD"
709 				    " error (%d)", status);
710 			}
711 		}
712 
713 		status = ibt_close_hca(ioc->ioc_ibt_hdl);
714 		if (status != IBT_SUCCESS) {
715 			SRPT_DPRINTF_L1(
716 			    "ioc_fini, close ioc error (%d)", status);
717 		}
718 	}
719 	rw_exit(&ioc->ioc_rwlock);
720 	rw_destroy(&ioc->ioc_rwlock);
721 	kmem_free(ioc, sizeof (srpt_ioc_t));
722 }
723 
724 /*
725  * srpt_ioc_port_active() - I/O Controller port active
726  */
727 static void
srpt_ioc_port_active(ibt_async_event_t * event)728 srpt_ioc_port_active(ibt_async_event_t *event)
729 {
730 	ibt_status_t		status;
731 	srpt_ioc_t		*ioc;
732 	srpt_target_port_t	*tgt = NULL;
733 	boolean_t		online_target = B_FALSE;
734 	stmf_change_status_t	cstatus;
735 
736 	ASSERT(event != NULL);
737 
738 	SRPT_DPRINTF_L3("ioc_port_active event handler, invoked");
739 
740 	/*
741 	 * Find the HCA in question and if the HCA has completed
742 	 * initialization, and the SRP Target service for the
743 	 * the I/O Controller exists, then bind this port.
744 	 */
745 	ioc = srpt_ioc_get(event->ev_hca_guid);
746 
747 	if (ioc == NULL) {
748 		SRPT_DPRINTF_L2("ioc_port_active, I/O Controller not"
749 		    " active");
750 		return;
751 	}
752 
753 	tgt = ioc->ioc_tgt_port;
754 	if (tgt == NULL) {
755 		SRPT_DPRINTF_L2("ioc_port_active, no I/O Controller target"
756 		    " undefined");
757 		return;
758 	}
759 
760 
761 	/*
762 	 * We take the target lock here to serialize this operation
763 	 * with any STMF initiated target state transitions.  If
764 	 * SRP is off-line then the service handle is NULL.
765 	 */
766 	mutex_enter(&tgt->tp_lock);
767 
768 	if (tgt->tp_ibt_svc_hdl != NULL) {
769 		status = srpt_ioc_svc_bind(tgt, event->ev_port);
770 		if ((status != IBT_SUCCESS) &&
771 		    (status != IBT_HCA_PORT_NOT_ACTIVE)) {
772 			SRPT_DPRINTF_L1("ioc_port_active, bind failed (%d)",
773 			    status);
774 		}
775 	} else {
776 		/* if we were offline because of no ports, try onlining now */
777 		if ((tgt->tp_num_active_ports == 0) &&
778 		    (tgt->tp_requested_state != tgt->tp_state) &&
779 		    (tgt->tp_requested_state == SRPT_TGT_STATE_ONLINE)) {
780 			online_target = B_TRUE;
781 			cstatus.st_completion_status = STMF_SUCCESS;
782 			cstatus.st_additional_info = "port active";
783 		}
784 	}
785 
786 	mutex_exit(&tgt->tp_lock);
787 
788 	if (online_target) {
789 		stmf_status_t	ret;
790 
791 		ret = stmf_ctl(STMF_CMD_LPORT_ONLINE, tgt->tp_lport, &cstatus);
792 
793 		if (ret == STMF_SUCCESS) {
794 			SRPT_DPRINTF_L1("ioc_port_active, port %d active, "
795 			    "target %016llx online requested", event->ev_port,
796 			    (u_longlong_t)ioc->ioc_guid);
797 		} else if (ret != STMF_ALREADY) {
798 			SRPT_DPRINTF_L1("ioc_port_active, port %d active, "
799 			    "target %016llx failed online request: %d",
800 			    event->ev_port, (u_longlong_t)ioc->ioc_guid,
801 			    (int)ret);
802 		}
803 	}
804 }
805 
806 /*
807  * srpt_ioc_port_down()
808  */
809 static void
srpt_ioc_port_down(ibt_async_event_t * event)810 srpt_ioc_port_down(ibt_async_event_t *event)
811 {
812 	srpt_ioc_t		*ioc;
813 	srpt_target_port_t	*tgt;
814 	srpt_channel_t		*ch;
815 	srpt_channel_t		*next_ch;
816 	boolean_t		offline_target = B_FALSE;
817 	stmf_change_status_t	cstatus;
818 
819 	SRPT_DPRINTF_L3("ioc_port_down event handler, invoked");
820 
821 	/*
822 	 * Find the HCA in question and if the HCA has completed
823 	 * initialization, and the SRP Target service for the
824 	 * the I/O Controller exists, then logout initiators
825 	 * through this port.
826 	 */
827 	ioc = srpt_ioc_get(event->ev_hca_guid);
828 
829 	if (ioc == NULL) {
830 		SRPT_DPRINTF_L2("ioc_port_down, I/O Controller not"
831 		    " active");
832 		return;
833 	}
834 
835 	/*
836 	 * We only have one target now, but we could go through all
837 	 * SCSI target ports if more are added.
838 	 */
839 	tgt = ioc->ioc_tgt_port;
840 	if (tgt == NULL) {
841 		SRPT_DPRINTF_L2("ioc_port_down, no I/O Controller target"
842 		    " undefined");
843 		return;
844 	}
845 	mutex_enter(&tgt->tp_lock);
846 
847 	/*
848 	 * For all channel's logged in through this port, initiate a
849 	 * disconnect.
850 	 */
851 	mutex_enter(&tgt->tp_ch_list_lock);
852 	ch = list_head(&tgt->tp_ch_list);
853 	while (ch != NULL) {
854 		next_ch = list_next(&tgt->tp_ch_list, ch);
855 		if (ch->ch_session && (ch->ch_session->ss_hw_port ==
856 		    event->ev_port)) {
857 			srpt_ch_disconnect(ch);
858 		}
859 		ch = next_ch;
860 	}
861 	mutex_exit(&tgt->tp_ch_list_lock);
862 
863 	tgt->tp_num_active_ports--;
864 
865 	/* if we have no active ports, take the target offline */
866 	if ((tgt->tp_num_active_ports == 0) &&
867 	    (tgt->tp_state == SRPT_TGT_STATE_ONLINE)) {
868 		cstatus.st_completion_status = STMF_SUCCESS;
869 		cstatus.st_additional_info = "no ports active";
870 		offline_target = B_TRUE;
871 	}
872 
873 	mutex_exit(&tgt->tp_lock);
874 
875 	if (offline_target) {
876 		stmf_status_t	ret;
877 
878 		ret = stmf_ctl(STMF_CMD_LPORT_OFFLINE, tgt->tp_lport, &cstatus);
879 
880 		if (ret == STMF_SUCCESS) {
881 			SRPT_DPRINTF_L1("ioc_port_down, port %d down, target "
882 			    "%016llx offline requested", event->ev_port,
883 			    (u_longlong_t)ioc->ioc_guid);
884 		} else if (ret != STMF_ALREADY) {
885 			SRPT_DPRINTF_L1("ioc_port_down, port %d down, target "
886 			    "%016llx failed offline request: %d",
887 			    event->ev_port,
888 			    (u_longlong_t)ioc->ioc_guid, (int)ret);
889 		}
890 	}
891 }
892 
893 /*
894  * srpt_ioc_ib_async_hdlr - I/O Controller IB asynchronous events
895  */
896 /* ARGSUSED */
897 void
srpt_ioc_ib_async_hdlr(void * clnt,ibt_hca_hdl_t hdl,ibt_async_code_t code,ibt_async_event_t * event)898 srpt_ioc_ib_async_hdlr(void *clnt, ibt_hca_hdl_t hdl,
899 	ibt_async_code_t code, ibt_async_event_t *event)
900 {
901 	srpt_channel_t		*ch;
902 
903 	switch (code) {
904 	case IBT_EVENT_PORT_UP:
905 		srpt_ioc_port_active(event);
906 		break;
907 
908 	case IBT_ERROR_PORT_DOWN:
909 		srpt_ioc_port_down(event);
910 		break;
911 
912 	case IBT_HCA_ATTACH_EVENT:
913 		SRPT_DPRINTF_L2(
914 		    "ib_async_hdlr, received attach event for HCA 0x%016llx",
915 		    (u_longlong_t)event->ev_hca_guid);
916 
917 		rw_enter(&srpt_ctxt->sc_rwlock, RW_WRITER);
918 		srpt_ioc_attach_hca(event->ev_hca_guid, B_FALSE);
919 		rw_exit(&srpt_ctxt->sc_rwlock);
920 
921 		break;
922 
923 	case IBT_HCA_DETACH_EVENT:
924 		SRPT_DPRINTF_L1(
925 		    "ioc_iob_async_hdlr, received HCA_DETACH_EVENT for "
926 		    "HCA 0x%016llx",
927 		    (u_longlong_t)event->ev_hca_guid);
928 
929 		rw_enter(&srpt_ctxt->sc_rwlock, RW_WRITER);
930 		srpt_ioc_detach_hca(event->ev_hca_guid);
931 		rw_exit(&srpt_ctxt->sc_rwlock);
932 
933 		break;
934 
935 	case IBT_EVENT_EMPTY_CHAN:
936 		/* Channel in ERROR state is now empty */
937 		ch = (srpt_channel_t *)ibt_get_chan_private(event->ev_chan_hdl);
938 		SRPT_DPRINTF_L3(
939 		    "ioc_iob_async_hdlr, received empty channel error on %p",
940 		    (void *)ch);
941 		break;
942 
943 	default:
944 		SRPT_DPRINTF_L2("ioc_ib_async_hdlr, event not "
945 		    "handled (%d)", code);
946 		break;
947 	}
948 }
949 
950 /*
951  * srpt_ioc_svc_bind()
952  */
953 ibt_status_t
srpt_ioc_svc_bind(srpt_target_port_t * tgt,uint_t portnum)954 srpt_ioc_svc_bind(srpt_target_port_t *tgt, uint_t portnum)
955 {
956 	ibt_status_t		status;
957 	srpt_hw_port_t		*port;
958 	ibt_hca_portinfo_t	*portinfo;
959 	uint_t			qportinfo_sz;
960 	uint_t			qportnum;
961 	ib_gid_t		new_gid;
962 	srpt_ioc_t		*ioc;
963 	srpt_session_t		sess;
964 
965 	ASSERT(tgt != NULL);
966 	ASSERT(tgt->tp_ioc != NULL);
967 	ioc = tgt->tp_ioc;
968 
969 	if (tgt->tp_ibt_svc_hdl == NULL) {
970 		SRPT_DPRINTF_L2("ioc_svc_bind, NULL SCSI target port"
971 		    " service");
972 		return (IBT_INVALID_PARAM);
973 	}
974 
975 	if (portnum == 0 || portnum > tgt->tp_nports) {
976 		SRPT_DPRINTF_L2("ioc_svc_bind, bad port (%d)", portnum);
977 		return (IBT_INVALID_PARAM);
978 	}
979 	status = ibt_query_hca_ports(ioc->ioc_ibt_hdl, portnum,
980 	    &portinfo, &qportnum, &qportinfo_sz);
981 	if (status != IBT_SUCCESS) {
982 		SRPT_DPRINTF_L1("ioc_svc_bind, query port error (%d)",
983 		    portnum);
984 		return (IBT_INVALID_PARAM);
985 	}
986 
987 	ASSERT(portinfo != NULL);
988 
989 	/*
990 	 * If port is not active do nothing, caller should attempt to bind
991 	 * after the port goes active.
992 	 */
993 	if (portinfo->p_linkstate != IBT_PORT_ACTIVE) {
994 		SRPT_DPRINTF_L2("ioc_svc_bind, port %d not in active state",
995 		    portnum);
996 		ibt_free_portinfo(portinfo, qportinfo_sz);
997 		return (IBT_HCA_PORT_NOT_ACTIVE);
998 	}
999 
1000 	port    = &tgt->tp_hw_port[portnum-1];
1001 	new_gid = portinfo->p_sgid_tbl[0];
1002 	ibt_free_portinfo(portinfo, qportinfo_sz);
1003 
1004 	/*
1005 	 * If previously bound and the port GID has changed,
1006 	 * unbind the old GID.
1007 	 */
1008 	if (port->hwp_bind_hdl != NULL) {
1009 		if (new_gid.gid_guid != port->hwp_gid.gid_guid ||
1010 		    new_gid.gid_prefix != port->hwp_gid.gid_prefix) {
1011 			SRPT_DPRINTF_L2("ioc_svc_bind, unregister current"
1012 			    " bind");
1013 			(void) ibt_unbind_service(tgt->tp_ibt_svc_hdl,
1014 			    port->hwp_bind_hdl);
1015 			port->hwp_bind_hdl = NULL;
1016 		} else {
1017 			SRPT_DPRINTF_L2("ioc_svc_bind, port %d already bound",
1018 			    portnum);
1019 		}
1020 	}
1021 
1022 	/* bind the new port GID */
1023 	if (port->hwp_bind_hdl == NULL) {
1024 		SRPT_DPRINTF_L2("ioc_svc_bind, bind service, %016llx:%016llx",
1025 		    (u_longlong_t)new_gid.gid_prefix,
1026 		    (u_longlong_t)new_gid.gid_guid);
1027 
1028 		/*
1029 		 * Pass SCSI Target Port as CM private data, the target will
1030 		 * always exist while this service is bound.
1031 		 */
1032 		status = ibt_bind_service(tgt->tp_ibt_svc_hdl, new_gid, NULL,
1033 		    tgt, &port->hwp_bind_hdl);
1034 		if (status != IBT_SUCCESS && status != IBT_CM_SERVICE_EXISTS) {
1035 			SRPT_DPRINTF_L1("ioc_svc_bind, bind error (%d)",
1036 			    status);
1037 			return (status);
1038 		}
1039 		port->hwp_gid.gid_prefix = new_gid.gid_prefix;
1040 		port->hwp_gid.gid_guid = new_gid.gid_guid;
1041 	}
1042 
1043 	/* port is now active */
1044 	tgt->tp_num_active_ports++;
1045 
1046 	/* setting up a transient structure for the dtrace probe. */
1047 	bzero(&sess, sizeof (srpt_session_t));
1048 	ALIAS_STR(sess.ss_t_gid, new_gid.gid_prefix, new_gid.gid_guid);
1049 	EUI_STR(sess.ss_t_name, tgt->tp_ibt_svc_id);
1050 
1051 	DTRACE_SRP_1(service__up, srpt_session_t, &sess);
1052 
1053 	return (IBT_SUCCESS);
1054 }
1055 
1056 /*
1057  * srpt_ioc_svc_unbind()
1058  */
1059 void
srpt_ioc_svc_unbind(srpt_target_port_t * tgt,uint_t portnum)1060 srpt_ioc_svc_unbind(srpt_target_port_t *tgt, uint_t portnum)
1061 {
1062 	srpt_hw_port_t		*port;
1063 	srpt_session_t		sess;
1064 	ibt_status_t		ret;
1065 
1066 	if (tgt == NULL) {
1067 		SRPT_DPRINTF_L2("ioc_svc_unbind, SCSI target does not exist");
1068 		return;
1069 	}
1070 
1071 	if (portnum == 0 || portnum > tgt->tp_nports) {
1072 		SRPT_DPRINTF_L2("ioc_svc_unbind, bad port (%d)", portnum);
1073 		return;
1074 	}
1075 	port = &tgt->tp_hw_port[portnum-1];
1076 
1077 	/* setting up a transient structure for the dtrace probe. */
1078 	bzero(&sess, sizeof (srpt_session_t));
1079 	ALIAS_STR(sess.ss_t_gid, port->hwp_gid.gid_prefix,
1080 	    port->hwp_gid.gid_guid);
1081 	EUI_STR(sess.ss_t_name, tgt->tp_ibt_svc_id);
1082 
1083 	DTRACE_SRP_1(service__down, srpt_session_t, &sess);
1084 
1085 	if (tgt->tp_ibt_svc_hdl != NULL && port->hwp_bind_hdl != NULL) {
1086 		SRPT_DPRINTF_L2("ioc_svc_unbind, unregister current bind");
1087 		ret = ibt_unbind_service(tgt->tp_ibt_svc_hdl,
1088 		    port->hwp_bind_hdl);
1089 		if (ret != IBT_SUCCESS) {
1090 			SRPT_DPRINTF_L1(
1091 			    "ioc_svc_unbind, unregister port %d failed: %d",
1092 			    portnum, ret);
1093 		} else {
1094 			port->hwp_bind_hdl = NULL;
1095 			port->hwp_gid.gid_prefix = 0;
1096 			port->hwp_gid.gid_guid = 0;
1097 		}
1098 	}
1099 }
1100 
1101 /*
1102  * srpt_ioc_svc_unbind_all()
1103  */
1104 void
srpt_ioc_svc_unbind_all(srpt_target_port_t * tgt)1105 srpt_ioc_svc_unbind_all(srpt_target_port_t *tgt)
1106 {
1107 	uint_t		portnum;
1108 
1109 	if (tgt == NULL) {
1110 		SRPT_DPRINTF_L2("ioc_svc_unbind_all, NULL SCSI target port"
1111 		    " specified");
1112 		return;
1113 	}
1114 	for (portnum = 1; portnum <= tgt->tp_nports; portnum++) {
1115 		srpt_ioc_svc_unbind(tgt, portnum);
1116 	}
1117 }
1118 
1119 /*
1120  * srpt_ioc_get_locked()
1121  *
1122  * Requires srpt_ctxt->rw_lock be held outside of call.
1123  */
1124 srpt_ioc_t *
srpt_ioc_get_locked(ib_guid_t guid)1125 srpt_ioc_get_locked(ib_guid_t guid)
1126 {
1127 	srpt_ioc_t	*ioc;
1128 
1129 	ioc = list_head(&srpt_ctxt->sc_ioc_list);
1130 	while (ioc != NULL) {
1131 		if (ioc->ioc_guid == guid) {
1132 			break;
1133 		}
1134 		ioc = list_next(&srpt_ctxt->sc_ioc_list, ioc);
1135 	}
1136 	return (ioc);
1137 }
1138 
1139 /*
1140  * srpt_ioc_get()
1141  */
1142 srpt_ioc_t *
srpt_ioc_get(ib_guid_t guid)1143 srpt_ioc_get(ib_guid_t guid)
1144 {
1145 	srpt_ioc_t	*ioc;
1146 
1147 	rw_enter(&srpt_ctxt->sc_rwlock, RW_READER);
1148 	ioc = srpt_ioc_get_locked(guid);
1149 	rw_exit(&srpt_ctxt->sc_rwlock);
1150 	return (ioc);
1151 }
1152 
1153 /*
1154  * srpt_ioc_post_recv_iu()
1155  */
1156 ibt_status_t
srpt_ioc_post_recv_iu(srpt_ioc_t * ioc,srpt_iu_t * iu)1157 srpt_ioc_post_recv_iu(srpt_ioc_t *ioc, srpt_iu_t *iu)
1158 {
1159 	ibt_status_t		status;
1160 	ibt_recv_wr_t		wr;
1161 	uint_t			posted;
1162 
1163 	ASSERT(ioc != NULL);
1164 	ASSERT(iu != NULL);
1165 
1166 	wr.wr_id  = (ibt_wrid_t)(uintptr_t)iu;
1167 	wr.wr_nds = 1;
1168 	wr.wr_sgl = &iu->iu_sge;
1169 	posted    = 0;
1170 
1171 	status = ibt_post_srq(ioc->ioc_srq_hdl, &wr, 1, &posted);
1172 	if (status != IBT_SUCCESS) {
1173 		SRPT_DPRINTF_L2("ioc_post_recv_iu, post error (%d)",
1174 		    status);
1175 	}
1176 	return (status);
1177 }
1178 
1179 /*
1180  * srpt_ioc_repost_recv_iu()
1181  */
1182 void
srpt_ioc_repost_recv_iu(srpt_ioc_t * ioc,srpt_iu_t * iu)1183 srpt_ioc_repost_recv_iu(srpt_ioc_t *ioc, srpt_iu_t *iu)
1184 {
1185 	srpt_channel_t		*ch;
1186 	ibt_status_t		status;
1187 
1188 	ASSERT(iu != NULL);
1189 	ASSERT(mutex_owned(&iu->iu_lock));
1190 
1191 	/*
1192 	 * Some additional sanity checks while in debug state, all STMF
1193 	 * related task activities should be complete prior to returning
1194 	 * this IU to the available pool.
1195 	 */
1196 	ASSERT(iu->iu_stmf_task == NULL);
1197 	ASSERT(iu->iu_sq_posted_cnt == 0);
1198 
1199 	ch = iu->iu_ch;
1200 	iu->iu_ch = NULL;
1201 	iu->iu_num_rdescs = 0;
1202 	iu->iu_rdescs = NULL;
1203 	iu->iu_tot_xfer_len = 0;
1204 	iu->iu_tag = 0;
1205 	iu->iu_flags = 0;
1206 	iu->iu_sq_posted_cnt = 0;
1207 
1208 	status = srpt_ioc_post_recv_iu(ioc, iu);
1209 
1210 	if (status != IBT_SUCCESS) {
1211 		/*
1212 		 * Very bad, we should initiate a shutdown of the I/O
1213 		 * Controller here, off-lining any targets associated
1214 		 * with this I/O Controller (and therefore disconnecting
1215 		 * any logins that remain).
1216 		 *
1217 		 * In practice this should never happen so we put
1218 		 * the code near the bottom of the implementation list.
1219 		 */
1220 		SRPT_DPRINTF_L0("ioc_repost_recv_iu, error RX IU (%d)",
1221 		    status);
1222 		ASSERT(0);
1223 	} else if (ch != NULL) {
1224 		atomic_inc_32(&ch->ch_req_lim_delta);
1225 	}
1226 }
1227 
1228 /*
1229  * srpt_ioc_init_profile()
1230  *
1231  * SRP I/O Controller serialization lock must be held when this
1232  * routine is invoked.
1233  */
1234 void
srpt_ioc_init_profile(srpt_ioc_t * ioc)1235 srpt_ioc_init_profile(srpt_ioc_t *ioc)
1236 {
1237 	srpt_ioc_opcap_mask_t		capmask = {0};
1238 
1239 	ASSERT(ioc != NULL);
1240 
1241 	ioc->ioc_profile.ioc_guid = h2b64(ioc->ioc_guid);
1242 	(void) memcpy(ioc->ioc_profile.ioc_id_string,
1243 	    "Solaris SRP Target 0.9a", 23);
1244 
1245 	/*
1246 	 * Note vendor ID and subsystem ID are 24 bit values.  Low order
1247 	 * 8 bits in vendor ID field is slot and is initialized to zero.
1248 	 * Low order 8 bits of subsystem ID is a reserved field and
1249 	 * initialized to zero.
1250 	 */
1251 	ioc->ioc_profile.ioc_vendorid =
1252 	    h2b32((uint32_t)(ioc->ioc_attr.hca_vendor_id << 8));
1253 	ioc->ioc_profile.ioc_deviceid =
1254 	    h2b32((uint32_t)ioc->ioc_attr.hca_device_id);
1255 	ioc->ioc_profile.ioc_device_ver =
1256 	    h2b16((uint16_t)ioc->ioc_attr.hca_version_id);
1257 	ioc->ioc_profile.ioc_subsys_vendorid =
1258 	    h2b32((uint32_t)(ioc->ioc_attr.hca_vendor_id << 8));
1259 	ioc->ioc_profile.ioc_subsys_id = h2b32(0);
1260 	ioc->ioc_profile.ioc_io_class = h2b16(SRP_REV_16A_IO_CLASS);
1261 	ioc->ioc_profile.ioc_io_subclass = h2b16(SRP_IO_SUBCLASS);
1262 	ioc->ioc_profile.ioc_protocol = h2b16(SRP_PROTOCOL);
1263 	ioc->ioc_profile.ioc_protocol_ver = h2b16(SRP_PROTOCOL_VERSION);
1264 	ioc->ioc_profile.ioc_send_msg_qdepth = h2b16(srpt_send_msg_depth);
1265 	ioc->ioc_profile.ioc_rdma_read_qdepth =
1266 	    ioc->ioc_attr.hca_max_rdma_out_chan;
1267 	ioc->ioc_profile.ioc_send_msg_sz = h2b32(srpt_iu_size);
1268 	ioc->ioc_profile.ioc_rdma_xfer_sz = h2b32(SRPT_DEFAULT_MAX_RDMA_SIZE);
1269 
1270 	capmask.bits.st = 1;	/* Messages can be sent to IOC */
1271 	capmask.bits.sf = 1;	/* Messages can be sent from IOC */
1272 	capmask.bits.rf = 1;	/* RDMA Reads can be sent from IOC */
1273 	capmask.bits.wf = 1;	/* RDMA Writes can be sent from IOC */
1274 	ioc->ioc_profile.ioc_ctrl_opcap_mask = capmask.mask;
1275 
1276 	/*
1277 	 * We currently only have one target, but if we had a list we would
1278 	 * go through that list and only count those that are ONLINE when
1279 	 * setting the services count and entries.
1280 	 */
1281 	if (ioc->ioc_tgt_port->tp_srp_enabled) {
1282 		ioc->ioc_profile.ioc_service_entries = 1;
1283 		ioc->ioc_svc.srv_id = h2b64(ioc->ioc_guid);
1284 		(void) snprintf((char *)ioc->ioc_svc.srv_name,
1285 		    IB_DM_MAX_SVC_NAME_LEN, "SRP.T10:%016llx",
1286 		    (u_longlong_t)ioc->ioc_guid);
1287 	} else {
1288 		ioc->ioc_profile.ioc_service_entries = 0;
1289 		ioc->ioc_svc.srv_id = 0;
1290 	}
1291 }
1292 
1293 /*
1294  * srpt_ioc_ds_alloc_dbuf()
1295  */
1296 /* ARGSUSED */
1297 stmf_data_buf_t *
srpt_ioc_ds_alloc_dbuf(struct scsi_task * task,uint32_t size,uint32_t * pminsize,uint32_t flags)1298 srpt_ioc_ds_alloc_dbuf(struct scsi_task *task, uint32_t size,
1299 	uint32_t *pminsize, uint32_t flags)
1300 {
1301 	srpt_iu_t		*iu;
1302 	srpt_ioc_t		*ioc;
1303 	srpt_ds_dbuf_t		*dbuf;
1304 	stmf_data_buf_t		*stmf_dbuf;
1305 	void			*buf;
1306 	srpt_mr_t		mr;
1307 
1308 	ASSERT(task != NULL);
1309 	iu  = task->task_port_private;
1310 	ioc = iu->iu_ioc;
1311 
1312 	SRPT_DPRINTF_L4("ioc_ds_alloc_dbuf, invoked ioc(%p)"
1313 	    " size(%d), flags(%x)",
1314 	    (void *)ioc, size, flags);
1315 
1316 	buf = srpt_vmem_alloc(ioc->ioc_dbuf_pool, size);
1317 	if (buf == NULL) {
1318 		return (NULL);
1319 	}
1320 
1321 	if (srpt_vmem_mr(ioc->ioc_dbuf_pool, buf, size, &mr) != 0) {
1322 		goto stmf_alloc_err;
1323 	}
1324 
1325 	stmf_dbuf = stmf_alloc(STMF_STRUCT_DATA_BUF, sizeof (srpt_ds_dbuf_t),
1326 	    0);
1327 	if (stmf_dbuf == NULL) {
1328 		SRPT_DPRINTF_L2("ioc_ds_alloc_dbuf, stmf_alloc failed");
1329 		goto stmf_alloc_err;
1330 	}
1331 
1332 	dbuf = stmf_dbuf->db_port_private;
1333 	dbuf->db_stmf_buf = stmf_dbuf;
1334 	dbuf->db_mr_hdl = mr.mr_hdl;
1335 	dbuf->db_ioc = ioc;
1336 	dbuf->db_sge.ds_va = mr.mr_va;
1337 	dbuf->db_sge.ds_key = mr.mr_lkey;
1338 	dbuf->db_sge.ds_len = size;
1339 
1340 	stmf_dbuf->db_buf_size = size;
1341 	stmf_dbuf->db_data_size = size;
1342 	stmf_dbuf->db_relative_offset = 0;
1343 	stmf_dbuf->db_flags = 0;
1344 	stmf_dbuf->db_xfer_status = 0;
1345 	stmf_dbuf->db_sglist_length = 1;
1346 	stmf_dbuf->db_sglist[0].seg_addr = buf;
1347 	stmf_dbuf->db_sglist[0].seg_length = size;
1348 
1349 	return (stmf_dbuf);
1350 
1351 buf_mr_err:
1352 	stmf_free(stmf_dbuf);
1353 
1354 stmf_alloc_err:
1355 	srpt_vmem_free(ioc->ioc_dbuf_pool, buf, size);
1356 
1357 	return (NULL);
1358 }
1359 
1360 void
srpt_ioc_ds_free_dbuf(struct stmf_dbuf_store * ds,stmf_data_buf_t * dbuf)1361 srpt_ioc_ds_free_dbuf(struct stmf_dbuf_store *ds,
1362 	stmf_data_buf_t *dbuf)
1363 {
1364 	srpt_ioc_t	*ioc;
1365 
1366 	SRPT_DPRINTF_L4("ioc_ds_free_dbuf, invoked buf (%p)",
1367 	    (void *)dbuf);
1368 	ioc = ds->ds_port_private;
1369 
1370 	srpt_vmem_free(ioc->ioc_dbuf_pool, dbuf->db_sglist[0].seg_addr,
1371 	    dbuf->db_buf_size);
1372 	stmf_free(dbuf);
1373 }
1374 
1375 /* Memory arena routines */
1376 
1377 static srpt_vmem_pool_t *
srpt_vmem_create(const char * name,srpt_ioc_t * ioc,ib_memlen_t chunksize,uint64_t maxsize,ibt_mr_flags_t flags)1378 srpt_vmem_create(const char *name, srpt_ioc_t *ioc, ib_memlen_t chunksize,
1379     uint64_t maxsize, ibt_mr_flags_t flags)
1380 {
1381 	srpt_mr_t		*chunk;
1382 	srpt_vmem_pool_t	*result;
1383 
1384 	ASSERT(chunksize <= maxsize);
1385 
1386 	result = kmem_zalloc(sizeof (srpt_vmem_pool_t), KM_SLEEP);
1387 
1388 	result->svp_ioc = ioc;
1389 	result->svp_chunksize = chunksize;
1390 	result->svp_max_size = maxsize;
1391 	result->svp_flags = flags;
1392 
1393 	rw_init(&result->svp_lock, NULL, RW_DRIVER, NULL);
1394 	avl_create(&result->svp_mr_list, srpt_vmem_mr_compare,
1395 	    sizeof (srpt_mr_t), offsetof(srpt_mr_t, mr_avl));
1396 
1397 	chunk = srpt_vmem_chunk_alloc(result, chunksize);
1398 
1399 	avl_add(&result->svp_mr_list, chunk);
1400 	result->svp_total_size = chunksize;
1401 
1402 	result->svp_vmem = vmem_create(name,
1403 	    (void*)(uintptr_t)chunk->mr_va,
1404 	    (size_t)chunk->mr_len, SRPT_MR_QUANTSIZE,
1405 	    NULL, NULL, NULL, 0, VM_SLEEP);
1406 
1407 	return (result);
1408 }
1409 
1410 static void
srpt_vmem_destroy(srpt_vmem_pool_t * vm_pool)1411 srpt_vmem_destroy(srpt_vmem_pool_t *vm_pool)
1412 {
1413 	srpt_mr_t		*chunk;
1414 	srpt_mr_t		*next;
1415 
1416 	rw_enter(&vm_pool->svp_lock, RW_WRITER);
1417 	vmem_destroy(vm_pool->svp_vmem);
1418 
1419 	chunk = avl_first(&vm_pool->svp_mr_list);
1420 
1421 	while (chunk != NULL) {
1422 		next = AVL_NEXT(&vm_pool->svp_mr_list, chunk);
1423 		avl_remove(&vm_pool->svp_mr_list, chunk);
1424 		srpt_vmem_chunk_free(vm_pool, chunk);
1425 		chunk = next;
1426 	}
1427 
1428 	avl_destroy(&vm_pool->svp_mr_list);
1429 
1430 	rw_exit(&vm_pool->svp_lock);
1431 	rw_destroy(&vm_pool->svp_lock);
1432 
1433 	kmem_free(vm_pool, sizeof (srpt_vmem_pool_t));
1434 }
1435 
1436 static void *
srpt_vmem_alloc(srpt_vmem_pool_t * vm_pool,size_t size)1437 srpt_vmem_alloc(srpt_vmem_pool_t *vm_pool, size_t size)
1438 {
1439 	void		*result;
1440 	srpt_mr_t	*next;
1441 	ib_memlen_t	chunklen;
1442 
1443 	ASSERT(vm_pool != NULL);
1444 
1445 	result = vmem_alloc(vm_pool->svp_vmem, size,
1446 	    VM_NOSLEEP | VM_FIRSTFIT);
1447 
1448 	if (result != NULL) {
1449 		/* memory successfully allocated */
1450 		return (result);
1451 	}
1452 
1453 	/* need more vmem */
1454 	rw_enter(&vm_pool->svp_lock, RW_WRITER);
1455 	chunklen = vm_pool->svp_chunksize;
1456 
1457 	if (vm_pool->svp_total_size >= vm_pool->svp_max_size) {
1458 		/* no more room to alloc */
1459 		rw_exit(&vm_pool->svp_lock);
1460 		return (NULL);
1461 	}
1462 
1463 	if ((vm_pool->svp_total_size + chunklen) > vm_pool->svp_max_size) {
1464 		chunklen = vm_pool->svp_max_size - vm_pool->svp_total_size;
1465 	}
1466 
1467 	next = srpt_vmem_chunk_alloc(vm_pool, chunklen);
1468 	if (next != NULL) {
1469 		/*
1470 		 * Note that the size of the chunk we got
1471 		 * may not be the size we requested.  Use the
1472 		 * length returned in the chunk itself.
1473 		 */
1474 		if (vmem_add(vm_pool->svp_vmem, (void*)(uintptr_t)next->mr_va,
1475 		    next->mr_len, VM_NOSLEEP) == NULL) {
1476 			srpt_vmem_chunk_free(vm_pool, next);
1477 			SRPT_DPRINTF_L2("vmem_add failed");
1478 		} else {
1479 			vm_pool->svp_total_size += next->mr_len;
1480 			avl_add(&vm_pool->svp_mr_list, next);
1481 		}
1482 	}
1483 
1484 	rw_exit(&vm_pool->svp_lock);
1485 
1486 	result = vmem_alloc(vm_pool->svp_vmem, size, VM_NOSLEEP | VM_FIRSTFIT);
1487 
1488 	return (result);
1489 }
1490 
1491 static void
srpt_vmem_free(srpt_vmem_pool_t * vm_pool,void * vaddr,size_t size)1492 srpt_vmem_free(srpt_vmem_pool_t *vm_pool, void *vaddr, size_t size)
1493 {
1494 	vmem_free(vm_pool->svp_vmem, vaddr, size);
1495 }
1496 
1497 static int
srpt_vmem_mr(srpt_vmem_pool_t * vm_pool,void * vaddr,size_t size,srpt_mr_t * mr)1498 srpt_vmem_mr(srpt_vmem_pool_t *vm_pool, void *vaddr, size_t size,
1499     srpt_mr_t *mr)
1500 {
1501 	avl_index_t		where;
1502 	ib_vaddr_t		mrva = (ib_vaddr_t)(uintptr_t)vaddr;
1503 	srpt_mr_t		chunk;
1504 	srpt_mr_t		*nearest;
1505 	ib_vaddr_t		chunk_end;
1506 	int			status = DDI_FAILURE;
1507 
1508 	rw_enter(&vm_pool->svp_lock, RW_READER);
1509 
1510 	chunk.mr_va = mrva;
1511 	nearest = avl_find(&vm_pool->svp_mr_list, &chunk, &where);
1512 
1513 	if (nearest == NULL) {
1514 		nearest = avl_nearest(&vm_pool->svp_mr_list, where,
1515 		    AVL_BEFORE);
1516 	}
1517 
1518 	if (nearest != NULL) {
1519 		/* Verify this chunk contains the specified address range */
1520 		ASSERT(nearest->mr_va <= mrva);
1521 
1522 		chunk_end = nearest->mr_va + nearest->mr_len;
1523 		if (chunk_end >= mrva + size) {
1524 			mr->mr_hdl = nearest->mr_hdl;
1525 			mr->mr_va = mrva;
1526 			mr->mr_len = size;
1527 			mr->mr_lkey = nearest->mr_lkey;
1528 			mr->mr_rkey = nearest->mr_rkey;
1529 			status = DDI_SUCCESS;
1530 		}
1531 	}
1532 
1533 	rw_exit(&vm_pool->svp_lock);
1534 	return (status);
1535 }
1536 
1537 static srpt_mr_t *
srpt_vmem_chunk_alloc(srpt_vmem_pool_t * vm_pool,ib_memlen_t chunksize)1538 srpt_vmem_chunk_alloc(srpt_vmem_pool_t *vm_pool, ib_memlen_t chunksize)
1539 {
1540 	void			*chunk = NULL;
1541 	srpt_mr_t		*result = NULL;
1542 
1543 	while ((chunk == NULL) && (chunksize >= SRPT_MIN_CHUNKSIZE)) {
1544 		chunk = kmem_alloc(chunksize, KM_NOSLEEP);
1545 		if (chunk == NULL) {
1546 			SRPT_DPRINTF_L2("srpt_vmem_chunk_alloc: "
1547 			    "failed to alloc chunk of %d, trying %d",
1548 			    (int)chunksize, (int)chunksize/2);
1549 			chunksize /= 2;
1550 		}
1551 	}
1552 
1553 	if (chunk != NULL) {
1554 		result = srpt_reg_mem(vm_pool, (ib_vaddr_t)(uintptr_t)chunk,
1555 		    chunksize);
1556 		if (result == NULL) {
1557 			SRPT_DPRINTF_L2("srpt_vmem_chunk_alloc: "
1558 			    "chunk registration failed");
1559 			kmem_free(chunk, chunksize);
1560 		}
1561 	}
1562 
1563 	return (result);
1564 }
1565 
1566 static void
srpt_vmem_chunk_free(srpt_vmem_pool_t * vm_pool,srpt_mr_t * mr)1567 srpt_vmem_chunk_free(srpt_vmem_pool_t *vm_pool, srpt_mr_t *mr)
1568 {
1569 	void			*chunk = (void *)(uintptr_t)mr->mr_va;
1570 	ib_memlen_t		chunksize = mr->mr_len;
1571 
1572 	srpt_dereg_mem(vm_pool->svp_ioc, mr);
1573 	kmem_free(chunk, chunksize);
1574 }
1575 
1576 static srpt_mr_t *
srpt_reg_mem(srpt_vmem_pool_t * vm_pool,ib_vaddr_t vaddr,ib_memlen_t len)1577 srpt_reg_mem(srpt_vmem_pool_t *vm_pool, ib_vaddr_t vaddr, ib_memlen_t len)
1578 {
1579 	srpt_mr_t		*result = NULL;
1580 	ibt_mr_attr_t		mr_attr;
1581 	ibt_mr_desc_t		mr_desc;
1582 	ibt_status_t		status;
1583 	srpt_ioc_t		*ioc = vm_pool->svp_ioc;
1584 
1585 	result = kmem_zalloc(sizeof (srpt_mr_t), KM_NOSLEEP);
1586 	if (result == NULL) {
1587 		SRPT_DPRINTF_L2("srpt_reg_mem: failed to allocate");
1588 		return (NULL);
1589 	}
1590 
1591 	bzero(&mr_attr, sizeof (ibt_mr_attr_t));
1592 	bzero(&mr_desc, sizeof (ibt_mr_desc_t));
1593 
1594 	mr_attr.mr_vaddr = vaddr;
1595 	mr_attr.mr_len = len;
1596 	mr_attr.mr_as = NULL;
1597 	mr_attr.mr_flags = vm_pool->svp_flags;
1598 
1599 	status = ibt_register_mr(ioc->ioc_ibt_hdl, ioc->ioc_pd_hdl,
1600 	    &mr_attr, &result->mr_hdl, &mr_desc);
1601 	if (status != IBT_SUCCESS) {
1602 		SRPT_DPRINTF_L2("srpt_reg_mem: ibt_register_mr "
1603 		    "failed %d", status);
1604 		kmem_free(result, sizeof (srpt_mr_t));
1605 		return (NULL);
1606 	}
1607 
1608 	result->mr_va = mr_attr.mr_vaddr;
1609 	result->mr_len = mr_attr.mr_len;
1610 	result->mr_lkey = mr_desc.md_lkey;
1611 	result->mr_rkey = mr_desc.md_rkey;
1612 
1613 	return (result);
1614 }
1615 
1616 static void
srpt_dereg_mem(srpt_ioc_t * ioc,srpt_mr_t * mr)1617 srpt_dereg_mem(srpt_ioc_t *ioc, srpt_mr_t *mr)
1618 {
1619 	ibt_status_t		status;
1620 
1621 	status = ibt_deregister_mr(ioc->ioc_ibt_hdl, mr->mr_hdl);
1622 	if (status != IBT_SUCCESS) {
1623 		SRPT_DPRINTF_L1("srpt_dereg_mem, error deregistering MR (%d)",
1624 		    status);
1625 	}
1626 	kmem_free(mr, sizeof (srpt_mr_t));
1627 }
1628 
1629 static int
srpt_vmem_mr_compare(const void * a,const void * b)1630 srpt_vmem_mr_compare(const void *a, const void *b)
1631 {
1632 	srpt_mr_t		*mr1 = (srpt_mr_t *)a;
1633 	srpt_mr_t		*mr2 = (srpt_mr_t *)b;
1634 
1635 	/* sort and match by virtual address */
1636 	if (mr1->mr_va < mr2->mr_va) {
1637 		return (-1);
1638 	} else if (mr1->mr_va > mr2->mr_va) {
1639 		return (1);
1640 	}
1641 
1642 	return (0);
1643 }
1644