xref: /illumos-gate/usr/src/uts/common/io/ib/clients/iser/iser_idm.c (revision 6fd984eca41d3425a2dc1f01db27c96a33046265)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/ddi.h>
27 #include <sys/sunddi.h>
28 
29 #include <sys/socket.h>		/* networking stuff */
30 #include <sys/sysmacros.h>	/* offsetof */
31 
32 #include <sys/ib/clients/iser/iser.h>
33 #include <sys/ib/clients/iser/iser_idm.h>
34 
35 /*
36  * iSER transport routines
37  *
38  * All transport functions except iser_tgt_svc_create() are called through
39  * the ops vector, iser_tgt_svc_create() is called from the async handler
40  * inaddition to being called by the ULP
41  */
42 
43 static void iser_pdu_tx(idm_conn_t *ic, idm_pdu_t *pdu);
44 
45 static idm_status_t iser_buf_tx_to_ini(idm_task_t *idt, idm_buf_t *idb);
46 static idm_status_t iser_buf_rx_from_ini(idm_task_t *idt, idm_buf_t *idb);
47 static idm_status_t iser_tgt_enable_datamover(idm_conn_t *ic);
48 static idm_status_t iser_ini_enable_datamover(idm_conn_t *ic);
49 static void iser_notice_key_values(struct idm_conn_s *ic,
50     nvlist_t *negotiated_nvl);
51 static kv_status_t iser_declare_key_values(struct idm_conn_s *ic,
52     nvlist_t *config_nvl, nvlist_t *outgoing_nvl);
53 static idm_status_t iser_free_task_rsrcs(idm_task_t *idt);
54 static kv_status_t iser_negotiate_key_values(idm_conn_t *ic,
55     nvlist_t *request_nvl, nvlist_t *response_nvl, nvlist_t *negotiated_nvl);
56 static kv_status_t iser_handle_numerical(nvpair_t *nvp, uint64_t value,
57     const idm_kv_xlate_t *ikvx, uint64_t min_value, uint64_t max_value,
58     uint64_t iser_max_value, nvlist_t *request_nvl, nvlist_t *response_nvl,
59     nvlist_t *negotiated_nvl);
60 static kv_status_t iser_handle_boolean(nvpair_t *nvp, boolean_t value,
61     const idm_kv_xlate_t *ikvx, boolean_t iser_value, nvlist_t *request_nvl,
62     nvlist_t *response_nvl, nvlist_t *negotiated_nvl);
63 static kv_status_t iser_handle_digest(nvpair_t *choices,
64     const idm_kv_xlate_t *ikvx, nvlist_t *request_nvl, nvlist_t *response_nvl,
65     nvlist_t *negotiated_nvl);
66 static kv_status_t iser_handle_key(nvpair_t *nvp, const idm_kv_xlate_t *ikvx,
67     nvlist_t *request_nvl, nvlist_t *response_nvl, nvlist_t *negotiated_nvl);
68 static kv_status_t iser_process_request_nvlist(nvlist_t *request_nvl,
69     nvlist_t *response_nvl, nvlist_t *negotiated_nvl);
70 static boolean_t iser_conn_is_capable(idm_conn_req_t *ic,
71     idm_transport_caps_t *caps);
72 static idm_status_t iser_buf_alloc(idm_buf_t *idb, uint64_t buflen);
73 static idm_status_t iser_buf_setup(idm_buf_t *idb);
74 static void iser_buf_teardown(idm_buf_t *idb);
75 static void iser_buf_free(idm_buf_t *idb);
76 static void iser_tgt_svc_destroy(struct idm_svc_s *is);
77 static idm_status_t iser_tgt_svc_online(struct idm_svc_s *is);
78 static void iser_tgt_svc_offline(struct idm_svc_s *is);
79 static idm_status_t iser_tgt_conn_connect(struct idm_conn_s *ic);
80 static idm_status_t iser_ini_conn_create(idm_conn_req_t *cr,
81     struct idm_conn_s *ic);
82 static void iser_conn_destroy(struct idm_conn_s *ic);
83 static idm_status_t iser_ini_conn_connect(struct idm_conn_s *ic);
84 static void iser_conn_disconnect(struct idm_conn_s *ic);
85 
86 /*
87  * iSER IDM transport operations
88  */
89 idm_transport_ops_t iser_transport_ops = {
90 	&iser_pdu_tx,			/* it_tx_pdu */
91 	&iser_buf_tx_to_ini,		/* it_buf_tx_to_ini */
92 	&iser_buf_rx_from_ini,		/* it_buf_rx_from_ini */
93 	NULL,				/* it_rx_datain */
94 	NULL,				/* it_rx_rtt */
95 	NULL,				/* it_rx_dataout */
96 	NULL,				/* it_alloc_conn_rsrc */
97 	NULL,				/* it_free_conn_rsrc */
98 	&iser_tgt_enable_datamover,	/* it_tgt_enable_datamover */
99 	&iser_ini_enable_datamover,	/* it_ini_enable_datamover */
100 	NULL,				/* it_conn_terminate */
101 	&iser_free_task_rsrcs,		/* it_free_task_rsrc */
102 	&iser_negotiate_key_values,	/* it_negotiate_key_values */
103 	&iser_notice_key_values,	/* it_notice_key_values */
104 	&iser_conn_is_capable,		/* it_conn_is_capable */
105 	&iser_buf_alloc,		/* it_buf_alloc */
106 	&iser_buf_free,			/* it_buf_free */
107 	&iser_buf_setup,		/* it_buf_setup */
108 	&iser_buf_teardown,		/* it_buf_teardown */
109 	&iser_tgt_svc_create,		/* it_tgt_svc_create */
110 	&iser_tgt_svc_destroy,		/* it_tgt_svc_destroy */
111 	&iser_tgt_svc_online,		/* it_tgt_svc_online */
112 	&iser_tgt_svc_offline,		/* it_tgt_svc_offline */
113 	&iser_conn_destroy,		/* it_tgt_conn_destroy */
114 	&iser_tgt_conn_connect,		/* it_tgt_conn_connect */
115 	&iser_conn_disconnect,		/* it_tgt_conn_disconnect */
116 	&iser_ini_conn_create,		/* it_ini_conn_create */
117 	&iser_conn_destroy,		/* it_ini_conn_destroy */
118 	&iser_ini_conn_connect,		/* it_ini_conn_connect */
119 	&iser_conn_disconnect,		/* it_ini_conn_disconnect */
120 	&iser_declare_key_values	/* it_declare_key_values */
121 };
122 
123 /*
124  * iSER IDM transport capabilities
125  */
126 idm_transport_caps_t iser_transport_caps = {
127 	0		/* flags */
128 };
129 
130 int
131 iser_idm_register()
132 {
133 	idm_transport_attr_t	attr;
134 	idm_status_t		status;
135 
136 	attr.type	= IDM_TRANSPORT_TYPE_ISER;
137 	attr.it_ops	= &iser_transport_ops;
138 	attr.it_caps	= &iser_transport_caps;
139 
140 	status = idm_transport_register(&attr);
141 	if (status != IDM_STATUS_SUCCESS) {
142 		ISER_LOG(CE_WARN, "Failed to register iSER transport with IDM");
143 		return (DDI_FAILURE);
144 	}
145 
146 	ISER_LOG(CE_NOTE, "Registered iSER transport with IDM");
147 
148 	return (DDI_SUCCESS);
149 }
150 
151 /*
152  * iser_ini_conn_create()
153  * Allocate an iSER initiator connection context
154  */
155 static idm_status_t
156 iser_ini_conn_create(idm_conn_req_t *cr, idm_conn_t *ic)
157 {
158 	iser_chan_t	*iser_chan = NULL;
159 	iser_conn_t	*iser_conn;
160 
161 	/* Allocate and set up a connection handle */
162 	iser_conn = kmem_zalloc(sizeof (iser_conn_t), KM_SLEEP);
163 	mutex_init(&iser_conn->ic_lock, NULL, MUTEX_DRIVER, NULL);
164 
165 	/* Allocate and open a channel to the target node */
166 	iser_chan = iser_channel_alloc(NULL, &cr->cr_ini_dst_addr);
167 	if (iser_chan == NULL) {
168 		ISER_LOG(CE_WARN, "iser: failed to allocate channel");
169 		mutex_destroy(&iser_conn->ic_lock);
170 		kmem_free(iser_conn, sizeof (iser_conn_t));
171 		return (IDM_STATUS_FAIL);
172 	}
173 
174 	/*
175 	 * The local IP and remote IP are filled in iser_channel_alloc. The
176 	 * remote port needs to be filled in from idm_conn_req_t. The local
177 	 * port is irrelevant. Internal representation of the port in the
178 	 * IDM sockaddr structure is in network byte order. IBT expects the
179 	 * port in host byte order.
180 	 */
181 	switch (cr->cr_ini_dst_addr.sin.sa_family) {
182 	case AF_INET:
183 		iser_chan->ic_rport = ntohs(cr->cr_ini_dst_addr.sin4.sin_port);
184 		break;
185 	case AF_INET6:
186 		iser_chan->ic_rport = ntohs(cr->cr_ini_dst_addr.sin6.sin6_port);
187 		break;
188 	default:
189 		iser_chan->ic_rport = ISCSI_LISTEN_PORT;
190 	}
191 	iser_chan->ic_lport = 0;
192 
193 	cv_init(&iser_conn->ic_stage_cv, NULL, CV_DEFAULT, NULL);
194 	iser_conn->ic_type = ISER_CONN_TYPE_INI;
195 	iser_conn->ic_stage = ISER_CONN_STAGE_ALLOCATED;
196 	iser_conn->ic_chan = iser_chan;
197 	iser_conn->ic_idmc = ic;
198 
199 	/*
200 	 * Set a pointer to the iser_conn in the iser_chan for easy
201 	 * access during CM event handling
202 	 */
203 	iser_chan->ic_conn = iser_conn;
204 
205 	/* Set the iSER conn handle in the IDM conn private handle */
206 	ic->ic_transport_private = (void *)iser_conn;
207 
208 	/* Set the transport header length */
209 	ic->ic_transport_hdrlen = ISER_HEADER_LENGTH;
210 
211 	return (IDM_STATUS_SUCCESS);
212 }
213 
214 /*
215  * iser_internal_conn_destroy()
216  * Tear down iSER-specific connection resources. This is used below
217  * in iser_conn_destroy(), but also from the CM code when we may have
218  * some of the connection established, but not fully connected.
219  */
220 void
221 iser_internal_conn_destroy(iser_conn_t *ic)
222 {
223 	mutex_enter(&ic->ic_lock);
224 	iser_channel_free(ic->ic_chan);
225 	if ((ic->ic_type == ISER_CONN_TYPE_TGT) &&
226 	    (ic->ic_stage == ISER_CONN_STAGE_ALLOCATED)) {
227 		/*
228 		 * This is a target connection that has yet to be
229 		 * established. Free our reference on the target
230 		 * service handle.
231 		 */
232 		iser_tgt_svc_rele(ic->ic_idms->is_iser_svc);
233 	}
234 	cv_destroy(&ic->ic_stage_cv);
235 	mutex_exit(&ic->ic_lock);
236 	mutex_destroy(&ic->ic_lock);
237 	kmem_free(ic, sizeof (iser_conn_t));
238 }
239 
240 /*
241  * iser_conn_destroy()
242  * Tear down an initiator or target connection.
243  */
244 static void
245 iser_conn_destroy(idm_conn_t *ic)
246 {
247 	iser_conn_t	*iser_conn;
248 	iser_conn = (iser_conn_t *)ic->ic_transport_private;
249 
250 	iser_internal_conn_destroy(iser_conn);
251 	ic->ic_transport_private = NULL;
252 }
253 
254 /*
255  * iser_ini_conn_connect()
256  * Establish the connection referred to by the handle previously allocated via
257  * iser_ini_conn_create().
258  */
259 static idm_status_t
260 iser_ini_conn_connect(idm_conn_t *ic)
261 {
262 	iser_conn_t		*iser_conn;
263 	iser_status_t		status;
264 
265 	iser_conn = (iser_conn_t *)ic->ic_transport_private;
266 
267 	status = iser_channel_open(iser_conn->ic_chan);
268 	if (status != ISER_STATUS_SUCCESS) {
269 		ISER_LOG(CE_WARN, "iser: failed to open channel");
270 		return (IDM_STATUS_FAIL);
271 	}
272 
273 	/*
274 	 * Set the local and remote addresses in the idm conn handle.
275 	 */
276 	iser_ib_conv_ibtaddr2sockaddr(&ic->ic_laddr,
277 	    &iser_conn->ic_chan->ic_localip, iser_conn->ic_chan->ic_lport);
278 	iser_ib_conv_ibtaddr2sockaddr(&ic->ic_raddr,
279 	    &iser_conn->ic_chan->ic_remoteip, iser_conn->ic_chan->ic_rport);
280 
281 	mutex_enter(&iser_conn->ic_lock);
282 	/* Hold a reference on the IDM connection handle */
283 	idm_conn_hold(ic);
284 	iser_conn->ic_stage = ISER_CONN_STAGE_IC_CONNECTED;
285 	mutex_exit(&iser_conn->ic_lock);
286 
287 	return (IDM_STATUS_SUCCESS);
288 }
289 
290 /*
291  * iser_conn_disconnect()
292  * Shutdown this iSER connection
293  */
294 static void
295 iser_conn_disconnect(idm_conn_t *ic)
296 {
297 	iser_conn_t	*iser_conn;
298 
299 	iser_conn = (iser_conn_t *)ic->ic_transport_private;
300 
301 	mutex_enter(&iser_conn->ic_lock);
302 	iser_conn->ic_stage = ISER_CONN_STAGE_CLOSING;
303 	mutex_exit(&iser_conn->ic_lock);
304 
305 	/* Close the channel */
306 	iser_channel_close(iser_conn->ic_chan);
307 
308 	/* Free our reference held on the IDM conn handle, and set CLOSED */
309 	mutex_enter(&iser_conn->ic_lock);
310 	idm_conn_rele(iser_conn->ic_idmc);
311 	iser_conn->ic_stage = ISER_CONN_STAGE_CLOSED;
312 	mutex_exit(&iser_conn->ic_lock);
313 }
314 
315 /*
316  * iser_tgt_svc_create()
317  * Establish the CM service for inbound iSER service requests on the port
318  * indicated by sr->sr_port.
319  * idm_svc_req_t contains the service parameters.
320  */
321 idm_status_t
322 iser_tgt_svc_create(idm_svc_req_t *sr, idm_svc_t *is)
323 {
324 	iser_svc_t		*iser_svc;
325 
326 	int			rc;
327 
328 	iser_svc = kmem_zalloc(sizeof (iser_svc_t), KM_SLEEP);
329 	is->is_iser_svc = (void *)iser_svc;
330 
331 	idm_refcnt_init(&iser_svc->is_refcnt, iser_svc);
332 
333 	list_create(&iser_svc->is_sbindlist, sizeof (iser_sbind_t),
334 	    offsetof(iser_sbind_t, is_list_node));
335 	iser_svc->is_svcid = ibt_get_ip_sid(IPPROTO_TCP, sr->sr_port);
336 
337 	/*
338 	 * Register an iSER target service for the requested port
339 	 * and set the iser_svc structure in the idm_svc handle.
340 	 */
341 	rc = iser_register_service(is);
342 	if (rc != DDI_SUCCESS) {
343 		ISER_LOG(CE_NOTE, "iser_tgt_svc_create: iser_register_service "
344 		    "failed on port (%d): rc (0x%x)", sr->sr_port, rc);
345 		(void) ibt_release_ip_sid(iser_svc->is_svcid);
346 		list_destroy(&iser_svc->is_sbindlist);
347 		idm_refcnt_destroy(&iser_svc->is_refcnt);
348 		kmem_free(iser_svc, sizeof (iser_svc_t));
349 		return (IDM_STATUS_FAIL);
350 	}
351 
352 	return (IDM_STATUS_SUCCESS);
353 }
354 
355 /* IDM refcnt utilities for the iSER service handle */
356 void
357 iser_tgt_svc_hold(iser_svc_t *is)
358 {
359 	idm_refcnt_hold(&is->is_refcnt);
360 }
361 
362 void
363 iser_tgt_svc_rele(iser_svc_t *is)
364 {
365 	idm_refcnt_rele(&is->is_refcnt);
366 }
367 
368 /*
369  * iser_tgt_svc_destroy()
370  * Teardown resources allocated in iser_tgt_svc_create()
371  */
372 static void
373 iser_tgt_svc_destroy(idm_svc_t *is)
374 {
375 	iser_svc_t	*iser_svc;
376 
377 	iser_svc = (iser_svc_t *)is->is_iser_svc;
378 
379 	/*
380 	 * Deregister the iSER target service on this port and free
381 	 * the iser_svc structure from the idm_svc handle.
382 	 */
383 	iser_deregister_service(is);
384 
385 	/* Wait for the iSER service handle's refcnt to zero */
386 	idm_refcnt_wait_ref(&iser_svc->is_refcnt);
387 
388 	list_destroy(&iser_svc->is_sbindlist);
389 
390 	idm_refcnt_destroy(&iser_svc->is_refcnt);
391 
392 	kmem_free(iser_svc, sizeof (iser_svc_t));
393 }
394 
395 /*
396  * iser_tgt_svc_online()
397  * Bind the CM service allocated via iser_tgt_svc_create().
398  */
399 static idm_status_t
400 iser_tgt_svc_online(idm_svc_t *is)
401 {
402 	iser_status_t	status;
403 
404 	mutex_enter(&is->is_mutex);
405 
406 	/*
407 	 * Pass the IDM service handle as the client private data for
408 	 * later use.
409 	 */
410 	status = iser_bind_service(is);
411 	if (status != ISER_STATUS_SUCCESS) {
412 		ISER_LOG(CE_NOTE, "iser_tgt_svc_online: failed bind service");
413 		mutex_exit(&is->is_mutex);
414 		return (IDM_STATUS_FAIL);
415 	}
416 
417 	mutex_exit(&is->is_mutex);
418 	return (IDM_STATUS_SUCCESS);
419 }
420 
421 /*
422  * iser_tgt_svc_offline
423  * Unbind the service on all available HCA ports.
424  */
425 static void
426 iser_tgt_svc_offline(idm_svc_t *is)
427 {
428 	mutex_enter(&is->is_mutex);
429 
430 	iser_unbind_service(is);
431 	mutex_exit(&is->is_mutex);
432 
433 }
434 
435 /*
436  * iser_tgt_conn_connect()
437  * Establish the connection in ic, passed from idm_tgt_conn_finish(), which
438  * is invoked from the SM as a result of an inbound connection request.
439  */
440 /* ARGSUSED */
441 static idm_status_t
442 iser_tgt_conn_connect(idm_conn_t *ic)
443 {
444 	/* No action required */
445 	return (IDM_STATUS_SUCCESS);
446 }
447 
448 /*
449  * iser_tgt_enable_datamover() sets the transport private data on the
450  * idm_conn_t and move the conn stage to indicate logged in.
451  */
452 static idm_status_t
453 iser_tgt_enable_datamover(idm_conn_t *ic)
454 {
455 	iser_conn_t	*iser_conn;
456 
457 	iser_conn = (iser_conn_t *)ic->ic_transport_private;
458 	mutex_enter(&iser_conn->ic_lock);
459 
460 	iser_conn->ic_stage = ISER_CONN_STAGE_LOGGED_IN;
461 	mutex_exit(&iser_conn->ic_lock);
462 
463 	return (IDM_STATUS_SUCCESS);
464 }
465 
466 /*
467  * iser_ini_enable_datamover() is used by the iSCSI initator to request that a
468  * specified iSCSI connection be transitioned to iSER-assisted mode.
469  * In the case of iSER, the RDMA resources for a reliable connection have
470  * already been allocated at this time, and the 'RDMAExtensions' is set to 'Yes'
471  * so no further negotiations are required at this time.
472  * The initiator now sends the first iSER Message - 'Hello' to the target
473  * and waits for  the 'HelloReply' Message from the target before directing
474  * the initiator to go into the Full Feature Phase.
475  *
476  * No transport op is required on the target side.
477  */
478 static idm_status_t
479 iser_ini_enable_datamover(idm_conn_t *ic)
480 {
481 
482 	iser_conn_t	*iser_conn;
483 	clock_t		delay;
484 	int		status;
485 
486 	iser_conn = (iser_conn_t *)ic->ic_transport_private;
487 
488 	mutex_enter(&iser_conn->ic_lock);
489 	iser_conn->ic_stage = ISER_CONN_STAGE_HELLO_SENT;
490 	mutex_exit(&iser_conn->ic_lock);
491 
492 	/* Send the iSER Hello Message to the target */
493 	status = iser_xfer_hello_msg(iser_conn->ic_chan);
494 	if (status != ISER_STATUS_SUCCESS) {
495 
496 		mutex_enter(&iser_conn->ic_lock);
497 		iser_conn->ic_stage = ISER_CONN_STAGE_HELLO_SENT_FAIL;
498 		mutex_exit(&iser_conn->ic_lock);
499 
500 		return (IDM_STATUS_FAIL);
501 	}
502 
503 	/*
504 	 * Acquire the iser_conn->ic_lock and wait for the iSER HelloReply
505 	 * Message from the target, i.e. iser_conn_stage_t to be set to
506 	 * ISER_CONN_STAGE_HELLOREPLY_RCV. If the handshake does not
507 	 * complete within a specified time period (.5s), then return failure.
508 	 *
509 	 */
510 	delay = ddi_get_lbolt() + drv_usectohz(500000);
511 
512 	mutex_enter(&iser_conn->ic_lock);
513 	while ((iser_conn->ic_stage != ISER_CONN_STAGE_HELLOREPLY_RCV) &&
514 	    (ddi_get_lbolt() < delay)) {
515 
516 		(void) cv_timedwait(&iser_conn->ic_stage_cv,
517 		    &iser_conn->ic_lock, delay);
518 	}
519 
520 	switch (iser_conn->ic_stage) {
521 	case ISER_CONN_STAGE_HELLOREPLY_RCV:
522 		iser_conn->ic_stage = ISER_CONN_STAGE_LOGGED_IN;
523 		mutex_exit(&iser_conn->ic_lock);
524 		/*
525 		 * Return suceess to indicate that the initiator connection can
526 		 * go to the next phase - FFP
527 		 */
528 		return (IDM_STATUS_SUCCESS);
529 	default:
530 		iser_conn->ic_stage = ISER_CONN_STAGE_HELLOREPLY_RCV_FAIL;
531 		mutex_exit(&iser_conn->ic_lock);
532 		return (IDM_STATUS_FAIL);
533 
534 	}
535 
536 	/* STATEMENT_NEVER_REACHED */
537 }
538 
539 /*
540  * iser_free_task_rsrcs()
541  * This routine does not currently need to do anything. It is used in
542  * the sockets transport to explicitly complete any buffers on the task,
543  * but we can rely on our RCaP layer to finish up it's work without any
544  * intervention.
545  */
546 /* ARGSUSED */
547 idm_status_t
548 iser_free_task_rsrcs(idm_task_t *idt)
549 {
550 	return (IDM_STATUS_SUCCESS);
551 }
552 
553 /*
554  * iser_negotiate_key_values() validates the key values for this connection
555  */
556 /* ARGSUSED */
557 static kv_status_t
558 iser_negotiate_key_values(idm_conn_t *ic, nvlist_t *request_nvl,
559     nvlist_t *response_nvl, nvlist_t *negotiated_nvl)
560 {
561 	kv_status_t		kvrc = KV_HANDLED;
562 
563 	/* Process the request nvlist */
564 	kvrc = iser_process_request_nvlist(request_nvl, response_nvl,
565 	    negotiated_nvl);
566 
567 	/* We must be using RDMA, so set the flag on the ic handle */
568 	ic->ic_rdma_extensions = B_TRUE;
569 
570 	return (kvrc);
571 }
572 
573 /* Process a list of key=value pairs from a login request */
574 static kv_status_t
575 iser_process_request_nvlist(nvlist_t *request_nvl, nvlist_t *response_nvl,
576     nvlist_t *negotiated_nvl)
577 {
578 	const idm_kv_xlate_t	*ikvx;
579 	char			*nvp_name;
580 	nvpair_t		*nvp;
581 	nvpair_t		*next_nvp;
582 	kv_status_t		kvrc = KV_HANDLED;
583 	boolean_t		transit = B_TRUE;
584 
585 	/* Process the list */
586 	nvp = nvlist_next_nvpair(request_nvl, NULL);
587 	while (nvp != NULL) {
588 		next_nvp = nvlist_next_nvpair(request_nvl, nvp);
589 
590 		nvp_name = nvpair_name(nvp);
591 		ikvx = idm_lookup_kv_xlate(nvp_name, strlen(nvp_name));
592 
593 		kvrc = iser_handle_key(nvp, ikvx, request_nvl, response_nvl,
594 		    negotiated_nvl);
595 		if (kvrc != KV_HANDLED) {
596 			if (kvrc == KV_HANDLED_NO_TRANSIT) {
597 				/* we countered, clear the transit flag */
598 				transit = B_FALSE;
599 			} else {
600 				/* error, bail out */
601 				break;
602 			}
603 		}
604 
605 		nvp = next_nvp;
606 	}
607 	/*
608 	 * If the current kv_status_t indicates success, we've handled
609 	 * the entire list. Explicitly set kvrc to NO_TRANSIT if we've
610 	 * cleared the transit flag along the way.
611 	 */
612 	if ((kvrc == KV_HANDLED) && (transit == B_FALSE)) {
613 		kvrc = KV_HANDLED_NO_TRANSIT;
614 	}
615 
616 	return (kvrc);
617 }
618 
619 /* Handle a given list, boolean or numerical key=value pair */
620 static kv_status_t
621 iser_handle_key(nvpair_t *nvp, const idm_kv_xlate_t *ikvx,
622     nvlist_t *request_nvl, nvlist_t *response_nvl, nvlist_t *negotiated_nvl)
623 {
624 	kv_status_t		kvrc = KV_UNHANDLED;
625 	boolean_t		bool_val;
626 	uint64_t		num_val;
627 	int			nvrc;
628 
629 	/* Retrieve values for booleans and numericals */
630 	switch (ikvx->ik_key_id) {
631 		/* Booleans */
632 	case KI_RDMA_EXTENSIONS:
633 	case KI_IMMEDIATE_DATA:
634 	case KI_IFMARKER:
635 	case KI_OFMARKER:
636 		nvrc = nvpair_value_boolean_value(nvp, &bool_val);
637 		ASSERT(nvrc == 0);
638 		break;
639 		/* Numericals */
640 	case KI_INITIATOR_RECV_DATA_SEGMENT_LENGTH:
641 	case KI_TARGET_RECV_DATA_SEGMENT_LENGTH:
642 	case KI_MAX_OUTSTANDING_UNEXPECTED_PDUS:
643 		nvrc = nvpair_value_uint64(nvp, &num_val);
644 		ASSERT(nvrc == 0);
645 		break;
646 	default:
647 		break;
648 	}
649 
650 	/* Now handle the values according to the key name */
651 	switch (ikvx->ik_key_id) {
652 	case KI_HEADER_DIGEST:
653 	case KI_DATA_DIGEST:
654 		/* Ensure "None" */
655 		kvrc = iser_handle_digest(nvp, ikvx, request_nvl, response_nvl,
656 		    negotiated_nvl);
657 		break;
658 	case KI_RDMA_EXTENSIONS:
659 		/* Ensure "Yes" */
660 		kvrc = iser_handle_boolean(nvp, bool_val, ikvx, B_TRUE,
661 		    request_nvl, response_nvl, negotiated_nvl);
662 		break;
663 	case KI_TARGET_RECV_DATA_SEGMENT_LENGTH:
664 		/* Validate the proposed value */
665 		kvrc = iser_handle_numerical(nvp, num_val, ikvx,
666 		    ISER_TARGET_RECV_DATA_SEGMENT_LENGTH_MIN,
667 		    ISER_TARGET_RECV_DATA_SEGMENT_LENGTH_MAX,
668 		    ISER_TARGET_RECV_DATA_SEGMENT_LENGTH_IMPL_MAX,
669 		    request_nvl, response_nvl, negotiated_nvl);
670 		break;
671 	case KI_INITIATOR_RECV_DATA_SEGMENT_LENGTH:
672 		/* Validate the proposed value */
673 		kvrc = iser_handle_numerical(nvp, num_val, ikvx,
674 		    ISER_INITIATOR_RECV_DATA_SEGMENT_LENGTH_MIN,
675 		    ISER_INITIATOR_RECV_DATA_SEGMENT_LENGTH_MAX,
676 		    ISER_INITIATOR_RECV_DATA_SEGMENT_LENGTH_IMPL_MAX,
677 		    request_nvl, response_nvl, negotiated_nvl);
678 		break;
679 	case KI_IMMEDIATE_DATA:
680 	case KI_OFMARKER:
681 	case KI_IFMARKER:
682 		/* Ensure "No" */
683 		kvrc = iser_handle_boolean(nvp, bool_val, ikvx, B_FALSE,
684 		    request_nvl, response_nvl, negotiated_nvl);
685 		break;
686 	case KI_MAX_OUTSTANDING_UNEXPECTED_PDUS:
687 		/* Validate the proposed value */
688 		kvrc = iser_handle_numerical(nvp, num_val, ikvx,
689 		    ISER_MAX_OUTSTANDING_UNEXPECTED_PDUS_MIN,
690 		    ISER_MAX_OUTSTANDING_UNEXPECTED_PDUS_MAX,
691 		    ISER_MAX_OUTSTANDING_UNEXPECTED_PDUS_IMPL_MAX,
692 		    request_nvl, response_nvl, negotiated_nvl);
693 		break;
694 	default:
695 		/*
696 		 * All other keys, including invalid keys, will be
697 		 * handled at the client layer.
698 		 */
699 		kvrc = KV_HANDLED;
700 		break;
701 	}
702 
703 	return (kvrc);
704 }
705 
706 /* Ensure that "None" is an option in the digest list, and select it */
707 static kv_status_t
708 iser_handle_digest(nvpair_t *choices, const idm_kv_xlate_t *ikvx,
709     nvlist_t *request_nvl, nvlist_t *response_nvl, nvlist_t *negotiated_nvl)
710 {
711 	kv_status_t		kvrc = KV_VALUE_ERROR;
712 	int			nvrc = 0;
713 	nvpair_t		*digest_choice;
714 	char			*digest_choice_string;
715 
716 	/*
717 	 * Loop through all digest choices.  We need to enforce no
718 	 * "None" for both header and data digest.  If we find our
719 	 * required value, add the value to our negotiated values list
720 	 * and respond with that value in the login response. If not,
721 	 * indicate a value error for the iSCSI layer to work with.
722 	 */
723 	digest_choice = idm_get_next_listvalue(choices, NULL);
724 	while (digest_choice != NULL) {
725 		nvrc = nvpair_value_string(digest_choice,
726 		    &digest_choice_string);
727 		ASSERT(nvrc == 0);
728 
729 		if (strcasecmp(digest_choice_string, "none") == 0) {
730 
731 			/* Add to negotiated values list */
732 			nvrc = nvlist_add_string(negotiated_nvl,
733 			    ikvx->ik_key_name, digest_choice_string);
734 			kvrc = idm_nvstat_to_kvstat(nvrc);
735 			if (nvrc == 0) {
736 				/* Add to login response list */
737 				nvrc = nvlist_add_string(response_nvl,
738 				    ikvx->ik_key_name, digest_choice_string);
739 				kvrc = idm_nvstat_to_kvstat(nvrc);
740 				/* Remove from the request (we've handled it) */
741 				(void) nvlist_remove_all(request_nvl,
742 				    ikvx->ik_key_name);
743 			}
744 			break;
745 		}
746 		digest_choice = idm_get_next_listvalue(choices,
747 		    digest_choice);
748 	}
749 
750 	return (kvrc);
751 }
752 
753 /* Validate a proposed boolean value, and set the alternate if necessary */
754 static kv_status_t
755 iser_handle_boolean(nvpair_t *nvp, boolean_t value, const idm_kv_xlate_t *ikvx,
756     boolean_t iser_value, nvlist_t *request_nvl, nvlist_t *response_nvl,
757     nvlist_t *negotiated_nvl)
758 {
759 	kv_status_t		kvrc;
760 	int			nvrc;
761 	boolean_t		respond;
762 
763 	if (value != iser_value) {
764 		/*
765 		 * Respond back to initiator with our value, and
766 		 * set the return value to unset the transit bit.
767 		 */
768 		value = iser_value;
769 		kvrc = KV_HANDLED_NO_TRANSIT;
770 		nvrc = 0;
771 		respond = B_TRUE;
772 
773 	} else {
774 		/* Add this to our negotiated values */
775 		nvrc = nvlist_add_nvpair(negotiated_nvl, nvp);
776 		/* Respond if this is not a declarative */
777 		respond = (ikvx->ik_declarative == B_FALSE);
778 	}
779 
780 	/* Response of Simple-value Negotiation */
781 	if (nvrc == 0 && respond) {
782 		nvrc = nvlist_add_boolean_value(response_nvl,
783 		    ikvx->ik_key_name, value);
784 		/* Remove from the request (we've handled it) */
785 		(void) nvlist_remove_all(request_nvl, ikvx->ik_key_name);
786 	}
787 
788 	if (kvrc == KV_HANDLED_NO_TRANSIT) {
789 		return (kvrc);
790 	}
791 
792 	return (idm_nvstat_to_kvstat(nvrc));
793 }
794 
795 /*
796  * Validate a proposed value against the iSER and/or iSCSI RFC's minimum and
797  * maximum values, and set an alternate, if necessary.  Note that the value
798  * 'iser_max_value" represents our implementation maximum (typically the max).
799  */
800 static kv_status_t
801 iser_handle_numerical(nvpair_t *nvp, uint64_t value, const idm_kv_xlate_t *ikvx,
802     uint64_t min_value, uint64_t max_value, uint64_t iser_max_value,
803     nvlist_t *request_nvl, nvlist_t *response_nvl, nvlist_t *negotiated_nvl)
804 {
805 	kv_status_t		kvrc;
806 	int			nvrc;
807 	boolean_t		respond;
808 
809 	/* Validate against standard */
810 	if ((value < min_value) || (value > max_value)) {
811 		kvrc = KV_VALUE_ERROR;
812 	} else {
813 		if (value > iser_max_value) {
814 			/*
815 			 * Respond back to initiator with our value, and
816 			 * set the return value to unset the transit bit.
817 			 */
818 			value = iser_max_value;
819 			kvrc = KV_HANDLED_NO_TRANSIT;
820 			nvrc = 0;
821 			respond = B_TRUE;
822 		} else {
823 			/* Add this to our negotiated values */
824 			nvrc = nvlist_add_nvpair(negotiated_nvl, nvp);
825 			/* Respond if this is not a declarative */
826 			respond = (ikvx->ik_declarative == B_FALSE);
827 		}
828 
829 		/* Response of Simple-value Negotiation */
830 		if (nvrc == 0 && respond) {
831 			nvrc = nvlist_add_uint64(response_nvl,
832 			    ikvx->ik_key_name, value);
833 			/* Remove from the request (we've handled it) */
834 			(void) nvlist_remove_all(request_nvl,
835 			    ikvx->ik_key_name);
836 		}
837 	}
838 
839 	if (kvrc == KV_HANDLED_NO_TRANSIT) {
840 		return (kvrc);
841 	}
842 
843 	return (idm_nvstat_to_kvstat(nvrc));
844 }
845 
846 /*
847  * iser_declare_key_values() declares the declarative key values for
848  * this connection.
849  */
850 /* ARGSUSED */
851 static kv_status_t
852 iser_declare_key_values(idm_conn_t *ic, nvlist_t *config_nvl,
853     nvlist_t *outgoing_nvl)
854 {
855 	kv_status_t		kvrc;
856 	int			nvrc = 0;
857 	int			rc;
858 	uint64_t		uint64_val;
859 
860 	if ((rc = nvlist_lookup_uint64(config_nvl,
861 	    ISER_KV_KEY_NAME_MAX_OUTSTANDING_PDU, &uint64_val)) != ENOENT) {
862 		ASSERT(rc == 0);
863 		if (outgoing_nvl) {
864 			nvrc = nvlist_add_uint64(outgoing_nvl,
865 			    ISER_KV_KEY_NAME_MAX_OUTSTANDING_PDU, uint64_val);
866 		}
867 	}
868 	kvrc = idm_nvstat_to_kvstat(nvrc);
869 	return (kvrc);
870 }
871 
872 /*
873  * iser_notice_key_values() activates the negotiated key values for
874  * this connection.
875  */
876 static void
877 iser_notice_key_values(idm_conn_t *ic, nvlist_t *negotiated_nvl)
878 {
879 	iser_conn_t		*iser_conn;
880 	boolean_t		boolean_val;
881 	uint64_t		uint64_val;
882 	int			nvrc;
883 
884 	iser_conn = (iser_conn_t *)ic->ic_transport_private;
885 
886 	/*
887 	 * Validate the final negotiated operational parameters,
888 	 * and save a copy.
889 	 */
890 	if ((nvrc = nvlist_lookup_boolean_value(negotiated_nvl,
891 	    "HeaderDigest", &boolean_val)) != ENOENT) {
892 		ASSERT(nvrc == 0);
893 		iser_conn->ic_op_params.op_header_digest = boolean_val;
894 	}
895 
896 	if ((nvrc = nvlist_lookup_boolean_value(negotiated_nvl,
897 	    "DataDigest", &boolean_val)) != ENOENT) {
898 		ASSERT(nvrc == 0);
899 		iser_conn->ic_op_params.op_data_digest = boolean_val;
900 	}
901 
902 	if ((nvrc = nvlist_lookup_boolean_value(negotiated_nvl,
903 	    "RDMAExtensions", &boolean_val)) != ENOENT) {
904 		ASSERT(nvrc == 0);
905 		iser_conn->ic_op_params.op_rdma_extensions = boolean_val;
906 	}
907 
908 	if ((nvrc = nvlist_lookup_boolean_value(negotiated_nvl,
909 	    "OFMarker", &boolean_val)) != ENOENT) {
910 		ASSERT(nvrc == 0);
911 		iser_conn->ic_op_params.op_ofmarker = boolean_val;
912 	}
913 
914 	if ((nvrc = nvlist_lookup_boolean_value(negotiated_nvl,
915 	    "IFMarker", &boolean_val)) != ENOENT) {
916 		ASSERT(nvrc == 0);
917 		iser_conn->ic_op_params.op_ifmarker = boolean_val;
918 	}
919 
920 	if ((nvrc = nvlist_lookup_uint64(negotiated_nvl,
921 	    "TargetRecvDataSegmentLength", &uint64_val)) != ENOENT) {
922 		ASSERT(nvrc == 0);
923 		iser_conn->ic_op_params.op_target_recv_data_segment_length =
924 		    uint64_val;
925 	}
926 
927 	if ((nvrc = nvlist_lookup_uint64(negotiated_nvl,
928 	    "InitiatorRecvDataSegmentLength", &uint64_val)) != ENOENT) {
929 		ASSERT(nvrc == 0);
930 		iser_conn->ic_op_params.op_initiator_recv_data_segment_length =
931 		    uint64_val;
932 	}
933 
934 	if ((nvrc = nvlist_lookup_uint64(negotiated_nvl,
935 	    "MaxOutstandingUnexpectedPDUs", &uint64_val)) != ENOENT) {
936 		ASSERT(nvrc == 0);
937 		iser_conn->ic_op_params.op_max_outstanding_unexpected_pdus =
938 		    uint64_val;
939 	}
940 
941 	/* Test boolean values which are required by RFC 5046 */
942 #ifdef ISER_DEBUG
943 	ASSERT(iser_conn->ic_op_params.op_rdma_extensions == B_TRUE);
944 	ASSERT(iser_conn->ic_op_params.op_header_digest == B_FALSE);
945 	ASSERT(iser_conn->ic_op_params.op_data_digest == B_FALSE);
946 	ASSERT(iser_conn->ic_op_params.op_ofmarker == B_FALSE);
947 	ASSERT(iser_conn->ic_op_params.op_ifmarker == B_FALSE);
948 #endif
949 }
950 
951 
952 /*
953  * iser_conn_is_capable() verifies that the passed connection is provided
954  * for by an iSER-capable link.
955  * NOTE: When utilizing InfiniBand RC as an RCaP, this routine will check
956  * if the link is on IPoIB. This only indicates a chance that the link is
957  * on an RCaP, and thus iSER-capable, since we may be running on an IB-Eth
958  * gateway, or other IB but non-RCaP link. Rather than fully establishing the
959  * link to verify RCaP here, we instead will return B_TRUE
960  * indicating the link is iSER-capable, if the link is IPoIB. If then in
961  * iser_ini_conn_create() the link proves not be RCaP, IDM will fall back
962  * to using the IDM Sockets transport.
963  */
964 /* ARGSUSED */
965 static boolean_t
966 iser_conn_is_capable(idm_conn_req_t *cr, idm_transport_caps_t *caps)
967 {
968 	/* A NULL value for laddr indicates implicit source */
969 	return (iser_path_exists(NULL, &cr->cr_ini_dst_addr));
970 }
971 
972 /*
973  * iser_pdu_tx() transmits a Control PDU via the iSER channel. We pull the
974  * channel out of the idm_conn_t passed in, and pass it and the pdu to the
975  * iser_xfer routine.
976  */
977 static void
978 iser_pdu_tx(idm_conn_t *ic, idm_pdu_t *pdu)
979 {
980 	iser_conn_t	*iser_conn;
981 	iser_status_t	iser_status;
982 
983 	iser_conn = (iser_conn_t *)ic->ic_transport_private;
984 
985 	iser_status = iser_xfer_ctrlpdu(iser_conn->ic_chan, pdu);
986 	if (iser_status != ISER_STATUS_SUCCESS) {
987 		ISER_LOG(CE_WARN, "iser_pdu_tx: failed iser_xfer_ctrlpdu: "
988 		    "ic (0x%p) pdu (0x%p)", (void *) ic, (void *) pdu);
989 		/* Fail this PDU transmission */
990 		idm_pdu_complete(pdu, IDM_STATUS_FAIL);
991 	}
992 
993 	/*
994 	 * We successfully posted this PDU for transmission.
995 	 * The completion handler will invoke idm_pdu_complete()
996 	 * with the completion status. See iser_cq.c for more
997 	 * information.
998 	 */
999 }
1000 
1001 /*
1002  * iser_buf_tx_to_ini() transmits the data buffer encoded in idb to the
1003  * initiator to fulfill SCSI Read commands. An iser_xfer routine is invoked
1004  * to implement the RDMA operations.
1005  *
1006  * Caller holds idt->idt_mutex.
1007  */
1008 static idm_status_t
1009 iser_buf_tx_to_ini(idm_task_t *idt, idm_buf_t *idb)
1010 {
1011 	iser_status_t	iser_status;
1012 	idm_status_t	idm_status = IDM_STATUS_SUCCESS;
1013 
1014 	ASSERT(mutex_owned(&idt->idt_mutex));
1015 
1016 	iser_status = iser_xfer_buf_to_ini(idt, idb);
1017 
1018 	if (iser_status != ISER_STATUS_SUCCESS) {
1019 		ISER_LOG(CE_WARN, "iser_buf_tx_to_ini: failed "
1020 		    "iser_xfer_buf_to_ini: idt (0x%p) idb (0x%p)",
1021 		    (void *) idt, (void *) idb);
1022 		idm_buf_tx_to_ini_done(idt, idb, IDM_STATUS_ABORTED);
1023 		return (IDM_STATUS_FAIL);
1024 	}
1025 
1026 	/*
1027 	 * iSCSIt's Data Completion Notify callback is invoked from
1028 	 * the Work Request Send completion Handler
1029 	 */
1030 
1031 	mutex_exit(&idt->idt_mutex);
1032 	return (idm_status);
1033 }
1034 
1035 /*
1036  * iser_buf_tx_from_ini() transmits data from the initiator into the buffer
1037  * in idb to fulfill SCSI Write commands. An iser_xfer routine is invoked
1038  * to implement the RDMA operations.
1039  *
1040  * Caller holds idt->idt_mutex.
1041  */
1042 static idm_status_t
1043 iser_buf_rx_from_ini(idm_task_t *idt, idm_buf_t *idb)
1044 {
1045 	iser_status_t	iser_status;
1046 	idm_status_t	idm_status = IDM_STATUS_SUCCESS;
1047 
1048 	ASSERT(mutex_owned(&idt->idt_mutex));
1049 
1050 	iser_status = iser_xfer_buf_from_ini(idt, idb);
1051 
1052 	if (iser_status != ISER_STATUS_SUCCESS) {
1053 		ISER_LOG(CE_WARN, "iser_buf_rx_from_ini: failed "
1054 		    "iser_xfer_buf_from_ini: idt (0x%p) idb (0x%p)",
1055 		    (void *) idt, (void *) idb);
1056 		idm_buf_rx_from_ini_done(idt, idb, IDM_STATUS_ABORTED);
1057 		return (IDM_STATUS_FAIL);
1058 	}
1059 
1060 	/*
1061 	 * iSCSIt's Data Completion Notify callback is invoked from
1062 	 * the Work Request Send completion Handler
1063 	 */
1064 
1065 	mutex_exit(&idt->idt_mutex);
1066 	return (idm_status);
1067 }
1068 
1069 /*
1070  * iser_buf_alloc() allocates a buffer and registers it with the IBTF for
1071  * use with iSER. Each HCA has it's own kmem cache for establishing a pool
1072  * of registered buffers, when once initially allocated, will remain
1073  * registered with the HCA. This routine is invoked only on the target,
1074  * where we have the requirement to pre-allocate buffers for the upper layers.
1075  * Note: buflen is compared to ISER_DEFAULT_BUFLEN, and allocation is failed
1076  * if the requested buflen is larger than our default.
1077  */
1078 /* ARGSUSED */
1079 static idm_status_t
1080 iser_buf_alloc(idm_buf_t *idb, uint64_t buflen)
1081 {
1082 	iser_conn_t	*iser_conn;
1083 	iser_hca_t	*iser_hca;
1084 	iser_buf_t	*iser_buf;
1085 
1086 	if (buflen > ISER_DEFAULT_BUFLEN) {
1087 		return (IDM_STATUS_FAIL);
1088 	}
1089 
1090 	iser_conn = (iser_conn_t *)idb->idb_ic->ic_transport_private;
1091 	iser_hca = iser_conn->ic_chan->ic_hca;
1092 
1093 	/*
1094 	 * Allocate a buffer from this HCA's cache. Once initialized, these
1095 	 * will remain allocated and registered (see above).
1096 	 */
1097 	iser_buf = kmem_cache_alloc(iser_hca->iser_buf_cache, KM_NOSLEEP);
1098 	if (iser_buf == NULL) {
1099 		ISER_LOG(CE_NOTE, "iser_buf_alloc: alloc failed");
1100 		return (IDM_STATUS_FAIL);
1101 	}
1102 
1103 	/* Set the allocated data buffer pointer in the IDM buf handle */
1104 	idb->idb_buf = iser_buf->buf;
1105 
1106 	/* Set the private buf and reg handles in the IDM buf handle */
1107 	idb->idb_buf_private = (void *)iser_buf;
1108 	idb->idb_reg_private = (void *)iser_buf->iser_mr;
1109 
1110 	return (IDM_STATUS_SUCCESS);
1111 }
1112 
1113 /*
1114  * iser_buf_free() frees the buffer handle passed in. Note that the cached
1115  * kmem object has an HCA-registered buffer in it which will not be freed.
1116  * This allows us to build up a cache of pre-allocated and registered
1117  * buffers for use on the target.
1118  */
1119 static void
1120 iser_buf_free(idm_buf_t *buf)
1121 {
1122 	iser_buf_t	*iser_buf;
1123 
1124 	iser_buf = buf->idb_buf_private;
1125 	kmem_cache_free(iser_buf->cache, iser_buf);
1126 }
1127 
1128 /*
1129  * iser_buf_setup() is invoked on the initiator in order to register memory
1130  * on demand for use with the iSER layer.
1131  */
1132 static idm_status_t
1133 iser_buf_setup(idm_buf_t *idb)
1134 {
1135 	iser_conn_t	*iser_conn;
1136 	iser_chan_t	*iser_chan;
1137 	iser_hca_t	*iser_hca;
1138 	iser_buf_t	*iser_buf;
1139 	int		status;
1140 
1141 	ASSERT(idb->idb_buf != NULL);
1142 
1143 	iser_conn = (iser_conn_t *)idb->idb_ic->ic_transport_private;
1144 	ASSERT(iser_conn != NULL);
1145 
1146 	iser_hca = iser_conn->ic_chan->ic_hca;
1147 
1148 	iser_chan = iser_conn->ic_chan;
1149 	ASSERT(iser_chan != NULL);
1150 
1151 	/*
1152 	 * Memory registration is known to be slow, so for small
1153 	 * transfers, use pre-registered memory buffers and just
1154 	 * copy the data into/from them at the appropriate time
1155 	 */
1156 	if (idb->idb_buflen < ISER_BCOPY_THRESHOLD) {
1157 		iser_buf =
1158 		    kmem_cache_alloc(iser_hca->iser_buf_cache, KM_NOSLEEP);
1159 
1160 		if (iser_buf == NULL) {
1161 
1162 			/* Fail over to dynamic registration */
1163 			status = iser_reg_rdma_mem(iser_chan->ic_hca, idb);
1164 			idb->idb_bufalloc = B_FALSE;
1165 			return (status);
1166 		}
1167 
1168 		/*
1169 		 * Set the allocated data buffer pointer in the IDM buf handle
1170 		 * Data is to be copied from/to this buffer using bcopy
1171 		 */
1172 		idb->idb_bufptr = idb->idb_buf;
1173 		idb->idb_bufbcopy = B_TRUE;
1174 
1175 		idb->idb_buf = iser_buf->buf;
1176 
1177 		/* Set the private buf and reg handles in the IDM buf handle */
1178 		idb->idb_buf_private = (void *)iser_buf;
1179 		idb->idb_reg_private = (void *)iser_buf->iser_mr;
1180 
1181 		/* Ensure bufalloc'd flag is set */
1182 		idb->idb_bufalloc = B_TRUE;
1183 
1184 		return (IDM_STATUS_SUCCESS);
1185 
1186 	} else {
1187 
1188 		/* Dynamically register the memory passed in on the idb */
1189 		status = iser_reg_rdma_mem(iser_chan->ic_hca, idb);
1190 
1191 		/* Ensure bufalloc'd flag is unset */
1192 		idb->idb_bufalloc = B_FALSE;
1193 
1194 		return (status);
1195 	}
1196 }
1197 
1198 /*
1199  * iser_buf_teardown() is invoked on the initiator in order to register memory
1200  * on demand for use with the iSER layer.
1201  */
1202 static void
1203 iser_buf_teardown(idm_buf_t *idb)
1204 {
1205 	iser_conn_t	*iser_conn;
1206 
1207 	iser_conn = (iser_conn_t *)idb->idb_ic->ic_transport_private;
1208 
1209 	/* Deregister the memory passed in on the idb */
1210 	iser_dereg_rdma_mem(iser_conn->ic_chan->ic_hca, idb);
1211 }
1212