/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. */ #include #include #include /* networking stuff */ #include /* offsetof */ #include #include /* * iSER transport routines * * All transport functions except iser_tgt_svc_create() are called through * the ops vector, iser_tgt_svc_create() is called from the async handler * inaddition to being called by the ULP */ static void iser_pdu_tx(idm_conn_t *ic, idm_pdu_t *pdu); static idm_status_t iser_buf_tx_to_ini(idm_task_t *idt, idm_buf_t *idb); static idm_status_t iser_buf_rx_from_ini(idm_task_t *idt, idm_buf_t *idb); static idm_status_t iser_tgt_enable_datamover(idm_conn_t *ic); static idm_status_t iser_ini_enable_datamover(idm_conn_t *ic); static void iser_notice_key_values(struct idm_conn_s *ic, nvlist_t *negotiated_nvl); static kv_status_t iser_declare_key_values(struct idm_conn_s *ic, nvlist_t *config_nvl, nvlist_t *outgoing_nvl); static idm_status_t iser_free_task_rsrcs(idm_task_t *idt); static kv_status_t iser_negotiate_key_values(idm_conn_t *ic, nvlist_t *request_nvl, nvlist_t *response_nvl, nvlist_t *negotiated_nvl); static kv_status_t iser_handle_numerical(nvpair_t *nvp, uint64_t value, const idm_kv_xlate_t *ikvx, uint64_t min_value, uint64_t max_value, uint64_t iser_max_value, nvlist_t *request_nvl, nvlist_t *response_nvl, nvlist_t *negotiated_nvl); static kv_status_t iser_handle_boolean(nvpair_t *nvp, boolean_t value, const idm_kv_xlate_t *ikvx, boolean_t iser_value, nvlist_t *request_nvl, nvlist_t *response_nvl, nvlist_t *negotiated_nvl); static kv_status_t iser_handle_key(nvpair_t *nvp, const idm_kv_xlate_t *ikvx, nvlist_t *request_nvl, nvlist_t *response_nvl, nvlist_t *negotiated_nvl); static kv_status_t iser_process_request_nvlist(nvlist_t *request_nvl, nvlist_t *response_nvl, nvlist_t *negotiated_nvl); static boolean_t iser_conn_is_capable(idm_conn_req_t *ic, idm_transport_caps_t *caps); static idm_status_t iser_buf_alloc(idm_buf_t *idb, uint64_t buflen); static idm_status_t iser_buf_setup(idm_buf_t *idb); static void iser_buf_teardown(idm_buf_t *idb); static void iser_buf_free(idm_buf_t *idb); static void iser_tgt_svc_destroy(struct idm_svc_s *is); static idm_status_t iser_tgt_svc_online(struct idm_svc_s *is); static void iser_tgt_svc_offline(struct idm_svc_s *is); static idm_status_t iser_tgt_conn_connect(struct idm_conn_s *ic); static idm_status_t iser_ini_conn_create(idm_conn_req_t *cr, struct idm_conn_s *ic); static void iser_conn_destroy(struct idm_conn_s *ic); static idm_status_t iser_ini_conn_connect(struct idm_conn_s *ic); static void iser_conn_disconnect(struct idm_conn_s *ic); /* * iSER IDM transport operations */ idm_transport_ops_t iser_transport_ops = { &iser_pdu_tx, /* it_tx_pdu */ &iser_buf_tx_to_ini, /* it_buf_tx_to_ini */ &iser_buf_rx_from_ini, /* it_buf_rx_from_ini */ NULL, /* it_rx_datain */ NULL, /* it_rx_rtt */ NULL, /* it_rx_dataout */ NULL, /* it_alloc_conn_rsrc */ NULL, /* it_free_conn_rsrc */ &iser_tgt_enable_datamover, /* it_tgt_enable_datamover */ &iser_ini_enable_datamover, /* it_ini_enable_datamover */ NULL, /* it_conn_terminate */ &iser_free_task_rsrcs, /* it_free_task_rsrc */ &iser_negotiate_key_values, /* it_negotiate_key_values */ &iser_notice_key_values, /* it_notice_key_values */ &iser_conn_is_capable, /* it_conn_is_capable */ &iser_buf_alloc, /* it_buf_alloc */ &iser_buf_free, /* it_buf_free */ &iser_buf_setup, /* it_buf_setup */ &iser_buf_teardown, /* it_buf_teardown */ &iser_tgt_svc_create, /* it_tgt_svc_create */ &iser_tgt_svc_destroy, /* it_tgt_svc_destroy */ &iser_tgt_svc_online, /* it_tgt_svc_online */ &iser_tgt_svc_offline, /* it_tgt_svc_offline */ &iser_conn_destroy, /* it_tgt_conn_destroy */ &iser_tgt_conn_connect, /* it_tgt_conn_connect */ &iser_conn_disconnect, /* it_tgt_conn_disconnect */ &iser_ini_conn_create, /* it_ini_conn_create */ &iser_conn_destroy, /* it_ini_conn_destroy */ &iser_ini_conn_connect, /* it_ini_conn_connect */ &iser_conn_disconnect, /* it_ini_conn_disconnect */ &iser_declare_key_values /* it_declare_key_values */ }; /* * iSER IDM transport capabilities */ idm_transport_caps_t iser_transport_caps = { 0 /* flags */ }; int iser_idm_register() { idm_transport_attr_t attr; idm_status_t status; attr.type = IDM_TRANSPORT_TYPE_ISER; attr.it_ops = &iser_transport_ops; attr.it_caps = &iser_transport_caps; status = idm_transport_register(&attr); if (status != IDM_STATUS_SUCCESS) { ISER_LOG(CE_WARN, "Failed to register iSER transport with IDM"); return (DDI_FAILURE); } ISER_LOG(CE_NOTE, "Registered iSER transport with IDM"); return (DDI_SUCCESS); } /* * iser_ini_conn_create() * Allocate an iSER initiator connection context */ static idm_status_t iser_ini_conn_create(idm_conn_req_t *cr, idm_conn_t *ic) { iser_chan_t *iser_chan = NULL; iser_conn_t *iser_conn; /* Allocate and set up a connection handle */ iser_conn = kmem_zalloc(sizeof (iser_conn_t), KM_SLEEP); mutex_init(&iser_conn->ic_lock, NULL, MUTEX_DRIVER, NULL); /* Allocate and open a channel to the target node */ iser_chan = iser_channel_alloc(NULL, &cr->cr_ini_dst_addr); if (iser_chan == NULL) { ISER_LOG(CE_WARN, "iser: failed to allocate channel"); mutex_destroy(&iser_conn->ic_lock); kmem_free(iser_conn, sizeof (iser_conn_t)); return (IDM_STATUS_FAIL); } /* * The local IP and remote IP are filled in iser_channel_alloc. The * remote port needs to be filled in from idm_conn_req_t. The local * port is irrelevant. Internal representation of the port in the * IDM sockaddr structure is in network byte order. IBT expects the * port in host byte order. */ switch (cr->cr_ini_dst_addr.sin.sa_family) { case AF_INET: iser_chan->ic_rport = ntohs(cr->cr_ini_dst_addr.sin4.sin_port); break; case AF_INET6: iser_chan->ic_rport = ntohs(cr->cr_ini_dst_addr.sin6.sin6_port); break; default: iser_chan->ic_rport = ISCSI_LISTEN_PORT; } iser_chan->ic_lport = 0; cv_init(&iser_conn->ic_stage_cv, NULL, CV_DEFAULT, NULL); iser_conn->ic_type = ISER_CONN_TYPE_INI; iser_conn->ic_stage = ISER_CONN_STAGE_ALLOCATED; iser_conn->ic_chan = iser_chan; iser_conn->ic_idmc = ic; /* * Set a pointer to the iser_conn in the iser_chan for easy * access during CM event handling */ iser_chan->ic_conn = iser_conn; /* Set the iSER conn handle in the IDM conn private handle */ ic->ic_transport_private = (void *)iser_conn; /* Set the transport header length */ ic->ic_transport_hdrlen = ISER_HEADER_LENGTH; return (IDM_STATUS_SUCCESS); } /* * iser_internal_conn_destroy() * Tear down iSER-specific connection resources. This is used below * in iser_conn_destroy(), but also from the CM code when we may have * some of the connection established, but not fully connected. */ void iser_internal_conn_destroy(iser_conn_t *ic) { mutex_enter(&ic->ic_lock); iser_channel_free(ic->ic_chan); if ((ic->ic_type == ISER_CONN_TYPE_TGT) && (ic->ic_stage == ISER_CONN_STAGE_ALLOCATED)) { /* * This is a target connection that has yet to be * established. Free our reference on the target * service handle. */ iser_tgt_svc_rele(ic->ic_idms->is_iser_svc); } cv_destroy(&ic->ic_stage_cv); mutex_exit(&ic->ic_lock); mutex_destroy(&ic->ic_lock); kmem_free(ic, sizeof (iser_conn_t)); } /* * iser_conn_destroy() * Tear down an initiator or target connection. */ static void iser_conn_destroy(idm_conn_t *ic) { iser_conn_t *iser_conn; iser_conn = (iser_conn_t *)ic->ic_transport_private; iser_internal_conn_destroy(iser_conn); ic->ic_transport_private = NULL; } /* * iser_ini_conn_connect() * Establish the connection referred to by the handle previously allocated via * iser_ini_conn_create(). */ static idm_status_t iser_ini_conn_connect(idm_conn_t *ic) { iser_conn_t *iser_conn; iser_status_t status; iser_conn = (iser_conn_t *)ic->ic_transport_private; status = iser_channel_open(iser_conn->ic_chan); if (status != ISER_STATUS_SUCCESS) { ISER_LOG(CE_WARN, "iser: failed to open channel"); return (IDM_STATUS_FAIL); } /* * Set the local and remote addresses in the idm conn handle. */ iser_ib_conv_ibtaddr2sockaddr(&ic->ic_laddr, &iser_conn->ic_chan->ic_localip, iser_conn->ic_chan->ic_lport); iser_ib_conv_ibtaddr2sockaddr(&ic->ic_raddr, &iser_conn->ic_chan->ic_remoteip, iser_conn->ic_chan->ic_rport); mutex_enter(&iser_conn->ic_lock); /* Hold a reference on the IDM connection handle */ idm_conn_hold(ic); iser_conn->ic_stage = ISER_CONN_STAGE_IC_CONNECTED; mutex_exit(&iser_conn->ic_lock); return (IDM_STATUS_SUCCESS); } /* * iser_conn_disconnect() * Shutdown this iSER connection */ static void iser_conn_disconnect(idm_conn_t *ic) { iser_conn_t *iser_conn; iser_conn = (iser_conn_t *)ic->ic_transport_private; mutex_enter(&iser_conn->ic_lock); iser_conn->ic_stage = ISER_CONN_STAGE_CLOSING; mutex_exit(&iser_conn->ic_lock); /* Close the channel */ iser_channel_close(iser_conn->ic_chan); /* Free our reference held on the IDM conn handle, and set CLOSED */ mutex_enter(&iser_conn->ic_lock); idm_conn_rele(iser_conn->ic_idmc); iser_conn->ic_stage = ISER_CONN_STAGE_CLOSED; mutex_exit(&iser_conn->ic_lock); } /* * iser_tgt_svc_create() * Establish the CM service for inbound iSER service requests on the port * indicated by sr->sr_port. * idm_svc_req_t contains the service parameters. */ idm_status_t iser_tgt_svc_create(idm_svc_req_t *sr, idm_svc_t *is) { iser_svc_t *iser_svc; int rc; iser_svc = kmem_zalloc(sizeof (iser_svc_t), KM_SLEEP); is->is_iser_svc = (void *)iser_svc; idm_refcnt_init(&iser_svc->is_refcnt, iser_svc); list_create(&iser_svc->is_sbindlist, sizeof (iser_sbind_t), offsetof(iser_sbind_t, is_list_node)); iser_svc->is_svcid = ibt_get_ip_sid(IPPROTO_TCP, sr->sr_port); /* * Register an iSER target service for the requested port * and set the iser_svc structure in the idm_svc handle. */ rc = iser_register_service(is); if (rc != DDI_SUCCESS) { ISER_LOG(CE_NOTE, "iser_tgt_svc_create: iser_register_service " "failed on port (%d): rc (0x%x)", sr->sr_port, rc); (void) ibt_release_ip_sid(iser_svc->is_svcid); list_destroy(&iser_svc->is_sbindlist); idm_refcnt_destroy(&iser_svc->is_refcnt); kmem_free(iser_svc, sizeof (iser_svc_t)); return (IDM_STATUS_FAIL); } return (IDM_STATUS_SUCCESS); } /* IDM refcnt utilities for the iSER service handle */ void iser_tgt_svc_hold(iser_svc_t *is) { idm_refcnt_hold(&is->is_refcnt); } void iser_tgt_svc_rele(iser_svc_t *is) { idm_refcnt_rele(&is->is_refcnt); } /* * iser_tgt_svc_destroy() * Teardown resources allocated in iser_tgt_svc_create() */ static void iser_tgt_svc_destroy(idm_svc_t *is) { iser_svc_t *iser_svc; iser_svc = (iser_svc_t *)is->is_iser_svc; /* * Deregister the iSER target service on this port and free * the iser_svc structure from the idm_svc handle. */ iser_deregister_service(is); /* Wait for the iSER service handle's refcnt to zero */ idm_refcnt_wait_ref(&iser_svc->is_refcnt); list_destroy(&iser_svc->is_sbindlist); idm_refcnt_destroy(&iser_svc->is_refcnt); kmem_free(iser_svc, sizeof (iser_svc_t)); } /* * iser_tgt_svc_online() * Bind the CM service allocated via iser_tgt_svc_create(). */ static idm_status_t iser_tgt_svc_online(idm_svc_t *is) { iser_status_t status; mutex_enter(&is->is_mutex); /* * Pass the IDM service handle as the client private data for * later use. */ status = iser_bind_service(is); if (status != ISER_STATUS_SUCCESS) { ISER_LOG(CE_NOTE, "iser_tgt_svc_online: failed bind service"); mutex_exit(&is->is_mutex); return (IDM_STATUS_FAIL); } mutex_exit(&is->is_mutex); return (IDM_STATUS_SUCCESS); } /* * iser_tgt_svc_offline * Unbind the service on all available HCA ports. */ static void iser_tgt_svc_offline(idm_svc_t *is) { mutex_enter(&is->is_mutex); iser_unbind_service(is); mutex_exit(&is->is_mutex); } /* * iser_tgt_conn_connect() * Establish the connection in ic, passed from idm_tgt_conn_finish(), which * is invoked from the SM as a result of an inbound connection request. */ /* ARGSUSED */ static idm_status_t iser_tgt_conn_connect(idm_conn_t *ic) { /* No action required */ return (IDM_STATUS_SUCCESS); } /* * iser_tgt_enable_datamover() sets the transport private data on the * idm_conn_t and move the conn stage to indicate logged in. */ static idm_status_t iser_tgt_enable_datamover(idm_conn_t *ic) { iser_conn_t *iser_conn; iser_conn = (iser_conn_t *)ic->ic_transport_private; mutex_enter(&iser_conn->ic_lock); iser_conn->ic_stage = ISER_CONN_STAGE_LOGGED_IN; mutex_exit(&iser_conn->ic_lock); return (IDM_STATUS_SUCCESS); } /* * iser_ini_enable_datamover() is used by the iSCSI initator to request that a * specified iSCSI connection be transitioned to iSER-assisted mode. * In the case of iSER, the RDMA resources for a reliable connection have * already been allocated at this time, and the 'RDMAExtensions' is set to 'Yes' * so no further negotiations are required at this time. * The initiator now sends the first iSER Message - 'Hello' to the target * and waits for the 'HelloReply' Message from the target before directing * the initiator to go into the Full Feature Phase. * * No transport op is required on the target side. */ static idm_status_t iser_ini_enable_datamover(idm_conn_t *ic) { iser_conn_t *iser_conn; clock_t delay; int status; iser_conn = (iser_conn_t *)ic->ic_transport_private; mutex_enter(&iser_conn->ic_lock); iser_conn->ic_stage = ISER_CONN_STAGE_HELLO_SENT; mutex_exit(&iser_conn->ic_lock); /* Send the iSER Hello Message to the target */ status = iser_xfer_hello_msg(iser_conn->ic_chan); if (status != ISER_STATUS_SUCCESS) { mutex_enter(&iser_conn->ic_lock); iser_conn->ic_stage = ISER_CONN_STAGE_HELLO_SENT_FAIL; mutex_exit(&iser_conn->ic_lock); return (IDM_STATUS_FAIL); } /* * Acquire the iser_conn->ic_lock and wait for the iSER HelloReply * Message from the target, i.e. iser_conn_stage_t to be set to * ISER_CONN_STAGE_HELLOREPLY_RCV. If the handshake does not * complete within a specified time period (.5s), then return failure. * */ delay = ddi_get_lbolt() + drv_usectohz(500000); mutex_enter(&iser_conn->ic_lock); while ((iser_conn->ic_stage != ISER_CONN_STAGE_HELLOREPLY_RCV) && (ddi_get_lbolt() < delay)) { (void) cv_timedwait(&iser_conn->ic_stage_cv, &iser_conn->ic_lock, delay); } switch (iser_conn->ic_stage) { case ISER_CONN_STAGE_HELLOREPLY_RCV: iser_conn->ic_stage = ISER_CONN_STAGE_LOGGED_IN; mutex_exit(&iser_conn->ic_lock); /* * Return suceess to indicate that the initiator connection can * go to the next phase - FFP */ return (IDM_STATUS_SUCCESS); default: iser_conn->ic_stage = ISER_CONN_STAGE_HELLOREPLY_RCV_FAIL; mutex_exit(&iser_conn->ic_lock); return (IDM_STATUS_FAIL); } /* STATEMENT_NEVER_REACHED */ } /* * iser_free_task_rsrcs() * This routine does not currently need to do anything. It is used in * the sockets transport to explicitly complete any buffers on the task, * but we can rely on our RCaP layer to finish up it's work without any * intervention. */ /* ARGSUSED */ idm_status_t iser_free_task_rsrcs(idm_task_t *idt) { return (IDM_STATUS_SUCCESS); } /* * iser_negotiate_key_values() validates the key values for this connection */ /* ARGSUSED */ static kv_status_t iser_negotiate_key_values(idm_conn_t *ic, nvlist_t *request_nvl, nvlist_t *response_nvl, nvlist_t *negotiated_nvl) { kv_status_t kvrc = KV_HANDLED; /* Process the request nvlist */ kvrc = iser_process_request_nvlist(request_nvl, response_nvl, negotiated_nvl); /* We must be using RDMA, so set the flag on the ic handle */ ic->ic_rdma_extensions = B_TRUE; return (kvrc); } /* Process a list of key=value pairs from a login request */ static kv_status_t iser_process_request_nvlist(nvlist_t *request_nvl, nvlist_t *response_nvl, nvlist_t *negotiated_nvl) { const idm_kv_xlate_t *ikvx; char *nvp_name; nvpair_t *nvp; nvpair_t *next_nvp; kv_status_t kvrc = KV_HANDLED; boolean_t transit = B_TRUE; /* Process the list */ nvp = nvlist_next_nvpair(request_nvl, NULL); while (nvp != NULL) { next_nvp = nvlist_next_nvpair(request_nvl, nvp); nvp_name = nvpair_name(nvp); ikvx = idm_lookup_kv_xlate(nvp_name, strlen(nvp_name)); kvrc = iser_handle_key(nvp, ikvx, request_nvl, response_nvl, negotiated_nvl); if (kvrc != KV_HANDLED) { if (kvrc == KV_HANDLED_NO_TRANSIT) { /* we countered, clear the transit flag */ transit = B_FALSE; } else { /* error, bail out */ break; } } nvp = next_nvp; } /* * If the current kv_status_t indicates success, we've handled * the entire list. Explicitly set kvrc to NO_TRANSIT if we've * cleared the transit flag along the way. */ if ((kvrc == KV_HANDLED) && (transit == B_FALSE)) { kvrc = KV_HANDLED_NO_TRANSIT; } return (kvrc); } /* Handle a given list, boolean or numerical key=value pair */ static kv_status_t iser_handle_key(nvpair_t *nvp, const idm_kv_xlate_t *ikvx, nvlist_t *request_nvl, nvlist_t *response_nvl, nvlist_t *negotiated_nvl) { kv_status_t kvrc = KV_UNHANDLED; boolean_t bool_val; uint64_t num_val; int nvrc; /* Retrieve values for booleans and numericals */ switch (ikvx->ik_key_id) { /* Booleans */ case KI_RDMA_EXTENSIONS: case KI_IMMEDIATE_DATA: nvrc = nvpair_value_boolean_value(nvp, &bool_val); ASSERT(nvrc == 0); break; /* Numericals */ case KI_INITIATOR_RECV_DATA_SEGMENT_LENGTH: case KI_TARGET_RECV_DATA_SEGMENT_LENGTH: case KI_MAX_OUTSTANDING_UNEXPECTED_PDUS: nvrc = nvpair_value_uint64(nvp, &num_val); ASSERT(nvrc == 0); break; default: break; } /* * Now handle the values according to the key name. Keys not * specifically handled here will be negotiated by the iscsi * target. Negotiated values take effect when * iser_notice_key_values gets called. */ switch (ikvx->ik_key_id) { case KI_RDMA_EXTENSIONS: /* Ensure "Yes" */ kvrc = iser_handle_boolean(nvp, bool_val, ikvx, B_TRUE, request_nvl, response_nvl, negotiated_nvl); break; case KI_TARGET_RECV_DATA_SEGMENT_LENGTH: /* Validate the proposed value */ kvrc = iser_handle_numerical(nvp, num_val, ikvx, ISER_TARGET_RECV_DATA_SEGMENT_LENGTH_MIN, ISER_TARGET_RECV_DATA_SEGMENT_LENGTH_MAX, ISER_TARGET_RECV_DATA_SEGMENT_LENGTH_IMPL_MAX, request_nvl, response_nvl, negotiated_nvl); break; case KI_INITIATOR_RECV_DATA_SEGMENT_LENGTH: /* Validate the proposed value */ kvrc = iser_handle_numerical(nvp, num_val, ikvx, ISER_INITIATOR_RECV_DATA_SEGMENT_LENGTH_MIN, ISER_INITIATOR_RECV_DATA_SEGMENT_LENGTH_MAX, ISER_INITIATOR_RECV_DATA_SEGMENT_LENGTH_IMPL_MAX, request_nvl, response_nvl, negotiated_nvl); break; case KI_IMMEDIATE_DATA: /* Ensure "No" */ kvrc = iser_handle_boolean(nvp, bool_val, ikvx, B_FALSE, request_nvl, response_nvl, negotiated_nvl); break; case KI_MAX_OUTSTANDING_UNEXPECTED_PDUS: /* Validate the proposed value */ kvrc = iser_handle_numerical(nvp, num_val, ikvx, ISER_MAX_OUTSTANDING_UNEXPECTED_PDUS_MIN, ISER_MAX_OUTSTANDING_UNEXPECTED_PDUS_MAX, ISER_MAX_OUTSTANDING_UNEXPECTED_PDUS_IMPL_MAX, request_nvl, response_nvl, negotiated_nvl); break; default: /* * All other keys, including invalid keys, will be * handled at the client layer. */ kvrc = KV_HANDLED; break; } return (kvrc); } /* Validate a proposed boolean value, and set the alternate if necessary */ static kv_status_t iser_handle_boolean(nvpair_t *nvp, boolean_t value, const idm_kv_xlate_t *ikvx, boolean_t iser_value, nvlist_t *request_nvl, nvlist_t *response_nvl, nvlist_t *negotiated_nvl) { kv_status_t kvrc = KV_UNHANDLED; int nvrc; boolean_t respond = B_FALSE; if (value != iser_value) { /* * Respond back to initiator with our value, and * set the return value to unset the transit bit. */ value = iser_value; nvrc = nvlist_add_boolean_value(negotiated_nvl, ikvx->ik_key_name, value); if (nvrc == 0) { kvrc = KV_HANDLED_NO_TRANSIT; respond = B_TRUE; } } else { /* Add this to our negotiated values */ nvrc = nvlist_add_nvpair(negotiated_nvl, nvp); /* Respond if this is not a declarative */ respond = (ikvx->ik_declarative == B_FALSE); } /* Response of Simple-value Negotiation */ if (nvrc == 0 && respond) { nvrc = nvlist_add_boolean_value(response_nvl, ikvx->ik_key_name, value); /* Remove from the request (we've handled it) */ (void) nvlist_remove_all(request_nvl, ikvx->ik_key_name); } if (kvrc == KV_HANDLED_NO_TRANSIT) { return (kvrc); } return (idm_nvstat_to_kvstat(nvrc)); } /* * Validate a proposed value against the iSER and/or iSCSI RFC's minimum and * maximum values, and set an alternate, if necessary. Note that the value * 'iser_max_value" represents our implementation maximum (typically the max). */ static kv_status_t iser_handle_numerical(nvpair_t *nvp, uint64_t value, const idm_kv_xlate_t *ikvx, uint64_t min_value, uint64_t max_value, uint64_t iser_max_value, nvlist_t *request_nvl, nvlist_t *response_nvl, nvlist_t *negotiated_nvl) { kv_status_t kvrc = KV_UNHANDLED; int nvrc; boolean_t respond = B_FALSE; /* Validate against standard */ if ((value < min_value) || (value > max_value)) { kvrc = KV_VALUE_ERROR; } else { if (value > iser_max_value) { /* * Respond back to initiator with our value, and * set the return value to unset the transit bit. */ value = iser_max_value; nvrc = nvlist_add_uint64(negotiated_nvl, ikvx->ik_key_name, value); if (nvrc == 0) { kvrc = KV_HANDLED_NO_TRANSIT; respond = B_TRUE; } } else { /* Add this to our negotiated values */ nvrc = nvlist_add_nvpair(negotiated_nvl, nvp); /* Respond if this is not a declarative */ respond = (ikvx->ik_declarative == B_FALSE); } /* Response of Simple-value Negotiation */ if (nvrc == 0 && respond) { nvrc = nvlist_add_uint64(response_nvl, ikvx->ik_key_name, value); /* Remove from the request (we've handled it) */ (void) nvlist_remove_all(request_nvl, ikvx->ik_key_name); } } if (kvrc == KV_HANDLED_NO_TRANSIT) { return (kvrc); } return (idm_nvstat_to_kvstat(nvrc)); } /* * iser_declare_key_values() declares the declarative key values for * this connection. */ /* ARGSUSED */ static kv_status_t iser_declare_key_values(idm_conn_t *ic, nvlist_t *config_nvl, nvlist_t *outgoing_nvl) { kv_status_t kvrc; int nvrc = 0; int rc; uint64_t uint64_val; if ((rc = nvlist_lookup_uint64(config_nvl, ISER_KV_KEY_NAME_MAX_OUTSTANDING_PDU, &uint64_val)) != ENOENT) { ASSERT(rc == 0); if (outgoing_nvl) { nvrc = nvlist_add_uint64(outgoing_nvl, ISER_KV_KEY_NAME_MAX_OUTSTANDING_PDU, uint64_val); } } kvrc = idm_nvstat_to_kvstat(nvrc); return (kvrc); } /* * iser_notice_key_values() activates the negotiated key values for * this connection. */ static void iser_notice_key_values(idm_conn_t *ic, nvlist_t *negotiated_nvl) { iser_conn_t *iser_conn; boolean_t boolean_val; uint64_t uint64_val; int nvrc; char *digest_choice_string; iser_conn = (iser_conn_t *)ic->ic_transport_private; /* * Validate the final negotiated operational parameters, * and save a copy. */ if ((nvrc = nvlist_lookup_string(negotiated_nvl, "HeaderDigest", &digest_choice_string)) != ENOENT) { ASSERT(nvrc == 0); /* * Per the iSER RFC, override the negotiated value with "None" */ iser_conn->ic_op_params.op_header_digest = B_FALSE; } if ((nvrc = nvlist_lookup_string(negotiated_nvl, "DataDigest", &digest_choice_string)) != ENOENT) { ASSERT(nvrc == 0); /* * Per the iSER RFC, override the negotiated value with "None" */ iser_conn->ic_op_params.op_data_digest = B_FALSE; } if ((nvrc = nvlist_lookup_boolean_value(negotiated_nvl, "RDMAExtensions", &boolean_val)) != ENOENT) { ASSERT(nvrc == 0); iser_conn->ic_op_params.op_rdma_extensions = boolean_val; } if ((nvrc = nvlist_lookup_boolean_value(negotiated_nvl, "OFMarker", &boolean_val)) != ENOENT) { ASSERT(nvrc == 0); /* * Per the iSER RFC, override the negotiated value with "No" */ iser_conn->ic_op_params.op_ofmarker = B_FALSE; } if ((nvrc = nvlist_lookup_boolean_value(negotiated_nvl, "IFMarker", &boolean_val)) != ENOENT) { ASSERT(nvrc == 0); /* * Per the iSER RFC, override the negotiated value with "No" */ iser_conn->ic_op_params.op_ifmarker = B_FALSE; } if ((nvrc = nvlist_lookup_uint64(negotiated_nvl, "TargetRecvDataSegmentLength", &uint64_val)) != ENOENT) { ASSERT(nvrc == 0); iser_conn->ic_op_params.op_target_recv_data_segment_length = uint64_val; } if ((nvrc = nvlist_lookup_uint64(negotiated_nvl, "InitiatorRecvDataSegmentLength", &uint64_val)) != ENOENT) { ASSERT(nvrc == 0); iser_conn->ic_op_params.op_initiator_recv_data_segment_length = uint64_val; } if ((nvrc = nvlist_lookup_uint64(negotiated_nvl, "MaxOutstandingUnexpectedPDUs", &uint64_val)) != ENOENT) { ASSERT(nvrc == 0); iser_conn->ic_op_params.op_max_outstanding_unexpected_pdus = uint64_val; } /* Test boolean values which are required by RFC 5046 */ #ifdef ISER_DEBUG ASSERT(iser_conn->ic_op_params.op_rdma_extensions == B_TRUE); ASSERT(iser_conn->ic_op_params.op_header_digest == B_FALSE); ASSERT(iser_conn->ic_op_params.op_data_digest == B_FALSE); ASSERT(iser_conn->ic_op_params.op_ofmarker == B_FALSE); ASSERT(iser_conn->ic_op_params.op_ifmarker == B_FALSE); #endif } /* * iser_conn_is_capable() verifies that the passed connection is provided * for by an iSER-capable link. * NOTE: When utilizing InfiniBand RC as an RCaP, this routine will check * if the link is on IPoIB. This only indicates a chance that the link is * on an RCaP, and thus iSER-capable, since we may be running on an IB-Eth * gateway, or other IB but non-RCaP link. Rather than fully establishing the * link to verify RCaP here, we instead will return B_TRUE * indicating the link is iSER-capable, if the link is IPoIB. If then in * iser_ini_conn_create() the link proves not be RCaP, IDM will fall back * to using the IDM Sockets transport. */ /* ARGSUSED */ static boolean_t iser_conn_is_capable(idm_conn_req_t *cr, idm_transport_caps_t *caps) { /* A NULL value for laddr indicates implicit source */ return (iser_path_exists(NULL, &cr->cr_ini_dst_addr)); } /* * iser_pdu_tx() transmits a Control PDU via the iSER channel. We pull the * channel out of the idm_conn_t passed in, and pass it and the pdu to the * iser_xfer routine. */ static void iser_pdu_tx(idm_conn_t *ic, idm_pdu_t *pdu) { iser_conn_t *iser_conn; iser_status_t iser_status; iser_conn = (iser_conn_t *)ic->ic_transport_private; iser_status = iser_xfer_ctrlpdu(iser_conn->ic_chan, pdu); if (iser_status != ISER_STATUS_SUCCESS) { ISER_LOG(CE_WARN, "iser_pdu_tx: failed iser_xfer_ctrlpdu: " "ic (0x%p) pdu (0x%p)", (void *) ic, (void *) pdu); /* Fail this PDU transmission */ idm_pdu_complete(pdu, IDM_STATUS_FAIL); } /* * We successfully posted this PDU for transmission. * The completion handler will invoke idm_pdu_complete() * with the completion status. See iser_cq.c for more * information. */ } /* * iser_buf_tx_to_ini() transmits the data buffer encoded in idb to the * initiator to fulfill SCSI Read commands. An iser_xfer routine is invoked * to implement the RDMA operations. * * Caller holds idt->idt_mutex. */ static idm_status_t iser_buf_tx_to_ini(idm_task_t *idt, idm_buf_t *idb) { iser_status_t iser_status; idm_status_t idm_status = IDM_STATUS_SUCCESS; ASSERT(mutex_owned(&idt->idt_mutex)); iser_status = iser_xfer_buf_to_ini(idt, idb); if (iser_status != ISER_STATUS_SUCCESS) { ISER_LOG(CE_WARN, "iser_buf_tx_to_ini: failed " "iser_xfer_buf_to_ini: idt (0x%p) idb (0x%p)", (void *) idt, (void *) idb); idm_buf_tx_to_ini_done(idt, idb, IDM_STATUS_ABORTED); return (IDM_STATUS_FAIL); } /* * iSCSIt's Data Completion Notify callback is invoked from * the Work Request Send completion Handler */ mutex_exit(&idt->idt_mutex); return (idm_status); } /* * iser_buf_tx_from_ini() transmits data from the initiator into the buffer * in idb to fulfill SCSI Write commands. An iser_xfer routine is invoked * to implement the RDMA operations. * * Caller holds idt->idt_mutex. */ static idm_status_t iser_buf_rx_from_ini(idm_task_t *idt, idm_buf_t *idb) { iser_status_t iser_status; idm_status_t idm_status = IDM_STATUS_SUCCESS; ASSERT(mutex_owned(&idt->idt_mutex)); iser_status = iser_xfer_buf_from_ini(idt, idb); if (iser_status != ISER_STATUS_SUCCESS) { ISER_LOG(CE_WARN, "iser_buf_rx_from_ini: failed " "iser_xfer_buf_from_ini: idt (0x%p) idb (0x%p)", (void *) idt, (void *) idb); idm_buf_rx_from_ini_done(idt, idb, IDM_STATUS_ABORTED); return (IDM_STATUS_FAIL); } /* * iSCSIt's Data Completion Notify callback is invoked from * the Work Request Send completion Handler */ mutex_exit(&idt->idt_mutex); return (idm_status); } /* * iser_buf_alloc() allocates a buffer and registers it with the IBTF for * use with iSER. Each HCA has it's own kmem cache for establishing a pool * of registered buffers, when once initially allocated, will remain * registered with the HCA. This routine is invoked only on the target, * where we have the requirement to pre-allocate buffers for the upper layers. * Note: buflen is compared to ISER_DEFAULT_BUFLEN, and allocation is failed * if the requested buflen is larger than our default. */ /* ARGSUSED */ static idm_status_t iser_buf_alloc(idm_buf_t *idb, uint64_t buflen) { iser_conn_t *iser_conn; iser_hca_t *iser_hca; iser_buf_t *iser_buf; if (buflen > ISER_DEFAULT_BUFLEN) { return (IDM_STATUS_FAIL); } iser_conn = (iser_conn_t *)idb->idb_ic->ic_transport_private; iser_hca = iser_conn->ic_chan->ic_hca; /* * Allocate a buffer from this HCA's cache. Once initialized, these * will remain allocated and registered (see above). */ iser_buf = kmem_cache_alloc(iser_hca->iser_buf_cache, KM_NOSLEEP); if (iser_buf == NULL) { ISER_LOG(CE_NOTE, "iser_buf_alloc: alloc failed"); return (IDM_STATUS_FAIL); } /* Set the allocated data buffer pointer in the IDM buf handle */ idb->idb_buf = iser_buf->buf; /* Set the private buf and reg handles in the IDM buf handle */ idb->idb_buf_private = (void *)iser_buf; idb->idb_reg_private = (void *)iser_buf->iser_mr; return (IDM_STATUS_SUCCESS); } /* * iser_buf_free() frees the buffer handle passed in. Note that the cached * kmem object has an HCA-registered buffer in it which will not be freed. * This allows us to build up a cache of pre-allocated and registered * buffers for use on the target. */ static void iser_buf_free(idm_buf_t *buf) { iser_buf_t *iser_buf; iser_buf = buf->idb_buf_private; kmem_cache_free(iser_buf->cache, iser_buf); } /* * iser_buf_setup() is invoked on the initiator in order to register memory * on demand for use with the iSER layer. */ static idm_status_t iser_buf_setup(idm_buf_t *idb) { iser_conn_t *iser_conn; iser_chan_t *iser_chan; iser_hca_t *iser_hca; iser_buf_t *iser_buf; int status; ASSERT(idb->idb_buf != NULL); iser_conn = (iser_conn_t *)idb->idb_ic->ic_transport_private; ASSERT(iser_conn != NULL); iser_hca = iser_conn->ic_chan->ic_hca; iser_chan = iser_conn->ic_chan; ASSERT(iser_chan != NULL); /* * Memory registration is known to be slow, so for small * transfers, use pre-registered memory buffers and just * copy the data into/from them at the appropriate time */ if (idb->idb_buflen < ISER_BCOPY_THRESHOLD) { iser_buf = kmem_cache_alloc(iser_hca->iser_buf_cache, KM_NOSLEEP); if (iser_buf == NULL) { /* Fail over to dynamic registration */ status = iser_reg_rdma_mem(iser_chan->ic_hca, idb); idb->idb_bufalloc = B_FALSE; return (status); } /* * Set the allocated data buffer pointer in the IDM buf handle * Data is to be copied from/to this buffer using bcopy */ idb->idb_bufptr = idb->idb_buf; idb->idb_bufbcopy = B_TRUE; idb->idb_buf = iser_buf->buf; /* Set the private buf and reg handles in the IDM buf handle */ idb->idb_buf_private = (void *)iser_buf; idb->idb_reg_private = (void *)iser_buf->iser_mr; /* Ensure bufalloc'd flag is set */ idb->idb_bufalloc = B_TRUE; return (IDM_STATUS_SUCCESS); } else { /* Dynamically register the memory passed in on the idb */ status = iser_reg_rdma_mem(iser_chan->ic_hca, idb); /* Ensure bufalloc'd flag is unset */ idb->idb_bufalloc = B_FALSE; return (status); } } /* * iser_buf_teardown() is invoked on the initiator in order to register memory * on demand for use with the iSER layer. */ static void iser_buf_teardown(idm_buf_t *idb) { iser_conn_t *iser_conn; iser_conn = (iser_conn_t *)idb->idb_ic->ic_transport_private; /* Deregister the memory passed in on the idb */ iser_dereg_rdma_mem(iser_conn->ic_chan->ic_hca, idb); }