/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. */ /* * sun4v LDC Link Layer */ #include #include #include #include #include #include #include #include #include #include #include /* needed for S_IFBLK and S_IFCHR */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* Core internal functions */ int i_ldc_h2v_error(int h_error); void i_ldc_reset(ldc_chan_t *ldcp, boolean_t force_reset); static int i_ldc_txq_reconf(ldc_chan_t *ldcp); static int i_ldc_rxq_reconf(ldc_chan_t *ldcp, boolean_t force_reset); static void i_ldc_rxq_drain(ldc_chan_t *ldcp); static void i_ldc_reset_state(ldc_chan_t *ldcp); static void i_ldc_debug_enter(void); static int i_ldc_get_tx_tail(ldc_chan_t *ldcp, uint64_t *tail); static void i_ldc_get_tx_head(ldc_chan_t *ldcp, uint64_t *head); static int i_ldc_set_tx_tail(ldc_chan_t *ldcp, uint64_t tail); static int i_ldc_set_rx_head(ldc_chan_t *ldcp, uint64_t head); static int i_ldc_send_pkt(ldc_chan_t *ldcp, uint8_t pkttype, uint8_t subtype, uint8_t ctrlmsg); static int i_ldc_set_rxdq_head(ldc_chan_t *ldcp, uint64_t head); static void i_ldc_rxdq_copy(ldc_chan_t *ldcp, uint64_t *head); static uint64_t i_ldc_dq_rx_get_state(ldc_chan_t *ldcp, uint64_t *head, uint64_t *tail, uint64_t *link_state); static uint64_t i_ldc_hvq_rx_get_state(ldc_chan_t *ldcp, uint64_t *head, uint64_t *tail, uint64_t *link_state); static int i_ldc_rx_ackpeek(ldc_chan_t *ldcp, uint64_t rx_head, uint64_t rx_tail); static uint_t i_ldc_chkq(ldc_chan_t *ldcp); /* Interrupt handling functions */ static uint_t i_ldc_tx_hdlr(caddr_t arg1, caddr_t arg2); static uint_t i_ldc_rx_hdlr(caddr_t arg1, caddr_t arg2); static uint_t i_ldc_rx_process_hvq(ldc_chan_t *ldcp, boolean_t *notify_client, uint64_t *notify_event); static void i_ldc_clear_intr(ldc_chan_t *ldcp, cnex_intrtype_t itype); /* Read method functions */ static int i_ldc_read_raw(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep); static int i_ldc_read_packet(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep); static int i_ldc_read_stream(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep); /* Write method functions */ static int i_ldc_write_raw(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep); static int i_ldc_write_packet(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep); static int i_ldc_write_stream(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep); /* Pkt processing internal functions */ static int i_ldc_check_seqid(ldc_chan_t *ldcp, ldc_msg_t *ldcmsg); static int i_ldc_ctrlmsg(ldc_chan_t *ldcp, ldc_msg_t *ldcmsg); static int i_ldc_process_VER(ldc_chan_t *ldcp, ldc_msg_t *msg); static int i_ldc_process_RTS(ldc_chan_t *ldcp, ldc_msg_t *msg); static int i_ldc_process_RTR(ldc_chan_t *ldcp, ldc_msg_t *msg); static int i_ldc_process_RDX(ldc_chan_t *ldcp, ldc_msg_t *msg); static int i_ldc_process_data_ACK(ldc_chan_t *ldcp, ldc_msg_t *msg); /* Imported functions */ extern void i_ldc_mem_set_hsvc_vers(uint64_t major, uint64_t minor); extern void i_ldc_init_mapin(ldc_soft_state_t *ldcssp, uint64_t major, uint64_t minor); /* LDC Version */ static ldc_ver_t ldc_versions[] = { {1, 0} }; /* number of supported versions */ #define LDC_NUM_VERS (sizeof (ldc_versions) / sizeof (ldc_versions[0])) /* Invalid value for the ldc_chan_t rx_ack_head field */ #define ACKPEEK_HEAD_INVALID ((uint64_t)-1) /* Module State Pointer */ ldc_soft_state_t *ldcssp; static struct modldrv md = { &mod_miscops, /* This is a misc module */ "sun4v LDC module", /* Name of the module */ }; static struct modlinkage ml = { MODREV_1, &md, NULL }; static uint64_t ldc_sup_minor; /* Supported minor number */ static hsvc_info_t ldc_hsvc = { HSVC_REV_1, NULL, HSVC_GROUP_LDC, 1, 2, "ldc" }; /* * The no. of MTU size messages that can be stored in * the LDC Tx queue. The number of Tx queue entries is * then computed as (mtu * mtu_msgs)/sizeof(queue_entry) */ uint64_t ldc_mtu_msgs = LDC_MTU_MSGS; /* * The minimum queue length. This is the size of the smallest * LDC queue. If the computed value is less than this default, * the queue length is rounded up to 'ldc_queue_entries'. */ uint64_t ldc_queue_entries = LDC_QUEUE_ENTRIES; /* * The length of the reliable-mode data queue in terms of the LDC * receive queue length. i.e., the number of times larger than the * LDC receive queue that the data queue should be. The HV receive * queue is required to be a power of 2 and this implementation * assumes the data queue will also be a power of 2. By making the * multiplier a power of 2, we ensure the data queue will be a * power of 2. We use a multiplier because the receive queue is * sized to be sane relative to the MTU and the same is needed for * the data queue. */ uint64_t ldc_rxdq_multiplier = LDC_RXDQ_MULTIPLIER; /* * LDC retry count and delay - when the HV returns EWOULDBLOCK * the operation is retried 'ldc_max_retries' times with a * wait of 'ldc_delay' usecs between each retry. */ int ldc_max_retries = LDC_MAX_RETRIES; clock_t ldc_delay = LDC_DELAY; /* * Channels which have a devclass satisfying the following * will be reset when entering the prom or kmdb. * * LDC_DEVCLASS_PROM_RESET(devclass) != 0 * * By default, only block device service channels are reset. */ #define LDC_DEVCLASS_BIT(dc) (0x1 << (dc)) #define LDC_DEVCLASS_PROM_RESET(dc) \ (LDC_DEVCLASS_BIT(dc) & ldc_debug_reset_mask) static uint64_t ldc_debug_reset_mask = LDC_DEVCLASS_BIT(LDC_DEV_BLK_SVC) | LDC_DEVCLASS_BIT(LDC_DEV_GENERIC); /* * delay between each retry of channel unregistration in * ldc_close(), to wait for pending interrupts to complete. */ clock_t ldc_close_delay = LDC_CLOSE_DELAY; /* * Reserved mapin space for descriptor rings. */ uint64_t ldc_dring_direct_map_rsvd = LDC_DIRECT_MAP_SIZE_DEFAULT; /* * Maximum direct map space allowed per channel. */ uint64_t ldc_direct_map_size_max = (16 * 1024 * 1024); /* 16 MB */ #ifdef DEBUG /* * Print debug messages * * set ldcdbg to 0x7 for enabling all msgs * 0x4 - Warnings * 0x2 - All debug messages * 0x1 - Minimal debug messages * * set ldcdbgchan to the channel number you want to debug * setting it to -1 prints debug messages for all channels * NOTE: ldcdbgchan has no effect on error messages */ int ldcdbg = 0x0; int64_t ldcdbgchan = DBG_ALL_LDCS; uint64_t ldc_inject_err_flag = 0; void ldcdebug(int64_t id, const char *fmt, ...) { char buf[512]; va_list ap; /* * Do not return if, * caller wants to print it anyway - (id == DBG_ALL_LDCS) * debug channel is set to all LDCs - (ldcdbgchan == DBG_ALL_LDCS) * debug channel = caller specified channel */ if ((id != DBG_ALL_LDCS) && (ldcdbgchan != DBG_ALL_LDCS) && (ldcdbgchan != id)) { return; } va_start(ap, fmt); (void) vsprintf(buf, fmt, ap); va_end(ap); cmn_err(CE_CONT, "?%s", buf); } #define LDC_ERR_RESET 0x1 #define LDC_ERR_PKTLOSS 0x2 #define LDC_ERR_DQFULL 0x4 #define LDC_ERR_DRNGCLEAR 0x8 static boolean_t ldc_inject_error(ldc_chan_t *ldcp, uint64_t error) { if ((ldcdbgchan != DBG_ALL_LDCS) && (ldcdbgchan != ldcp->id)) return (B_FALSE); if ((ldc_inject_err_flag & error) == 0) return (B_FALSE); /* clear the injection state */ ldc_inject_err_flag &= ~error; return (B_TRUE); } #define DUMP_PAYLOAD(id, addr) \ { \ char buf[65*3]; \ int i; \ uint8_t *src = (uint8_t *)addr; \ for (i = 0; i < 64; i++, src++) \ (void) sprintf(&buf[i * 3], "|%02x", *src); \ (void) sprintf(&buf[i * 3], "|\n"); \ D2((id), "payload: %s", buf); \ } #define DUMP_LDC_PKT(c, s, addr) \ { \ ldc_msg_t *msg = (ldc_msg_t *)(addr); \ uint32_t mid = ((c)->mode != LDC_MODE_RAW) ? msg->seqid : 0; \ if (msg->type == LDC_DATA) { \ D2((c)->id, "%s: msg%d (/%x/%x/%x/,env[%c%c,sz=%d])", \ (s), mid, msg->type, msg->stype, msg->ctrl, \ (msg->env & LDC_FRAG_START) ? 'B' : ' ', \ (msg->env & LDC_FRAG_STOP) ? 'E' : ' ', \ (msg->env & LDC_LEN_MASK)); \ } else { \ D2((c)->id, "%s: msg%d (/%x/%x/%x/,env=%x)", (s), \ mid, msg->type, msg->stype, msg->ctrl, msg->env); \ } \ } #define LDC_INJECT_RESET(_ldcp) ldc_inject_error(_ldcp, LDC_ERR_RESET) #define LDC_INJECT_PKTLOSS(_ldcp) ldc_inject_error(_ldcp, LDC_ERR_PKTLOSS) #define LDC_INJECT_DQFULL(_ldcp) ldc_inject_error(_ldcp, LDC_ERR_DQFULL) #define LDC_INJECT_DRNGCLEAR(_ldcp) ldc_inject_error(_ldcp, LDC_ERR_DRNGCLEAR) extern void i_ldc_mem_inject_dring_clear(ldc_chan_t *ldcp); #else #define DBG_ALL_LDCS -1 #define DUMP_PAYLOAD(id, addr) #define DUMP_LDC_PKT(c, s, addr) #define LDC_INJECT_RESET(_ldcp) (B_FALSE) #define LDC_INJECT_PKTLOSS(_ldcp) (B_FALSE) #define LDC_INJECT_DQFULL(_ldcp) (B_FALSE) #define LDC_INJECT_DRNGCLEAR(_ldcp) (B_FALSE) #endif /* * dtrace SDT probes to ease tracing of the rx data queue and HV queue * lengths. Just pass the head, tail, and entries values so that the * length can be calculated in a dtrace script when the probe is enabled. */ #define TRACE_RXDQ_LENGTH(ldcp) \ DTRACE_PROBE4(rxdq__size, \ uint64_t, ldcp->id, \ uint64_t, ldcp->rx_dq_head, \ uint64_t, ldcp->rx_dq_tail, \ uint64_t, ldcp->rx_dq_entries) #define TRACE_RXHVQ_LENGTH(ldcp, head, tail) \ DTRACE_PROBE4(rxhvq__size, \ uint64_t, ldcp->id, \ uint64_t, head, \ uint64_t, tail, \ uint64_t, ldcp->rx_q_entries) /* A dtrace SDT probe to ease tracing of data queue copy operations */ #define TRACE_RXDQ_COPY(ldcp, bytes) \ DTRACE_PROBE2(rxdq__copy, uint64_t, ldcp->id, uint64_t, bytes) \ /* The amount of contiguous space at the tail of the queue */ #define Q_CONTIG_SPACE(head, tail, size) \ ((head) <= (tail) ? ((size) - (tail)) : \ ((head) - (tail) - LDC_PACKET_SIZE)) #define ZERO_PKT(p) \ bzero((p), sizeof (ldc_msg_t)); #define IDX2COOKIE(idx, pg_szc, pg_shift) \ (((pg_szc) << LDC_COOKIE_PGSZC_SHIFT) | ((idx) << (pg_shift))) int _init(void) { int status; status = hsvc_register(&ldc_hsvc, &ldc_sup_minor); if (status != 0) { cmn_err(CE_NOTE, "!%s: cannot negotiate hypervisor LDC services" " group: 0x%lx major: %ld minor: %ld errno: %d", ldc_hsvc.hsvc_modname, ldc_hsvc.hsvc_group, ldc_hsvc.hsvc_major, ldc_hsvc.hsvc_minor, status); return (-1); } /* Initialize shared memory HV API version checking */ i_ldc_mem_set_hsvc_vers(ldc_hsvc.hsvc_major, ldc_sup_minor); /* allocate soft state structure */ ldcssp = kmem_zalloc(sizeof (ldc_soft_state_t), KM_SLEEP); i_ldc_init_mapin(ldcssp, ldc_hsvc.hsvc_major, ldc_sup_minor); /* Link the module into the system */ status = mod_install(&ml); if (status != 0) { kmem_free(ldcssp, sizeof (ldc_soft_state_t)); return (status); } /* Initialize the LDC state structure */ mutex_init(&ldcssp->lock, NULL, MUTEX_DRIVER, NULL); mutex_enter(&ldcssp->lock); /* Create a cache for memory handles */ ldcssp->memhdl_cache = kmem_cache_create("ldc_memhdl_cache", sizeof (ldc_mhdl_t), 0, NULL, NULL, NULL, NULL, NULL, 0); if (ldcssp->memhdl_cache == NULL) { DWARN(DBG_ALL_LDCS, "_init: ldc_memhdl cache create failed\n"); mutex_exit(&ldcssp->lock); return (-1); } /* Create cache for memory segment structures */ ldcssp->memseg_cache = kmem_cache_create("ldc_memseg_cache", sizeof (ldc_memseg_t), 0, NULL, NULL, NULL, NULL, NULL, 0); if (ldcssp->memseg_cache == NULL) { DWARN(DBG_ALL_LDCS, "_init: ldc_memseg cache create failed\n"); mutex_exit(&ldcssp->lock); return (-1); } ldcssp->channel_count = 0; ldcssp->channels_open = 0; ldcssp->chan_list = NULL; ldcssp->dring_list = NULL; /* Register debug_enter callback */ kldc_set_debug_cb(&i_ldc_debug_enter); mutex_exit(&ldcssp->lock); return (0); } int _info(struct modinfo *modinfop) { /* Report status of the dynamically loadable driver module */ return (mod_info(&ml, modinfop)); } int _fini(void) { int rv, status; ldc_chan_t *tmp_ldcp, *ldcp; ldc_dring_t *tmp_dringp, *dringp; ldc_mem_info_t minfo; /* Unlink the driver module from the system */ status = mod_remove(&ml); if (status) { DWARN(DBG_ALL_LDCS, "_fini: mod_remove failed\n"); return (EIO); } /* Unregister debug_enter callback */ kldc_set_debug_cb(NULL); /* Free descriptor rings */ dringp = ldcssp->dring_list; while (dringp != NULL) { tmp_dringp = dringp->next; rv = ldc_mem_dring_info((ldc_dring_handle_t)dringp, &minfo); if (rv == 0 && minfo.status != LDC_UNBOUND) { if (minfo.status == LDC_BOUND) { (void) ldc_mem_dring_unbind( (ldc_dring_handle_t)dringp); } if (minfo.status == LDC_MAPPED) { (void) ldc_mem_dring_unmap( (ldc_dring_handle_t)dringp); } } (void) ldc_mem_dring_destroy((ldc_dring_handle_t)dringp); dringp = tmp_dringp; } ldcssp->dring_list = NULL; /* close and finalize channels */ ldcp = ldcssp->chan_list; while (ldcp != NULL) { tmp_ldcp = ldcp->next; (void) ldc_close((ldc_handle_t)ldcp); (void) ldc_fini((ldc_handle_t)ldcp); ldcp = tmp_ldcp; } ldcssp->chan_list = NULL; /* Destroy kmem caches */ kmem_cache_destroy(ldcssp->memhdl_cache); kmem_cache_destroy(ldcssp->memseg_cache); /* * We have successfully "removed" the driver. * Destroying soft states */ mutex_destroy(&ldcssp->lock); kmem_free(ldcssp, sizeof (ldc_soft_state_t)); (void) hsvc_unregister(&ldc_hsvc); return (status); } /* -------------------------------------------------------------------------- */ /* * LDC Link Layer Internal Functions */ /* * Translate HV Errors to sun4v error codes */ int i_ldc_h2v_error(int h_error) { switch (h_error) { case H_EOK: return (0); case H_ENORADDR: return (EFAULT); case H_EBADPGSZ: case H_EINVAL: return (EINVAL); case H_EWOULDBLOCK: return (EWOULDBLOCK); case H_ENOACCESS: case H_ENOMAP: return (EACCES); case H_EIO: case H_ECPUERROR: return (EIO); case H_ENOTSUPPORTED: return (ENOTSUP); case H_ETOOMANY: return (ENOSPC); case H_ECHANNEL: return (ECHRNG); default: break; } return (EIO); } /* * Reconfigure the transmit queue */ static int i_ldc_txq_reconf(ldc_chan_t *ldcp) { int rv; ASSERT(MUTEX_HELD(&ldcp->lock)); ASSERT(MUTEX_HELD(&ldcp->tx_lock)); rv = hv_ldc_tx_qconf(ldcp->id, ldcp->tx_q_ra, ldcp->tx_q_entries); if (rv) { cmn_err(CE_WARN, "i_ldc_txq_reconf: (0x%lx) cannot set qconf", ldcp->id); return (EIO); } rv = hv_ldc_tx_get_state(ldcp->id, &(ldcp->tx_head), &(ldcp->tx_tail), &(ldcp->link_state)); if (rv) { cmn_err(CE_WARN, "i_ldc_txq_reconf: (0x%lx) cannot get qptrs", ldcp->id); return (EIO); } D1(ldcp->id, "i_ldc_txq_reconf: (0x%llx) h=0x%llx,t=0x%llx," "s=0x%llx\n", ldcp->id, ldcp->tx_head, ldcp->tx_tail, ldcp->link_state); return (0); } /* * Reconfigure the receive queue */ static int i_ldc_rxq_reconf(ldc_chan_t *ldcp, boolean_t force_reset) { int rv; uint64_t rx_head, rx_tail; ASSERT(MUTEX_HELD(&ldcp->lock)); rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail, &(ldcp->link_state)); if (rv) { cmn_err(CE_WARN, "i_ldc_rxq_reconf: (0x%lx) cannot get state", ldcp->id); return (EIO); } if (force_reset || (ldcp->tstate & ~TS_IN_RESET) == TS_UP) { rv = hv_ldc_rx_qconf(ldcp->id, ldcp->rx_q_ra, ldcp->rx_q_entries); if (rv) { cmn_err(CE_WARN, "i_ldc_rxq_reconf: (0x%lx) cannot set qconf", ldcp->id); return (EIO); } D1(ldcp->id, "i_ldc_rxq_reconf: (0x%llx) completed q reconf", ldcp->id); } return (0); } /* * Drain the contents of the receive queue */ static void i_ldc_rxq_drain(ldc_chan_t *ldcp) { int rv; uint64_t rx_head, rx_tail; int retries = 0; ASSERT(MUTEX_HELD(&ldcp->lock)); rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail, &(ldcp->link_state)); if (rv) { cmn_err(CE_WARN, "i_ldc_rxq_drain: (0x%lx) cannot get state, " "rv = 0x%x", ldcp->id, rv); return; } /* If the queue is already empty just return success. */ if (rx_head == rx_tail) return; /* * We are draining the queue in order to close the channel. * Call hv_ldc_rx_set_qhead directly instead of i_ldc_set_rx_head * because we do not need to reset the channel if the set * qhead fails. */ if ((rv = hv_ldc_rx_set_qhead(ldcp->id, rx_tail)) == 0) return; while ((rv == H_EWOULDBLOCK) && (retries++ < ldc_max_retries)) { drv_usecwait(ldc_delay); if ((rv = hv_ldc_rx_set_qhead(ldcp->id, rx_tail)) == 0) return; } cmn_err(CE_WARN, "i_ldc_rxq_drain: (0x%lx) cannot set qhead 0x%lx, " "rv = 0x%x", ldcp->id, rx_tail, rv); } /* * Reset LDC state structure and its contents */ static void i_ldc_reset_state(ldc_chan_t *ldcp) { ASSERT(MUTEX_HELD(&ldcp->lock)); ldcp->last_msg_snt = LDC_INIT_SEQID; ldcp->last_ack_rcd = 0; ldcp->last_msg_rcd = 0; ldcp->tx_ackd_head = ldcp->tx_head; ldcp->stream_remains = 0; ldcp->next_vidx = 0; ldcp->hstate = 0; ldcp->tstate = TS_OPEN; ldcp->status = LDC_OPEN; ldcp->rx_ack_head = ACKPEEK_HEAD_INVALID; ldcp->rx_dq_head = 0; ldcp->rx_dq_tail = 0; if (ldcp->link_state == LDC_CHANNEL_UP || ldcp->link_state == LDC_CHANNEL_RESET) { if (ldcp->mode == LDC_MODE_RAW) { ldcp->status = LDC_UP; ldcp->tstate = TS_UP; } else { ldcp->status = LDC_READY; ldcp->tstate |= TS_LINK_READY; } } } /* * Reset a LDC channel */ void i_ldc_reset(ldc_chan_t *ldcp, boolean_t force_reset) { DWARN(ldcp->id, "i_ldc_reset: (0x%llx) channel reset\n", ldcp->id); ASSERT(MUTEX_HELD(&ldcp->lock)); ASSERT(MUTEX_HELD(&ldcp->tx_lock)); /* reconfig Tx and Rx queues */ (void) i_ldc_txq_reconf(ldcp); (void) i_ldc_rxq_reconf(ldcp, force_reset); /* Clear Tx and Rx interrupts */ (void) i_ldc_clear_intr(ldcp, CNEX_TX_INTR); (void) i_ldc_clear_intr(ldcp, CNEX_RX_INTR); /* Reset channel state */ i_ldc_reset_state(ldcp); /* Mark channel in reset */ ldcp->tstate |= TS_IN_RESET; } /* * Walk the channel list and reset channels if they are of the right * devclass and their Rx queues have been configured. No locks are * taken because the function is only invoked by the kernel just before * entering the prom or debugger when the system is single-threaded. */ static void i_ldc_debug_enter(void) { ldc_chan_t *ldcp; ldcp = ldcssp->chan_list; while (ldcp != NULL) { if (((ldcp->tstate & TS_QCONF_RDY) == TS_QCONF_RDY) && (LDC_DEVCLASS_PROM_RESET(ldcp->devclass) != 0)) { (void) hv_ldc_rx_qconf(ldcp->id, ldcp->rx_q_ra, ldcp->rx_q_entries); } ldcp = ldcp->next; } } /* * Clear pending interrupts */ static void i_ldc_clear_intr(ldc_chan_t *ldcp, cnex_intrtype_t itype) { ldc_cnex_t *cinfo = &ldcssp->cinfo; ASSERT(MUTEX_HELD(&ldcp->lock)); ASSERT(cinfo->dip != NULL); switch (itype) { case CNEX_TX_INTR: /* check Tx interrupt */ if (ldcp->tx_intr_state) ldcp->tx_intr_state = LDC_INTR_NONE; else return; break; case CNEX_RX_INTR: /* check Rx interrupt */ if (ldcp->rx_intr_state) ldcp->rx_intr_state = LDC_INTR_NONE; else return; break; } (void) cinfo->clr_intr(cinfo->dip, ldcp->id, itype); D2(ldcp->id, "i_ldc_clear_intr: (0x%llx) cleared 0x%x intr\n", ldcp->id, itype); } /* * Set the receive queue head * Resets connection and returns an error if it fails. */ static int i_ldc_set_rx_head(ldc_chan_t *ldcp, uint64_t head) { int rv; int retries; ASSERT(MUTEX_HELD(&ldcp->lock)); for (retries = 0; retries < ldc_max_retries; retries++) { if ((rv = hv_ldc_rx_set_qhead(ldcp->id, head)) == 0) return (0); if (rv != H_EWOULDBLOCK) break; /* wait for ldc_delay usecs */ drv_usecwait(ldc_delay); } cmn_err(CE_WARN, "ldc_set_rx_qhead: (0x%lx) cannot set qhead 0x%lx, " "rv = 0x%x", ldcp->id, head, rv); mutex_enter(&ldcp->tx_lock); i_ldc_reset(ldcp, B_TRUE); mutex_exit(&ldcp->tx_lock); return (ECONNRESET); } /* * Returns the tx_head to be used for transfer */ static void i_ldc_get_tx_head(ldc_chan_t *ldcp, uint64_t *head) { ldc_msg_t *pkt; ASSERT(MUTEX_HELD(&ldcp->tx_lock)); /* get current Tx head */ *head = ldcp->tx_head; /* * Reliable mode will use the ACKd head instead of the regular tx_head. * Also in Reliable mode, advance ackd_head for all non DATA/INFO pkts, * up to the current location of tx_head. This needs to be done * as the peer will only ACK DATA/INFO pkts. */ if (ldcp->mode == LDC_MODE_RELIABLE) { while (ldcp->tx_ackd_head != ldcp->tx_head) { pkt = (ldc_msg_t *)(ldcp->tx_q_va + ldcp->tx_ackd_head); if ((pkt->type & LDC_DATA) && (pkt->stype & LDC_INFO)) { break; } /* advance ACKd head */ ldcp->tx_ackd_head = (ldcp->tx_ackd_head + LDC_PACKET_SIZE) % (ldcp->tx_q_entries << LDC_PACKET_SHIFT); } *head = ldcp->tx_ackd_head; } } /* * Returns the tx_tail to be used for transfer * Re-reads the TX queue ptrs if and only if the * the cached head and tail are equal (queue is full) */ static int i_ldc_get_tx_tail(ldc_chan_t *ldcp, uint64_t *tail) { int rv; uint64_t current_head, new_tail; ASSERT(MUTEX_HELD(&ldcp->tx_lock)); /* Read the head and tail ptrs from HV */ rv = hv_ldc_tx_get_state(ldcp->id, &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state); if (rv) { cmn_err(CE_WARN, "i_ldc_get_tx_tail: (0x%lx) cannot read qptrs\n", ldcp->id); return (EIO); } if (ldcp->link_state == LDC_CHANNEL_DOWN) { D1(ldcp->id, "i_ldc_get_tx_tail: (0x%llx) channel not ready\n", ldcp->id); return (ECONNRESET); } i_ldc_get_tx_head(ldcp, ¤t_head); /* increment the tail */ new_tail = (ldcp->tx_tail + LDC_PACKET_SIZE) % (ldcp->tx_q_entries << LDC_PACKET_SHIFT); if (new_tail == current_head) { DWARN(ldcp->id, "i_ldc_get_tx_tail: (0x%llx) TX queue is full\n", ldcp->id); return (EWOULDBLOCK); } D2(ldcp->id, "i_ldc_get_tx_tail: (0x%llx) head=0x%llx, tail=0x%llx\n", ldcp->id, ldcp->tx_head, ldcp->tx_tail); *tail = ldcp->tx_tail; return (0); } /* * Set the tail pointer. If HV returns EWOULDBLOCK, it will back off * and retry ldc_max_retries times before returning an error. * Returns 0, EWOULDBLOCK or EIO */ static int i_ldc_set_tx_tail(ldc_chan_t *ldcp, uint64_t tail) { int rv, retval = EWOULDBLOCK; int retries; ASSERT(MUTEX_HELD(&ldcp->tx_lock)); for (retries = 0; retries < ldc_max_retries; retries++) { if ((rv = hv_ldc_tx_set_qtail(ldcp->id, tail)) == 0) { retval = 0; break; } if (rv != H_EWOULDBLOCK) { DWARN(ldcp->id, "i_ldc_set_tx_tail: (0x%llx) set " "qtail=0x%llx failed, rv=%d\n", ldcp->id, tail, rv); retval = EIO; break; } /* wait for ldc_delay usecs */ drv_usecwait(ldc_delay); } return (retval); } /* * Copy a data packet from the HV receive queue to the data queue. * Caller must ensure that the data queue is not already full. * * The *head argument represents the current head pointer for the HV * receive queue. After copying a packet from the HV receive queue, * the *head pointer will be updated. This allows the caller to update * the head pointer in HV using the returned *head value. */ void i_ldc_rxdq_copy(ldc_chan_t *ldcp, uint64_t *head) { uint64_t q_size, dq_size; ASSERT(MUTEX_HELD(&ldcp->lock)); q_size = ldcp->rx_q_entries << LDC_PACKET_SHIFT; dq_size = ldcp->rx_dq_entries << LDC_PACKET_SHIFT; ASSERT(Q_CONTIG_SPACE(ldcp->rx_dq_head, ldcp->rx_dq_tail, dq_size) >= LDC_PACKET_SIZE); bcopy((void *)(ldcp->rx_q_va + *head), (void *)(ldcp->rx_dq_va + ldcp->rx_dq_tail), LDC_PACKET_SIZE); TRACE_RXDQ_COPY(ldcp, LDC_PACKET_SIZE); /* Update rx head */ *head = (*head + LDC_PACKET_SIZE) % q_size; /* Update dq tail */ ldcp->rx_dq_tail = (ldcp->rx_dq_tail + LDC_PACKET_SIZE) % dq_size; } /* * Update the Rx data queue head pointer */ static int i_ldc_set_rxdq_head(ldc_chan_t *ldcp, uint64_t head) { ldcp->rx_dq_head = head; return (0); } /* * Get the Rx data queue head and tail pointers */ static uint64_t i_ldc_dq_rx_get_state(ldc_chan_t *ldcp, uint64_t *head, uint64_t *tail, uint64_t *link_state) { _NOTE(ARGUNUSED(link_state)) *head = ldcp->rx_dq_head; *tail = ldcp->rx_dq_tail; return (0); } /* * Wrapper for the Rx HV queue set head function. Giving the * data queue and HV queue set head functions the same type. */ static uint64_t i_ldc_hvq_rx_get_state(ldc_chan_t *ldcp, uint64_t *head, uint64_t *tail, uint64_t *link_state) { return (i_ldc_h2v_error(hv_ldc_rx_get_state(ldcp->id, head, tail, link_state))); } /* * LDC receive interrupt handler * triggered for channel with data pending to read * i.e. Rx queue content changes */ static uint_t i_ldc_rx_hdlr(caddr_t arg1, caddr_t arg2) { _NOTE(ARGUNUSED(arg2)) ldc_chan_t *ldcp; boolean_t notify; uint64_t event; int rv, status; /* Get the channel for which interrupt was received */ if (arg1 == NULL) { cmn_err(CE_WARN, "i_ldc_rx_hdlr: invalid arg\n"); return (DDI_INTR_UNCLAIMED); } ldcp = (ldc_chan_t *)arg1; D1(ldcp->id, "i_ldc_rx_hdlr: (0x%llx) Received intr, ldcp=0x%p\n", ldcp->id, ldcp); D1(ldcp->id, "i_ldc_rx_hdlr: (%llx) USR%lx/TS%lx/HS%lx, LSTATE=%lx\n", ldcp->id, ldcp->status, ldcp->tstate, ldcp->hstate, ldcp->link_state); /* Lock channel */ mutex_enter(&ldcp->lock); /* Mark the interrupt as being actively handled */ ldcp->rx_intr_state = LDC_INTR_ACTIVE; status = i_ldc_rx_process_hvq(ldcp, ¬ify, &event); if (ldcp->mode != LDC_MODE_RELIABLE) { /* * If there are no data packets on the queue, clear * the interrupt. Otherwise, the ldc_read will clear * interrupts after draining the queue. To indicate the * interrupt has not yet been cleared, it is marked * as pending. */ if ((event & LDC_EVT_READ) == 0) { i_ldc_clear_intr(ldcp, CNEX_RX_INTR); } else { ldcp->rx_intr_state = LDC_INTR_PEND; } } /* if callbacks are disabled, do not notify */ if (notify && ldcp->cb_enabled) { ldcp->cb_inprogress = B_TRUE; mutex_exit(&ldcp->lock); rv = ldcp->cb(event, ldcp->cb_arg); if (rv) { DWARN(ldcp->id, "i_ldc_rx_hdlr: (0x%llx) callback failure", ldcp->id); } mutex_enter(&ldcp->lock); ldcp->cb_inprogress = B_FALSE; } if (ldcp->mode == LDC_MODE_RELIABLE) { if (status == ENOSPC) { /* * Here, ENOSPC indicates the secondary data * queue is full and the Rx queue is non-empty. * Much like how reliable and raw modes are * handled above, since the Rx queue is non- * empty, we mark the interrupt as pending to * indicate it has not yet been cleared. */ ldcp->rx_intr_state = LDC_INTR_PEND; } else { /* * We have processed all CTRL packets and * copied all DATA packets to the secondary * queue. Clear the interrupt. */ i_ldc_clear_intr(ldcp, CNEX_RX_INTR); } } mutex_exit(&ldcp->lock); D1(ldcp->id, "i_ldc_rx_hdlr: (0x%llx) exiting handler", ldcp->id); return (DDI_INTR_CLAIMED); } /* * Wrapper for the Rx HV queue processing function to be used when * checking the Rx HV queue for data packets. Unlike the interrupt * handler code flow, the Rx interrupt is not cleared here and * callbacks are not made. */ static uint_t i_ldc_chkq(ldc_chan_t *ldcp) { boolean_t notify; uint64_t event; return (i_ldc_rx_process_hvq(ldcp, ¬ify, &event)); } /* * Send a LDC message */ static int i_ldc_send_pkt(ldc_chan_t *ldcp, uint8_t pkttype, uint8_t subtype, uint8_t ctrlmsg) { int rv; ldc_msg_t *pkt; uint64_t tx_tail; uint32_t curr_seqid; /* Obtain Tx lock */ mutex_enter(&ldcp->tx_lock); curr_seqid = ldcp->last_msg_snt; /* get the current tail for the message */ rv = i_ldc_get_tx_tail(ldcp, &tx_tail); if (rv) { DWARN(ldcp->id, "i_ldc_send_pkt: (0x%llx) error sending pkt, " "type=0x%x,subtype=0x%x,ctrl=0x%x\n", ldcp->id, pkttype, subtype, ctrlmsg); mutex_exit(&ldcp->tx_lock); return (rv); } pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail); ZERO_PKT(pkt); /* Initialize the packet */ pkt->type = pkttype; pkt->stype = subtype; pkt->ctrl = ctrlmsg; /* Store ackid/seqid iff it is RELIABLE mode & not a RTS/RTR message */ if (((ctrlmsg & LDC_CTRL_MASK) != LDC_RTS) && ((ctrlmsg & LDC_CTRL_MASK) != LDC_RTR)) { curr_seqid++; if (ldcp->mode != LDC_MODE_RAW) { pkt->seqid = curr_seqid; pkt->ackid = ldcp->last_msg_rcd; } } DUMP_LDC_PKT(ldcp, "i_ldc_send_pkt", (uint64_t)pkt); /* initiate the send by calling into HV and set the new tail */ tx_tail = (tx_tail + LDC_PACKET_SIZE) % (ldcp->tx_q_entries << LDC_PACKET_SHIFT); rv = i_ldc_set_tx_tail(ldcp, tx_tail); if (rv) { DWARN(ldcp->id, "i_ldc_send_pkt:(0x%llx) error sending pkt, " "type=0x%x,stype=0x%x,ctrl=0x%x\n", ldcp->id, pkttype, subtype, ctrlmsg); mutex_exit(&ldcp->tx_lock); return (EIO); } ldcp->last_msg_snt = curr_seqid; ldcp->tx_tail = tx_tail; mutex_exit(&ldcp->tx_lock); return (0); } /* * Checks if packet was received in right order * in the case of a reliable link. * Returns 0 if in order, else EIO */ static int i_ldc_check_seqid(ldc_chan_t *ldcp, ldc_msg_t *msg) { /* No seqid checking for RAW mode */ if (ldcp->mode == LDC_MODE_RAW) return (0); /* No seqid checking for version, RTS, RTR message */ if (msg->ctrl == LDC_VER || msg->ctrl == LDC_RTS || msg->ctrl == LDC_RTR) return (0); /* Initial seqid to use is sent in RTS/RTR and saved in last_msg_rcd */ if (msg->seqid != (ldcp->last_msg_rcd + 1)) { DWARN(ldcp->id, "i_ldc_check_seqid: (0x%llx) out-of-order pkt, got 0x%x, " "expecting 0x%x\n", ldcp->id, msg->seqid, (ldcp->last_msg_rcd + 1)); return (EIO); } #ifdef DEBUG if (LDC_INJECT_PKTLOSS(ldcp)) { DWARN(ldcp->id, "i_ldc_check_seqid: (0x%llx) inject pkt loss\n", ldcp->id); return (EIO); } #endif return (0); } /* * Process an incoming version ctrl message */ static int i_ldc_process_VER(ldc_chan_t *ldcp, ldc_msg_t *msg) { int rv = 0, idx = ldcp->next_vidx; ldc_msg_t *pkt; uint64_t tx_tail; ldc_ver_t *rcvd_ver; /* get the received version */ rcvd_ver = (ldc_ver_t *)((uint64_t)msg + LDC_PAYLOAD_VER_OFF); D2(ldcp->id, "i_ldc_process_VER: (0x%llx) received VER v%u.%u\n", ldcp->id, rcvd_ver->major, rcvd_ver->minor); /* Obtain Tx lock */ mutex_enter(&ldcp->tx_lock); switch (msg->stype) { case LDC_INFO: if ((ldcp->tstate & ~TS_IN_RESET) == TS_VREADY) { (void) i_ldc_txq_reconf(ldcp); i_ldc_reset_state(ldcp); mutex_exit(&ldcp->tx_lock); return (EAGAIN); } /* get the current tail and pkt for the response */ rv = i_ldc_get_tx_tail(ldcp, &tx_tail); if (rv != 0) { DWARN(ldcp->id, "i_ldc_process_VER: (0x%llx) err sending " "version ACK/NACK\n", ldcp->id); i_ldc_reset(ldcp, B_TRUE); mutex_exit(&ldcp->tx_lock); return (ECONNRESET); } pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail); ZERO_PKT(pkt); /* initialize the packet */ pkt->type = LDC_CTRL; pkt->ctrl = LDC_VER; for (;;) { D1(ldcp->id, "i_ldc_process_VER: got %u.%u chk %u.%u\n", rcvd_ver->major, rcvd_ver->minor, ldc_versions[idx].major, ldc_versions[idx].minor); if (rcvd_ver->major == ldc_versions[idx].major) { /* major version match - ACK version */ pkt->stype = LDC_ACK; /* * lower minor version to the one this endpt * supports, if necessary */ if (rcvd_ver->minor > ldc_versions[idx].minor) rcvd_ver->minor = ldc_versions[idx].minor; bcopy(rcvd_ver, pkt->udata, sizeof (*rcvd_ver)); break; } if (rcvd_ver->major > ldc_versions[idx].major) { D1(ldcp->id, "i_ldc_process_VER: using next" " lower idx=%d, v%u.%u\n", idx, ldc_versions[idx].major, ldc_versions[idx].minor); /* nack with next lower version */ pkt->stype = LDC_NACK; bcopy(&ldc_versions[idx], pkt->udata, sizeof (ldc_versions[idx])); ldcp->next_vidx = idx; break; } /* next major version */ idx++; D1(ldcp->id, "i_ldc_process_VER: inc idx %x\n", idx); if (idx == LDC_NUM_VERS) { /* no version match - send NACK */ pkt->stype = LDC_NACK; bzero(pkt->udata, sizeof (ldc_ver_t)); ldcp->next_vidx = 0; break; } } /* initiate the send by calling into HV and set the new tail */ tx_tail = (tx_tail + LDC_PACKET_SIZE) % (ldcp->tx_q_entries << LDC_PACKET_SHIFT); rv = i_ldc_set_tx_tail(ldcp, tx_tail); if (rv == 0) { ldcp->tx_tail = tx_tail; if (pkt->stype == LDC_ACK) { D2(ldcp->id, "i_ldc_process_VER: (0x%llx) sent" " version ACK\n", ldcp->id); /* Save the ACK'd version */ ldcp->version.major = rcvd_ver->major; ldcp->version.minor = rcvd_ver->minor; ldcp->hstate |= TS_RCVD_VER; ldcp->tstate |= TS_VER_DONE; D1(DBG_ALL_LDCS, "(0x%llx) Sent ACK, " "Agreed on version v%u.%u\n", ldcp->id, rcvd_ver->major, rcvd_ver->minor); } } else { DWARN(ldcp->id, "i_ldc_process_VER: (0x%llx) error sending " "ACK/NACK\n", ldcp->id); i_ldc_reset(ldcp, B_TRUE); mutex_exit(&ldcp->tx_lock); return (ECONNRESET); } break; case LDC_ACK: if ((ldcp->tstate & ~TS_IN_RESET) == TS_VREADY) { if (ldcp->version.major != rcvd_ver->major || ldcp->version.minor != rcvd_ver->minor) { /* mismatched version - reset connection */ DWARN(ldcp->id, "i_ldc_process_VER: (0x%llx) recvd" " ACK ver != sent ACK ver\n", ldcp->id); i_ldc_reset(ldcp, B_TRUE); mutex_exit(&ldcp->tx_lock); return (ECONNRESET); } } else { /* SUCCESS - we have agreed on a version */ ldcp->version.major = rcvd_ver->major; ldcp->version.minor = rcvd_ver->minor; ldcp->tstate |= TS_VER_DONE; } D1(ldcp->id, "(0x%llx) Got ACK, Agreed on version v%u.%u\n", ldcp->id, rcvd_ver->major, rcvd_ver->minor); /* initiate RTS-RTR-RDX handshake */ rv = i_ldc_get_tx_tail(ldcp, &tx_tail); if (rv) { DWARN(ldcp->id, "i_ldc_process_VER: (0x%llx) cannot send RTS\n", ldcp->id); i_ldc_reset(ldcp, B_TRUE); mutex_exit(&ldcp->tx_lock); return (ECONNRESET); } pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail); ZERO_PKT(pkt); pkt->type = LDC_CTRL; pkt->stype = LDC_INFO; pkt->ctrl = LDC_RTS; pkt->env = ldcp->mode; if (ldcp->mode != LDC_MODE_RAW) pkt->seqid = LDC_INIT_SEQID; ldcp->last_msg_rcd = LDC_INIT_SEQID; DUMP_LDC_PKT(ldcp, "i_ldc_process_VER snd rts", (uint64_t)pkt); /* initiate the send by calling into HV and set the new tail */ tx_tail = (tx_tail + LDC_PACKET_SIZE) % (ldcp->tx_q_entries << LDC_PACKET_SHIFT); rv = i_ldc_set_tx_tail(ldcp, tx_tail); if (rv) { D2(ldcp->id, "i_ldc_process_VER: (0x%llx) no listener\n", ldcp->id); i_ldc_reset(ldcp, B_TRUE); mutex_exit(&ldcp->tx_lock); return (ECONNRESET); } ldcp->tx_tail = tx_tail; ldcp->hstate |= TS_SENT_RTS; break; case LDC_NACK: /* check if version in NACK is zero */ if (rcvd_ver->major == 0 && rcvd_ver->minor == 0) { /* version handshake failure */ DWARN(DBG_ALL_LDCS, "i_ldc_process_VER: (0x%llx) no version match\n", ldcp->id); i_ldc_reset(ldcp, B_TRUE); mutex_exit(&ldcp->tx_lock); return (ECONNRESET); } /* get the current tail and pkt for the response */ rv = i_ldc_get_tx_tail(ldcp, &tx_tail); if (rv != 0) { cmn_err(CE_NOTE, "i_ldc_process_VER: (0x%lx) err sending " "version ACK/NACK\n", ldcp->id); i_ldc_reset(ldcp, B_TRUE); mutex_exit(&ldcp->tx_lock); return (ECONNRESET); } pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail); ZERO_PKT(pkt); /* initialize the packet */ pkt->type = LDC_CTRL; pkt->ctrl = LDC_VER; pkt->stype = LDC_INFO; /* check ver in NACK msg has a match */ for (;;) { if (rcvd_ver->major == ldc_versions[idx].major) { /* * major version match - resubmit request * if lower minor version to the one this endpt * supports, if necessary */ if (rcvd_ver->minor > ldc_versions[idx].minor) rcvd_ver->minor = ldc_versions[idx].minor; bcopy(rcvd_ver, pkt->udata, sizeof (*rcvd_ver)); break; } if (rcvd_ver->major > ldc_versions[idx].major) { D1(ldcp->id, "i_ldc_process_VER: using next" " lower idx=%d, v%u.%u\n", idx, ldc_versions[idx].major, ldc_versions[idx].minor); /* send next lower version */ bcopy(&ldc_versions[idx], pkt->udata, sizeof (ldc_versions[idx])); ldcp->next_vidx = idx; break; } /* next version */ idx++; D1(ldcp->id, "i_ldc_process_VER: inc idx %x\n", idx); if (idx == LDC_NUM_VERS) { /* no version match - terminate */ ldcp->next_vidx = 0; mutex_exit(&ldcp->tx_lock); return (ECONNRESET); } } /* initiate the send by calling into HV and set the new tail */ tx_tail = (tx_tail + LDC_PACKET_SIZE) % (ldcp->tx_q_entries << LDC_PACKET_SHIFT); rv = i_ldc_set_tx_tail(ldcp, tx_tail); if (rv == 0) { D2(ldcp->id, "i_ldc_process_VER: (0x%llx) sent version" "INFO v%u.%u\n", ldcp->id, ldc_versions[idx].major, ldc_versions[idx].minor); ldcp->tx_tail = tx_tail; } else { cmn_err(CE_NOTE, "i_ldc_process_VER: (0x%lx) error sending version" "INFO\n", ldcp->id); i_ldc_reset(ldcp, B_TRUE); mutex_exit(&ldcp->tx_lock); return (ECONNRESET); } break; } mutex_exit(&ldcp->tx_lock); return (rv); } /* * Process an incoming RTS ctrl message */ static int i_ldc_process_RTS(ldc_chan_t *ldcp, ldc_msg_t *msg) { int rv = 0; ldc_msg_t *pkt; uint64_t tx_tail; boolean_t sent_NACK = B_FALSE; D2(ldcp->id, "i_ldc_process_RTS: (0x%llx) received RTS\n", ldcp->id); switch (msg->stype) { case LDC_NACK: DWARN(ldcp->id, "i_ldc_process_RTS: (0x%llx) RTS NACK received\n", ldcp->id); /* Reset the channel -- as we cannot continue */ mutex_enter(&ldcp->tx_lock); i_ldc_reset(ldcp, B_TRUE); mutex_exit(&ldcp->tx_lock); rv = ECONNRESET; break; case LDC_INFO: /* check mode */ if (ldcp->mode != (ldc_mode_t)msg->env) { cmn_err(CE_NOTE, "i_ldc_process_RTS: (0x%lx) mode mismatch\n", ldcp->id); /* * send NACK in response to MODE message * get the current tail for the response */ rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_NACK, LDC_RTS); if (rv) { /* if cannot send NACK - reset channel */ mutex_enter(&ldcp->tx_lock); i_ldc_reset(ldcp, B_TRUE); mutex_exit(&ldcp->tx_lock); rv = ECONNRESET; break; } sent_NACK = B_TRUE; } break; default: DWARN(ldcp->id, "i_ldc_process_RTS: (0x%llx) unexp ACK\n", ldcp->id); mutex_enter(&ldcp->tx_lock); i_ldc_reset(ldcp, B_TRUE); mutex_exit(&ldcp->tx_lock); rv = ECONNRESET; break; } /* * If either the connection was reset (when rv != 0) or * a NACK was sent, we return. In the case of a NACK * we dont want to consume the packet that came in but * not record that we received the RTS */ if (rv || sent_NACK) return (rv); /* record RTS received */ ldcp->hstate |= TS_RCVD_RTS; /* store initial SEQID info */ ldcp->last_msg_snt = msg->seqid; /* Obtain Tx lock */ mutex_enter(&ldcp->tx_lock); /* get the current tail for the response */ rv = i_ldc_get_tx_tail(ldcp, &tx_tail); if (rv != 0) { cmn_err(CE_NOTE, "i_ldc_process_RTS: (0x%lx) err sending RTR\n", ldcp->id); i_ldc_reset(ldcp, B_TRUE); mutex_exit(&ldcp->tx_lock); return (ECONNRESET); } pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail); ZERO_PKT(pkt); /* initialize the packet */ pkt->type = LDC_CTRL; pkt->stype = LDC_INFO; pkt->ctrl = LDC_RTR; pkt->env = ldcp->mode; if (ldcp->mode != LDC_MODE_RAW) pkt->seqid = LDC_INIT_SEQID; ldcp->last_msg_rcd = msg->seqid; /* initiate the send by calling into HV and set the new tail */ tx_tail = (tx_tail + LDC_PACKET_SIZE) % (ldcp->tx_q_entries << LDC_PACKET_SHIFT); rv = i_ldc_set_tx_tail(ldcp, tx_tail); if (rv == 0) { D2(ldcp->id, "i_ldc_process_RTS: (0x%llx) sent RTR\n", ldcp->id); DUMP_LDC_PKT(ldcp, "i_ldc_process_RTS sent rtr", (uint64_t)pkt); ldcp->tx_tail = tx_tail; ldcp->hstate |= TS_SENT_RTR; } else { cmn_err(CE_NOTE, "i_ldc_process_RTS: (0x%lx) error sending RTR\n", ldcp->id); i_ldc_reset(ldcp, B_TRUE); mutex_exit(&ldcp->tx_lock); return (ECONNRESET); } mutex_exit(&ldcp->tx_lock); return (0); } /* * Process an incoming RTR ctrl message */ static int i_ldc_process_RTR(ldc_chan_t *ldcp, ldc_msg_t *msg) { int rv = 0; boolean_t sent_NACK = B_FALSE; D2(ldcp->id, "i_ldc_process_RTR: (0x%llx) received RTR\n", ldcp->id); switch (msg->stype) { case LDC_NACK: /* RTR NACK received */ DWARN(ldcp->id, "i_ldc_process_RTR: (0x%llx) RTR NACK received\n", ldcp->id); /* Reset the channel -- as we cannot continue */ mutex_enter(&ldcp->tx_lock); i_ldc_reset(ldcp, B_TRUE); mutex_exit(&ldcp->tx_lock); rv = ECONNRESET; break; case LDC_INFO: /* check mode */ if (ldcp->mode != (ldc_mode_t)msg->env) { DWARN(ldcp->id, "i_ldc_process_RTR: (0x%llx) mode mismatch, " "expecting 0x%x, got 0x%x\n", ldcp->id, ldcp->mode, (ldc_mode_t)msg->env); /* * send NACK in response to MODE message * get the current tail for the response */ rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_NACK, LDC_RTR); if (rv) { /* if cannot send NACK - reset channel */ mutex_enter(&ldcp->tx_lock); i_ldc_reset(ldcp, B_TRUE); mutex_exit(&ldcp->tx_lock); rv = ECONNRESET; break; } sent_NACK = B_TRUE; } break; default: DWARN(ldcp->id, "i_ldc_process_RTR: (0x%llx) unexp ACK\n", ldcp->id); /* Reset the channel -- as we cannot continue */ mutex_enter(&ldcp->tx_lock); i_ldc_reset(ldcp, B_TRUE); mutex_exit(&ldcp->tx_lock); rv = ECONNRESET; break; } /* * If either the connection was reset (when rv != 0) or * a NACK was sent, we return. In the case of a NACK * we dont want to consume the packet that came in but * not record that we received the RTR */ if (rv || sent_NACK) return (rv); ldcp->last_msg_snt = msg->seqid; ldcp->hstate |= TS_RCVD_RTR; rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_INFO, LDC_RDX); if (rv) { cmn_err(CE_NOTE, "i_ldc_process_RTR: (0x%lx) cannot send RDX\n", ldcp->id); mutex_enter(&ldcp->tx_lock); i_ldc_reset(ldcp, B_TRUE); mutex_exit(&ldcp->tx_lock); return (ECONNRESET); } D2(ldcp->id, "i_ldc_process_RTR: (0x%llx) sent RDX\n", ldcp->id); ldcp->hstate |= TS_SENT_RDX; ldcp->tstate |= TS_HSHAKE_DONE; if ((ldcp->tstate & TS_IN_RESET) == 0) ldcp->status = LDC_UP; D1(ldcp->id, "(0x%llx) Handshake Complete\n", ldcp->id); return (0); } /* * Process an incoming RDX ctrl message */ static int i_ldc_process_RDX(ldc_chan_t *ldcp, ldc_msg_t *msg) { int rv = 0; D2(ldcp->id, "i_ldc_process_RDX: (0x%llx) received RDX\n", ldcp->id); switch (msg->stype) { case LDC_NACK: /* RDX NACK received */ DWARN(ldcp->id, "i_ldc_process_RDX: (0x%llx) RDX NACK received\n", ldcp->id); /* Reset the channel -- as we cannot continue */ mutex_enter(&ldcp->tx_lock); i_ldc_reset(ldcp, B_TRUE); mutex_exit(&ldcp->tx_lock); rv = ECONNRESET; break; case LDC_INFO: /* * if channel is UP and a RDX received after data transmission * has commenced it is an error */ if ((ldcp->tstate == TS_UP) && (ldcp->hstate & TS_RCVD_RDX)) { DWARN(DBG_ALL_LDCS, "i_ldc_process_RDX: (0x%llx) unexpected RDX" " - LDC reset\n", ldcp->id); mutex_enter(&ldcp->tx_lock); i_ldc_reset(ldcp, B_TRUE); mutex_exit(&ldcp->tx_lock); return (ECONNRESET); } ldcp->hstate |= TS_RCVD_RDX; ldcp->tstate |= TS_HSHAKE_DONE; if ((ldcp->tstate & TS_IN_RESET) == 0) ldcp->status = LDC_UP; D1(DBG_ALL_LDCS, "(0x%llx) Handshake Complete\n", ldcp->id); break; default: DWARN(ldcp->id, "i_ldc_process_RDX: (0x%llx) unexp ACK\n", ldcp->id); /* Reset the channel -- as we cannot continue */ mutex_enter(&ldcp->tx_lock); i_ldc_reset(ldcp, B_TRUE); mutex_exit(&ldcp->tx_lock); rv = ECONNRESET; break; } return (rv); } /* * Process an incoming ACK for a data packet */ static int i_ldc_process_data_ACK(ldc_chan_t *ldcp, ldc_msg_t *msg) { int rv; uint64_t tx_head; ldc_msg_t *pkt; /* Obtain Tx lock */ mutex_enter(&ldcp->tx_lock); /* * Read the current Tx head and tail */ rv = hv_ldc_tx_get_state(ldcp->id, &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state); if (rv != 0) { cmn_err(CE_WARN, "i_ldc_process_data_ACK: (0x%lx) cannot read qptrs\n", ldcp->id); /* Reset the channel -- as we cannot continue */ i_ldc_reset(ldcp, B_TRUE); mutex_exit(&ldcp->tx_lock); return (ECONNRESET); } /* * loop from where the previous ACK location was to the * current head location. This is how far the HV has * actually send pkts. Pkts between head and tail are * yet to be sent by HV. */ tx_head = ldcp->tx_ackd_head; for (;;) { pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_head); tx_head = (tx_head + LDC_PACKET_SIZE) % (ldcp->tx_q_entries << LDC_PACKET_SHIFT); if (pkt->seqid == msg->ackid) { D2(ldcp->id, "i_ldc_process_data_ACK: (0x%llx) found packet\n", ldcp->id); ldcp->last_ack_rcd = msg->ackid; ldcp->tx_ackd_head = tx_head; break; } if (tx_head == ldcp->tx_head) { /* could not find packet */ DWARN(ldcp->id, "i_ldc_process_data_ACK: (0x%llx) invalid ACKid\n", ldcp->id); /* Reset the channel -- as we cannot continue */ i_ldc_reset(ldcp, B_TRUE); mutex_exit(&ldcp->tx_lock); return (ECONNRESET); } } mutex_exit(&ldcp->tx_lock); return (0); } /* * Process incoming control message * Return 0 - session can continue * EAGAIN - reprocess packet - state was changed * ECONNRESET - channel was reset */ static int i_ldc_ctrlmsg(ldc_chan_t *ldcp, ldc_msg_t *msg) { int rv = 0; D1(ldcp->id, "i_ldc_ctrlmsg: (%llx) tstate = %lx, hstate = %lx\n", ldcp->id, ldcp->tstate, ldcp->hstate); switch (ldcp->tstate & ~TS_IN_RESET) { case TS_OPEN: case TS_READY: switch (msg->ctrl & LDC_CTRL_MASK) { case LDC_VER: /* process version message */ rv = i_ldc_process_VER(ldcp, msg); break; default: DWARN(ldcp->id, "i_ldc_ctrlmsg: (0x%llx) unexp ctrl 0x%x " "tstate=0x%x\n", ldcp->id, (msg->ctrl & LDC_CTRL_MASK), ldcp->tstate); break; } break; case TS_VREADY: switch (msg->ctrl & LDC_CTRL_MASK) { case LDC_VER: /* process version message */ rv = i_ldc_process_VER(ldcp, msg); break; case LDC_RTS: /* process RTS message */ rv = i_ldc_process_RTS(ldcp, msg); break; case LDC_RTR: /* process RTR message */ rv = i_ldc_process_RTR(ldcp, msg); break; case LDC_RDX: /* process RDX message */ rv = i_ldc_process_RDX(ldcp, msg); break; default: DWARN(ldcp->id, "i_ldc_ctrlmsg: (0x%llx) unexp ctrl 0x%x " "tstate=0x%x\n", ldcp->id, (msg->ctrl & LDC_CTRL_MASK), ldcp->tstate); break; } break; case TS_UP: switch (msg->ctrl & LDC_CTRL_MASK) { case LDC_VER: DWARN(ldcp->id, "i_ldc_ctrlmsg: (0x%llx) unexpected VER " "- LDC reset\n", ldcp->id); /* peer is redoing version negotiation */ mutex_enter(&ldcp->tx_lock); (void) i_ldc_txq_reconf(ldcp); i_ldc_reset_state(ldcp); mutex_exit(&ldcp->tx_lock); rv = EAGAIN; break; case LDC_RDX: /* process RDX message */ rv = i_ldc_process_RDX(ldcp, msg); break; default: DWARN(ldcp->id, "i_ldc_ctrlmsg: (0x%llx) unexp ctrl 0x%x " "tstate=0x%x\n", ldcp->id, (msg->ctrl & LDC_CTRL_MASK), ldcp->tstate); break; } } return (rv); } /* * Register channel with the channel nexus */ static int i_ldc_register_channel(ldc_chan_t *ldcp) { int rv = 0; ldc_cnex_t *cinfo = &ldcssp->cinfo; if (cinfo->dip == NULL) { DWARN(ldcp->id, "i_ldc_register_channel: cnex has not registered\n"); return (EAGAIN); } rv = cinfo->reg_chan(cinfo->dip, ldcp->id, ldcp->devclass); if (rv) { DWARN(ldcp->id, "i_ldc_register_channel: cannot register channel\n"); return (rv); } rv = cinfo->add_intr(cinfo->dip, ldcp->id, CNEX_TX_INTR, i_ldc_tx_hdlr, ldcp, NULL); if (rv) { DWARN(ldcp->id, "i_ldc_register_channel: cannot add Tx interrupt\n"); (void) cinfo->unreg_chan(cinfo->dip, ldcp->id); return (rv); } rv = cinfo->add_intr(cinfo->dip, ldcp->id, CNEX_RX_INTR, i_ldc_rx_hdlr, ldcp, NULL); if (rv) { DWARN(ldcp->id, "i_ldc_register_channel: cannot add Rx interrupt\n"); (void) cinfo->rem_intr(cinfo->dip, ldcp->id, CNEX_TX_INTR); (void) cinfo->unreg_chan(cinfo->dip, ldcp->id); return (rv); } ldcp->tstate |= TS_CNEX_RDY; return (0); } /* * Unregister a channel with the channel nexus */ static int i_ldc_unregister_channel(ldc_chan_t *ldcp) { int rv = 0; ldc_cnex_t *cinfo = &ldcssp->cinfo; if (cinfo->dip == NULL) { DWARN(ldcp->id, "i_ldc_unregister_channel: cnex has not registered\n"); return (EAGAIN); } if (ldcp->tstate & TS_CNEX_RDY) { /* Remove the Rx interrupt */ rv = cinfo->rem_intr(cinfo->dip, ldcp->id, CNEX_RX_INTR); if (rv) { if (rv != EAGAIN) { DWARN(ldcp->id, "i_ldc_unregister_channel: err removing " "Rx intr\n"); return (rv); } /* * If interrupts are pending and handler has * finished running, clear interrupt and try * again */ if (ldcp->rx_intr_state != LDC_INTR_PEND) return (rv); (void) i_ldc_clear_intr(ldcp, CNEX_RX_INTR); rv = cinfo->rem_intr(cinfo->dip, ldcp->id, CNEX_RX_INTR); if (rv) { DWARN(ldcp->id, "i_ldc_unregister_channel: " "err removing Rx interrupt\n"); return (rv); } } /* Remove the Tx interrupt */ rv = cinfo->rem_intr(cinfo->dip, ldcp->id, CNEX_TX_INTR); if (rv) { DWARN(ldcp->id, "i_ldc_unregister_channel: err removing Tx intr\n"); return (rv); } /* Unregister the channel */ rv = cinfo->unreg_chan(ldcssp->cinfo.dip, ldcp->id); if (rv) { DWARN(ldcp->id, "i_ldc_unregister_channel: cannot unreg channel\n"); return (rv); } ldcp->tstate &= ~TS_CNEX_RDY; } return (0); } /* * LDC transmit interrupt handler * triggered for chanel up/down/reset events * and Tx queue content changes */ static uint_t i_ldc_tx_hdlr(caddr_t arg1, caddr_t arg2) { _NOTE(ARGUNUSED(arg2)) int rv; ldc_chan_t *ldcp; boolean_t notify_client = B_FALSE; uint64_t notify_event = 0, link_state; /* Get the channel for which interrupt was received */ ASSERT(arg1 != NULL); ldcp = (ldc_chan_t *)arg1; D1(ldcp->id, "i_ldc_tx_hdlr: (0x%llx) Received intr, ldcp=0x%p\n", ldcp->id, ldcp); /* Lock channel */ mutex_enter(&ldcp->lock); /* Obtain Tx lock */ mutex_enter(&ldcp->tx_lock); /* mark interrupt as pending */ ldcp->tx_intr_state = LDC_INTR_ACTIVE; /* save current link state */ link_state = ldcp->link_state; rv = hv_ldc_tx_get_state(ldcp->id, &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state); if (rv) { cmn_err(CE_WARN, "i_ldc_tx_hdlr: (0x%lx) cannot read queue ptrs rv=0x%d\n", ldcp->id, rv); i_ldc_clear_intr(ldcp, CNEX_TX_INTR); mutex_exit(&ldcp->tx_lock); mutex_exit(&ldcp->lock); return (DDI_INTR_CLAIMED); } /* * reset the channel state if the channel went down * (other side unconfigured queue) or channel was reset * (other side reconfigured its queue) */ if (link_state != ldcp->link_state && ldcp->link_state == LDC_CHANNEL_DOWN) { D1(ldcp->id, "i_ldc_tx_hdlr: channel link down\n", ldcp->id); i_ldc_reset(ldcp, B_FALSE); notify_client = B_TRUE; notify_event = LDC_EVT_DOWN; } if (link_state != ldcp->link_state && ldcp->link_state == LDC_CHANNEL_RESET) { D1(ldcp->id, "i_ldc_tx_hdlr: channel link reset\n", ldcp->id); i_ldc_reset(ldcp, B_FALSE); notify_client = B_TRUE; notify_event = LDC_EVT_RESET; } if (link_state != ldcp->link_state && (ldcp->tstate & ~TS_IN_RESET) == TS_OPEN && ldcp->link_state == LDC_CHANNEL_UP) { D1(ldcp->id, "i_ldc_tx_hdlr: channel link up\n", ldcp->id); notify_client = B_TRUE; notify_event = LDC_EVT_RESET; ldcp->tstate |= TS_LINK_READY; ldcp->status = LDC_READY; } /* if callbacks are disabled, do not notify */ if (!ldcp->cb_enabled) notify_client = B_FALSE; i_ldc_clear_intr(ldcp, CNEX_TX_INTR); mutex_exit(&ldcp->tx_lock); if (notify_client) { ldcp->cb_inprogress = B_TRUE; mutex_exit(&ldcp->lock); rv = ldcp->cb(notify_event, ldcp->cb_arg); if (rv) { DWARN(ldcp->id, "i_ldc_tx_hdlr: (0x%llx) callback " "failure", ldcp->id); } mutex_enter(&ldcp->lock); ldcp->cb_inprogress = B_FALSE; } mutex_exit(&ldcp->lock); D1(ldcp->id, "i_ldc_tx_hdlr: (0x%llx) exiting handler", ldcp->id); return (DDI_INTR_CLAIMED); } /* * Process the Rx HV queue. * * Returns 0 if data packets were found and no errors were encountered, * otherwise returns an error. In either case, the *notify argument is * set to indicate whether or not the client callback function should * be invoked. The *event argument is set to contain the callback event. * * Depending on the channel mode, packets are handled differently: * * RAW MODE * For raw mode channels, when a data packet is encountered, * processing stops and all packets are left on the queue to be removed * and processed by the ldc_read code path. * * UNRELIABLE MODE * For unreliable mode, when a data packet is encountered, processing * stops, and all packets are left on the queue to be removed and * processed by the ldc_read code path. Control packets are processed * inline if they are encountered before any data packets. * * RELIABLE MODE * For reliable mode channels, all packets on the receive queue * are processed: data packets are copied to the data queue and * control packets are processed inline. Packets are only left on * the receive queue when the data queue is full. */ static uint_t i_ldc_rx_process_hvq(ldc_chan_t *ldcp, boolean_t *notify_client, uint64_t *notify_event) { int rv; uint64_t rx_head, rx_tail; ldc_msg_t *msg; uint64_t link_state, first_fragment = 0; boolean_t trace_length = B_TRUE; ASSERT(MUTEX_HELD(&ldcp->lock)); *notify_client = B_FALSE; *notify_event = 0; /* * Read packet(s) from the queue */ for (;;) { link_state = ldcp->link_state; rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail, &ldcp->link_state); if (rv) { cmn_err(CE_WARN, "i_ldc_rx_process_hvq: (0x%lx) cannot read " "queue ptrs, rv=0x%d\n", ldcp->id, rv); i_ldc_clear_intr(ldcp, CNEX_RX_INTR); return (EIO); } /* * reset the channel state if the channel went down * (other side unconfigured queue) or channel was reset * (other side reconfigured its queue) */ if (link_state != ldcp->link_state) { switch (ldcp->link_state) { case LDC_CHANNEL_DOWN: D1(ldcp->id, "i_ldc_rx_process_hvq: channel " "link down\n", ldcp->id); mutex_enter(&ldcp->tx_lock); i_ldc_reset(ldcp, B_FALSE); mutex_exit(&ldcp->tx_lock); *notify_client = B_TRUE; *notify_event = LDC_EVT_DOWN; goto loop_exit; case LDC_CHANNEL_UP: D1(ldcp->id, "i_ldc_rx_process_hvq: " "channel link up\n", ldcp->id); if ((ldcp->tstate & ~TS_IN_RESET) == TS_OPEN) { *notify_client = B_TRUE; *notify_event = LDC_EVT_RESET; ldcp->tstate |= TS_LINK_READY; ldcp->status = LDC_READY; } break; case LDC_CHANNEL_RESET: default: #ifdef DEBUG force_reset: #endif D1(ldcp->id, "i_ldc_rx_process_hvq: channel " "link reset\n", ldcp->id); mutex_enter(&ldcp->tx_lock); i_ldc_reset(ldcp, B_FALSE); mutex_exit(&ldcp->tx_lock); *notify_client = B_TRUE; *notify_event = LDC_EVT_RESET; break; } } #ifdef DEBUG if (LDC_INJECT_RESET(ldcp)) goto force_reset; if (LDC_INJECT_DRNGCLEAR(ldcp)) i_ldc_mem_inject_dring_clear(ldcp); #endif if (trace_length) { TRACE_RXHVQ_LENGTH(ldcp, rx_head, rx_tail); trace_length = B_FALSE; } if (rx_head == rx_tail) { D2(ldcp->id, "i_ldc_rx_process_hvq: (0x%llx) " "No packets\n", ldcp->id); break; } D2(ldcp->id, "i_ldc_rx_process_hvq: head=0x%llx, " "tail=0x%llx\n", rx_head, rx_tail); DUMP_LDC_PKT(ldcp, "i_ldc_rx_process_hvq rcd", ldcp->rx_q_va + rx_head); /* get the message */ msg = (ldc_msg_t *)(ldcp->rx_q_va + rx_head); /* if channel is in RAW mode or data pkt, notify and return */ if (ldcp->mode == LDC_MODE_RAW) { *notify_client = B_TRUE; *notify_event |= LDC_EVT_READ; break; } if ((msg->type & LDC_DATA) && (msg->stype & LDC_INFO)) { /* discard packet if channel is not up */ if ((ldcp->tstate & ~TS_IN_RESET) != TS_UP) { /* move the head one position */ rx_head = (rx_head + LDC_PACKET_SIZE) % (ldcp->rx_q_entries << LDC_PACKET_SHIFT); if (rv = i_ldc_set_rx_head(ldcp, rx_head)) break; continue; } else { uint64_t dq_head, dq_tail; /* process only RELIABLE mode data packets */ if (ldcp->mode != LDC_MODE_RELIABLE) { if ((ldcp->tstate & TS_IN_RESET) == 0) *notify_client = B_TRUE; *notify_event |= LDC_EVT_READ; break; } /* don't process packet if queue full */ (void) i_ldc_dq_rx_get_state(ldcp, &dq_head, &dq_tail, NULL); dq_tail = (dq_tail + LDC_PACKET_SIZE) % (ldcp->rx_dq_entries << LDC_PACKET_SHIFT); if (dq_tail == dq_head || LDC_INJECT_DQFULL(ldcp)) { rv = ENOSPC; break; } } } /* Check the sequence ID for the message received */ rv = i_ldc_check_seqid(ldcp, msg); if (rv != 0) { DWARN(ldcp->id, "i_ldc_rx_process_hvq: (0x%llx) " "seqid error, q_ptrs=0x%lx,0x%lx", ldcp->id, rx_head, rx_tail); /* Reset last_msg_rcd to start of message */ if (first_fragment != 0) { ldcp->last_msg_rcd = first_fragment - 1; first_fragment = 0; } /* * Send a NACK due to seqid mismatch */ rv = i_ldc_send_pkt(ldcp, msg->type, LDC_NACK, (msg->ctrl & LDC_CTRL_MASK)); if (rv) { cmn_err(CE_NOTE, "i_ldc_rx_process_hvq: " "(0x%lx) err sending CTRL/DATA NACK msg\n", ldcp->id); /* if cannot send NACK - reset channel */ mutex_enter(&ldcp->tx_lock); i_ldc_reset(ldcp, B_TRUE); mutex_exit(&ldcp->tx_lock); *notify_client = B_TRUE; *notify_event = LDC_EVT_RESET; break; } /* purge receive queue */ (void) i_ldc_set_rx_head(ldcp, rx_tail); break; } /* record the message ID */ ldcp->last_msg_rcd = msg->seqid; /* process control messages */ if (msg->type & LDC_CTRL) { /* save current internal state */ uint64_t tstate = ldcp->tstate; rv = i_ldc_ctrlmsg(ldcp, msg); if (rv == EAGAIN) { /* re-process pkt - state was adjusted */ continue; } if (rv == ECONNRESET) { *notify_client = B_TRUE; *notify_event = LDC_EVT_RESET; break; } /* * control message processing was successful * channel transitioned to ready for communication */ if (rv == 0 && ldcp->tstate == TS_UP && (tstate & ~TS_IN_RESET) != (ldcp->tstate & ~TS_IN_RESET)) { *notify_client = B_TRUE; *notify_event = LDC_EVT_UP; } } /* process data NACKs */ if ((msg->type & LDC_DATA) && (msg->stype & LDC_NACK)) { DWARN(ldcp->id, "i_ldc_rx_process_hvq: (0x%llx) received DATA/NACK", ldcp->id); mutex_enter(&ldcp->tx_lock); i_ldc_reset(ldcp, B_TRUE); mutex_exit(&ldcp->tx_lock); *notify_client = B_TRUE; *notify_event = LDC_EVT_RESET; break; } /* process data ACKs */ if ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK)) { if (rv = i_ldc_process_data_ACK(ldcp, msg)) { *notify_client = B_TRUE; *notify_event = LDC_EVT_RESET; break; } } if ((msg->type & LDC_DATA) && (msg->stype & LDC_INFO)) { ASSERT(ldcp->mode == LDC_MODE_RELIABLE); /* * Copy the data packet to the data queue. Note * that the copy routine updates the rx_head pointer. */ i_ldc_rxdq_copy(ldcp, &rx_head); if ((ldcp->tstate & TS_IN_RESET) == 0) *notify_client = B_TRUE; *notify_event |= LDC_EVT_READ; } else { rx_head = (rx_head + LDC_PACKET_SIZE) % (ldcp->rx_q_entries << LDC_PACKET_SHIFT); } /* move the head one position */ if (rv = i_ldc_set_rx_head(ldcp, rx_head)) { *notify_client = B_TRUE; *notify_event = LDC_EVT_RESET; break; } } /* for */ loop_exit: if (ldcp->mode == LDC_MODE_RELIABLE) { /* ACK data packets */ if ((*notify_event & (LDC_EVT_READ | LDC_EVT_RESET)) == LDC_EVT_READ) { int ack_rv; ack_rv = i_ldc_send_pkt(ldcp, LDC_DATA, LDC_ACK, 0); if (ack_rv && ack_rv != EWOULDBLOCK) { cmn_err(CE_NOTE, "i_ldc_rx_process_hvq: (0x%lx) cannot " "send ACK\n", ldcp->id); mutex_enter(&ldcp->tx_lock); i_ldc_reset(ldcp, B_FALSE); mutex_exit(&ldcp->tx_lock); *notify_client = B_TRUE; *notify_event = LDC_EVT_RESET; goto skip_ackpeek; } } /* * If we have no more space on the data queue, make sure * there are no ACKs on the rx queue waiting to be processed. */ if (rv == ENOSPC) { if (i_ldc_rx_ackpeek(ldcp, rx_head, rx_tail) != 0) { ldcp->rx_ack_head = ACKPEEK_HEAD_INVALID; *notify_client = B_TRUE; *notify_event = LDC_EVT_RESET; } return (rv); } else { ldcp->rx_ack_head = ACKPEEK_HEAD_INVALID; } } skip_ackpeek: /* Return, indicating whether or not data packets were found */ if ((*notify_event & (LDC_EVT_READ | LDC_EVT_RESET)) == LDC_EVT_READ) return (0); return (ENOMSG); } /* * Process any ACK packets on the HV receive queue. * * This function is only used by RELIABLE mode channels when the * secondary data queue fills up and there are packets remaining on * the HV receive queue. */ int i_ldc_rx_ackpeek(ldc_chan_t *ldcp, uint64_t rx_head, uint64_t rx_tail) { int rv = 0; ldc_msg_t *msg; if (ldcp->rx_ack_head == ACKPEEK_HEAD_INVALID) ldcp->rx_ack_head = rx_head; while (ldcp->rx_ack_head != rx_tail) { msg = (ldc_msg_t *)(ldcp->rx_q_va + ldcp->rx_ack_head); if ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK)) { if (rv = i_ldc_process_data_ACK(ldcp, msg)) break; msg->stype &= ~LDC_ACK; } ldcp->rx_ack_head = (ldcp->rx_ack_head + LDC_PACKET_SIZE) % (ldcp->rx_q_entries << LDC_PACKET_SHIFT); } return (rv); } /* -------------------------------------------------------------------------- */ /* * LDC API functions */ /* * Initialize the channel. Allocate internal structure and memory for * TX/RX queues, and initialize locks. */ int ldc_init(uint64_t id, ldc_attr_t *attr, ldc_handle_t *handle) { ldc_chan_t *ldcp; int rv, exit_val; uint64_t ra_base, nentries; uint64_t qlen; exit_val = EINVAL; /* guarantee an error if exit on failure */ if (attr == NULL) { DWARN(id, "ldc_init: (0x%llx) invalid attr\n", id); return (EINVAL); } if (handle == NULL) { DWARN(id, "ldc_init: (0x%llx) invalid handle\n", id); return (EINVAL); } /* check if channel is valid */ rv = hv_ldc_tx_qinfo(id, &ra_base, &nentries); if (rv == H_ECHANNEL) { DWARN(id, "ldc_init: (0x%llx) invalid channel id\n", id); return (EINVAL); } /* check if the channel has already been initialized */ mutex_enter(&ldcssp->lock); ldcp = ldcssp->chan_list; while (ldcp != NULL) { if (ldcp->id == id) { DWARN(id, "ldc_init: (0x%llx) already initialized\n", id); mutex_exit(&ldcssp->lock); return (EADDRINUSE); } ldcp = ldcp->next; } mutex_exit(&ldcssp->lock); ASSERT(ldcp == NULL); *handle = 0; /* Allocate an ldcp structure */ ldcp = kmem_zalloc(sizeof (ldc_chan_t), KM_SLEEP); /* * Initialize the channel and Tx lock * * The channel 'lock' protects the entire channel and * should be acquired before initializing, resetting, * destroying or reading from a channel. * * The 'tx_lock' should be acquired prior to transmitting * data over the channel. The lock should also be acquired * prior to channel reconfiguration (in order to prevent * concurrent writes). * * ORDERING: When both locks are being acquired, to prevent * deadlocks, the channel lock should be always acquired prior * to the tx_lock. */ mutex_init(&ldcp->lock, NULL, MUTEX_DRIVER, NULL); mutex_init(&ldcp->tx_lock, NULL, MUTEX_DRIVER, NULL); /* Initialize the channel */ ldcp->id = id; ldcp->cb = NULL; ldcp->cb_arg = NULL; ldcp->cb_inprogress = B_FALSE; ldcp->cb_enabled = B_FALSE; ldcp->next = NULL; /* Read attributes */ ldcp->mode = attr->mode; ldcp->devclass = attr->devclass; ldcp->devinst = attr->instance; ldcp->mtu = (attr->mtu > 0) ? attr->mtu : LDC_DEFAULT_MTU; D1(ldcp->id, "ldc_init: (0x%llx) channel attributes, class=0x%x, " "instance=0x%llx, mode=%d, mtu=%d\n", ldcp->id, ldcp->devclass, ldcp->devinst, ldcp->mode, ldcp->mtu); ldcp->next_vidx = 0; ldcp->tstate = TS_IN_RESET; ldcp->hstate = 0; ldcp->last_msg_snt = LDC_INIT_SEQID; ldcp->last_ack_rcd = 0; ldcp->last_msg_rcd = 0; ldcp->rx_ack_head = ACKPEEK_HEAD_INVALID; ldcp->stream_bufferp = NULL; ldcp->exp_dring_list = NULL; ldcp->imp_dring_list = NULL; ldcp->mhdl_list = NULL; ldcp->tx_intr_state = LDC_INTR_NONE; ldcp->rx_intr_state = LDC_INTR_NONE; /* Initialize payload size depending on whether channel is reliable */ switch (ldcp->mode) { case LDC_MODE_RAW: ldcp->pkt_payload = LDC_PAYLOAD_SIZE_RAW; ldcp->read_p = i_ldc_read_raw; ldcp->write_p = i_ldc_write_raw; break; case LDC_MODE_UNRELIABLE: ldcp->pkt_payload = LDC_PAYLOAD_SIZE_UNRELIABLE; ldcp->read_p = i_ldc_read_packet; ldcp->write_p = i_ldc_write_packet; break; case LDC_MODE_RELIABLE: ldcp->pkt_payload = LDC_PAYLOAD_SIZE_RELIABLE; ldcp->stream_remains = 0; ldcp->stream_offset = 0; ldcp->stream_bufferp = kmem_alloc(ldcp->mtu, KM_SLEEP); ldcp->read_p = i_ldc_read_stream; ldcp->write_p = i_ldc_write_stream; break; default: exit_val = EINVAL; goto cleanup_on_exit; } /* * qlen is (mtu * ldc_mtu_msgs) / pkt_payload. If this * value is smaller than default length of ldc_queue_entries, * qlen is set to ldc_queue_entries. Ensure that computed * length is a power-of-two value. */ qlen = (ldcp->mtu * ldc_mtu_msgs) / ldcp->pkt_payload; if (!ISP2(qlen)) { uint64_t tmp = 1; while (qlen) { qlen >>= 1; tmp <<= 1; } qlen = tmp; } ldcp->rx_q_entries = (qlen < ldc_queue_entries) ? ldc_queue_entries : qlen; ldcp->tx_q_entries = ldcp->rx_q_entries; D1(ldcp->id, "ldc_init: queue length = 0x%llx\n", ldcp->rx_q_entries); /* Create a transmit queue */ ldcp->tx_q_va = (uint64_t) contig_mem_alloc(ldcp->tx_q_entries << LDC_PACKET_SHIFT); if (ldcp->tx_q_va == 0) { cmn_err(CE_WARN, "ldc_init: (0x%lx) TX queue allocation failed\n", ldcp->id); exit_val = ENOMEM; goto cleanup_on_exit; } ldcp->tx_q_ra = va_to_pa((caddr_t)ldcp->tx_q_va); D2(ldcp->id, "ldc_init: txq_va=0x%llx, txq_ra=0x%llx, entries=0x%llx\n", ldcp->tx_q_va, ldcp->tx_q_ra, ldcp->tx_q_entries); ldcp->tstate |= TS_TXQ_RDY; /* Create a receive queue */ ldcp->rx_q_va = (uint64_t) contig_mem_alloc(ldcp->rx_q_entries << LDC_PACKET_SHIFT); if (ldcp->rx_q_va == 0) { cmn_err(CE_WARN, "ldc_init: (0x%lx) RX queue allocation failed\n", ldcp->id); exit_val = ENOMEM; goto cleanup_on_exit; } ldcp->rx_q_ra = va_to_pa((caddr_t)ldcp->rx_q_va); D2(ldcp->id, "ldc_init: rxq_va=0x%llx, rxq_ra=0x%llx, entries=0x%llx\n", ldcp->rx_q_va, ldcp->rx_q_ra, ldcp->rx_q_entries); ldcp->tstate |= TS_RXQ_RDY; /* Setup a separate read data queue */ if (ldcp->mode == LDC_MODE_RELIABLE) { ldcp->readq_get_state = i_ldc_dq_rx_get_state; ldcp->readq_set_head = i_ldc_set_rxdq_head; /* Make sure the data queue multiplier is a power of 2 */ if (!ISP2(ldc_rxdq_multiplier)) { D1(ldcp->id, "ldc_init: (0x%llx) ldc_rxdq_multiplier " "not a power of 2, resetting", ldcp->id); ldc_rxdq_multiplier = LDC_RXDQ_MULTIPLIER; } ldcp->rx_dq_entries = ldc_rxdq_multiplier * ldcp->rx_q_entries; ldcp->rx_dq_va = (uint64_t) kmem_alloc(ldcp->rx_dq_entries << LDC_PACKET_SHIFT, KM_SLEEP); if (ldcp->rx_dq_va == 0) { cmn_err(CE_WARN, "ldc_init: (0x%lx) RX data queue " "allocation failed\n", ldcp->id); exit_val = ENOMEM; goto cleanup_on_exit; } ldcp->rx_dq_head = ldcp->rx_dq_tail = 0; D2(ldcp->id, "ldc_init: rx_dq_va=0x%llx, " "rx_dq_entries=0x%llx\n", ldcp->rx_dq_va, ldcp->rx_dq_entries); } else { ldcp->readq_get_state = i_ldc_hvq_rx_get_state; ldcp->readq_set_head = i_ldc_set_rx_head; } /* Init descriptor ring and memory handle list lock */ mutex_init(&ldcp->exp_dlist_lock, NULL, MUTEX_DRIVER, NULL); mutex_init(&ldcp->imp_dlist_lock, NULL, MUTEX_DRIVER, NULL); mutex_init(&ldcp->mlist_lock, NULL, MUTEX_DRIVER, NULL); /* mark status as INITialized */ ldcp->status = LDC_INIT; /* Add to channel list */ mutex_enter(&ldcssp->lock); ldcp->next = ldcssp->chan_list; ldcssp->chan_list = ldcp; ldcssp->channel_count++; mutex_exit(&ldcssp->lock); /* set the handle */ *handle = (ldc_handle_t)ldcp; D1(ldcp->id, "ldc_init: (0x%llx) channel initialized\n", ldcp->id); return (0); cleanup_on_exit: if (ldcp->mode == LDC_MODE_RELIABLE && ldcp->stream_bufferp) kmem_free(ldcp->stream_bufferp, ldcp->mtu); if (ldcp->tstate & TS_TXQ_RDY) contig_mem_free((caddr_t)ldcp->tx_q_va, (ldcp->tx_q_entries << LDC_PACKET_SHIFT)); if (ldcp->tstate & TS_RXQ_RDY) contig_mem_free((caddr_t)ldcp->rx_q_va, (ldcp->rx_q_entries << LDC_PACKET_SHIFT)); mutex_destroy(&ldcp->tx_lock); mutex_destroy(&ldcp->lock); kmem_free(ldcp, sizeof (ldc_chan_t)); return (exit_val); } /* * Finalizes the LDC connection. It will return EBUSY if the * channel is open. A ldc_close() has to be done prior to * a ldc_fini operation. It frees TX/RX queues, associated * with the channel */ int ldc_fini(ldc_handle_t handle) { ldc_chan_t *ldcp; ldc_chan_t *tmp_ldcp; uint64_t id; if (handle == 0) { DWARN(DBG_ALL_LDCS, "ldc_fini: invalid channel handle\n"); return (EINVAL); } ldcp = (ldc_chan_t *)handle; id = ldcp->id; mutex_enter(&ldcp->lock); if ((ldcp->tstate & ~TS_IN_RESET) > TS_INIT) { DWARN(ldcp->id, "ldc_fini: (0x%llx) channel is open\n", ldcp->id); mutex_exit(&ldcp->lock); return (EBUSY); } /* Remove from the channel list */ mutex_enter(&ldcssp->lock); tmp_ldcp = ldcssp->chan_list; if (tmp_ldcp == ldcp) { ldcssp->chan_list = ldcp->next; ldcp->next = NULL; } else { while (tmp_ldcp != NULL) { if (tmp_ldcp->next == ldcp) { tmp_ldcp->next = ldcp->next; ldcp->next = NULL; break; } tmp_ldcp = tmp_ldcp->next; } if (tmp_ldcp == NULL) { DWARN(DBG_ALL_LDCS, "ldc_fini: invalid channel hdl\n"); mutex_exit(&ldcssp->lock); mutex_exit(&ldcp->lock); return (EINVAL); } } ldcssp->channel_count--; mutex_exit(&ldcssp->lock); /* Free the map table for this channel */ if (ldcp->mtbl) { (void) hv_ldc_set_map_table(ldcp->id, 0, 0); if (ldcp->mtbl->contigmem) contig_mem_free(ldcp->mtbl->table, ldcp->mtbl->size); else kmem_free(ldcp->mtbl->table, ldcp->mtbl->size); mutex_destroy(&ldcp->mtbl->lock); kmem_free(ldcp->mtbl, sizeof (ldc_mtbl_t)); } /* Destroy descriptor ring and memory handle list lock */ mutex_destroy(&ldcp->exp_dlist_lock); mutex_destroy(&ldcp->imp_dlist_lock); mutex_destroy(&ldcp->mlist_lock); /* Free the stream buffer for RELIABLE_MODE */ if (ldcp->mode == LDC_MODE_RELIABLE && ldcp->stream_bufferp) kmem_free(ldcp->stream_bufferp, ldcp->mtu); /* Free the RX queue */ contig_mem_free((caddr_t)ldcp->rx_q_va, (ldcp->rx_q_entries << LDC_PACKET_SHIFT)); ldcp->tstate &= ~TS_RXQ_RDY; /* Free the RX data queue */ if (ldcp->mode == LDC_MODE_RELIABLE) { kmem_free((caddr_t)ldcp->rx_dq_va, (ldcp->rx_dq_entries << LDC_PACKET_SHIFT)); } /* Free the TX queue */ contig_mem_free((caddr_t)ldcp->tx_q_va, (ldcp->tx_q_entries << LDC_PACKET_SHIFT)); ldcp->tstate &= ~TS_TXQ_RDY; mutex_exit(&ldcp->lock); /* Destroy mutex */ mutex_destroy(&ldcp->tx_lock); mutex_destroy(&ldcp->lock); /* free channel structure */ kmem_free(ldcp, sizeof (ldc_chan_t)); D1(id, "ldc_fini: (0x%llx) channel finalized\n", id); return (0); } /* * Open the LDC channel for use. It registers the TX/RX queues * with the Hypervisor. It also specifies the interrupt number * and target CPU for this channel */ int ldc_open(ldc_handle_t handle) { ldc_chan_t *ldcp; int rv; if (handle == 0) { DWARN(DBG_ALL_LDCS, "ldc_open: invalid channel handle\n"); return (EINVAL); } ldcp = (ldc_chan_t *)handle; mutex_enter(&ldcp->lock); if (ldcp->tstate < TS_INIT) { DWARN(ldcp->id, "ldc_open: (0x%llx) channel not initialized\n", ldcp->id); mutex_exit(&ldcp->lock); return (EFAULT); } if ((ldcp->tstate & ~TS_IN_RESET) >= TS_OPEN) { DWARN(ldcp->id, "ldc_open: (0x%llx) channel is already open\n", ldcp->id); mutex_exit(&ldcp->lock); return (EFAULT); } /* * Unregister/Register the tx queue with the hypervisor */ rv = hv_ldc_tx_qconf(ldcp->id, 0, 0); if (rv) { cmn_err(CE_WARN, "ldc_open: (0x%lx) channel tx queue unconf failed\n", ldcp->id); mutex_exit(&ldcp->lock); return (EIO); } rv = hv_ldc_tx_qconf(ldcp->id, ldcp->tx_q_ra, ldcp->tx_q_entries); if (rv) { cmn_err(CE_WARN, "ldc_open: (0x%lx) channel tx queue conf failed\n", ldcp->id); mutex_exit(&ldcp->lock); return (EIO); } D2(ldcp->id, "ldc_open: (0x%llx) registered tx queue with LDC\n", ldcp->id); /* * Unregister/Register the rx queue with the hypervisor */ rv = hv_ldc_rx_qconf(ldcp->id, 0, 0); if (rv) { cmn_err(CE_WARN, "ldc_open: (0x%lx) channel rx queue unconf failed\n", ldcp->id); mutex_exit(&ldcp->lock); return (EIO); } rv = hv_ldc_rx_qconf(ldcp->id, ldcp->rx_q_ra, ldcp->rx_q_entries); if (rv) { cmn_err(CE_WARN, "ldc_open: (0x%lx) channel rx queue conf failed\n", ldcp->id); mutex_exit(&ldcp->lock); return (EIO); } D2(ldcp->id, "ldc_open: (0x%llx) registered rx queue with LDC\n", ldcp->id); ldcp->tstate |= TS_QCONF_RDY; /* Register the channel with the channel nexus */ rv = i_ldc_register_channel(ldcp); if (rv && rv != EAGAIN) { cmn_err(CE_WARN, "ldc_open: (0x%lx) channel register failed\n", ldcp->id); ldcp->tstate &= ~TS_QCONF_RDY; (void) hv_ldc_tx_qconf(ldcp->id, 0, 0); (void) hv_ldc_rx_qconf(ldcp->id, 0, 0); mutex_exit(&ldcp->lock); return (EIO); } /* mark channel in OPEN state */ ldcp->status = LDC_OPEN; /* Read channel state */ rv = hv_ldc_tx_get_state(ldcp->id, &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state); if (rv) { cmn_err(CE_WARN, "ldc_open: (0x%lx) cannot read channel state\n", ldcp->id); (void) i_ldc_unregister_channel(ldcp); ldcp->tstate &= ~TS_QCONF_RDY; (void) hv_ldc_tx_qconf(ldcp->id, 0, 0); (void) hv_ldc_rx_qconf(ldcp->id, 0, 0); mutex_exit(&ldcp->lock); return (EIO); } /* * set the ACKd head to current head location for reliable */ ldcp->tx_ackd_head = ldcp->tx_head; /* mark channel ready if HV report link is UP (peer alloc'd Rx queue) */ if (ldcp->link_state == LDC_CHANNEL_UP || ldcp->link_state == LDC_CHANNEL_RESET) { ldcp->tstate |= TS_LINK_READY; ldcp->status = LDC_READY; } /* * if channel is being opened in RAW mode - no handshake is needed * switch the channel READY and UP state */ if (ldcp->mode == LDC_MODE_RAW) { ldcp->tstate = TS_UP; /* set bits associated with LDC UP */ ldcp->status = LDC_UP; } mutex_exit(&ldcp->lock); /* * Increment number of open channels */ mutex_enter(&ldcssp->lock); ldcssp->channels_open++; mutex_exit(&ldcssp->lock); D1(ldcp->id, "ldc_open: (0x%llx) channel (0x%p) open for use " "(tstate=0x%x, status=0x%x)\n", ldcp->id, ldcp, ldcp->tstate, ldcp->status); return (0); } /* * Close the LDC connection. It will return EBUSY if there * are memory segments or descriptor rings either bound to or * mapped over the channel */ int ldc_close(ldc_handle_t handle) { ldc_chan_t *ldcp; int rv = 0, retries = 0; boolean_t chk_done = B_FALSE; if (handle == 0) { DWARN(DBG_ALL_LDCS, "ldc_close: invalid channel handle\n"); return (EINVAL); } ldcp = (ldc_chan_t *)handle; mutex_enter(&ldcp->lock); /* return error if channel is not open */ if ((ldcp->tstate & ~TS_IN_RESET) < TS_OPEN) { DWARN(ldcp->id, "ldc_close: (0x%llx) channel is not open\n", ldcp->id); mutex_exit(&ldcp->lock); return (EFAULT); } /* if any memory handles, drings, are bound or mapped cannot close */ if (ldcp->mhdl_list != NULL) { DWARN(ldcp->id, "ldc_close: (0x%llx) channel has bound memory handles\n", ldcp->id); mutex_exit(&ldcp->lock); return (EBUSY); } if (ldcp->exp_dring_list != NULL) { DWARN(ldcp->id, "ldc_close: (0x%llx) channel has bound descriptor rings\n", ldcp->id); mutex_exit(&ldcp->lock); return (EBUSY); } if (ldcp->imp_dring_list != NULL) { DWARN(ldcp->id, "ldc_close: (0x%llx) channel has mapped descriptor rings\n", ldcp->id); mutex_exit(&ldcp->lock); return (EBUSY); } if (ldcp->cb_inprogress) { DWARN(ldcp->id, "ldc_close: (0x%llx) callback active\n", ldcp->id); mutex_exit(&ldcp->lock); return (EWOULDBLOCK); } /* Obtain Tx lock */ mutex_enter(&ldcp->tx_lock); /* * Wait for pending transmits to complete i.e Tx queue to drain * if there are pending pkts - wait 1 ms and retry again */ for (;;) { rv = hv_ldc_tx_get_state(ldcp->id, &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state); if (rv) { cmn_err(CE_WARN, "ldc_close: (0x%lx) cannot read qptrs\n", ldcp->id); mutex_exit(&ldcp->tx_lock); mutex_exit(&ldcp->lock); return (EIO); } if (ldcp->tx_head == ldcp->tx_tail || ldcp->link_state != LDC_CHANNEL_UP) { break; } if (chk_done) { DWARN(ldcp->id, "ldc_close: (0x%llx) Tx queue drain timeout\n", ldcp->id); break; } /* wait for one ms and try again */ delay(drv_usectohz(1000)); chk_done = B_TRUE; } /* * Drain the Tx and Rx queues as we are closing the * channel. We dont care about any pending packets. * We have to also drain the queue prior to clearing * pending interrupts, otherwise the HV will trigger * an interrupt the moment the interrupt state is * cleared. */ (void) i_ldc_txq_reconf(ldcp); i_ldc_rxq_drain(ldcp); /* * Unregister the channel with the nexus */ while ((rv = i_ldc_unregister_channel(ldcp)) != 0) { mutex_exit(&ldcp->tx_lock); mutex_exit(&ldcp->lock); /* if any error other than EAGAIN return back */ if (rv != EAGAIN || retries >= ldc_max_retries) { cmn_err(CE_WARN, "ldc_close: (0x%lx) unregister failed, %d\n", ldcp->id, rv); return (rv); } /* * As there could be pending interrupts we need * to wait and try again */ drv_usecwait(ldc_close_delay); mutex_enter(&ldcp->lock); mutex_enter(&ldcp->tx_lock); retries++; } ldcp->tstate &= ~TS_QCONF_RDY; /* * Unregister queues */ rv = hv_ldc_tx_qconf(ldcp->id, 0, 0); if (rv) { cmn_err(CE_WARN, "ldc_close: (0x%lx) channel TX queue unconf failed\n", ldcp->id); mutex_exit(&ldcp->tx_lock); mutex_exit(&ldcp->lock); return (EIO); } rv = hv_ldc_rx_qconf(ldcp->id, 0, 0); if (rv) { cmn_err(CE_WARN, "ldc_close: (0x%lx) channel RX queue unconf failed\n", ldcp->id); mutex_exit(&ldcp->tx_lock); mutex_exit(&ldcp->lock); return (EIO); } /* Reset channel state information */ i_ldc_reset_state(ldcp); /* Mark channel as down and in initialized state */ ldcp->tx_ackd_head = 0; ldcp->tx_head = 0; ldcp->tstate = TS_IN_RESET|TS_INIT; ldcp->status = LDC_INIT; mutex_exit(&ldcp->tx_lock); mutex_exit(&ldcp->lock); /* Decrement number of open channels */ mutex_enter(&ldcssp->lock); ldcssp->channels_open--; mutex_exit(&ldcssp->lock); D1(ldcp->id, "ldc_close: (0x%llx) channel closed\n", ldcp->id); return (0); } /* * Register channel callback */ int ldc_reg_callback(ldc_handle_t handle, uint_t(*cb)(uint64_t event, caddr_t arg), caddr_t arg) { ldc_chan_t *ldcp; if (handle == 0) { DWARN(DBG_ALL_LDCS, "ldc_reg_callback: invalid channel handle\n"); return (EINVAL); } if (((uint64_t)cb) < KERNELBASE) { DWARN(DBG_ALL_LDCS, "ldc_reg_callback: invalid callback\n"); return (EINVAL); } ldcp = (ldc_chan_t *)handle; mutex_enter(&ldcp->lock); if (ldcp->cb) { DWARN(ldcp->id, "ldc_reg_callback: (0x%llx) callback exists\n", ldcp->id); mutex_exit(&ldcp->lock); return (EIO); } if (ldcp->cb_inprogress) { DWARN(ldcp->id, "ldc_reg_callback: (0x%llx) callback active\n", ldcp->id); mutex_exit(&ldcp->lock); return (EWOULDBLOCK); } ldcp->cb = cb; ldcp->cb_arg = arg; ldcp->cb_enabled = B_TRUE; D1(ldcp->id, "ldc_reg_callback: (0x%llx) registered callback for channel\n", ldcp->id); mutex_exit(&ldcp->lock); return (0); } /* * Unregister channel callback */ int ldc_unreg_callback(ldc_handle_t handle) { ldc_chan_t *ldcp; if (handle == 0) { DWARN(DBG_ALL_LDCS, "ldc_unreg_callback: invalid channel handle\n"); return (EINVAL); } ldcp = (ldc_chan_t *)handle; mutex_enter(&ldcp->lock); if (ldcp->cb == NULL) { DWARN(ldcp->id, "ldc_unreg_callback: (0x%llx) no callback exists\n", ldcp->id); mutex_exit(&ldcp->lock); return (EIO); } if (ldcp->cb_inprogress) { DWARN(ldcp->id, "ldc_unreg_callback: (0x%llx) callback active\n", ldcp->id); mutex_exit(&ldcp->lock); return (EWOULDBLOCK); } ldcp->cb = NULL; ldcp->cb_arg = NULL; ldcp->cb_enabled = B_FALSE; D1(ldcp->id, "ldc_unreg_callback: (0x%llx) unregistered callback for channel\n", ldcp->id); mutex_exit(&ldcp->lock); return (0); } /* * Bring a channel up by initiating a handshake with the peer * This call is asynchronous. It will complete at a later point * in time when the peer responds back with an RTR. */ int ldc_up(ldc_handle_t handle) { int rv; ldc_chan_t *ldcp; ldc_msg_t *ldcmsg; uint64_t tx_tail, tstate, link_state; if (handle == 0) { DWARN(DBG_ALL_LDCS, "ldc_up: invalid channel handle\n"); return (EINVAL); } ldcp = (ldc_chan_t *)handle; mutex_enter(&ldcp->lock); D1(ldcp->id, "ldc_up: (0x%llx) doing channel UP\n", ldcp->id); /* clear the reset state */ tstate = ldcp->tstate; ldcp->tstate &= ~TS_IN_RESET; if (ldcp->tstate == TS_UP) { DWARN(ldcp->id, "ldc_up: (0x%llx) channel is already in UP state\n", ldcp->id); /* mark channel as up */ ldcp->status = LDC_UP; /* * if channel was in reset state and there was * pending data clear interrupt state. this will * trigger an interrupt, causing the RX handler to * to invoke the client's callback */ if ((tstate & TS_IN_RESET) && ldcp->rx_intr_state == LDC_INTR_PEND) { D1(ldcp->id, "ldc_up: (0x%llx) channel has pending data, " "clearing interrupt\n", ldcp->id); i_ldc_clear_intr(ldcp, CNEX_RX_INTR); } mutex_exit(&ldcp->lock); return (0); } /* if the channel is in RAW mode - mark it as UP, if READY */ if (ldcp->mode == LDC_MODE_RAW && ldcp->tstate >= TS_READY) { ldcp->tstate = TS_UP; mutex_exit(&ldcp->lock); return (0); } /* Don't start another handshake if there is one in progress */ if (ldcp->hstate) { D1(ldcp->id, "ldc_up: (0x%llx) channel handshake in progress\n", ldcp->id); mutex_exit(&ldcp->lock); return (0); } mutex_enter(&ldcp->tx_lock); /* save current link state */ link_state = ldcp->link_state; /* get the current tail for the LDC msg */ rv = i_ldc_get_tx_tail(ldcp, &tx_tail); if (rv) { D1(ldcp->id, "ldc_up: (0x%llx) cannot initiate handshake\n", ldcp->id); mutex_exit(&ldcp->tx_lock); mutex_exit(&ldcp->lock); return (ECONNREFUSED); } /* * If i_ldc_get_tx_tail() changed link_state to either RESET or UP, * from a previous state of DOWN, then mark the channel as * being ready for handshake. */ if ((link_state == LDC_CHANNEL_DOWN) && (link_state != ldcp->link_state)) { ASSERT((ldcp->link_state == LDC_CHANNEL_RESET) || (ldcp->link_state == LDC_CHANNEL_UP)); if (ldcp->mode == LDC_MODE_RAW) { ldcp->status = LDC_UP; ldcp->tstate = TS_UP; mutex_exit(&ldcp->tx_lock); mutex_exit(&ldcp->lock); return (0); } else { ldcp->status = LDC_READY; ldcp->tstate |= TS_LINK_READY; } } ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail); ZERO_PKT(ldcmsg); ldcmsg->type = LDC_CTRL; ldcmsg->stype = LDC_INFO; ldcmsg->ctrl = LDC_VER; ldcp->next_vidx = 0; bcopy(&ldc_versions[0], ldcmsg->udata, sizeof (ldc_versions[0])); DUMP_LDC_PKT(ldcp, "ldc_up snd ver", (uint64_t)ldcmsg); /* initiate the send by calling into HV and set the new tail */ tx_tail = (tx_tail + LDC_PACKET_SIZE) % (ldcp->tx_q_entries << LDC_PACKET_SHIFT); rv = i_ldc_set_tx_tail(ldcp, tx_tail); if (rv) { DWARN(ldcp->id, "ldc_up: (0x%llx) cannot initiate handshake rv=%d\n", ldcp->id, rv); mutex_exit(&ldcp->tx_lock); mutex_exit(&ldcp->lock); return (rv); } ldcp->hstate |= TS_SENT_VER; ldcp->tx_tail = tx_tail; D1(ldcp->id, "ldc_up: (0x%llx) channel up initiated\n", ldcp->id); mutex_exit(&ldcp->tx_lock); mutex_exit(&ldcp->lock); return (rv); } /* * Bring a channel down by resetting its state and queues */ int ldc_down(ldc_handle_t handle) { ldc_chan_t *ldcp; if (handle == 0) { DWARN(DBG_ALL_LDCS, "ldc_down: invalid channel handle\n"); return (EINVAL); } ldcp = (ldc_chan_t *)handle; mutex_enter(&ldcp->lock); mutex_enter(&ldcp->tx_lock); i_ldc_reset(ldcp, B_TRUE); mutex_exit(&ldcp->tx_lock); mutex_exit(&ldcp->lock); return (0); } /* * Get the current channel status */ int ldc_status(ldc_handle_t handle, ldc_status_t *status) { ldc_chan_t *ldcp; if (handle == 0 || status == NULL) { DWARN(DBG_ALL_LDCS, "ldc_status: invalid argument\n"); return (EINVAL); } ldcp = (ldc_chan_t *)handle; *status = ((ldc_chan_t *)handle)->status; D1(ldcp->id, "ldc_status: (0x%llx) returned status %d\n", ldcp->id, *status); return (0); } /* * Set the channel's callback mode - enable/disable callbacks */ int ldc_set_cb_mode(ldc_handle_t handle, ldc_cb_mode_t cmode) { ldc_chan_t *ldcp; if (handle == 0) { DWARN(DBG_ALL_LDCS, "ldc_set_intr_mode: invalid channel handle\n"); return (EINVAL); } ldcp = (ldc_chan_t *)handle; /* * Record no callbacks should be invoked */ mutex_enter(&ldcp->lock); switch (cmode) { case LDC_CB_DISABLE: if (!ldcp->cb_enabled) { DWARN(ldcp->id, "ldc_set_cb_mode: (0x%llx) callbacks disabled\n", ldcp->id); break; } ldcp->cb_enabled = B_FALSE; D1(ldcp->id, "ldc_set_cb_mode: (0x%llx) disabled callbacks\n", ldcp->id); break; case LDC_CB_ENABLE: if (ldcp->cb_enabled) { DWARN(ldcp->id, "ldc_set_cb_mode: (0x%llx) callbacks enabled\n", ldcp->id); break; } ldcp->cb_enabled = B_TRUE; D1(ldcp->id, "ldc_set_cb_mode: (0x%llx) enabled callbacks\n", ldcp->id); break; } mutex_exit(&ldcp->lock); return (0); } /* * Check to see if there are packets on the incoming queue * Will return hasdata = B_FALSE if there are no packets */ int ldc_chkq(ldc_handle_t handle, boolean_t *hasdata) { int rv; uint64_t rx_head, rx_tail; ldc_chan_t *ldcp; if (handle == 0) { DWARN(DBG_ALL_LDCS, "ldc_chkq: invalid channel handle\n"); return (EINVAL); } ldcp = (ldc_chan_t *)handle; *hasdata = B_FALSE; mutex_enter(&ldcp->lock); if (ldcp->tstate != TS_UP) { D1(ldcp->id, "ldc_chkq: (0x%llx) channel is not up\n", ldcp->id); mutex_exit(&ldcp->lock); return (ECONNRESET); } /* Read packet(s) from the queue */ rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail, &ldcp->link_state); if (rv != 0) { cmn_err(CE_WARN, "ldc_chkq: (0x%lx) unable to read queue ptrs", ldcp->id); mutex_exit(&ldcp->lock); return (EIO); } /* reset the channel state if the channel went down */ if (ldcp->link_state == LDC_CHANNEL_DOWN || ldcp->link_state == LDC_CHANNEL_RESET) { mutex_enter(&ldcp->tx_lock); i_ldc_reset(ldcp, B_FALSE); mutex_exit(&ldcp->tx_lock); mutex_exit(&ldcp->lock); return (ECONNRESET); } switch (ldcp->mode) { case LDC_MODE_RAW: /* * In raw mode, there are no ctrl packets, so checking * if the queue is non-empty is sufficient. */ *hasdata = (rx_head != rx_tail); break; case LDC_MODE_UNRELIABLE: /* * In unreliable mode, if the queue is non-empty, we need * to check if it actually contains unread data packets. * The queue may just contain ctrl packets. */ if (rx_head != rx_tail) { *hasdata = (i_ldc_chkq(ldcp) == 0); /* * If no data packets were found on the queue, * all packets must have been control packets * which will now have been processed, leaving * the queue empty. If the interrupt state * is pending, we need to clear the interrupt * here. */ if (*hasdata == B_FALSE && ldcp->rx_intr_state == LDC_INTR_PEND) { i_ldc_clear_intr(ldcp, CNEX_RX_INTR); } } break; case LDC_MODE_RELIABLE: /* * In reliable mode, first check for 'stream_remains' > 0. * Otherwise, if the data queue head and tail pointers * differ, there must be data to read. */ if (ldcp->stream_remains > 0) *hasdata = B_TRUE; else *hasdata = (ldcp->rx_dq_head != ldcp->rx_dq_tail); break; default: cmn_err(CE_WARN, "ldc_chkq: (0x%lx) unexpected channel mode " "(0x%x)", ldcp->id, ldcp->mode); mutex_exit(&ldcp->lock); return (EIO); } mutex_exit(&ldcp->lock); return (0); } /* * Read 'size' amount of bytes or less. If incoming buffer * is more than 'size', ENOBUFS is returned. * * On return, size contains the number of bytes read. */ int ldc_read(ldc_handle_t handle, caddr_t bufp, size_t *sizep) { ldc_chan_t *ldcp; uint64_t rx_head = 0, rx_tail = 0; int rv = 0, exit_val; if (handle == 0) { DWARN(DBG_ALL_LDCS, "ldc_read: invalid channel handle\n"); return (EINVAL); } ldcp = (ldc_chan_t *)handle; /* channel lock */ mutex_enter(&ldcp->lock); if (ldcp->tstate != TS_UP) { DWARN(ldcp->id, "ldc_read: (0x%llx) channel is not in UP state\n", ldcp->id); exit_val = ECONNRESET; } else if (ldcp->mode == LDC_MODE_RELIABLE) { TRACE_RXDQ_LENGTH(ldcp); exit_val = ldcp->read_p(ldcp, bufp, sizep); /* * For reliable mode channels, the interrupt * state is only set to pending during * interrupt handling when the secondary data * queue became full, leaving unprocessed * packets on the Rx queue. If the interrupt * state is pending and space is now available * on the data queue, clear the interrupt. */ if (ldcp->rx_intr_state == LDC_INTR_PEND && Q_CONTIG_SPACE(ldcp->rx_dq_head, ldcp->rx_dq_tail, ldcp->rx_dq_entries << LDC_PACKET_SHIFT) >= LDC_PACKET_SIZE) { /* data queue is not full */ i_ldc_clear_intr(ldcp, CNEX_RX_INTR); } mutex_exit(&ldcp->lock); return (exit_val); } else { exit_val = ldcp->read_p(ldcp, bufp, sizep); } /* * if queue has been drained - clear interrupt */ rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail, &ldcp->link_state); if (rv != 0) { cmn_err(CE_WARN, "ldc_read: (0x%lx) unable to read queue ptrs", ldcp->id); mutex_enter(&ldcp->tx_lock); i_ldc_reset(ldcp, B_TRUE); mutex_exit(&ldcp->tx_lock); mutex_exit(&ldcp->lock); return (ECONNRESET); } if (exit_val == 0) { if (ldcp->link_state == LDC_CHANNEL_DOWN || ldcp->link_state == LDC_CHANNEL_RESET) { mutex_enter(&ldcp->tx_lock); i_ldc_reset(ldcp, B_FALSE); exit_val = ECONNRESET; mutex_exit(&ldcp->tx_lock); } if ((rv == 0) && (ldcp->rx_intr_state == LDC_INTR_PEND) && (rx_head == rx_tail)) { i_ldc_clear_intr(ldcp, CNEX_RX_INTR); } } mutex_exit(&ldcp->lock); return (exit_val); } /* * Basic raw mondo read - * no interpretation of mondo contents at all. * * Enter and exit with ldcp->lock held by caller */ static int i_ldc_read_raw(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep) { uint64_t q_size_mask; ldc_msg_t *msgp; uint8_t *msgbufp; int rv = 0, space; uint64_t rx_head, rx_tail; space = *sizep; if (space < LDC_PAYLOAD_SIZE_RAW) return (ENOBUFS); ASSERT(mutex_owned(&ldcp->lock)); /* compute mask for increment */ q_size_mask = (ldcp->rx_q_entries-1)<id, &rx_head, &rx_tail, &ldcp->link_state); if (rv != 0) { cmn_err(CE_WARN, "ldc_read_raw: (0x%lx) unable to read queue ptrs", ldcp->id); return (EIO); } D1(ldcp->id, "ldc_read_raw: (0x%llx) rxh=0x%llx," " rxt=0x%llx, st=0x%llx\n", ldcp->id, rx_head, rx_tail, ldcp->link_state); /* reset the channel state if the channel went down */ if (ldcp->link_state == LDC_CHANNEL_DOWN || ldcp->link_state == LDC_CHANNEL_RESET) { mutex_enter(&ldcp->tx_lock); i_ldc_reset(ldcp, B_FALSE); mutex_exit(&ldcp->tx_lock); return (ECONNRESET); } /* * Check for empty queue */ if (rx_head == rx_tail) { *sizep = 0; return (0); } /* get the message */ msgp = (ldc_msg_t *)(ldcp->rx_q_va + rx_head); /* if channel is in RAW mode, copy data and return */ msgbufp = (uint8_t *)&(msgp->raw[0]); bcopy(msgbufp, target_bufp, LDC_PAYLOAD_SIZE_RAW); DUMP_PAYLOAD(ldcp->id, msgbufp); *sizep = LDC_PAYLOAD_SIZE_RAW; rx_head = (rx_head + LDC_PACKET_SIZE) & q_size_mask; rv = i_ldc_set_rx_head(ldcp, rx_head); return (rv); } /* * Process LDC mondos to build larger packets * with either un-reliable or reliable delivery. * * Enter and exit with ldcp->lock held by caller */ static int i_ldc_read_packet(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep) { int rv = 0; uint64_t rx_head = 0, rx_tail = 0; uint64_t curr_head = 0; ldc_msg_t *msg; caddr_t target; size_t len = 0, bytes_read = 0; int retries = 0; uint64_t q_va, q_size_mask; uint64_t first_fragment = 0; target = target_bufp; ASSERT(mutex_owned(&ldcp->lock)); /* check if the buffer and size are valid */ if (target_bufp == NULL || *sizep == 0) { DWARN(ldcp->id, "ldc_read: (0x%llx) invalid buffer/size\n", ldcp->id); return (EINVAL); } /* Set q_va and compute increment mask for the appropriate queue */ if (ldcp->mode == LDC_MODE_RELIABLE) { q_va = ldcp->rx_dq_va; q_size_mask = (ldcp->rx_dq_entries-1)<rx_q_va; q_size_mask = (ldcp->rx_q_entries-1)<readq_get_state(ldcp, &curr_head, &rx_tail, &ldcp->link_state); if (rv != 0) { cmn_err(CE_WARN, "ldc_read: (0x%lx) unable to read queue ptrs", ldcp->id); mutex_enter(&ldcp->tx_lock); i_ldc_reset(ldcp, B_TRUE); mutex_exit(&ldcp->tx_lock); return (ECONNRESET); } D1(ldcp->id, "ldc_read: (0x%llx) chd=0x%llx, tl=0x%llx, st=0x%llx\n", ldcp->id, curr_head, rx_tail, ldcp->link_state); /* reset the channel state if the channel went down */ if (ldcp->link_state != LDC_CHANNEL_UP) goto channel_is_reset; for (;;) { if (curr_head == rx_tail) { /* * If a data queue is being used, check the Rx HV * queue. This will copy over any new data packets * that have arrived. */ if (ldcp->mode == LDC_MODE_RELIABLE) (void) i_ldc_chkq(ldcp); rv = ldcp->readq_get_state(ldcp, &rx_head, &rx_tail, &ldcp->link_state); if (rv != 0) { cmn_err(CE_WARN, "ldc_read: (0x%lx) cannot read queue ptrs", ldcp->id); mutex_enter(&ldcp->tx_lock); i_ldc_reset(ldcp, B_TRUE); mutex_exit(&ldcp->tx_lock); return (ECONNRESET); } if (ldcp->link_state != LDC_CHANNEL_UP) goto channel_is_reset; if (curr_head == rx_tail) { /* If in the middle of a fragmented xfer */ if (first_fragment != 0) { /* wait for ldc_delay usecs */ drv_usecwait(ldc_delay); if (++retries < ldc_max_retries) continue; *sizep = 0; if (ldcp->mode != LDC_MODE_RELIABLE) ldcp->last_msg_rcd = first_fragment - 1; DWARN(DBG_ALL_LDCS, "ldc_read: " "(0x%llx) read timeout", ldcp->id); return (EAGAIN); } *sizep = 0; break; } } retries = 0; D2(ldcp->id, "ldc_read: (0x%llx) chd=0x%llx, rxhd=0x%llx, rxtl=0x%llx\n", ldcp->id, curr_head, rx_head, rx_tail); /* get the message */ msg = (ldc_msg_t *)(q_va + curr_head); DUMP_LDC_PKT(ldcp, "ldc_read received pkt", ldcp->rx_q_va + curr_head); /* Check the message ID for the message received */ if (ldcp->mode != LDC_MODE_RELIABLE) { if ((rv = i_ldc_check_seqid(ldcp, msg)) != 0) { DWARN(ldcp->id, "ldc_read: (0x%llx) seqid " "error, q_ptrs=0x%lx,0x%lx", ldcp->id, rx_head, rx_tail); /* throw away data */ bytes_read = 0; /* Reset last_msg_rcd to start of message */ if (first_fragment != 0) { ldcp->last_msg_rcd = first_fragment - 1; first_fragment = 0; } /* * Send a NACK -- invalid seqid * get the current tail for the response */ rv = i_ldc_send_pkt(ldcp, msg->type, LDC_NACK, (msg->ctrl & LDC_CTRL_MASK)); if (rv) { cmn_err(CE_NOTE, "ldc_read: (0x%lx) err sending " "NACK msg\n", ldcp->id); /* if cannot send NACK - reset chan */ mutex_enter(&ldcp->tx_lock); i_ldc_reset(ldcp, B_FALSE); mutex_exit(&ldcp->tx_lock); rv = ECONNRESET; break; } /* purge receive queue */ rv = i_ldc_set_rx_head(ldcp, rx_tail); break; } /* * Process any messages of type CTRL messages * Future implementations should try to pass these * to LDC link by resetting the intr state. * * NOTE: not done as a switch() as type can be * both ctrl+data */ if (msg->type & LDC_CTRL) { if (rv = i_ldc_ctrlmsg(ldcp, msg)) { if (rv == EAGAIN) continue; rv = i_ldc_set_rx_head(ldcp, rx_tail); *sizep = 0; bytes_read = 0; break; } } /* process data ACKs */ if ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK)) { if (rv = i_ldc_process_data_ACK(ldcp, msg)) { *sizep = 0; bytes_read = 0; break; } } /* process data NACKs */ if ((msg->type & LDC_DATA) && (msg->stype & LDC_NACK)) { DWARN(ldcp->id, "ldc_read: (0x%llx) received DATA/NACK", ldcp->id); mutex_enter(&ldcp->tx_lock); i_ldc_reset(ldcp, B_TRUE); mutex_exit(&ldcp->tx_lock); return (ECONNRESET); } } /* process data messages */ if ((msg->type & LDC_DATA) && (msg->stype & LDC_INFO)) { uint8_t *msgbuf = (uint8_t *)( (ldcp->mode == LDC_MODE_RELIABLE) ? msg->rdata : msg->udata); D2(ldcp->id, "ldc_read: (0x%llx) received data msg\n", ldcp->id); /* get the packet length */ len = (msg->env & LDC_LEN_MASK); /* * FUTURE OPTIMIZATION: * dont need to set q head for every * packet we read just need to do this when * we are done or need to wait for more * mondos to make a full packet - this is * currently expensive. */ if (first_fragment == 0) { /* * first packets should always have the start * bit set (even for a single packet). If not * throw away the packet */ if (!(msg->env & LDC_FRAG_START)) { DWARN(DBG_ALL_LDCS, "ldc_read: (0x%llx) not start - " "frag=%x\n", ldcp->id, (msg->env) & LDC_FRAG_MASK); /* toss pkt, inc head, cont reading */ bytes_read = 0; target = target_bufp; curr_head = (curr_head + LDC_PACKET_SIZE) & q_size_mask; if (rv = ldcp->readq_set_head(ldcp, curr_head)) break; continue; } first_fragment = msg->seqid; } else { /* check to see if this is a pkt w/ START bit */ if (msg->env & LDC_FRAG_START) { DWARN(DBG_ALL_LDCS, "ldc_read:(0x%llx) unexpected pkt" " env=0x%x discarding %d bytes," " lastmsg=%d, currentmsg=%d\n", ldcp->id, msg->env&LDC_FRAG_MASK, bytes_read, ldcp->last_msg_rcd, msg->seqid); /* throw data we have read so far */ bytes_read = 0; target = target_bufp; first_fragment = msg->seqid; if (rv = ldcp->readq_set_head(ldcp, curr_head)) break; } } /* copy (next) pkt into buffer */ if (len <= (*sizep - bytes_read)) { bcopy(msgbuf, target, len); target += len; bytes_read += len; } else { /* * there is not enough space in the buffer to * read this pkt. throw message away & continue * reading data from queue */ DWARN(DBG_ALL_LDCS, "ldc_read: (0x%llx) buffer too small, " "head=0x%lx, expect=%d, got=%d\n", ldcp->id, curr_head, *sizep, bytes_read+len); first_fragment = 0; target = target_bufp; bytes_read = 0; /* throw away everything received so far */ if (rv = ldcp->readq_set_head(ldcp, curr_head)) break; /* continue reading remaining pkts */ continue; } } /* set the message id */ if (ldcp->mode != LDC_MODE_RELIABLE) ldcp->last_msg_rcd = msg->seqid; /* move the head one position */ curr_head = (curr_head + LDC_PACKET_SIZE) & q_size_mask; if (msg->env & LDC_FRAG_STOP) { /* * All pkts that are part of this fragmented transfer * have been read or this was a single pkt read * or there was an error */ /* set the queue head */ if (rv = ldcp->readq_set_head(ldcp, curr_head)) bytes_read = 0; *sizep = bytes_read; break; } /* advance head if it is a CTRL packet or a DATA ACK packet */ if ((msg->type & LDC_CTRL) || ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK))) { /* set the queue head */ if (rv = ldcp->readq_set_head(ldcp, curr_head)) { bytes_read = 0; break; } D2(ldcp->id, "ldc_read: (0x%llx) set ACK qhead 0x%llx", ldcp->id, curr_head); } } /* for (;;) */ D2(ldcp->id, "ldc_read: (0x%llx) end size=%d", ldcp->id, *sizep); return (rv); channel_is_reset: mutex_enter(&ldcp->tx_lock); i_ldc_reset(ldcp, B_FALSE); mutex_exit(&ldcp->tx_lock); return (ECONNRESET); } /* * Fetch and buffer incoming packets so we can hand them back as * a basic byte stream. * * Enter and exit with ldcp->lock held by caller */ static int i_ldc_read_stream(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep) { int rv; size_t size; ASSERT(mutex_owned(&ldcp->lock)); D2(ldcp->id, "i_ldc_read_stream: (0x%llx) buffer size=%d", ldcp->id, *sizep); if (ldcp->stream_remains == 0) { size = ldcp->mtu; rv = i_ldc_read_packet(ldcp, (caddr_t)ldcp->stream_bufferp, &size); D2(ldcp->id, "i_ldc_read_stream: read packet (0x%llx) size=%d", ldcp->id, size); if (rv != 0) return (rv); ldcp->stream_remains = size; ldcp->stream_offset = 0; } size = MIN(ldcp->stream_remains, *sizep); bcopy(ldcp->stream_bufferp + ldcp->stream_offset, target_bufp, size); ldcp->stream_offset += size; ldcp->stream_remains -= size; D2(ldcp->id, "i_ldc_read_stream: (0x%llx) fill from buffer size=%d", ldcp->id, size); *sizep = size; return (0); } /* * Write specified amount of bytes to the channel * in multiple pkts of pkt_payload size. Each * packet is tagged with an unique packet ID in * the case of a reliable link. * * On return, size contains the number of bytes written. */ int ldc_write(ldc_handle_t handle, caddr_t buf, size_t *sizep) { ldc_chan_t *ldcp; int rv = 0; if (handle == 0) { DWARN(DBG_ALL_LDCS, "ldc_write: invalid channel handle\n"); return (EINVAL); } ldcp = (ldc_chan_t *)handle; mutex_enter(&ldcp->tx_lock); /* check if non-zero data to write */ if (buf == NULL || sizep == NULL) { DWARN(ldcp->id, "ldc_write: (0x%llx) invalid data write\n", ldcp->id); mutex_exit(&ldcp->tx_lock); return (EINVAL); } if (*sizep == 0) { DWARN(ldcp->id, "ldc_write: (0x%llx) write size of zero\n", ldcp->id); mutex_exit(&ldcp->tx_lock); return (0); } /* Check if channel is UP for data exchange */ if (ldcp->tstate != TS_UP) { DWARN(ldcp->id, "ldc_write: (0x%llx) channel is not in UP state\n", ldcp->id); *sizep = 0; rv = ECONNRESET; } else { rv = ldcp->write_p(ldcp, buf, sizep); } mutex_exit(&ldcp->tx_lock); return (rv); } /* * Write a raw packet to the channel * On return, size contains the number of bytes written. */ static int i_ldc_write_raw(ldc_chan_t *ldcp, caddr_t buf, size_t *sizep) { ldc_msg_t *ldcmsg; uint64_t tx_head, tx_tail, new_tail; int rv = 0; size_t size; ASSERT(MUTEX_HELD(&ldcp->tx_lock)); ASSERT(ldcp->mode == LDC_MODE_RAW); size = *sizep; /* * Check to see if the packet size is less than or * equal to packet size support in raw mode */ if (size > ldcp->pkt_payload) { DWARN(ldcp->id, "ldc_write: (0x%llx) invalid size (0x%llx) for RAW mode\n", ldcp->id, *sizep); *sizep = 0; return (EMSGSIZE); } /* get the qptrs for the tx queue */ rv = hv_ldc_tx_get_state(ldcp->id, &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state); if (rv != 0) { cmn_err(CE_WARN, "ldc_write: (0x%lx) cannot read queue ptrs\n", ldcp->id); *sizep = 0; return (EIO); } if (ldcp->link_state == LDC_CHANNEL_DOWN || ldcp->link_state == LDC_CHANNEL_RESET) { DWARN(ldcp->id, "ldc_write: (0x%llx) channel down/reset\n", ldcp->id); *sizep = 0; if (mutex_tryenter(&ldcp->lock)) { i_ldc_reset(ldcp, B_FALSE); mutex_exit(&ldcp->lock); } else { /* * Release Tx lock, and then reacquire channel * and Tx lock in correct order */ mutex_exit(&ldcp->tx_lock); mutex_enter(&ldcp->lock); mutex_enter(&ldcp->tx_lock); i_ldc_reset(ldcp, B_FALSE); mutex_exit(&ldcp->lock); } return (ECONNRESET); } tx_tail = ldcp->tx_tail; tx_head = ldcp->tx_head; new_tail = (tx_tail + LDC_PACKET_SIZE) & ((ldcp->tx_q_entries-1) << LDC_PACKET_SHIFT); if (new_tail == tx_head) { DWARN(DBG_ALL_LDCS, "ldc_write: (0x%llx) TX queue is full\n", ldcp->id); *sizep = 0; return (EWOULDBLOCK); } D2(ldcp->id, "ldc_write: (0x%llx) start xfer size=%d", ldcp->id, size); /* Send the data now */ ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail); /* copy the data into pkt */ bcopy((uint8_t *)buf, ldcmsg, size); /* increment tail */ tx_tail = new_tail; /* * All packets have been copied into the TX queue * update the tail ptr in the HV */ rv = i_ldc_set_tx_tail(ldcp, tx_tail); if (rv) { if (rv == EWOULDBLOCK) { DWARN(ldcp->id, "ldc_write: (0x%llx) write timed out\n", ldcp->id); *sizep = 0; return (EWOULDBLOCK); } *sizep = 0; if (mutex_tryenter(&ldcp->lock)) { i_ldc_reset(ldcp, B_FALSE); mutex_exit(&ldcp->lock); } else { /* * Release Tx lock, and then reacquire channel * and Tx lock in correct order */ mutex_exit(&ldcp->tx_lock); mutex_enter(&ldcp->lock); mutex_enter(&ldcp->tx_lock); i_ldc_reset(ldcp, B_FALSE); mutex_exit(&ldcp->lock); } return (ECONNRESET); } ldcp->tx_tail = tx_tail; *sizep = size; D2(ldcp->id, "ldc_write: (0x%llx) end xfer size=%d", ldcp->id, size); return (rv); } /* * Write specified amount of bytes to the channel * in multiple pkts of pkt_payload size. Each * packet is tagged with an unique packet ID in * the case of a reliable link. * * On return, size contains the number of bytes written. * This function needs to ensure that the write size is < MTU size */ static int i_ldc_write_packet(ldc_chan_t *ldcp, caddr_t buf, size_t *size) { ldc_msg_t *ldcmsg; uint64_t tx_head, tx_tail, new_tail, start; uint64_t txq_size_mask, numavail; uint8_t *msgbuf, *source = (uint8_t *)buf; size_t len, bytes_written = 0, remaining; int rv; uint32_t curr_seqid; ASSERT(MUTEX_HELD(&ldcp->tx_lock)); ASSERT(ldcp->mode == LDC_MODE_RELIABLE || ldcp->mode == LDC_MODE_UNRELIABLE); /* compute mask for increment */ txq_size_mask = (ldcp->tx_q_entries - 1) << LDC_PACKET_SHIFT; /* get the qptrs for the tx queue */ rv = hv_ldc_tx_get_state(ldcp->id, &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state); if (rv != 0) { cmn_err(CE_WARN, "ldc_write: (0x%lx) cannot read queue ptrs\n", ldcp->id); *size = 0; return (EIO); } if (ldcp->link_state == LDC_CHANNEL_DOWN || ldcp->link_state == LDC_CHANNEL_RESET) { DWARN(ldcp->id, "ldc_write: (0x%llx) channel down/reset\n", ldcp->id); *size = 0; if (mutex_tryenter(&ldcp->lock)) { i_ldc_reset(ldcp, B_FALSE); mutex_exit(&ldcp->lock); } else { /* * Release Tx lock, and then reacquire channel * and Tx lock in correct order */ mutex_exit(&ldcp->tx_lock); mutex_enter(&ldcp->lock); mutex_enter(&ldcp->tx_lock); i_ldc_reset(ldcp, B_FALSE); mutex_exit(&ldcp->lock); } return (ECONNRESET); } tx_tail = ldcp->tx_tail; new_tail = (tx_tail + LDC_PACKET_SIZE) % (ldcp->tx_q_entries << LDC_PACKET_SHIFT); /* * Check to see if the queue is full. The check is done using * the appropriate head based on the link mode. */ i_ldc_get_tx_head(ldcp, &tx_head); if (new_tail == tx_head) { DWARN(DBG_ALL_LDCS, "ldc_write: (0x%llx) TX queue is full\n", ldcp->id); *size = 0; return (EWOULDBLOCK); } /* * Make sure that the LDC Tx queue has enough space */ numavail = (tx_head >> LDC_PACKET_SHIFT) - (tx_tail >> LDC_PACKET_SHIFT) + ldcp->tx_q_entries - 1; numavail %= ldcp->tx_q_entries; if (*size > (numavail * ldcp->pkt_payload)) { DWARN(DBG_ALL_LDCS, "ldc_write: (0x%llx) TX queue has no space\n", ldcp->id); return (EWOULDBLOCK); } D2(ldcp->id, "ldc_write: (0x%llx) start xfer size=%d", ldcp->id, *size); /* Send the data now */ bytes_written = 0; curr_seqid = ldcp->last_msg_snt; start = tx_tail; while (*size > bytes_written) { ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail); msgbuf = (uint8_t *)((ldcp->mode == LDC_MODE_RELIABLE) ? ldcmsg->rdata : ldcmsg->udata); ldcmsg->type = LDC_DATA; ldcmsg->stype = LDC_INFO; ldcmsg->ctrl = 0; remaining = *size - bytes_written; len = min(ldcp->pkt_payload, remaining); ldcmsg->env = (uint8_t)len; curr_seqid++; ldcmsg->seqid = curr_seqid; /* copy the data into pkt */ bcopy(source, msgbuf, len); source += len; bytes_written += len; /* increment tail */ tx_tail = (tx_tail + LDC_PACKET_SIZE) & txq_size_mask; ASSERT(tx_tail != tx_head); } /* Set the start and stop bits */ ldcmsg->env |= LDC_FRAG_STOP; ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + start); ldcmsg->env |= LDC_FRAG_START; /* * All packets have been copied into the TX queue * update the tail ptr in the HV */ rv = i_ldc_set_tx_tail(ldcp, tx_tail); if (rv == 0) { ldcp->tx_tail = tx_tail; ldcp->last_msg_snt = curr_seqid; *size = bytes_written; } else { int rv2; if (rv != EWOULDBLOCK) { *size = 0; if (mutex_tryenter(&ldcp->lock)) { i_ldc_reset(ldcp, B_FALSE); mutex_exit(&ldcp->lock); } else { /* * Release Tx lock, and then reacquire channel * and Tx lock in correct order */ mutex_exit(&ldcp->tx_lock); mutex_enter(&ldcp->lock); mutex_enter(&ldcp->tx_lock); i_ldc_reset(ldcp, B_FALSE); mutex_exit(&ldcp->lock); } return (ECONNRESET); } D1(ldcp->id, "hv_tx_set_tail returns 0x%x (head 0x%x, " "old tail 0x%x, new tail 0x%x, qsize=0x%x)\n", rv, ldcp->tx_head, ldcp->tx_tail, tx_tail, (ldcp->tx_q_entries << LDC_PACKET_SHIFT)); rv2 = hv_ldc_tx_get_state(ldcp->id, &tx_head, &tx_tail, &ldcp->link_state); D1(ldcp->id, "hv_ldc_tx_get_state returns 0x%x " "(head 0x%x, tail 0x%x state 0x%x)\n", rv2, tx_head, tx_tail, ldcp->link_state); *size = 0; } D2(ldcp->id, "ldc_write: (0x%llx) end xfer size=%d", ldcp->id, *size); return (rv); } /* * Write specified amount of bytes to the channel * in multiple pkts of pkt_payload size. Each * packet is tagged with an unique packet ID in * the case of a reliable link. * * On return, size contains the number of bytes written. * This function needs to ensure that the write size is < MTU size */ static int i_ldc_write_stream(ldc_chan_t *ldcp, caddr_t buf, size_t *sizep) { ASSERT(MUTEX_HELD(&ldcp->tx_lock)); ASSERT(ldcp->mode == LDC_MODE_RELIABLE); /* Truncate packet to max of MTU size */ if (*sizep > ldcp->mtu) *sizep = ldcp->mtu; return (i_ldc_write_packet(ldcp, buf, sizep)); } /* * Interfaces for channel nexus to register/unregister with LDC module * The nexus will register functions to be used to register individual * channels with the nexus and enable interrupts for the channels */ int ldc_register(ldc_cnex_t *cinfo) { ldc_chan_t *ldcp; if (cinfo == NULL || cinfo->dip == NULL || cinfo->reg_chan == NULL || cinfo->unreg_chan == NULL || cinfo->add_intr == NULL || cinfo->rem_intr == NULL || cinfo->clr_intr == NULL) { DWARN(DBG_ALL_LDCS, "ldc_register: invalid nexus info\n"); return (EINVAL); } mutex_enter(&ldcssp->lock); /* nexus registration */ ldcssp->cinfo.dip = cinfo->dip; ldcssp->cinfo.reg_chan = cinfo->reg_chan; ldcssp->cinfo.unreg_chan = cinfo->unreg_chan; ldcssp->cinfo.add_intr = cinfo->add_intr; ldcssp->cinfo.rem_intr = cinfo->rem_intr; ldcssp->cinfo.clr_intr = cinfo->clr_intr; /* register any channels that might have been previously initialized */ ldcp = ldcssp->chan_list; while (ldcp) { if ((ldcp->tstate & TS_QCONF_RDY) && (ldcp->tstate & TS_CNEX_RDY) == 0) (void) i_ldc_register_channel(ldcp); ldcp = ldcp->next; } mutex_exit(&ldcssp->lock); return (0); } int ldc_unregister(ldc_cnex_t *cinfo) { if (cinfo == NULL || cinfo->dip == NULL) { DWARN(DBG_ALL_LDCS, "ldc_unregister: invalid nexus info\n"); return (EINVAL); } mutex_enter(&ldcssp->lock); if (cinfo->dip != ldcssp->cinfo.dip) { DWARN(DBG_ALL_LDCS, "ldc_unregister: invalid dip\n"); mutex_exit(&ldcssp->lock); return (EINVAL); } /* nexus unregister */ ldcssp->cinfo.dip = NULL; ldcssp->cinfo.reg_chan = NULL; ldcssp->cinfo.unreg_chan = NULL; ldcssp->cinfo.add_intr = NULL; ldcssp->cinfo.rem_intr = NULL; ldcssp->cinfo.clr_intr = NULL; mutex_exit(&ldcssp->lock); return (0); } int ldc_info(ldc_handle_t handle, ldc_info_t *info) { ldc_chan_t *ldcp; uint64_t avail; if (handle == 0 || info == NULL) { DWARN(DBG_ALL_LDCS, "ldc_get_info: invalid args\n"); return (EINVAL); } ldcp = (ldc_chan_t *)handle; mutex_enter(&ldcp->lock); /* check to see if channel is initalized */ if ((ldcp->tstate & ~TS_IN_RESET) < TS_INIT) { DWARN(ldcp->id, "ldc_get_info: (0x%llx) channel not initialized\n", ldcp->id); mutex_exit(&ldcp->lock); return (EINVAL); } mutex_exit(&ldcp->lock); /* * ldcssp->mapin_size is the max amount of shared memory supported by * the Hypervisor per guest. e.g, legacy HV supports 64MB; latest HV * support 1GB. This size is read during ldc module initialization. * * ldc_dring_direct_map_rsvd is the amount of memory reserved for * mapping in descriptor rings. In the initial implementation, we use a * simple approach to determine the amount of mapin space available per * channel. In future, we may implement strict accounting of the actual * memory consumed to determine the exact amount available per channel. */ if (ldcssp->mapin_size <= ldc_dring_direct_map_rsvd) { info->direct_map_size_max = 0; return (0); } avail = ldcssp->mapin_size - ldc_dring_direct_map_rsvd; if (avail >= ldc_direct_map_size_max) { info->direct_map_size_max = ldc_direct_map_size_max; } else { info->direct_map_size_max = 0; } return (0); }