1843e1988Sjohnlev /* 2843e1988Sjohnlev * CDDL HEADER START 3843e1988Sjohnlev * 4843e1988Sjohnlev * The contents of this file are subject to the terms of the 5843e1988Sjohnlev * Common Development and Distribution License (the "License"). 6843e1988Sjohnlev * You may not use this file except in compliance with the License. 7843e1988Sjohnlev * 8843e1988Sjohnlev * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9843e1988Sjohnlev * or http://www.opensolaris.org/os/licensing. 10843e1988Sjohnlev * See the License for the specific language governing permissions 11843e1988Sjohnlev * and limitations under the License. 12843e1988Sjohnlev * 13843e1988Sjohnlev * When distributing Covered Code, include this CDDL HEADER in each 14843e1988Sjohnlev * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15843e1988Sjohnlev * If applicable, add the following below this CDDL HEADER, with the 16843e1988Sjohnlev * fields enclosed by brackets "[]" replaced with your own identifying 17843e1988Sjohnlev * information: Portions Copyright [yyyy] [name of copyright owner] 18843e1988Sjohnlev * 19843e1988Sjohnlev * CDDL HEADER END 20843e1988Sjohnlev */ 21843e1988Sjohnlev 22843e1988Sjohnlev /* 237f0b8309SEdward Pilatowicz * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24843e1988Sjohnlev * Use is subject to license terms. 25843e1988Sjohnlev */ 26843e1988Sjohnlev 27fcebcf2bSYuri Pankov /* 28fcebcf2bSYuri Pankov * Copyright (c) 2014 by Delphix. All rights reserved. 29*cd93da82SYuri Pankov * Copyright 2017 Nexenta Systems, Inc. 30fcebcf2bSYuri Pankov */ 31843e1988Sjohnlev 32843e1988Sjohnlev #ifndef _SYS_XDF_H 33843e1988Sjohnlev #define _SYS_XDF_H 34843e1988Sjohnlev 357f0b8309SEdward Pilatowicz #include <sys/ddi.h> 367f0b8309SEdward Pilatowicz #include <sys/sunddi.h> 377f0b8309SEdward Pilatowicz #include <sys/cmlb.h> 387f0b8309SEdward Pilatowicz #include <sys/dkio.h> 397f0b8309SEdward Pilatowicz 407f0b8309SEdward Pilatowicz #include <sys/gnttab.h> 417f0b8309SEdward Pilatowicz #include <xen/sys/xendev.h> 427f0b8309SEdward Pilatowicz 43843e1988Sjohnlev #ifdef __cplusplus 44843e1988Sjohnlev extern "C" { 45843e1988Sjohnlev #endif 46843e1988Sjohnlev 47843e1988Sjohnlev 48843e1988Sjohnlev /* 49843e1988Sjohnlev * VBDs have standard 512 byte blocks 50843e1988Sjohnlev * A single blkif_request can transfer up to 11 pages of data, 1 page/segment 51843e1988Sjohnlev */ 52843e1988Sjohnlev #define XB_BSIZE DEV_BSIZE 53843e1988Sjohnlev #define XB_BMASK (XB_BSIZE - 1) 54843e1988Sjohnlev #define XB_BSHIFT 9 5565908c77Syu, larry liu - Sun Microsystems - Beijing China #define XB_DTOB(bn, vdp) ((bn) * (vdp)->xdf_xdev_secsize) 56843e1988Sjohnlev 57843e1988Sjohnlev #define XB_MAX_SEGLEN (8 * XB_BSIZE) 58843e1988Sjohnlev #define XB_SEGOFFSET (XB_MAX_SEGLEN - 1) 59843e1988Sjohnlev #define XB_MAX_XFER (XB_MAX_SEGLEN * BLKIF_MAX_SEGMENTS_PER_REQUEST) 60843e1988Sjohnlev #define XB_MAXPHYS (XB_MAX_XFER * BLKIF_RING_SIZE) 61843e1988Sjohnlev 62ee56d0c8SMark Johnson /* Number of sectors per segement */ 63ee56d0c8SMark Johnson #define XB_NUM_SECTORS_PER_SEG (PAGESIZE / XB_BSIZE) 64ee56d0c8SMark Johnson /* sectors are number 0 through XB_NUM_SECTORS_PER_SEG - 1 */ 65ee56d0c8SMark Johnson #define XB_LAST_SECTOR_IN_SEG (XB_NUM_SECTORS_PER_SEG - 1) 66ee56d0c8SMark Johnson 6706bbe1e0Sedp 6806bbe1e0Sedp /* 6906bbe1e0Sedp * Slice for absolute disk transaction. 7006bbe1e0Sedp * 7106bbe1e0Sedp * Hack Alert. XB_SLICE_NONE is a magic value that can be written into the 7206bbe1e0Sedp * b_private field of buf structures passed to xdf_strategy(). When present 7306bbe1e0Sedp * it indicates that the I/O is using an absolute offset. (ie, the I/O is 7406bbe1e0Sedp * not bound to any one partition.) This magic value is currently used by 7506bbe1e0Sedp * the pv_cmdk driver. This hack is shamelessly stolen from the sun4v vdc 7606bbe1e0Sedp * driver, another virtual disk device driver. (Although in the case of 7706bbe1e0Sedp * vdc the hack is less egregious since it is self contained within the 7806bbe1e0Sedp * vdc driver, where as here it is used as an interface between the pv_cmdk 7906bbe1e0Sedp * driver and the xdf driver.) 8006bbe1e0Sedp */ 817f0b8309SEdward Pilatowicz #define XB_SLICE_NONE 0xFF 8206bbe1e0Sedp 83843e1988Sjohnlev /* 84843e1988Sjohnlev * blkif status 85843e1988Sjohnlev */ 867f0b8309SEdward Pilatowicz typedef enum xdf_state { 87843e1988Sjohnlev /* 88843e1988Sjohnlev * initial state 89843e1988Sjohnlev */ 907f0b8309SEdward Pilatowicz XD_UNKNOWN = 0, 91843e1988Sjohnlev /* 92843e1988Sjohnlev * ring and evtchn alloced, xenbus state changed to 93843e1988Sjohnlev * XenbusStateInitialised, wait for backend to connect 94843e1988Sjohnlev */ 957f0b8309SEdward Pilatowicz XD_INIT = 1, 967f0b8309SEdward Pilatowicz /* 977f0b8309SEdward Pilatowicz * backend and frontend xenbus state has changed to 987f0b8309SEdward Pilatowicz * XenbusStateConnected. IO is now allowed, but we are not still 997f0b8309SEdward Pilatowicz * fully initialized. 1007f0b8309SEdward Pilatowicz */ 1017f0b8309SEdward Pilatowicz XD_CONNECTED = 2, 102843e1988Sjohnlev /* 1037f0b8309SEdward Pilatowicz * We're fully initialized and allowing regular IO. 104843e1988Sjohnlev */ 1057f0b8309SEdward Pilatowicz XD_READY = 3, 106843e1988Sjohnlev /* 107843e1988Sjohnlev * vbd interface close request received from backend, no more I/O 108843e1988Sjohnlev * requestis allowed to be put into ring buffer, while interrupt handler 109843e1988Sjohnlev * is allowed to run to finish any outstanding I/O request, disconnect 110843e1988Sjohnlev * process is kicked off by changing xenbus state to XenbusStateClosed 111843e1988Sjohnlev */ 1127f0b8309SEdward Pilatowicz XD_CLOSING = 4, 113843e1988Sjohnlev /* 114843e1988Sjohnlev * disconnection process finished, both backend and frontend's 115843e1988Sjohnlev * xenbus state has been changed to XenbusStateClosed, can be detached 116843e1988Sjohnlev */ 1177f0b8309SEdward Pilatowicz XD_CLOSED = 5, 118843e1988Sjohnlev /* 1197f0b8309SEdward Pilatowicz * We're either being suspended or resuming from a suspend. If we're 1207f0b8309SEdward Pilatowicz * in the process of suspending, we block all new IO, but but allow 1217f0b8309SEdward Pilatowicz * existing IO to drain. 122843e1988Sjohnlev */ 1237f0b8309SEdward Pilatowicz XD_SUSPEND = 6 1247f0b8309SEdward Pilatowicz } xdf_state_t; 125843e1988Sjohnlev 126843e1988Sjohnlev /* 127551bc2a6Smrj * 16 partitions + fdisk 128843e1988Sjohnlev */ 129843e1988Sjohnlev #define XDF_PSHIFT 6 130843e1988Sjohnlev #define XDF_PMASK ((1 << XDF_PSHIFT) - 1) 131843e1988Sjohnlev #define XDF_PEXT (1 << XDF_PSHIFT) 132843e1988Sjohnlev #define XDF_MINOR(i, m) (((i) << XDF_PSHIFT) | (m)) 133843e1988Sjohnlev #define XDF_INST(m) ((m) >> XDF_PSHIFT) 134843e1988Sjohnlev #define XDF_PART(m) ((m) & XDF_PMASK) 135843e1988Sjohnlev 136843e1988Sjohnlev /* 137843e1988Sjohnlev * one blkif_request_t will have one corresponding ge_slot_t 138843e1988Sjohnlev * where we save those grant table refs used in this blkif_request_t 139843e1988Sjohnlev * 140843e1988Sjohnlev * the id of this ge_slot_t will also be put into 'id' field in 141843e1988Sjohnlev * each blkif_request_t when sent out to the ring buffer. 142843e1988Sjohnlev */ 143843e1988Sjohnlev typedef struct ge_slot { 1447f0b8309SEdward Pilatowicz list_node_t gs_vreq_link; 1457f0b8309SEdward Pilatowicz struct v_req *gs_vreq; 1467f0b8309SEdward Pilatowicz domid_t gs_oeid; 1477f0b8309SEdward Pilatowicz int gs_isread; 1487f0b8309SEdward Pilatowicz grant_ref_t gs_ghead; 1497f0b8309SEdward Pilatowicz int gs_ngrefs; 1507f0b8309SEdward Pilatowicz grant_ref_t gs_ge[BLKIF_MAX_SEGMENTS_PER_REQUEST]; 151843e1988Sjohnlev } ge_slot_t; 152843e1988Sjohnlev 153843e1988Sjohnlev /* 154843e1988Sjohnlev * vbd I/O request 155843e1988Sjohnlev * 156843e1988Sjohnlev * An instance of this structure is bound to each buf passed to 157843e1988Sjohnlev * the driver's strategy by setting the pointer into bp->av_back. 158843e1988Sjohnlev * The id of this vreq will also be put into 'id' field in each 159843e1988Sjohnlev * blkif_request_t when sent out to the ring buffer for one DMA 160843e1988Sjohnlev * window of this buf. 161843e1988Sjohnlev * 162843e1988Sjohnlev * Vreq mainly contains DMA information for this buf. In one vreq/buf, 163843e1988Sjohnlev * there could be more than one DMA window, each of which will be 164843e1988Sjohnlev * mapped to one blkif_request_t/ge_slot_t. Ge_slot_t contains all grant 165843e1988Sjohnlev * table entry information for this buf. The ge_slot_t for current DMA 166843e1988Sjohnlev * window is pointed to by v_gs in vreq. 167843e1988Sjohnlev * 168843e1988Sjohnlev * So, grant table entries will only be alloc'ed when the DMA window is 169843e1988Sjohnlev * about to be transferred via blkif_request_t to the ring buffer. And 170843e1988Sjohnlev * they will be freed right after the blkif_response_t is seen. By this 171843e1988Sjohnlev * means, we can make use of grant table entries more efficiently. 172843e1988Sjohnlev */ 173843e1988Sjohnlev typedef struct v_req { 174843e1988Sjohnlev list_node_t v_link; 1757f0b8309SEdward Pilatowicz list_t v_gs; 176843e1988Sjohnlev int v_status; 177843e1988Sjohnlev buf_t *v_buf; 178843e1988Sjohnlev uint_t v_ndmacs; 179843e1988Sjohnlev uint_t v_dmaw; 180843e1988Sjohnlev uint_t v_ndmaws; 181843e1988Sjohnlev uint_t v_nslots; 182843e1988Sjohnlev uint64_t v_blkno; 1837f0b8309SEdward Pilatowicz ddi_dma_handle_t v_memdmahdl; 184843e1988Sjohnlev ddi_acc_handle_t v_align; 1857f0b8309SEdward Pilatowicz ddi_dma_handle_t v_dmahdl; 1867f0b8309SEdward Pilatowicz ddi_dma_cookie_t v_dmac; 187843e1988Sjohnlev caddr_t v_abuf; 188843e1988Sjohnlev uint8_t v_flush_diskcache; 1897f0b8309SEdward Pilatowicz boolean_t v_runq; 190843e1988Sjohnlev } v_req_t; 191843e1988Sjohnlev 192843e1988Sjohnlev /* 193843e1988Sjohnlev * Status set and checked in vreq->v_status by vreq_setup() 194843e1988Sjohnlev * 195843e1988Sjohnlev * These flags will help us to continue the vreq setup work from last failure 196551bc2a6Smrj * point, instead of starting from scratch after each failure. 197843e1988Sjohnlev */ 198843e1988Sjohnlev #define VREQ_INIT 0x0 199843e1988Sjohnlev #define VREQ_INIT_DONE 0x1 200843e1988Sjohnlev #define VREQ_DMAHDL_ALLOCED 0x2 201843e1988Sjohnlev #define VREQ_MEMDMAHDL_ALLOCED 0x3 202843e1988Sjohnlev #define VREQ_DMAMEM_ALLOCED 0x4 203843e1988Sjohnlev #define VREQ_DMABUF_BOUND 0x5 204843e1988Sjohnlev #define VREQ_GS_ALLOCED 0x6 205843e1988Sjohnlev #define VREQ_DMAWIN_DONE 0x7 206843e1988Sjohnlev 207843e1988Sjohnlev /* 208843e1988Sjohnlev * virtual block device per-instance softstate 209843e1988Sjohnlev */ 210843e1988Sjohnlev typedef struct xdf { 211843e1988Sjohnlev dev_info_t *xdf_dip; 2127f0b8309SEdward Pilatowicz char *xdf_addr; 21306bbe1e0Sedp ddi_iblock_cookie_t xdf_ibc; /* mutex iblock cookie */ 214843e1988Sjohnlev domid_t xdf_peer; /* otherend's dom ID */ 215843e1988Sjohnlev xendev_ring_t *xdf_xb_ring; /* I/O ring buffer */ 216843e1988Sjohnlev ddi_acc_handle_t xdf_xb_ring_hdl; /* access handler for ring buffer */ 217843e1988Sjohnlev list_t xdf_vreq_act; /* active vreq list */ 218843e1988Sjohnlev buf_t *xdf_f_act; /* active buf list head */ 219843e1988Sjohnlev buf_t *xdf_l_act; /* active buf list tail */ 2207f0b8309SEdward Pilatowicz buf_t *xdf_i_act; /* active buf list index */ 2217f0b8309SEdward Pilatowicz xdf_state_t xdf_state; /* status of this virtual disk */ 2227f0b8309SEdward Pilatowicz boolean_t xdf_suspending; 223843e1988Sjohnlev ulong_t xdf_vd_open[OTYPCNT]; 224843e1988Sjohnlev ulong_t xdf_vd_lyropen[XDF_PEXT]; 2257f0b8309SEdward Pilatowicz ulong_t xdf_connect_req; 2262de7185cSEdward Pilatowicz kthread_t *xdf_connect_thread; 227843e1988Sjohnlev ulong_t xdf_vd_exclopen; 22806bbe1e0Sedp kmutex_t xdf_iostat_lk; /* muxes lock for the iostat ptr */ 229843e1988Sjohnlev kmutex_t xdf_dev_lk; /* mutex lock for I/O path */ 230843e1988Sjohnlev kmutex_t xdf_cb_lk; /* mutex lock for event handling path */ 231843e1988Sjohnlev kcondvar_t xdf_dev_cv; /* cv used in I/O path */ 2327f0b8309SEdward Pilatowicz uint_t xdf_dinfo; /* disk info from backend xenstore */ 233843e1988Sjohnlev diskaddr_t xdf_xdev_nblocks; /* total size in block */ 23465908c77Syu, larry liu - Sun Microsystems - Beijing China uint_t xdf_xdev_secsize; /* disk blksize from backend */ 23506bbe1e0Sedp cmlb_geom_t xdf_pgeom; 2367f0b8309SEdward Pilatowicz boolean_t xdf_pgeom_set; 2377f0b8309SEdward Pilatowicz boolean_t xdf_pgeom_fixed; 238843e1988Sjohnlev kstat_t *xdf_xdev_iostat; 239843e1988Sjohnlev cmlb_handle_t xdf_vd_lbl; 240843e1988Sjohnlev ddi_softintr_t xdf_softintr_id; 241843e1988Sjohnlev timeout_id_t xdf_timeout_id; 242843e1988Sjohnlev struct gnttab_free_callback xdf_gnt_callback; 2437f0b8309SEdward Pilatowicz boolean_t xdf_feature_barrier; 2447f0b8309SEdward Pilatowicz boolean_t xdf_flush_supported; 2457f0b8309SEdward Pilatowicz boolean_t xdf_media_req_supported; 2467f0b8309SEdward Pilatowicz boolean_t xdf_wce; 247*cd93da82SYuri Pankov boolean_t xdf_cmlb_reattach; 248843e1988Sjohnlev char *xdf_flush_mem; 249843e1988Sjohnlev char *xdf_cache_flush_block; 250551bc2a6Smrj int xdf_evtchn; 2517f0b8309SEdward Pilatowicz enum dkio_state xdf_mstate; 2527f0b8309SEdward Pilatowicz kcondvar_t xdf_mstate_cv; 2537f0b8309SEdward Pilatowicz kcondvar_t xdf_hp_status_cv; 2547f0b8309SEdward Pilatowicz struct buf *xdf_ready_bp; 2557f0b8309SEdward Pilatowicz ddi_taskq_t *xdf_ready_tq; 2567f0b8309SEdward Pilatowicz kthread_t *xdf_ready_tq_thread; 2577f0b8309SEdward Pilatowicz struct buf *xdf_ready_tq_bp; 258fcebcf2bSYuri Pankov ddi_devid_t xdf_tgt_devid; 259843e1988Sjohnlev #ifdef DEBUG 260843e1988Sjohnlev int xdf_dmacallback_num; 2617f0b8309SEdward Pilatowicz kthread_t *xdf_oe_change_thread; 262843e1988Sjohnlev #endif 263843e1988Sjohnlev } xdf_t; 264843e1988Sjohnlev 265843e1988Sjohnlev /* 266843e1988Sjohnlev * VBD I/O requests must be aligned on a 512-byte boundary and specify 267843e1988Sjohnlev * a transfer size which is a mutiple of 512-bytes 268843e1988Sjohnlev */ 269843e1988Sjohnlev #define ALIGNED_XFER(bp) \ 270843e1988Sjohnlev ((((uintptr_t)((bp)->b_un.b_addr) & XB_BMASK) == 0) && \ 271843e1988Sjohnlev (((bp)->b_bcount & XB_BMASK) == 0)) 272843e1988Sjohnlev 273843e1988Sjohnlev #define U_INVAL(u) (((u)->uio_loffset & (offset_t)(XB_BMASK)) || \ 274843e1988Sjohnlev ((u)->uio_iov->iov_len & (offset_t)(XB_BMASK))) 275843e1988Sjohnlev 276843e1988Sjohnlev /* wrap pa_to_ma() for xdf to run in dom0 */ 277843e1988Sjohnlev #define PATOMA(addr) (DOMAIN_IS_INITDOMAIN(xen_info) ? addr : pa_to_ma(addr)) 278843e1988Sjohnlev 2797f0b8309SEdward Pilatowicz #define XD_IS_RO(vbd) VOID2BOOLEAN((vbd)->xdf_dinfo & VDISK_READONLY) 2807f0b8309SEdward Pilatowicz #define XD_IS_CD(vbd) VOID2BOOLEAN((vbd)->xdf_dinfo & VDISK_CDROM) 2817f0b8309SEdward Pilatowicz #define XD_IS_RM(vbd) VOID2BOOLEAN((vbd)->xdf_dinfo & VDISK_REMOVABLE) 2827f0b8309SEdward Pilatowicz #define IS_READ(bp) VOID2BOOLEAN((bp)->b_flags & B_READ) 2837f0b8309SEdward Pilatowicz #define IS_ERROR(bp) VOID2BOOLEAN((bp)->b_flags & B_ERROR) 284843e1988Sjohnlev 285843e1988Sjohnlev #define XDF_UPDATE_IO_STAT(vdp, bp) \ 2867f0b8309SEdward Pilatowicz { \ 287843e1988Sjohnlev kstat_io_t *kip = KSTAT_IO_PTR((vdp)->xdf_xdev_iostat); \ 288843e1988Sjohnlev size_t n_done = (bp)->b_bcount - (bp)->b_resid; \ 289843e1988Sjohnlev if ((bp)->b_flags & B_READ) { \ 290843e1988Sjohnlev kip->reads++; \ 291843e1988Sjohnlev kip->nread += n_done; \ 292843e1988Sjohnlev } else { \ 293843e1988Sjohnlev kip->writes++; \ 294843e1988Sjohnlev kip->nwritten += n_done; \ 295843e1988Sjohnlev } \ 296843e1988Sjohnlev } 297843e1988Sjohnlev 298843e1988Sjohnlev #ifdef DEBUG 2997f0b8309SEdward Pilatowicz #define DPRINTF(flag, args) {if (xdf_debug & (flag)) prom_printf args; } 300843e1988Sjohnlev #define SETDMACBON(vbd) {(vbd)->xdf_dmacallback_num++; } 301843e1988Sjohnlev #define SETDMACBOFF(vbd) {(vbd)->xdf_dmacallback_num--; } 302843e1988Sjohnlev #define ISDMACBON(vbd) ((vbd)->xdf_dmacallback_num > 0) 303843e1988Sjohnlev #else 304843e1988Sjohnlev #define DPRINTF(flag, args) 305843e1988Sjohnlev #define SETDMACBON(vbd) 306843e1988Sjohnlev #define SETDMACBOFF(vbd) 307843e1988Sjohnlev #define ISDMACBON(vbd) 308843e1988Sjohnlev #endif /* DEBUG */ 309843e1988Sjohnlev 310843e1988Sjohnlev #define DDI_DBG 0x1 311843e1988Sjohnlev #define DMA_DBG 0x2 312843e1988Sjohnlev #define INTR_DBG 0x8 313843e1988Sjohnlev #define IO_DBG 0x10 314843e1988Sjohnlev #define IOCTL_DBG 0x20 315843e1988Sjohnlev #define SUSRES_DBG 0x40 316843e1988Sjohnlev #define LBL_DBG 0x80 317843e1988Sjohnlev 318c73799ddSYuri Pankov #ifdef XPV_HVM_DRIVER 3197f0b8309SEdward Pilatowicz extern int xdf_lb_getinfo(dev_info_t *, int, void *, void *); 3207f0b8309SEdward Pilatowicz extern int xdf_lb_rdwr(dev_info_t *, uchar_t, void *, diskaddr_t, size_t, 3217f0b8309SEdward Pilatowicz void *); 3227f0b8309SEdward Pilatowicz extern void xdfmin(struct buf *bp); 3237f0b8309SEdward Pilatowicz extern dev_info_t *xdf_hvm_hold(const char *); 3247f0b8309SEdward Pilatowicz extern boolean_t xdf_hvm_connect(dev_info_t *); 32506bbe1e0Sedp extern int xdf_hvm_setpgeom(dev_info_t *, cmlb_geom_t *); 3267f0b8309SEdward Pilatowicz extern boolean_t xdf_is_cd(dev_info_t *); 3277f0b8309SEdward Pilatowicz extern boolean_t xdf_is_rm(dev_info_t *); 3287f0b8309SEdward Pilatowicz extern boolean_t xdf_media_req_supported(dev_info_t *); 32906bbe1e0Sedp #endif /* XPV_HVM_DRIVER */ 33006bbe1e0Sedp 331843e1988Sjohnlev #ifdef __cplusplus 332843e1988Sjohnlev } 333843e1988Sjohnlev #endif 334843e1988Sjohnlev 335843e1988Sjohnlev #endif /* _SYS_XDF_H */ 336