1bf21cd93STycho Nightingale /*-
2*32640292SAndy Fiddaman  * SPDX-License-Identifier: BSD-2-Clause
34c87aefeSPatrick Mooney  *
4bf21cd93STycho Nightingale  * Copyright (c) 2011 NetApp, Inc.
5bf21cd93STycho Nightingale  * All rights reserved.
6b0de25cbSAndy Fiddaman  * Copyright 2020-2021 Joyent, Inc.
7bf21cd93STycho Nightingale  *
8bf21cd93STycho Nightingale  * Redistribution and use in source and binary forms, with or without
9bf21cd93STycho Nightingale  * modification, are permitted provided that the following conditions
10bf21cd93STycho Nightingale  * are met:
11bf21cd93STycho Nightingale  * 1. Redistributions of source code must retain the above copyright
12bf21cd93STycho Nightingale  *    notice, this list of conditions and the following disclaimer.
13bf21cd93STycho Nightingale  * 2. Redistributions in binary form must reproduce the above copyright
14bf21cd93STycho Nightingale  *    notice, this list of conditions and the following disclaimer in the
15bf21cd93STycho Nightingale  *    documentation and/or other materials provided with the distribution.
16bf21cd93STycho Nightingale  *
17bf21cd93STycho Nightingale  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
18bf21cd93STycho Nightingale  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19bf21cd93STycho Nightingale  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20bf21cd93STycho Nightingale  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
21bf21cd93STycho Nightingale  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22bf21cd93STycho Nightingale  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23bf21cd93STycho Nightingale  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24bf21cd93STycho Nightingale  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25bf21cd93STycho Nightingale  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26bf21cd93STycho Nightingale  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27bf21cd93STycho Nightingale  * SUCH DAMAGE.
28bf21cd93STycho Nightingale  */
29bf21cd93STycho Nightingale /*
30bf21cd93STycho Nightingale  * This file and its contents are supplied under the terms of the
31bf21cd93STycho Nightingale  * Common Development and Distribution License ("CDDL"), version 1.0.
32bf21cd93STycho Nightingale  * You may only use this file in accordance with the terms of version
33bf21cd93STycho Nightingale  * 1.0 of the CDDL.
34bf21cd93STycho Nightingale  *
35bf21cd93STycho Nightingale  * A full copy of the text of the CDDL should have accompanied this
36bf21cd93STycho Nightingale  * source.  A copy of the CDDL is also available via the Internet at
37bf21cd93STycho Nightingale  * http://www.illumos.org/license/CDDL.
38bf21cd93STycho Nightingale  *
39bf21cd93STycho Nightingale  * Copyright 2014 Pluribus Networks Inc.
40bf21cd93STycho Nightingale  */
41bf21cd93STycho Nightingale 
42bf21cd93STycho Nightingale #include <sys/cdefs.h>
43bf21cd93STycho Nightingale 
44bf21cd93STycho Nightingale #include <sys/param.h>
45bf21cd93STycho Nightingale #include <sys/linker_set.h>
46bf21cd93STycho Nightingale #include <sys/stat.h>
47bf21cd93STycho Nightingale #include <sys/uio.h>
48bf21cd93STycho Nightingale #include <sys/ioctl.h>
49bf21cd93STycho Nightingale #include <sys/disk.h>
50bf21cd93STycho Nightingale 
51bf21cd93STycho Nightingale #include <errno.h>
52bf21cd93STycho Nightingale #include <fcntl.h>
53bf21cd93STycho Nightingale #include <stdio.h>
54bf21cd93STycho Nightingale #include <stdlib.h>
55bf21cd93STycho Nightingale #include <stdint.h>
56bf21cd93STycho Nightingale #include <string.h>
57bf21cd93STycho Nightingale #include <strings.h>
58bf21cd93STycho Nightingale #include <unistd.h>
59bf21cd93STycho Nightingale #include <assert.h>
60bf21cd93STycho Nightingale #include <pthread.h>
61bf21cd93STycho Nightingale #include <md5.h>
62bf21cd93STycho Nightingale 
63bf21cd93STycho Nightingale #include "bhyverun.h"
642b948146SAndy Fiddaman #include "config.h"
65154972afSPatrick Mooney #include "debug.h"
66bf21cd93STycho Nightingale #include "pci_emul.h"
67bf21cd93STycho Nightingale #include "virtio.h"
684c87aefeSPatrick Mooney #include "block_if.h"
69bf21cd93STycho Nightingale 
70282a8ecbSJason King #define	VTBLK_BSIZE	512
71282a8ecbSJason King #define	VTBLK_RINGSZ	128
72bf21cd93STycho Nightingale 
734c87aefeSPatrick Mooney _Static_assert(VTBLK_RINGSZ <= BLOCKIF_RING_MAX, "Each ring entry must be able to queue a request");
74bf21cd93STycho Nightingale 
75282a8ecbSJason King #define	VTBLK_S_OK	0
76282a8ecbSJason King #define	VTBLK_S_IOERR	1
77bf21cd93STycho Nightingale #define	VTBLK_S_UNSUPP	2
78bf21cd93STycho Nightingale 
794c87aefeSPatrick Mooney #define	VTBLK_BLK_ID_BYTES	20 + 1
80bf21cd93STycho Nightingale 
81bf21cd93STycho Nightingale /* Capability bits */
82282a8ecbSJason King #define	VTBLK_F_BARRIER		(1 << 0)	/* Does host support barriers? */
83282a8ecbSJason King #define	VTBLK_F_SIZE_MAX	(1 << 1)	/* Indicates maximum segment size */
84282a8ecbSJason King #define	VTBLK_F_SEG_MAX		(1 << 2)	/* Indicates maximum # of segments */
85282a8ecbSJason King #define	VTBLK_F_GEOMETRY	(1 << 4)	/* Legacy geometry available  */
86282a8ecbSJason King #define	VTBLK_F_RO		(1 << 5)	/* Disk is read-only */
87282a8ecbSJason King #define	VTBLK_F_BLK_SIZE	(1 << 6)	/* Block size of disk is available*/
88282a8ecbSJason King #define	VTBLK_F_SCSI		(1 << 7)	/* Supports scsi command passthru */
89282a8ecbSJason King #define	VTBLK_F_FLUSH		(1 << 9)	/* Writeback mode enabled after reset */
90282a8ecbSJason King #define	VTBLK_F_WCE		(1 << 9)	/* Legacy alias for FLUSH */
91282a8ecbSJason King #define	VTBLK_F_TOPOLOGY	(1 << 10)	/* Topology information is available */
92282a8ecbSJason King #define	VTBLK_F_CONFIG_WCE	(1 << 11)	/* Writeback mode available in config */
93154972afSPatrick Mooney #define	VTBLK_F_MQ		(1 << 12)	/* Multi-Queue */
94282a8ecbSJason King #define	VTBLK_F_DISCARD		(1 << 13)	/* Trim blocks */
95282a8ecbSJason King #define	VTBLK_F_WRITE_ZEROES	(1 << 14)	/* Write zeros */
96bf21cd93STycho Nightingale 
97bf21cd93STycho Nightingale /*
98bf21cd93STycho Nightingale  * Host capabilities
99bf21cd93STycho Nightingale  */
100282a8ecbSJason King #define	VTBLK_S_HOSTCAPS      \
101bf21cd93STycho Nightingale   ( VTBLK_F_SEG_MAX  |						    \
102bf21cd93STycho Nightingale     VTBLK_F_BLK_SIZE |						    \
1034c87aefeSPatrick Mooney     VTBLK_F_FLUSH    |						    \
1044c87aefeSPatrick Mooney     VTBLK_F_TOPOLOGY |						    \
105bf21cd93STycho Nightingale     VIRTIO_RING_F_INDIRECT_DESC )	/* indirect descriptors */
106bf21cd93STycho Nightingale 
107282a8ecbSJason King /*
108282a8ecbSJason King  * The current blockif_delete() interface only allows a single delete
109282a8ecbSJason King  * request at a time.
110282a8ecbSJason King  */
111282a8ecbSJason King #define	VTBLK_MAX_DISCARD_SEG	1
112282a8ecbSJason King 
113282a8ecbSJason King /*
114282a8ecbSJason King  * An arbitrary limit to prevent excessive latency due to large
115282a8ecbSJason King  * delete requests.
116282a8ecbSJason King  */
117282a8ecbSJason King #define	VTBLK_MAX_DISCARD_SECT	((16 << 20) / VTBLK_BSIZE)	/* 16 MiB */
118282a8ecbSJason King 
119bf21cd93STycho Nightingale /*
120bf21cd93STycho Nightingale  * Config space "registers"
121bf21cd93STycho Nightingale  */
122bf21cd93STycho Nightingale struct vtblk_config {
123bf21cd93STycho Nightingale 	uint64_t	vbc_capacity;
124bf21cd93STycho Nightingale 	uint32_t	vbc_size_max;
125bf21cd93STycho Nightingale 	uint32_t	vbc_seg_max;
1264c87aefeSPatrick Mooney 	struct {
1274c87aefeSPatrick Mooney 		uint16_t cylinders;
1284c87aefeSPatrick Mooney 		uint8_t heads;
1294c87aefeSPatrick Mooney 		uint8_t sectors;
1304c87aefeSPatrick Mooney 	} vbc_geometry;
131bf21cd93STycho Nightingale 	uint32_t	vbc_blk_size;
1324c87aefeSPatrick Mooney 	struct {
1334c87aefeSPatrick Mooney 		uint8_t physical_block_exp;
1344c87aefeSPatrick Mooney 		uint8_t alignment_offset;
1354c87aefeSPatrick Mooney 		uint16_t min_io_size;
1364c87aefeSPatrick Mooney 		uint32_t opt_io_size;
1374c87aefeSPatrick Mooney 	} vbc_topology;
1384c87aefeSPatrick Mooney 	uint8_t		vbc_writeback;
139154972afSPatrick Mooney 	uint8_t		unused0[1];
140154972afSPatrick Mooney 	uint16_t	num_queues;
141282a8ecbSJason King 	uint32_t	max_discard_sectors;
142282a8ecbSJason King 	uint32_t	max_discard_seg;
143282a8ecbSJason King 	uint32_t	discard_sector_alignment;
144282a8ecbSJason King 	uint32_t	max_write_zeroes_sectors;
145282a8ecbSJason King 	uint32_t	max_write_zeroes_seg;
146282a8ecbSJason King 	uint8_t		write_zeroes_may_unmap;
147282a8ecbSJason King 	uint8_t		unused1[3];
148bf21cd93STycho Nightingale } __packed;
149bf21cd93STycho Nightingale 
150bf21cd93STycho Nightingale /*
151bf21cd93STycho Nightingale  * Fixed-size block header
152bf21cd93STycho Nightingale  */
153bf21cd93STycho Nightingale struct virtio_blk_hdr {
154bf21cd93STycho Nightingale #define	VBH_OP_READ		0
155bf21cd93STycho Nightingale #define	VBH_OP_WRITE		1
156282a8ecbSJason King #define	VBH_OP_SCSI_CMD		2
157282a8ecbSJason King #define	VBH_OP_SCSI_CMD_OUT	3
1584c87aefeSPatrick Mooney #define	VBH_OP_FLUSH		4
1594c87aefeSPatrick Mooney #define	VBH_OP_FLUSH_OUT	5
1604c87aefeSPatrick Mooney #define	VBH_OP_IDENT		8
161282a8ecbSJason King #define	VBH_OP_DISCARD		11
162282a8ecbSJason King #define	VBH_OP_WRITE_ZEROES	13
163282a8ecbSJason King 
164bf21cd93STycho Nightingale #define	VBH_FLAG_BARRIER	0x80000000	/* OR'ed into vbh_type */
1654c87aefeSPatrick Mooney 	uint32_t	vbh_type;
166bf21cd93STycho Nightingale 	uint32_t	vbh_ioprio;
167bf21cd93STycho Nightingale 	uint64_t	vbh_sector;
168bf21cd93STycho Nightingale } __packed;
169bf21cd93STycho Nightingale 
170bf21cd93STycho Nightingale /*
171bf21cd93STycho Nightingale  * Debug printf
172bf21cd93STycho Nightingale  */
173bf21cd93STycho Nightingale static int pci_vtblk_debug;
174154972afSPatrick Mooney #define	DPRINTF(params) if (pci_vtblk_debug) PRINTLN params
175154972afSPatrick Mooney #define	WPRINTF(params) PRINTLN params
176bf21cd93STycho Nightingale 
1774c87aefeSPatrick Mooney struct pci_vtblk_ioreq {
1784c87aefeSPatrick Mooney 	struct blockif_req		io_req;
1794c87aefeSPatrick Mooney 	struct pci_vtblk_softc		*io_sc;
1804c87aefeSPatrick Mooney 	uint8_t				*io_status;
1814c87aefeSPatrick Mooney 	uint16_t			io_idx;
1824c87aefeSPatrick Mooney };
1834c87aefeSPatrick Mooney 
184282a8ecbSJason King struct virtio_blk_discard_write_zeroes {
185282a8ecbSJason King 	uint64_t	sector;
186282a8ecbSJason King 	uint32_t	num_sectors;
187282a8ecbSJason King 	struct {
188282a8ecbSJason King 		uint32_t unmap:1;
189282a8ecbSJason King 		uint32_t reserved:31;
190282a8ecbSJason King 	} flags;
191282a8ecbSJason King };
192282a8ecbSJason King 
193bf21cd93STycho Nightingale /*
194bf21cd93STycho Nightingale  * Per-device softc
195bf21cd93STycho Nightingale  */
196bf21cd93STycho Nightingale struct pci_vtblk_softc {
197bf21cd93STycho Nightingale 	struct virtio_softc vbsc_vs;
198bf21cd93STycho Nightingale 	pthread_mutex_t vsc_mtx;
199bf21cd93STycho Nightingale 	struct vqueue_info vbsc_vq;
2004c87aefeSPatrick Mooney 	struct vtblk_config vbsc_cfg;
201282a8ecbSJason King 	struct virtio_consts vbsc_consts;
2024c87aefeSPatrick Mooney 	struct blockif_ctxt *bc;
2034c87aefeSPatrick Mooney #ifndef __FreeBSD__
2044c87aefeSPatrick Mooney 	int vbsc_wce;
2054c87aefeSPatrick Mooney #endif
206bf21cd93STycho Nightingale 	char vbsc_ident[VTBLK_BLK_ID_BYTES];
2074c87aefeSPatrick Mooney 	struct pci_vtblk_ioreq vbsc_ios[VTBLK_RINGSZ];
208bf21cd93STycho Nightingale };
209bf21cd93STycho Nightingale 
210bf21cd93STycho Nightingale static void pci_vtblk_reset(void *);
211bf21cd93STycho Nightingale static void pci_vtblk_notify(void *, struct vqueue_info *);
212bf21cd93STycho Nightingale static int pci_vtblk_cfgread(void *, int, int, uint32_t *);
213bf21cd93STycho Nightingale static int pci_vtblk_cfgwrite(void *, int, int, uint32_t);
2144c87aefeSPatrick Mooney #ifndef __FreeBSD__
2154c87aefeSPatrick Mooney static void pci_vtblk_apply_feats(void *, uint64_t);
2164c87aefeSPatrick Mooney #endif
217bf21cd93STycho Nightingale 
218bf21cd93STycho Nightingale static struct virtio_consts vtblk_vi_consts = {
21959d65d31SAndy Fiddaman 	.vc_name =	"vtblk",
22059d65d31SAndy Fiddaman 	.vc_nvq =	1,
22159d65d31SAndy Fiddaman 	.vc_cfgsize =	sizeof(struct vtblk_config),
22259d65d31SAndy Fiddaman 	.vc_reset =	pci_vtblk_reset,
22359d65d31SAndy Fiddaman 	.vc_qnotify =	pci_vtblk_notify,
22459d65d31SAndy Fiddaman 	.vc_cfgread =	pci_vtblk_cfgread,
22559d65d31SAndy Fiddaman 	.vc_cfgwrite =	pci_vtblk_cfgwrite,
2264c87aefeSPatrick Mooney #ifndef __FreeBSD__
22759d65d31SAndy Fiddaman 	.vc_apply_features = pci_vtblk_apply_feats,
2284c87aefeSPatrick Mooney #else
22959d65d31SAndy Fiddaman 	.vc_apply_features = NULL,
2304c87aefeSPatrick Mooney #endif
23159d65d31SAndy Fiddaman 	.vc_hv_caps =	VTBLK_S_HOSTCAPS,
232bf21cd93STycho Nightingale };
233bf21cd93STycho Nightingale 
234bf21cd93STycho Nightingale static void
pci_vtblk_reset(void * vsc)235bf21cd93STycho Nightingale pci_vtblk_reset(void *vsc)
236bf21cd93STycho Nightingale {
237bf21cd93STycho Nightingale 	struct pci_vtblk_softc *sc = vsc;
238bf21cd93STycho Nightingale 
239154972afSPatrick Mooney 	DPRINTF(("vtblk: device reset requested !"));
240bf21cd93STycho Nightingale 	vi_reset_dev(&sc->vbsc_vs);
2414c87aefeSPatrick Mooney #ifndef __FreeBSD__
2424c87aefeSPatrick Mooney 	/* Disable write cache until FLUSH feature is negotiated */
2434c87aefeSPatrick Mooney 	(void) blockif_set_wce(sc->bc, 0);
2444c87aefeSPatrick Mooney 	sc->vbsc_wce = 0;
2454c87aefeSPatrick Mooney #endif
2464c87aefeSPatrick Mooney }
2474c87aefeSPatrick Mooney 
2484c87aefeSPatrick Mooney static void
pci_vtblk_done_locked(struct pci_vtblk_ioreq * io,int err)2494c87aefeSPatrick Mooney pci_vtblk_done_locked(struct pci_vtblk_ioreq *io, int err)
2504c87aefeSPatrick Mooney {
2514c87aefeSPatrick Mooney 	struct pci_vtblk_softc *sc = io->io_sc;
2524c87aefeSPatrick Mooney 
2534c87aefeSPatrick Mooney 	/* convert errno into a virtio block error return */
2544c87aefeSPatrick Mooney 	if (err == EOPNOTSUPP || err == ENOSYS)
2554c87aefeSPatrick Mooney 		*io->io_status = VTBLK_S_UNSUPP;
2564c87aefeSPatrick Mooney 	else if (err != 0)
2574c87aefeSPatrick Mooney 		*io->io_status = VTBLK_S_IOERR;
2584c87aefeSPatrick Mooney 	else
2594c87aefeSPatrick Mooney 		*io->io_status = VTBLK_S_OK;
2604c87aefeSPatrick Mooney 
2614c87aefeSPatrick Mooney 	/*
2624c87aefeSPatrick Mooney 	 * Return the descriptor back to the host.
2634c87aefeSPatrick Mooney 	 * We wrote 1 byte (our status) to host.
2644c87aefeSPatrick Mooney 	 */
2654c87aefeSPatrick Mooney 	vq_relchain(&sc->vbsc_vq, io->io_idx, 1);
2664c87aefeSPatrick Mooney 	vq_endchains(&sc->vbsc_vq, 0);
2674c87aefeSPatrick Mooney }
2684c87aefeSPatrick Mooney 
2694c87aefeSPatrick Mooney static void
pci_vtblk_done(struct blockif_req * br,int err)2704c87aefeSPatrick Mooney pci_vtblk_done(struct blockif_req *br, int err)
2714c87aefeSPatrick Mooney {
2724c87aefeSPatrick Mooney 	struct pci_vtblk_ioreq *io = br->br_param;
2734c87aefeSPatrick Mooney 	struct pci_vtblk_softc *sc = io->io_sc;
2744c87aefeSPatrick Mooney 
2754c87aefeSPatrick Mooney 	pthread_mutex_lock(&sc->vsc_mtx);
2764c87aefeSPatrick Mooney 	pci_vtblk_done_locked(io, err);
2774c87aefeSPatrick Mooney 	pthread_mutex_unlock(&sc->vsc_mtx);
278bf21cd93STycho Nightingale }
279bf21cd93STycho Nightingale 
280bf21cd93STycho Nightingale static void
pci_vtblk_proc(struct pci_vtblk_softc * sc,struct vqueue_info * vq)281bf21cd93STycho Nightingale pci_vtblk_proc(struct pci_vtblk_softc *sc, struct vqueue_info *vq)
282bf21cd93STycho Nightingale {
283bf21cd93STycho Nightingale 	struct virtio_blk_hdr *vbh;
2844c87aefeSPatrick Mooney 	struct pci_vtblk_ioreq *io;
285bf21cd93STycho Nightingale 	int i, n;
286bf21cd93STycho Nightingale 	int err;
2874c87aefeSPatrick Mooney 	ssize_t iolen;
288bf21cd93STycho Nightingale 	int writeop, type;
289b0de25cbSAndy Fiddaman 	struct vi_req req;
2904c87aefeSPatrick Mooney 	struct iovec iov[BLOCKIF_IOV_MAX + 2];
291282a8ecbSJason King 	struct virtio_blk_discard_write_zeroes *discard;
292bf21cd93STycho Nightingale 
293b0de25cbSAndy Fiddaman 	n = vq_getchain(vq, iov, BLOCKIF_IOV_MAX + 2, &req);
294bf21cd93STycho Nightingale 
295bf21cd93STycho Nightingale 	/*
296bf21cd93STycho Nightingale 	 * The first descriptor will be the read-only fixed header,
297bf21cd93STycho Nightingale 	 * and the last is for status (hence +2 above and below).
298bf21cd93STycho Nightingale 	 * The remaining iov's are the actual data I/O vectors.
299bf21cd93STycho Nightingale 	 *
300bf21cd93STycho Nightingale 	 * XXX - note - this fails on crash dump, which does a
301bf21cd93STycho Nightingale 	 * VIRTIO_BLK_T_FLUSH with a zero transfer length
302bf21cd93STycho Nightingale 	 */
3034c87aefeSPatrick Mooney 	assert(n >= 2 && n <= BLOCKIF_IOV_MAX + 2);
304bf21cd93STycho Nightingale 
305b0de25cbSAndy Fiddaman 	io = &sc->vbsc_ios[req.idx];
306b0de25cbSAndy Fiddaman 	assert(req.readable != 0);
307bf21cd93STycho Nightingale 	assert(iov[0].iov_len == sizeof(struct virtio_blk_hdr));
3084c87aefeSPatrick Mooney 	vbh = (struct virtio_blk_hdr *)iov[0].iov_base;
3094c87aefeSPatrick Mooney 	memcpy(&io->io_req.br_iov, &iov[1], sizeof(struct iovec) * (n - 2));
3104c87aefeSPatrick Mooney 	io->io_req.br_iovcnt = n - 2;
311282a8ecbSJason King 	io->io_req.br_offset = vbh->vbh_sector * VTBLK_BSIZE;
3124c87aefeSPatrick Mooney 	io->io_status = (uint8_t *)iov[--n].iov_base;
313b0de25cbSAndy Fiddaman 	assert(req.writable != 0);
314bf21cd93STycho Nightingale 	assert(iov[n].iov_len == 1);
315bf21cd93STycho Nightingale 
316bf21cd93STycho Nightingale 	/*
317bf21cd93STycho Nightingale 	 * XXX
318bf21cd93STycho Nightingale 	 * The guest should not be setting the BARRIER flag because
319bf21cd93STycho Nightingale 	 * we don't advertise the capability.
320bf21cd93STycho Nightingale 	 */
321bf21cd93STycho Nightingale 	type = vbh->vbh_type & ~VBH_FLAG_BARRIER;
322282a8ecbSJason King 	writeop = (type == VBH_OP_WRITE || type == VBH_OP_DISCARD);
323b0de25cbSAndy Fiddaman 	/*
324b0de25cbSAndy Fiddaman 	 * - Write op implies read-only descriptor
325b0de25cbSAndy Fiddaman 	 * - Read/ident op implies write-only descriptor
326b0de25cbSAndy Fiddaman 	 *
327b0de25cbSAndy Fiddaman 	 * By taking away either the read-only fixed header or the write-only
328b0de25cbSAndy Fiddaman 	 * status iovec, the following condition should hold true.
329b0de25cbSAndy Fiddaman 	 */
330b0de25cbSAndy Fiddaman 	assert(n == (writeop ? req.readable : req.writable));
331bf21cd93STycho Nightingale 
332bf21cd93STycho Nightingale 	iolen = 0;
333bf21cd93STycho Nightingale 	for (i = 1; i < n; i++) {
334bf21cd93STycho Nightingale 		iolen += iov[i].iov_len;
335bf21cd93STycho Nightingale 	}
3364c87aefeSPatrick Mooney 	io->io_req.br_resid = iolen;
337bf21cd93STycho Nightingale 
338154972afSPatrick Mooney 	DPRINTF(("virtio-block: %s op, %zd bytes, %d segs, offset %ld",
339282a8ecbSJason King 		 writeop ? "write/discard" : "read/ident", iolen, i - 1,
3404c87aefeSPatrick Mooney 		 io->io_req.br_offset));
341bf21cd93STycho Nightingale 
342bf21cd93STycho Nightingale 	switch (type) {
3434c87aefeSPatrick Mooney 	case VBH_OP_READ:
3444c87aefeSPatrick Mooney 		err = blockif_read(sc->bc, &io->io_req);
3454c87aefeSPatrick Mooney 		break;
346bf21cd93STycho Nightingale 	case VBH_OP_WRITE:
3474c87aefeSPatrick Mooney 		err = blockif_write(sc->bc, &io->io_req);
348bf21cd93STycho Nightingale 		break;
349282a8ecbSJason King 	case VBH_OP_DISCARD:
350282a8ecbSJason King 		/*
351282a8ecbSJason King 		 * We currently only support a single request, if the guest
352282a8ecbSJason King 		 * has submitted a request that doesn't conform to the
353282a8ecbSJason King 		 * requirements, we return a error.
354282a8ecbSJason King 		 */
355282a8ecbSJason King 		if (iov[1].iov_len != sizeof (*discard)) {
356282a8ecbSJason King 			pci_vtblk_done_locked(io, EINVAL);
357282a8ecbSJason King 			return;
358282a8ecbSJason King 		}
359282a8ecbSJason King 
360282a8ecbSJason King 		/* The segments to discard are provided rather than data */
361282a8ecbSJason King 		discard = (struct virtio_blk_discard_write_zeroes *)
362282a8ecbSJason King 		    iov[1].iov_base;
363282a8ecbSJason King 
364282a8ecbSJason King 		/*
365282a8ecbSJason King 		 * virtio v1.1 5.2.6.2:
366282a8ecbSJason King 		 * The device MUST set the status byte to VIRTIO_BLK_S_UNSUPP
367282a8ecbSJason King 		 * for discard and write zeroes commands if any unknown flag is
368282a8ecbSJason King 		 * set. Furthermore, the device MUST set the status byte to
369282a8ecbSJason King 		 * VIRTIO_BLK_S_UNSUPP for discard commands if the unmap flag
370282a8ecbSJason King 		 * is set.
371282a8ecbSJason King 		 *
372282a8ecbSJason King 		 * Currently there are no known flags for a DISCARD request.
373282a8ecbSJason King 		 */
374282a8ecbSJason King 		if (discard->flags.unmap != 0 || discard->flags.reserved != 0) {
375282a8ecbSJason King 			pci_vtblk_done_locked(io, ENOTSUP);
376282a8ecbSJason King 			return;
377282a8ecbSJason King 		}
378282a8ecbSJason King 
379282a8ecbSJason King 		/* Make sure the request doesn't exceed our size limit */
380282a8ecbSJason King 		if (discard->num_sectors > VTBLK_MAX_DISCARD_SECT) {
381282a8ecbSJason King 			pci_vtblk_done_locked(io, EINVAL);
382282a8ecbSJason King 			return;
383282a8ecbSJason King 		}
384282a8ecbSJason King 
385282a8ecbSJason King 		io->io_req.br_offset = discard->sector * VTBLK_BSIZE;
386282a8ecbSJason King 		io->io_req.br_resid = discard->num_sectors * VTBLK_BSIZE;
387282a8ecbSJason King 		err = blockif_delete(sc->bc, &io->io_req);
388282a8ecbSJason King 		break;
3894c87aefeSPatrick Mooney 	case VBH_OP_FLUSH:
3904c87aefeSPatrick Mooney 	case VBH_OP_FLUSH_OUT:
3914c87aefeSPatrick Mooney 		err = blockif_flush(sc->bc, &io->io_req);
392bf21cd93STycho Nightingale 		break;
393bf21cd93STycho Nightingale 	case VBH_OP_IDENT:
394bf21cd93STycho Nightingale 		/* Assume a single buffer */
3954c87aefeSPatrick Mooney 		/* S/n equal to buffer is not zero-terminated. */
3964c87aefeSPatrick Mooney 		memset(iov[1].iov_base, 0, iov[1].iov_len);
3974c87aefeSPatrick Mooney 		strncpy(iov[1].iov_base, sc->vbsc_ident,
398bf21cd93STycho Nightingale 		    MIN(iov[1].iov_len, sizeof(sc->vbsc_ident)));
3994c87aefeSPatrick Mooney 		pci_vtblk_done_locked(io, 0);
4004c87aefeSPatrick Mooney 		return;
401bf21cd93STycho Nightingale 	default:
4024c87aefeSPatrick Mooney 		pci_vtblk_done_locked(io, EOPNOTSUPP);
4034c87aefeSPatrick Mooney 		return;
404bf21cd93STycho Nightingale 	}
4054c87aefeSPatrick Mooney 	assert(err == 0);
406bf21cd93STycho Nightingale }
407bf21cd93STycho Nightingale 
408bf21cd93STycho Nightingale static void
pci_vtblk_notify(void * vsc,struct vqueue_info * vq)409bf21cd93STycho Nightingale pci_vtblk_notify(void *vsc, struct vqueue_info *vq)
410bf21cd93STycho Nightingale {
411bf21cd93STycho Nightingale 	struct pci_vtblk_softc *sc = vsc;
412bf21cd93STycho Nightingale 
413bf21cd93STycho Nightingale 	while (vq_has_descs(vq))
414bf21cd93STycho Nightingale 		pci_vtblk_proc(sc, vq);
415bf21cd93STycho Nightingale }
416bf21cd93STycho Nightingale 
417b0de25cbSAndy Fiddaman static void
pci_vtblk_resized(struct blockif_ctxt * bctxt __unused,void * arg,size_t new_size)41859d65d31SAndy Fiddaman pci_vtblk_resized(struct blockif_ctxt *bctxt __unused, void *arg,
41959d65d31SAndy Fiddaman     size_t new_size)
420b0de25cbSAndy Fiddaman {
421b0de25cbSAndy Fiddaman 	struct pci_vtblk_softc *sc;
422b0de25cbSAndy Fiddaman 
423b0de25cbSAndy Fiddaman 	sc = arg;
424b0de25cbSAndy Fiddaman 
425b0de25cbSAndy Fiddaman 	sc->vbsc_cfg.vbc_capacity = new_size / VTBLK_BSIZE; /* 512-byte units */
426b0de25cbSAndy Fiddaman 	vi_interrupt(&sc->vbsc_vs, VIRTIO_PCI_ISR_CONFIG,
427b0de25cbSAndy Fiddaman 	    sc->vbsc_vs.vs_msix_cfg_idx);
428b0de25cbSAndy Fiddaman }
429b0de25cbSAndy Fiddaman 
430bf21cd93STycho Nightingale static int
pci_vtblk_init(struct pci_devinst * pi,nvlist_t * nvl)431*32640292SAndy Fiddaman pci_vtblk_init(struct pci_devinst *pi, nvlist_t *nvl)
432bf21cd93STycho Nightingale {
43359d65d31SAndy Fiddaman 	char bident[sizeof("XXX:XXX")];
4344c87aefeSPatrick Mooney 	struct blockif_ctxt *bctxt;
4356dc98349SAndy Fiddaman 	const char *path, *serial;
436bf21cd93STycho Nightingale 	MD5_CTX mdctx;
437bf21cd93STycho Nightingale 	u_char digest[16];
438bf21cd93STycho Nightingale 	struct pci_vtblk_softc *sc;
4394c87aefeSPatrick Mooney 	off_t size;
4404c87aefeSPatrick Mooney 	int i, sectsz, sts, sto;
441bf21cd93STycho Nightingale 
442bf21cd93STycho Nightingale 	/*
443bf21cd93STycho Nightingale 	 * The supplied backing file has to exist
444bf21cd93STycho Nightingale 	 */
44559d65d31SAndy Fiddaman 	snprintf(bident, sizeof(bident), "%u:%u", pi->pi_slot, pi->pi_func);
4462b948146SAndy Fiddaman 	bctxt = blockif_open(nvl, bident);
447282a8ecbSJason King 	if (bctxt == NULL) {
448bf21cd93STycho Nightingale 		perror("Could not open backing file");
449bf21cd93STycho Nightingale 		return (1);
450bf21cd93STycho Nightingale 	}
451bf21cd93STycho Nightingale 
452*32640292SAndy Fiddaman 	if (blockif_add_boot_device(pi, bctxt)) {
453*32640292SAndy Fiddaman 		perror("Invalid boot device");
454*32640292SAndy Fiddaman 		return (1);
455*32640292SAndy Fiddaman 	}
456*32640292SAndy Fiddaman 
4574c87aefeSPatrick Mooney 	size = blockif_size(bctxt);
4584c87aefeSPatrick Mooney 	sectsz = blockif_sectsz(bctxt);
4594c87aefeSPatrick Mooney 	blockif_psectsz(bctxt, &sts, &sto);
460bf21cd93STycho Nightingale 
461bf21cd93STycho Nightingale 	sc = calloc(1, sizeof(struct pci_vtblk_softc));
4624c87aefeSPatrick Mooney 	sc->bc = bctxt;
4634c87aefeSPatrick Mooney 	for (i = 0; i < VTBLK_RINGSZ; i++) {
4644c87aefeSPatrick Mooney 		struct pci_vtblk_ioreq *io = &sc->vbsc_ios[i];
4654c87aefeSPatrick Mooney 		io->io_req.br_callback = pci_vtblk_done;
4664c87aefeSPatrick Mooney 		io->io_req.br_param = io;
4674c87aefeSPatrick Mooney 		io->io_sc = sc;
4684c87aefeSPatrick Mooney 		io->io_idx = i;
4694c87aefeSPatrick Mooney 	}
470bf21cd93STycho Nightingale 
471282a8ecbSJason King 	bcopy(&vtblk_vi_consts, &sc->vbsc_consts, sizeof (vtblk_vi_consts));
472282a8ecbSJason King 	if (blockif_candelete(sc->bc))
473282a8ecbSJason King 		sc->vbsc_consts.vc_hv_caps |= VTBLK_F_DISCARD;
474282a8ecbSJason King 
4754c87aefeSPatrick Mooney #ifndef __FreeBSD__
4764c87aefeSPatrick Mooney 	/* Disable write cache until FLUSH feature is negotiated */
4774c87aefeSPatrick Mooney 	(void) blockif_set_wce(sc->bc, 0);
4784c87aefeSPatrick Mooney 	sc->vbsc_wce = 0;
4794c87aefeSPatrick Mooney #endif
480bf21cd93STycho Nightingale 
481bf21cd93STycho Nightingale 	pthread_mutex_init(&sc->vsc_mtx, NULL);
482bf21cd93STycho Nightingale 
483bf21cd93STycho Nightingale 	/* init virtio softc and virtqueues */
484282a8ecbSJason King 	vi_softc_linkup(&sc->vbsc_vs, &sc->vbsc_consts, sc, pi, &sc->vbsc_vq);
485bf21cd93STycho Nightingale 	sc->vbsc_vs.vs_mtx = &sc->vsc_mtx;
486bf21cd93STycho Nightingale 
487bf21cd93STycho Nightingale 	sc->vbsc_vq.vq_qsize = VTBLK_RINGSZ;
488bf21cd93STycho Nightingale 	/* sc->vbsc_vq.vq_notify = we have no per-queue notify */
489bf21cd93STycho Nightingale 
490bf21cd93STycho Nightingale 	/*
4916dc98349SAndy Fiddaman 	 * If an explicit identifier is not given, create an
4926dc98349SAndy Fiddaman 	 * identifier using parts of the md5 sum of the filename.
493bf21cd93STycho Nightingale 	 */
4946dc98349SAndy Fiddaman 	bzero(sc->vbsc_ident, VTBLK_BLK_ID_BYTES);
4954b82e532SAndy Fiddaman 	if ((serial = get_config_value_node(nvl, "serial")) != NULL ||
4964b82e532SAndy Fiddaman 	    (serial = get_config_value_node(nvl, "ser")) != NULL) {
4974b82e532SAndy Fiddaman 		strlcpy(sc->vbsc_ident, serial, VTBLK_BLK_ID_BYTES);
4986dc98349SAndy Fiddaman 	} else {
4996dc98349SAndy Fiddaman 		path = get_config_value_node(nvl, "path");
5006dc98349SAndy Fiddaman 		MD5Init(&mdctx);
5016dc98349SAndy Fiddaman 		MD5Update(&mdctx, path, strlen(path));
5026dc98349SAndy Fiddaman 		MD5Final(digest, &mdctx);
5036dc98349SAndy Fiddaman 		snprintf(sc->vbsc_ident, VTBLK_BLK_ID_BYTES,
5046dc98349SAndy Fiddaman 		    "BHYVE-%02X%02X-%02X%02X-%02X%02X",
5056dc98349SAndy Fiddaman 		    digest[0], digest[1], digest[2], digest[3], digest[4],
5066dc98349SAndy Fiddaman 		    digest[5]);
5074b82e532SAndy Fiddaman 	}
5084b82e532SAndy Fiddaman 
509bf21cd93STycho Nightingale 	/* setup virtio block config space */
510282a8ecbSJason King 	sc->vbsc_cfg.vbc_capacity = size / VTBLK_BSIZE; /* 512-byte units */
511bf21cd93STycho Nightingale 	sc->vbsc_cfg.vbc_size_max = 0;	/* not negotiated */
5124c87aefeSPatrick Mooney 
5134c87aefeSPatrick Mooney 	/*
5144c87aefeSPatrick Mooney 	 * If Linux is presented with a seg_max greater than the virtio queue
5154c87aefeSPatrick Mooney 	 * size, it can stumble into situations where it violates its own
5164c87aefeSPatrick Mooney 	 * invariants and panics.  For safety, we keep seg_max clamped, paying
5174c87aefeSPatrick Mooney 	 * heed to the two extra descriptors needed for the header and status
5184c87aefeSPatrick Mooney 	 * of a request.
5194c87aefeSPatrick Mooney 	 */
5204c87aefeSPatrick Mooney 	sc->vbsc_cfg.vbc_seg_max = MIN(VTBLK_RINGSZ - 2, BLOCKIF_IOV_MAX);
5214c87aefeSPatrick Mooney 	sc->vbsc_cfg.vbc_geometry.cylinders = 0;	/* no geometry */
5224c87aefeSPatrick Mooney 	sc->vbsc_cfg.vbc_geometry.heads = 0;
5234c87aefeSPatrick Mooney 	sc->vbsc_cfg.vbc_geometry.sectors = 0;
5244c87aefeSPatrick Mooney 	sc->vbsc_cfg.vbc_blk_size = sectsz;
5254c87aefeSPatrick Mooney 	sc->vbsc_cfg.vbc_topology.physical_block_exp =
5264c87aefeSPatrick Mooney 	    (sts > sectsz) ? (ffsll(sts / sectsz) - 1) : 0;
5274c87aefeSPatrick Mooney 	sc->vbsc_cfg.vbc_topology.alignment_offset =
5284c87aefeSPatrick Mooney 	    (sto != 0) ? ((sts - sto) / sectsz) : 0;
5294c87aefeSPatrick Mooney 	sc->vbsc_cfg.vbc_topology.min_io_size = 0;
5304c87aefeSPatrick Mooney 	sc->vbsc_cfg.vbc_topology.opt_io_size = 0;
5314c87aefeSPatrick Mooney 	sc->vbsc_cfg.vbc_writeback = 0;
532282a8ecbSJason King 	sc->vbsc_cfg.max_discard_sectors = VTBLK_MAX_DISCARD_SECT;
533282a8ecbSJason King 	sc->vbsc_cfg.max_discard_seg = VTBLK_MAX_DISCARD_SEG;
5346960cd89SAndy Fiddaman 	sc->vbsc_cfg.discard_sector_alignment = MAX(sectsz, sts) / VTBLK_BSIZE;
535bf21cd93STycho Nightingale 
536bf21cd93STycho Nightingale 	/*
537bf21cd93STycho Nightingale 	 * Should we move some of this into virtio.c?  Could
538bf21cd93STycho Nightingale 	 * have the device, class, and subdev_0 as fields in
539bf21cd93STycho Nightingale 	 * the virtio constants structure.
540bf21cd93STycho Nightingale 	 */
541bf21cd93STycho Nightingale 	pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_BLOCK);
542bf21cd93STycho Nightingale 	pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR);
543bf21cd93STycho Nightingale 	pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE);
5442b948146SAndy Fiddaman 	pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_ID_BLOCK);
5454c87aefeSPatrick Mooney 	pci_set_cfgdata16(pi, PCIR_SUBVEND_0, VIRTIO_VENDOR);
546bf21cd93STycho Nightingale 
5474c87aefeSPatrick Mooney 	if (vi_intr_init(&sc->vbsc_vs, 1, fbsdrun_virtio_msix())) {
5484c87aefeSPatrick Mooney 		blockif_close(sc->bc);
5494c87aefeSPatrick Mooney 		free(sc);
550bf21cd93STycho Nightingale 		return (1);
5514c87aefeSPatrick Mooney 	}
552bf21cd93STycho Nightingale 	vi_set_io_bar(&sc->vbsc_vs, 0);
553b0de25cbSAndy Fiddaman 	blockif_register_resize_callback(sc->bc, pci_vtblk_resized, sc);
554bf21cd93STycho Nightingale 	return (0);
555bf21cd93STycho Nightingale }
556bf21cd93STycho Nightingale 
557bf21cd93STycho Nightingale static int
pci_vtblk_cfgwrite(void * vsc __unused,int offset,int size __unused,uint32_t value __unused)55859d65d31SAndy Fiddaman pci_vtblk_cfgwrite(void *vsc __unused, int offset, int size __unused,
55959d65d31SAndy Fiddaman     uint32_t value __unused)
560bf21cd93STycho Nightingale {
561bf21cd93STycho Nightingale 
562154972afSPatrick Mooney 	DPRINTF(("vtblk: write to readonly reg %d", offset));
563bf21cd93STycho Nightingale 	return (1);
564bf21cd93STycho Nightingale }
565bf21cd93STycho Nightingale 
566bf21cd93STycho Nightingale static int
pci_vtblk_cfgread(void * vsc,int offset,int size,uint32_t * retval)567bf21cd93STycho Nightingale pci_vtblk_cfgread(void *vsc, int offset, int size, uint32_t *retval)
568bf21cd93STycho Nightingale {
569bf21cd93STycho Nightingale 	struct pci_vtblk_softc *sc = vsc;
570bf21cd93STycho Nightingale 	void *ptr;
571bf21cd93STycho Nightingale 
572bf21cd93STycho Nightingale 	/* our caller has already verified offset and size */
573bf21cd93STycho Nightingale 	ptr = (uint8_t *)&sc->vbsc_cfg + offset;
574bf21cd93STycho Nightingale 	memcpy(retval, ptr, size);
575bf21cd93STycho Nightingale 	return (0);
576bf21cd93STycho Nightingale }
577bf21cd93STycho Nightingale 
5784c87aefeSPatrick Mooney #ifndef __FreeBSD__
5794c87aefeSPatrick Mooney void
pci_vtblk_apply_feats(void * vsc,uint64_t caps)5804c87aefeSPatrick Mooney pci_vtblk_apply_feats(void *vsc, uint64_t caps)
5814c87aefeSPatrick Mooney {
5824c87aefeSPatrick Mooney 	struct pci_vtblk_softc *sc = vsc;
5834c87aefeSPatrick Mooney 	const int wce_next = ((caps & VTBLK_F_FLUSH) != 0) ? 1 : 0;
5844c87aefeSPatrick Mooney 
5854c87aefeSPatrick Mooney 	if (sc->vbsc_wce != wce_next) {
5864c87aefeSPatrick Mooney 		(void) blockif_set_wce(sc->bc, wce_next);
5874c87aefeSPatrick Mooney 		sc->vbsc_wce = wce_next;
5884c87aefeSPatrick Mooney 	}
5894c87aefeSPatrick Mooney }
5904c87aefeSPatrick Mooney #endif /* __FreeBSD__ */
5914c87aefeSPatrick Mooney 
5924f3f3e9aSAndy Fiddaman static const struct pci_devemu pci_de_vblk = {
593bf21cd93STycho Nightingale 	.pe_emu =	"virtio-blk",
594bf21cd93STycho Nightingale 	.pe_init =	pci_vtblk_init,
5952b948146SAndy Fiddaman 	.pe_legacy_config = blockif_legacy_config,
596bf21cd93STycho Nightingale 	.pe_barwrite =	vi_pci_write,
5976dc98349SAndy Fiddaman 	.pe_barread =	vi_pci_read,
598bf21cd93STycho Nightingale };
599bf21cd93STycho Nightingale PCI_EMUL_SET(pci_de_vblk);
600