1bf21cd93STycho Nightingale /*-
2*32640292SAndy Fiddaman * SPDX-License-Identifier: BSD-2-Clause
34c87aefeSPatrick Mooney *
4bf21cd93STycho Nightingale * Copyright (c) 2011 NetApp, Inc.
5bf21cd93STycho Nightingale * All rights reserved.
6b0de25cbSAndy Fiddaman * Copyright 2020-2021 Joyent, Inc.
7bf21cd93STycho Nightingale *
8bf21cd93STycho Nightingale * Redistribution and use in source and binary forms, with or without
9bf21cd93STycho Nightingale * modification, are permitted provided that the following conditions
10bf21cd93STycho Nightingale * are met:
11bf21cd93STycho Nightingale * 1. Redistributions of source code must retain the above copyright
12bf21cd93STycho Nightingale * notice, this list of conditions and the following disclaimer.
13bf21cd93STycho Nightingale * 2. Redistributions in binary form must reproduce the above copyright
14bf21cd93STycho Nightingale * notice, this list of conditions and the following disclaimer in the
15bf21cd93STycho Nightingale * documentation and/or other materials provided with the distribution.
16bf21cd93STycho Nightingale *
17bf21cd93STycho Nightingale * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
18bf21cd93STycho Nightingale * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19bf21cd93STycho Nightingale * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20bf21cd93STycho Nightingale * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
21bf21cd93STycho Nightingale * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22bf21cd93STycho Nightingale * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23bf21cd93STycho Nightingale * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24bf21cd93STycho Nightingale * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25bf21cd93STycho Nightingale * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26bf21cd93STycho Nightingale * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27bf21cd93STycho Nightingale * SUCH DAMAGE.
28bf21cd93STycho Nightingale */
29bf21cd93STycho Nightingale /*
30bf21cd93STycho Nightingale * This file and its contents are supplied under the terms of the
31bf21cd93STycho Nightingale * Common Development and Distribution License ("CDDL"), version 1.0.
32bf21cd93STycho Nightingale * You may only use this file in accordance with the terms of version
33bf21cd93STycho Nightingale * 1.0 of the CDDL.
34bf21cd93STycho Nightingale *
35bf21cd93STycho Nightingale * A full copy of the text of the CDDL should have accompanied this
36bf21cd93STycho Nightingale * source. A copy of the CDDL is also available via the Internet at
37bf21cd93STycho Nightingale * http://www.illumos.org/license/CDDL.
38bf21cd93STycho Nightingale *
39bf21cd93STycho Nightingale * Copyright 2014 Pluribus Networks Inc.
40bf21cd93STycho Nightingale */
41bf21cd93STycho Nightingale
42bf21cd93STycho Nightingale #include <sys/cdefs.h>
43bf21cd93STycho Nightingale
44bf21cd93STycho Nightingale #include <sys/param.h>
45bf21cd93STycho Nightingale #include <sys/linker_set.h>
46bf21cd93STycho Nightingale #include <sys/stat.h>
47bf21cd93STycho Nightingale #include <sys/uio.h>
48bf21cd93STycho Nightingale #include <sys/ioctl.h>
49bf21cd93STycho Nightingale #include <sys/disk.h>
50bf21cd93STycho Nightingale
51bf21cd93STycho Nightingale #include <errno.h>
52bf21cd93STycho Nightingale #include <fcntl.h>
53bf21cd93STycho Nightingale #include <stdio.h>
54bf21cd93STycho Nightingale #include <stdlib.h>
55bf21cd93STycho Nightingale #include <stdint.h>
56bf21cd93STycho Nightingale #include <string.h>
57bf21cd93STycho Nightingale #include <strings.h>
58bf21cd93STycho Nightingale #include <unistd.h>
59bf21cd93STycho Nightingale #include <assert.h>
60bf21cd93STycho Nightingale #include <pthread.h>
61bf21cd93STycho Nightingale #include <md5.h>
62bf21cd93STycho Nightingale
63bf21cd93STycho Nightingale #include "bhyverun.h"
642b948146SAndy Fiddaman #include "config.h"
65154972afSPatrick Mooney #include "debug.h"
66bf21cd93STycho Nightingale #include "pci_emul.h"
67bf21cd93STycho Nightingale #include "virtio.h"
684c87aefeSPatrick Mooney #include "block_if.h"
69bf21cd93STycho Nightingale
70282a8ecbSJason King #define VTBLK_BSIZE 512
71282a8ecbSJason King #define VTBLK_RINGSZ 128
72bf21cd93STycho Nightingale
734c87aefeSPatrick Mooney _Static_assert(VTBLK_RINGSZ <= BLOCKIF_RING_MAX, "Each ring entry must be able to queue a request");
74bf21cd93STycho Nightingale
75282a8ecbSJason King #define VTBLK_S_OK 0
76282a8ecbSJason King #define VTBLK_S_IOERR 1
77bf21cd93STycho Nightingale #define VTBLK_S_UNSUPP 2
78bf21cd93STycho Nightingale
794c87aefeSPatrick Mooney #define VTBLK_BLK_ID_BYTES 20 + 1
80bf21cd93STycho Nightingale
81bf21cd93STycho Nightingale /* Capability bits */
82282a8ecbSJason King #define VTBLK_F_BARRIER (1 << 0) /* Does host support barriers? */
83282a8ecbSJason King #define VTBLK_F_SIZE_MAX (1 << 1) /* Indicates maximum segment size */
84282a8ecbSJason King #define VTBLK_F_SEG_MAX (1 << 2) /* Indicates maximum # of segments */
85282a8ecbSJason King #define VTBLK_F_GEOMETRY (1 << 4) /* Legacy geometry available */
86282a8ecbSJason King #define VTBLK_F_RO (1 << 5) /* Disk is read-only */
87282a8ecbSJason King #define VTBLK_F_BLK_SIZE (1 << 6) /* Block size of disk is available*/
88282a8ecbSJason King #define VTBLK_F_SCSI (1 << 7) /* Supports scsi command passthru */
89282a8ecbSJason King #define VTBLK_F_FLUSH (1 << 9) /* Writeback mode enabled after reset */
90282a8ecbSJason King #define VTBLK_F_WCE (1 << 9) /* Legacy alias for FLUSH */
91282a8ecbSJason King #define VTBLK_F_TOPOLOGY (1 << 10) /* Topology information is available */
92282a8ecbSJason King #define VTBLK_F_CONFIG_WCE (1 << 11) /* Writeback mode available in config */
93154972afSPatrick Mooney #define VTBLK_F_MQ (1 << 12) /* Multi-Queue */
94282a8ecbSJason King #define VTBLK_F_DISCARD (1 << 13) /* Trim blocks */
95282a8ecbSJason King #define VTBLK_F_WRITE_ZEROES (1 << 14) /* Write zeros */
96bf21cd93STycho Nightingale
97bf21cd93STycho Nightingale /*
98bf21cd93STycho Nightingale * Host capabilities
99bf21cd93STycho Nightingale */
100282a8ecbSJason King #define VTBLK_S_HOSTCAPS \
101bf21cd93STycho Nightingale ( VTBLK_F_SEG_MAX | \
102bf21cd93STycho Nightingale VTBLK_F_BLK_SIZE | \
1034c87aefeSPatrick Mooney VTBLK_F_FLUSH | \
1044c87aefeSPatrick Mooney VTBLK_F_TOPOLOGY | \
105bf21cd93STycho Nightingale VIRTIO_RING_F_INDIRECT_DESC ) /* indirect descriptors */
106bf21cd93STycho Nightingale
107282a8ecbSJason King /*
108282a8ecbSJason King * The current blockif_delete() interface only allows a single delete
109282a8ecbSJason King * request at a time.
110282a8ecbSJason King */
111282a8ecbSJason King #define VTBLK_MAX_DISCARD_SEG 1
112282a8ecbSJason King
113282a8ecbSJason King /*
114282a8ecbSJason King * An arbitrary limit to prevent excessive latency due to large
115282a8ecbSJason King * delete requests.
116282a8ecbSJason King */
117282a8ecbSJason King #define VTBLK_MAX_DISCARD_SECT ((16 << 20) / VTBLK_BSIZE) /* 16 MiB */
118282a8ecbSJason King
119bf21cd93STycho Nightingale /*
120bf21cd93STycho Nightingale * Config space "registers"
121bf21cd93STycho Nightingale */
122bf21cd93STycho Nightingale struct vtblk_config {
123bf21cd93STycho Nightingale uint64_t vbc_capacity;
124bf21cd93STycho Nightingale uint32_t vbc_size_max;
125bf21cd93STycho Nightingale uint32_t vbc_seg_max;
1264c87aefeSPatrick Mooney struct {
1274c87aefeSPatrick Mooney uint16_t cylinders;
1284c87aefeSPatrick Mooney uint8_t heads;
1294c87aefeSPatrick Mooney uint8_t sectors;
1304c87aefeSPatrick Mooney } vbc_geometry;
131bf21cd93STycho Nightingale uint32_t vbc_blk_size;
1324c87aefeSPatrick Mooney struct {
1334c87aefeSPatrick Mooney uint8_t physical_block_exp;
1344c87aefeSPatrick Mooney uint8_t alignment_offset;
1354c87aefeSPatrick Mooney uint16_t min_io_size;
1364c87aefeSPatrick Mooney uint32_t opt_io_size;
1374c87aefeSPatrick Mooney } vbc_topology;
1384c87aefeSPatrick Mooney uint8_t vbc_writeback;
139154972afSPatrick Mooney uint8_t unused0[1];
140154972afSPatrick Mooney uint16_t num_queues;
141282a8ecbSJason King uint32_t max_discard_sectors;
142282a8ecbSJason King uint32_t max_discard_seg;
143282a8ecbSJason King uint32_t discard_sector_alignment;
144282a8ecbSJason King uint32_t max_write_zeroes_sectors;
145282a8ecbSJason King uint32_t max_write_zeroes_seg;
146282a8ecbSJason King uint8_t write_zeroes_may_unmap;
147282a8ecbSJason King uint8_t unused1[3];
148bf21cd93STycho Nightingale } __packed;
149bf21cd93STycho Nightingale
150bf21cd93STycho Nightingale /*
151bf21cd93STycho Nightingale * Fixed-size block header
152bf21cd93STycho Nightingale */
153bf21cd93STycho Nightingale struct virtio_blk_hdr {
154bf21cd93STycho Nightingale #define VBH_OP_READ 0
155bf21cd93STycho Nightingale #define VBH_OP_WRITE 1
156282a8ecbSJason King #define VBH_OP_SCSI_CMD 2
157282a8ecbSJason King #define VBH_OP_SCSI_CMD_OUT 3
1584c87aefeSPatrick Mooney #define VBH_OP_FLUSH 4
1594c87aefeSPatrick Mooney #define VBH_OP_FLUSH_OUT 5
1604c87aefeSPatrick Mooney #define VBH_OP_IDENT 8
161282a8ecbSJason King #define VBH_OP_DISCARD 11
162282a8ecbSJason King #define VBH_OP_WRITE_ZEROES 13
163282a8ecbSJason King
164bf21cd93STycho Nightingale #define VBH_FLAG_BARRIER 0x80000000 /* OR'ed into vbh_type */
1654c87aefeSPatrick Mooney uint32_t vbh_type;
166bf21cd93STycho Nightingale uint32_t vbh_ioprio;
167bf21cd93STycho Nightingale uint64_t vbh_sector;
168bf21cd93STycho Nightingale } __packed;
169bf21cd93STycho Nightingale
170bf21cd93STycho Nightingale /*
171bf21cd93STycho Nightingale * Debug printf
172bf21cd93STycho Nightingale */
173bf21cd93STycho Nightingale static int pci_vtblk_debug;
174154972afSPatrick Mooney #define DPRINTF(params) if (pci_vtblk_debug) PRINTLN params
175154972afSPatrick Mooney #define WPRINTF(params) PRINTLN params
176bf21cd93STycho Nightingale
1774c87aefeSPatrick Mooney struct pci_vtblk_ioreq {
1784c87aefeSPatrick Mooney struct blockif_req io_req;
1794c87aefeSPatrick Mooney struct pci_vtblk_softc *io_sc;
1804c87aefeSPatrick Mooney uint8_t *io_status;
1814c87aefeSPatrick Mooney uint16_t io_idx;
1824c87aefeSPatrick Mooney };
1834c87aefeSPatrick Mooney
184282a8ecbSJason King struct virtio_blk_discard_write_zeroes {
185282a8ecbSJason King uint64_t sector;
186282a8ecbSJason King uint32_t num_sectors;
187282a8ecbSJason King struct {
188282a8ecbSJason King uint32_t unmap:1;
189282a8ecbSJason King uint32_t reserved:31;
190282a8ecbSJason King } flags;
191282a8ecbSJason King };
192282a8ecbSJason King
193bf21cd93STycho Nightingale /*
194bf21cd93STycho Nightingale * Per-device softc
195bf21cd93STycho Nightingale */
196bf21cd93STycho Nightingale struct pci_vtblk_softc {
197bf21cd93STycho Nightingale struct virtio_softc vbsc_vs;
198bf21cd93STycho Nightingale pthread_mutex_t vsc_mtx;
199bf21cd93STycho Nightingale struct vqueue_info vbsc_vq;
2004c87aefeSPatrick Mooney struct vtblk_config vbsc_cfg;
201282a8ecbSJason King struct virtio_consts vbsc_consts;
2024c87aefeSPatrick Mooney struct blockif_ctxt *bc;
2034c87aefeSPatrick Mooney #ifndef __FreeBSD__
2044c87aefeSPatrick Mooney int vbsc_wce;
2054c87aefeSPatrick Mooney #endif
206bf21cd93STycho Nightingale char vbsc_ident[VTBLK_BLK_ID_BYTES];
2074c87aefeSPatrick Mooney struct pci_vtblk_ioreq vbsc_ios[VTBLK_RINGSZ];
208bf21cd93STycho Nightingale };
209bf21cd93STycho Nightingale
210bf21cd93STycho Nightingale static void pci_vtblk_reset(void *);
211bf21cd93STycho Nightingale static void pci_vtblk_notify(void *, struct vqueue_info *);
212bf21cd93STycho Nightingale static int pci_vtblk_cfgread(void *, int, int, uint32_t *);
213bf21cd93STycho Nightingale static int pci_vtblk_cfgwrite(void *, int, int, uint32_t);
2144c87aefeSPatrick Mooney #ifndef __FreeBSD__
2154c87aefeSPatrick Mooney static void pci_vtblk_apply_feats(void *, uint64_t);
2164c87aefeSPatrick Mooney #endif
217bf21cd93STycho Nightingale
218bf21cd93STycho Nightingale static struct virtio_consts vtblk_vi_consts = {
21959d65d31SAndy Fiddaman .vc_name = "vtblk",
22059d65d31SAndy Fiddaman .vc_nvq = 1,
22159d65d31SAndy Fiddaman .vc_cfgsize = sizeof(struct vtblk_config),
22259d65d31SAndy Fiddaman .vc_reset = pci_vtblk_reset,
22359d65d31SAndy Fiddaman .vc_qnotify = pci_vtblk_notify,
22459d65d31SAndy Fiddaman .vc_cfgread = pci_vtblk_cfgread,
22559d65d31SAndy Fiddaman .vc_cfgwrite = pci_vtblk_cfgwrite,
2264c87aefeSPatrick Mooney #ifndef __FreeBSD__
22759d65d31SAndy Fiddaman .vc_apply_features = pci_vtblk_apply_feats,
2284c87aefeSPatrick Mooney #else
22959d65d31SAndy Fiddaman .vc_apply_features = NULL,
2304c87aefeSPatrick Mooney #endif
23159d65d31SAndy Fiddaman .vc_hv_caps = VTBLK_S_HOSTCAPS,
232bf21cd93STycho Nightingale };
233bf21cd93STycho Nightingale
234bf21cd93STycho Nightingale static void
pci_vtblk_reset(void * vsc)235bf21cd93STycho Nightingale pci_vtblk_reset(void *vsc)
236bf21cd93STycho Nightingale {
237bf21cd93STycho Nightingale struct pci_vtblk_softc *sc = vsc;
238bf21cd93STycho Nightingale
239154972afSPatrick Mooney DPRINTF(("vtblk: device reset requested !"));
240bf21cd93STycho Nightingale vi_reset_dev(&sc->vbsc_vs);
2414c87aefeSPatrick Mooney #ifndef __FreeBSD__
2424c87aefeSPatrick Mooney /* Disable write cache until FLUSH feature is negotiated */
2434c87aefeSPatrick Mooney (void) blockif_set_wce(sc->bc, 0);
2444c87aefeSPatrick Mooney sc->vbsc_wce = 0;
2454c87aefeSPatrick Mooney #endif
2464c87aefeSPatrick Mooney }
2474c87aefeSPatrick Mooney
2484c87aefeSPatrick Mooney static void
pci_vtblk_done_locked(struct pci_vtblk_ioreq * io,int err)2494c87aefeSPatrick Mooney pci_vtblk_done_locked(struct pci_vtblk_ioreq *io, int err)
2504c87aefeSPatrick Mooney {
2514c87aefeSPatrick Mooney struct pci_vtblk_softc *sc = io->io_sc;
2524c87aefeSPatrick Mooney
2534c87aefeSPatrick Mooney /* convert errno into a virtio block error return */
2544c87aefeSPatrick Mooney if (err == EOPNOTSUPP || err == ENOSYS)
2554c87aefeSPatrick Mooney *io->io_status = VTBLK_S_UNSUPP;
2564c87aefeSPatrick Mooney else if (err != 0)
2574c87aefeSPatrick Mooney *io->io_status = VTBLK_S_IOERR;
2584c87aefeSPatrick Mooney else
2594c87aefeSPatrick Mooney *io->io_status = VTBLK_S_OK;
2604c87aefeSPatrick Mooney
2614c87aefeSPatrick Mooney /*
2624c87aefeSPatrick Mooney * Return the descriptor back to the host.
2634c87aefeSPatrick Mooney * We wrote 1 byte (our status) to host.
2644c87aefeSPatrick Mooney */
2654c87aefeSPatrick Mooney vq_relchain(&sc->vbsc_vq, io->io_idx, 1);
2664c87aefeSPatrick Mooney vq_endchains(&sc->vbsc_vq, 0);
2674c87aefeSPatrick Mooney }
2684c87aefeSPatrick Mooney
2694c87aefeSPatrick Mooney static void
pci_vtblk_done(struct blockif_req * br,int err)2704c87aefeSPatrick Mooney pci_vtblk_done(struct blockif_req *br, int err)
2714c87aefeSPatrick Mooney {
2724c87aefeSPatrick Mooney struct pci_vtblk_ioreq *io = br->br_param;
2734c87aefeSPatrick Mooney struct pci_vtblk_softc *sc = io->io_sc;
2744c87aefeSPatrick Mooney
2754c87aefeSPatrick Mooney pthread_mutex_lock(&sc->vsc_mtx);
2764c87aefeSPatrick Mooney pci_vtblk_done_locked(io, err);
2774c87aefeSPatrick Mooney pthread_mutex_unlock(&sc->vsc_mtx);
278bf21cd93STycho Nightingale }
279bf21cd93STycho Nightingale
280bf21cd93STycho Nightingale static void
pci_vtblk_proc(struct pci_vtblk_softc * sc,struct vqueue_info * vq)281bf21cd93STycho Nightingale pci_vtblk_proc(struct pci_vtblk_softc *sc, struct vqueue_info *vq)
282bf21cd93STycho Nightingale {
283bf21cd93STycho Nightingale struct virtio_blk_hdr *vbh;
2844c87aefeSPatrick Mooney struct pci_vtblk_ioreq *io;
285bf21cd93STycho Nightingale int i, n;
286bf21cd93STycho Nightingale int err;
2874c87aefeSPatrick Mooney ssize_t iolen;
288bf21cd93STycho Nightingale int writeop, type;
289b0de25cbSAndy Fiddaman struct vi_req req;
2904c87aefeSPatrick Mooney struct iovec iov[BLOCKIF_IOV_MAX + 2];
291282a8ecbSJason King struct virtio_blk_discard_write_zeroes *discard;
292bf21cd93STycho Nightingale
293b0de25cbSAndy Fiddaman n = vq_getchain(vq, iov, BLOCKIF_IOV_MAX + 2, &req);
294bf21cd93STycho Nightingale
295bf21cd93STycho Nightingale /*
296bf21cd93STycho Nightingale * The first descriptor will be the read-only fixed header,
297bf21cd93STycho Nightingale * and the last is for status (hence +2 above and below).
298bf21cd93STycho Nightingale * The remaining iov's are the actual data I/O vectors.
299bf21cd93STycho Nightingale *
300bf21cd93STycho Nightingale * XXX - note - this fails on crash dump, which does a
301bf21cd93STycho Nightingale * VIRTIO_BLK_T_FLUSH with a zero transfer length
302bf21cd93STycho Nightingale */
3034c87aefeSPatrick Mooney assert(n >= 2 && n <= BLOCKIF_IOV_MAX + 2);
304bf21cd93STycho Nightingale
305b0de25cbSAndy Fiddaman io = &sc->vbsc_ios[req.idx];
306b0de25cbSAndy Fiddaman assert(req.readable != 0);
307bf21cd93STycho Nightingale assert(iov[0].iov_len == sizeof(struct virtio_blk_hdr));
3084c87aefeSPatrick Mooney vbh = (struct virtio_blk_hdr *)iov[0].iov_base;
3094c87aefeSPatrick Mooney memcpy(&io->io_req.br_iov, &iov[1], sizeof(struct iovec) * (n - 2));
3104c87aefeSPatrick Mooney io->io_req.br_iovcnt = n - 2;
311282a8ecbSJason King io->io_req.br_offset = vbh->vbh_sector * VTBLK_BSIZE;
3124c87aefeSPatrick Mooney io->io_status = (uint8_t *)iov[--n].iov_base;
313b0de25cbSAndy Fiddaman assert(req.writable != 0);
314bf21cd93STycho Nightingale assert(iov[n].iov_len == 1);
315bf21cd93STycho Nightingale
316bf21cd93STycho Nightingale /*
317bf21cd93STycho Nightingale * XXX
318bf21cd93STycho Nightingale * The guest should not be setting the BARRIER flag because
319bf21cd93STycho Nightingale * we don't advertise the capability.
320bf21cd93STycho Nightingale */
321bf21cd93STycho Nightingale type = vbh->vbh_type & ~VBH_FLAG_BARRIER;
322282a8ecbSJason King writeop = (type == VBH_OP_WRITE || type == VBH_OP_DISCARD);
323b0de25cbSAndy Fiddaman /*
324b0de25cbSAndy Fiddaman * - Write op implies read-only descriptor
325b0de25cbSAndy Fiddaman * - Read/ident op implies write-only descriptor
326b0de25cbSAndy Fiddaman *
327b0de25cbSAndy Fiddaman * By taking away either the read-only fixed header or the write-only
328b0de25cbSAndy Fiddaman * status iovec, the following condition should hold true.
329b0de25cbSAndy Fiddaman */
330b0de25cbSAndy Fiddaman assert(n == (writeop ? req.readable : req.writable));
331bf21cd93STycho Nightingale
332bf21cd93STycho Nightingale iolen = 0;
333bf21cd93STycho Nightingale for (i = 1; i < n; i++) {
334bf21cd93STycho Nightingale iolen += iov[i].iov_len;
335bf21cd93STycho Nightingale }
3364c87aefeSPatrick Mooney io->io_req.br_resid = iolen;
337bf21cd93STycho Nightingale
338154972afSPatrick Mooney DPRINTF(("virtio-block: %s op, %zd bytes, %d segs, offset %ld",
339282a8ecbSJason King writeop ? "write/discard" : "read/ident", iolen, i - 1,
3404c87aefeSPatrick Mooney io->io_req.br_offset));
341bf21cd93STycho Nightingale
342bf21cd93STycho Nightingale switch (type) {
3434c87aefeSPatrick Mooney case VBH_OP_READ:
3444c87aefeSPatrick Mooney err = blockif_read(sc->bc, &io->io_req);
3454c87aefeSPatrick Mooney break;
346bf21cd93STycho Nightingale case VBH_OP_WRITE:
3474c87aefeSPatrick Mooney err = blockif_write(sc->bc, &io->io_req);
348bf21cd93STycho Nightingale break;
349282a8ecbSJason King case VBH_OP_DISCARD:
350282a8ecbSJason King /*
351282a8ecbSJason King * We currently only support a single request, if the guest
352282a8ecbSJason King * has submitted a request that doesn't conform to the
353282a8ecbSJason King * requirements, we return a error.
354282a8ecbSJason King */
355282a8ecbSJason King if (iov[1].iov_len != sizeof (*discard)) {
356282a8ecbSJason King pci_vtblk_done_locked(io, EINVAL);
357282a8ecbSJason King return;
358282a8ecbSJason King }
359282a8ecbSJason King
360282a8ecbSJason King /* The segments to discard are provided rather than data */
361282a8ecbSJason King discard = (struct virtio_blk_discard_write_zeroes *)
362282a8ecbSJason King iov[1].iov_base;
363282a8ecbSJason King
364282a8ecbSJason King /*
365282a8ecbSJason King * virtio v1.1 5.2.6.2:
366282a8ecbSJason King * The device MUST set the status byte to VIRTIO_BLK_S_UNSUPP
367282a8ecbSJason King * for discard and write zeroes commands if any unknown flag is
368282a8ecbSJason King * set. Furthermore, the device MUST set the status byte to
369282a8ecbSJason King * VIRTIO_BLK_S_UNSUPP for discard commands if the unmap flag
370282a8ecbSJason King * is set.
371282a8ecbSJason King *
372282a8ecbSJason King * Currently there are no known flags for a DISCARD request.
373282a8ecbSJason King */
374282a8ecbSJason King if (discard->flags.unmap != 0 || discard->flags.reserved != 0) {
375282a8ecbSJason King pci_vtblk_done_locked(io, ENOTSUP);
376282a8ecbSJason King return;
377282a8ecbSJason King }
378282a8ecbSJason King
379282a8ecbSJason King /* Make sure the request doesn't exceed our size limit */
380282a8ecbSJason King if (discard->num_sectors > VTBLK_MAX_DISCARD_SECT) {
381282a8ecbSJason King pci_vtblk_done_locked(io, EINVAL);
382282a8ecbSJason King return;
383282a8ecbSJason King }
384282a8ecbSJason King
385282a8ecbSJason King io->io_req.br_offset = discard->sector * VTBLK_BSIZE;
386282a8ecbSJason King io->io_req.br_resid = discard->num_sectors * VTBLK_BSIZE;
387282a8ecbSJason King err = blockif_delete(sc->bc, &io->io_req);
388282a8ecbSJason King break;
3894c87aefeSPatrick Mooney case VBH_OP_FLUSH:
3904c87aefeSPatrick Mooney case VBH_OP_FLUSH_OUT:
3914c87aefeSPatrick Mooney err = blockif_flush(sc->bc, &io->io_req);
392bf21cd93STycho Nightingale break;
393bf21cd93STycho Nightingale case VBH_OP_IDENT:
394bf21cd93STycho Nightingale /* Assume a single buffer */
3954c87aefeSPatrick Mooney /* S/n equal to buffer is not zero-terminated. */
3964c87aefeSPatrick Mooney memset(iov[1].iov_base, 0, iov[1].iov_len);
3974c87aefeSPatrick Mooney strncpy(iov[1].iov_base, sc->vbsc_ident,
398bf21cd93STycho Nightingale MIN(iov[1].iov_len, sizeof(sc->vbsc_ident)));
3994c87aefeSPatrick Mooney pci_vtblk_done_locked(io, 0);
4004c87aefeSPatrick Mooney return;
401bf21cd93STycho Nightingale default:
4024c87aefeSPatrick Mooney pci_vtblk_done_locked(io, EOPNOTSUPP);
4034c87aefeSPatrick Mooney return;
404bf21cd93STycho Nightingale }
4054c87aefeSPatrick Mooney assert(err == 0);
406bf21cd93STycho Nightingale }
407bf21cd93STycho Nightingale
408bf21cd93STycho Nightingale static void
pci_vtblk_notify(void * vsc,struct vqueue_info * vq)409bf21cd93STycho Nightingale pci_vtblk_notify(void *vsc, struct vqueue_info *vq)
410bf21cd93STycho Nightingale {
411bf21cd93STycho Nightingale struct pci_vtblk_softc *sc = vsc;
412bf21cd93STycho Nightingale
413bf21cd93STycho Nightingale while (vq_has_descs(vq))
414bf21cd93STycho Nightingale pci_vtblk_proc(sc, vq);
415bf21cd93STycho Nightingale }
416bf21cd93STycho Nightingale
417b0de25cbSAndy Fiddaman static void
pci_vtblk_resized(struct blockif_ctxt * bctxt __unused,void * arg,size_t new_size)41859d65d31SAndy Fiddaman pci_vtblk_resized(struct blockif_ctxt *bctxt __unused, void *arg,
41959d65d31SAndy Fiddaman size_t new_size)
420b0de25cbSAndy Fiddaman {
421b0de25cbSAndy Fiddaman struct pci_vtblk_softc *sc;
422b0de25cbSAndy Fiddaman
423b0de25cbSAndy Fiddaman sc = arg;
424b0de25cbSAndy Fiddaman
425b0de25cbSAndy Fiddaman sc->vbsc_cfg.vbc_capacity = new_size / VTBLK_BSIZE; /* 512-byte units */
426b0de25cbSAndy Fiddaman vi_interrupt(&sc->vbsc_vs, VIRTIO_PCI_ISR_CONFIG,
427b0de25cbSAndy Fiddaman sc->vbsc_vs.vs_msix_cfg_idx);
428b0de25cbSAndy Fiddaman }
429b0de25cbSAndy Fiddaman
430bf21cd93STycho Nightingale static int
pci_vtblk_init(struct pci_devinst * pi,nvlist_t * nvl)431*32640292SAndy Fiddaman pci_vtblk_init(struct pci_devinst *pi, nvlist_t *nvl)
432bf21cd93STycho Nightingale {
43359d65d31SAndy Fiddaman char bident[sizeof("XXX:XXX")];
4344c87aefeSPatrick Mooney struct blockif_ctxt *bctxt;
4356dc98349SAndy Fiddaman const char *path, *serial;
436bf21cd93STycho Nightingale MD5_CTX mdctx;
437bf21cd93STycho Nightingale u_char digest[16];
438bf21cd93STycho Nightingale struct pci_vtblk_softc *sc;
4394c87aefeSPatrick Mooney off_t size;
4404c87aefeSPatrick Mooney int i, sectsz, sts, sto;
441bf21cd93STycho Nightingale
442bf21cd93STycho Nightingale /*
443bf21cd93STycho Nightingale * The supplied backing file has to exist
444bf21cd93STycho Nightingale */
44559d65d31SAndy Fiddaman snprintf(bident, sizeof(bident), "%u:%u", pi->pi_slot, pi->pi_func);
4462b948146SAndy Fiddaman bctxt = blockif_open(nvl, bident);
447282a8ecbSJason King if (bctxt == NULL) {
448bf21cd93STycho Nightingale perror("Could not open backing file");
449bf21cd93STycho Nightingale return (1);
450bf21cd93STycho Nightingale }
451bf21cd93STycho Nightingale
452*32640292SAndy Fiddaman if (blockif_add_boot_device(pi, bctxt)) {
453*32640292SAndy Fiddaman perror("Invalid boot device");
454*32640292SAndy Fiddaman return (1);
455*32640292SAndy Fiddaman }
456*32640292SAndy Fiddaman
4574c87aefeSPatrick Mooney size = blockif_size(bctxt);
4584c87aefeSPatrick Mooney sectsz = blockif_sectsz(bctxt);
4594c87aefeSPatrick Mooney blockif_psectsz(bctxt, &sts, &sto);
460bf21cd93STycho Nightingale
461bf21cd93STycho Nightingale sc = calloc(1, sizeof(struct pci_vtblk_softc));
4624c87aefeSPatrick Mooney sc->bc = bctxt;
4634c87aefeSPatrick Mooney for (i = 0; i < VTBLK_RINGSZ; i++) {
4644c87aefeSPatrick Mooney struct pci_vtblk_ioreq *io = &sc->vbsc_ios[i];
4654c87aefeSPatrick Mooney io->io_req.br_callback = pci_vtblk_done;
4664c87aefeSPatrick Mooney io->io_req.br_param = io;
4674c87aefeSPatrick Mooney io->io_sc = sc;
4684c87aefeSPatrick Mooney io->io_idx = i;
4694c87aefeSPatrick Mooney }
470bf21cd93STycho Nightingale
471282a8ecbSJason King bcopy(&vtblk_vi_consts, &sc->vbsc_consts, sizeof (vtblk_vi_consts));
472282a8ecbSJason King if (blockif_candelete(sc->bc))
473282a8ecbSJason King sc->vbsc_consts.vc_hv_caps |= VTBLK_F_DISCARD;
474282a8ecbSJason King
4754c87aefeSPatrick Mooney #ifndef __FreeBSD__
4764c87aefeSPatrick Mooney /* Disable write cache until FLUSH feature is negotiated */
4774c87aefeSPatrick Mooney (void) blockif_set_wce(sc->bc, 0);
4784c87aefeSPatrick Mooney sc->vbsc_wce = 0;
4794c87aefeSPatrick Mooney #endif
480bf21cd93STycho Nightingale
481bf21cd93STycho Nightingale pthread_mutex_init(&sc->vsc_mtx, NULL);
482bf21cd93STycho Nightingale
483bf21cd93STycho Nightingale /* init virtio softc and virtqueues */
484282a8ecbSJason King vi_softc_linkup(&sc->vbsc_vs, &sc->vbsc_consts, sc, pi, &sc->vbsc_vq);
485bf21cd93STycho Nightingale sc->vbsc_vs.vs_mtx = &sc->vsc_mtx;
486bf21cd93STycho Nightingale
487bf21cd93STycho Nightingale sc->vbsc_vq.vq_qsize = VTBLK_RINGSZ;
488bf21cd93STycho Nightingale /* sc->vbsc_vq.vq_notify = we have no per-queue notify */
489bf21cd93STycho Nightingale
490bf21cd93STycho Nightingale /*
4916dc98349SAndy Fiddaman * If an explicit identifier is not given, create an
4926dc98349SAndy Fiddaman * identifier using parts of the md5 sum of the filename.
493bf21cd93STycho Nightingale */
4946dc98349SAndy Fiddaman bzero(sc->vbsc_ident, VTBLK_BLK_ID_BYTES);
4954b82e532SAndy Fiddaman if ((serial = get_config_value_node(nvl, "serial")) != NULL ||
4964b82e532SAndy Fiddaman (serial = get_config_value_node(nvl, "ser")) != NULL) {
4974b82e532SAndy Fiddaman strlcpy(sc->vbsc_ident, serial, VTBLK_BLK_ID_BYTES);
4986dc98349SAndy Fiddaman } else {
4996dc98349SAndy Fiddaman path = get_config_value_node(nvl, "path");
5006dc98349SAndy Fiddaman MD5Init(&mdctx);
5016dc98349SAndy Fiddaman MD5Update(&mdctx, path, strlen(path));
5026dc98349SAndy Fiddaman MD5Final(digest, &mdctx);
5036dc98349SAndy Fiddaman snprintf(sc->vbsc_ident, VTBLK_BLK_ID_BYTES,
5046dc98349SAndy Fiddaman "BHYVE-%02X%02X-%02X%02X-%02X%02X",
5056dc98349SAndy Fiddaman digest[0], digest[1], digest[2], digest[3], digest[4],
5066dc98349SAndy Fiddaman digest[5]);
5074b82e532SAndy Fiddaman }
5084b82e532SAndy Fiddaman
509bf21cd93STycho Nightingale /* setup virtio block config space */
510282a8ecbSJason King sc->vbsc_cfg.vbc_capacity = size / VTBLK_BSIZE; /* 512-byte units */
511bf21cd93STycho Nightingale sc->vbsc_cfg.vbc_size_max = 0; /* not negotiated */
5124c87aefeSPatrick Mooney
5134c87aefeSPatrick Mooney /*
5144c87aefeSPatrick Mooney * If Linux is presented with a seg_max greater than the virtio queue
5154c87aefeSPatrick Mooney * size, it can stumble into situations where it violates its own
5164c87aefeSPatrick Mooney * invariants and panics. For safety, we keep seg_max clamped, paying
5174c87aefeSPatrick Mooney * heed to the two extra descriptors needed for the header and status
5184c87aefeSPatrick Mooney * of a request.
5194c87aefeSPatrick Mooney */
5204c87aefeSPatrick Mooney sc->vbsc_cfg.vbc_seg_max = MIN(VTBLK_RINGSZ - 2, BLOCKIF_IOV_MAX);
5214c87aefeSPatrick Mooney sc->vbsc_cfg.vbc_geometry.cylinders = 0; /* no geometry */
5224c87aefeSPatrick Mooney sc->vbsc_cfg.vbc_geometry.heads = 0;
5234c87aefeSPatrick Mooney sc->vbsc_cfg.vbc_geometry.sectors = 0;
5244c87aefeSPatrick Mooney sc->vbsc_cfg.vbc_blk_size = sectsz;
5254c87aefeSPatrick Mooney sc->vbsc_cfg.vbc_topology.physical_block_exp =
5264c87aefeSPatrick Mooney (sts > sectsz) ? (ffsll(sts / sectsz) - 1) : 0;
5274c87aefeSPatrick Mooney sc->vbsc_cfg.vbc_topology.alignment_offset =
5284c87aefeSPatrick Mooney (sto != 0) ? ((sts - sto) / sectsz) : 0;
5294c87aefeSPatrick Mooney sc->vbsc_cfg.vbc_topology.min_io_size = 0;
5304c87aefeSPatrick Mooney sc->vbsc_cfg.vbc_topology.opt_io_size = 0;
5314c87aefeSPatrick Mooney sc->vbsc_cfg.vbc_writeback = 0;
532282a8ecbSJason King sc->vbsc_cfg.max_discard_sectors = VTBLK_MAX_DISCARD_SECT;
533282a8ecbSJason King sc->vbsc_cfg.max_discard_seg = VTBLK_MAX_DISCARD_SEG;
5346960cd89SAndy Fiddaman sc->vbsc_cfg.discard_sector_alignment = MAX(sectsz, sts) / VTBLK_BSIZE;
535bf21cd93STycho Nightingale
536bf21cd93STycho Nightingale /*
537bf21cd93STycho Nightingale * Should we move some of this into virtio.c? Could
538bf21cd93STycho Nightingale * have the device, class, and subdev_0 as fields in
539bf21cd93STycho Nightingale * the virtio constants structure.
540bf21cd93STycho Nightingale */
541bf21cd93STycho Nightingale pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_BLOCK);
542bf21cd93STycho Nightingale pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR);
543bf21cd93STycho Nightingale pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE);
5442b948146SAndy Fiddaman pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_ID_BLOCK);
5454c87aefeSPatrick Mooney pci_set_cfgdata16(pi, PCIR_SUBVEND_0, VIRTIO_VENDOR);
546bf21cd93STycho Nightingale
5474c87aefeSPatrick Mooney if (vi_intr_init(&sc->vbsc_vs, 1, fbsdrun_virtio_msix())) {
5484c87aefeSPatrick Mooney blockif_close(sc->bc);
5494c87aefeSPatrick Mooney free(sc);
550bf21cd93STycho Nightingale return (1);
5514c87aefeSPatrick Mooney }
552bf21cd93STycho Nightingale vi_set_io_bar(&sc->vbsc_vs, 0);
553b0de25cbSAndy Fiddaman blockif_register_resize_callback(sc->bc, pci_vtblk_resized, sc);
554bf21cd93STycho Nightingale return (0);
555bf21cd93STycho Nightingale }
556bf21cd93STycho Nightingale
557bf21cd93STycho Nightingale static int
pci_vtblk_cfgwrite(void * vsc __unused,int offset,int size __unused,uint32_t value __unused)55859d65d31SAndy Fiddaman pci_vtblk_cfgwrite(void *vsc __unused, int offset, int size __unused,
55959d65d31SAndy Fiddaman uint32_t value __unused)
560bf21cd93STycho Nightingale {
561bf21cd93STycho Nightingale
562154972afSPatrick Mooney DPRINTF(("vtblk: write to readonly reg %d", offset));
563bf21cd93STycho Nightingale return (1);
564bf21cd93STycho Nightingale }
565bf21cd93STycho Nightingale
566bf21cd93STycho Nightingale static int
pci_vtblk_cfgread(void * vsc,int offset,int size,uint32_t * retval)567bf21cd93STycho Nightingale pci_vtblk_cfgread(void *vsc, int offset, int size, uint32_t *retval)
568bf21cd93STycho Nightingale {
569bf21cd93STycho Nightingale struct pci_vtblk_softc *sc = vsc;
570bf21cd93STycho Nightingale void *ptr;
571bf21cd93STycho Nightingale
572bf21cd93STycho Nightingale /* our caller has already verified offset and size */
573bf21cd93STycho Nightingale ptr = (uint8_t *)&sc->vbsc_cfg + offset;
574bf21cd93STycho Nightingale memcpy(retval, ptr, size);
575bf21cd93STycho Nightingale return (0);
576bf21cd93STycho Nightingale }
577bf21cd93STycho Nightingale
5784c87aefeSPatrick Mooney #ifndef __FreeBSD__
5794c87aefeSPatrick Mooney void
pci_vtblk_apply_feats(void * vsc,uint64_t caps)5804c87aefeSPatrick Mooney pci_vtblk_apply_feats(void *vsc, uint64_t caps)
5814c87aefeSPatrick Mooney {
5824c87aefeSPatrick Mooney struct pci_vtblk_softc *sc = vsc;
5834c87aefeSPatrick Mooney const int wce_next = ((caps & VTBLK_F_FLUSH) != 0) ? 1 : 0;
5844c87aefeSPatrick Mooney
5854c87aefeSPatrick Mooney if (sc->vbsc_wce != wce_next) {
5864c87aefeSPatrick Mooney (void) blockif_set_wce(sc->bc, wce_next);
5874c87aefeSPatrick Mooney sc->vbsc_wce = wce_next;
5884c87aefeSPatrick Mooney }
5894c87aefeSPatrick Mooney }
5904c87aefeSPatrick Mooney #endif /* __FreeBSD__ */
5914c87aefeSPatrick Mooney
5924f3f3e9aSAndy Fiddaman static const struct pci_devemu pci_de_vblk = {
593bf21cd93STycho Nightingale .pe_emu = "virtio-blk",
594bf21cd93STycho Nightingale .pe_init = pci_vtblk_init,
5952b948146SAndy Fiddaman .pe_legacy_config = blockif_legacy_config,
596bf21cd93STycho Nightingale .pe_barwrite = vi_pci_write,
5976dc98349SAndy Fiddaman .pe_barread = vi_pci_read,
598bf21cd93STycho Nightingale };
599bf21cd93STycho Nightingale PCI_EMUL_SET(pci_de_vblk);
600