xref: /illumos-gate/usr/src/boot/common/bcache.c (revision 22028508)
1c28006deSToomas Soome /*
2199767f8SToomas Soome  * Copyright (c) 1998 Michael Smith <msmith@freebsd.org>
3199767f8SToomas Soome  * Copyright 2015 Toomas Soome <tsoome@me.com>
4199767f8SToomas Soome  * All rights reserved.
5199767f8SToomas Soome  *
6199767f8SToomas Soome  * Redistribution and use in source and binary forms, with or without
7199767f8SToomas Soome  * modification, are permitted provided that the following conditions
8199767f8SToomas Soome  * are met:
9199767f8SToomas Soome  * 1. Redistributions of source code must retain the above copyright
10199767f8SToomas Soome  *    notice, this list of conditions and the following disclaimer.
11199767f8SToomas Soome  * 2. Redistributions in binary form must reproduce the above copyright
12199767f8SToomas Soome  *    notice, this list of conditions and the following disclaimer in the
13199767f8SToomas Soome  *    documentation and/or other materials provided with the distribution.
14199767f8SToomas Soome  *
15199767f8SToomas Soome  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16199767f8SToomas Soome  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17199767f8SToomas Soome  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18199767f8SToomas Soome  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19199767f8SToomas Soome  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20199767f8SToomas Soome  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21199767f8SToomas Soome  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22199767f8SToomas Soome  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23199767f8SToomas Soome  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24199767f8SToomas Soome  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25199767f8SToomas Soome  * SUCH DAMAGE.
26199767f8SToomas Soome  */
27199767f8SToomas Soome 
28199767f8SToomas Soome #include <sys/cdefs.h>
29199767f8SToomas Soome #include <sys/param.h>
30199767f8SToomas Soome 
31199767f8SToomas Soome /*
32199767f8SToomas Soome  * Simple hashed block cache
33199767f8SToomas Soome  */
34199767f8SToomas Soome 
35199767f8SToomas Soome #include <sys/stdint.h>
36199767f8SToomas Soome 
37199767f8SToomas Soome #include <stand.h>
38199767f8SToomas Soome #include <string.h>
39199767f8SToomas Soome #include <strings.h>
40199767f8SToomas Soome 
41199767f8SToomas Soome #include "bootstrap.h"
42199767f8SToomas Soome 
43199767f8SToomas Soome /* #define BCACHE_DEBUG */
44199767f8SToomas Soome 
45199767f8SToomas Soome #ifdef BCACHE_DEBUG
467efc4ab5SToomas Soome #define	DPRINTF(fmt, args...)	printf("%s: " fmt "\n", __func__, ## args)
47199767f8SToomas Soome #else
487efc4ab5SToomas Soome #define	DPRINTF(fmt, args...)	((void)0)
49199767f8SToomas Soome #endif
50199767f8SToomas Soome 
51199767f8SToomas Soome struct bcachectl
52199767f8SToomas Soome {
53c28006deSToomas Soome 	daddr_t	bc_blkno;
54c28006deSToomas Soome 	int	bc_count;
55199767f8SToomas Soome };
56199767f8SToomas Soome 
57199767f8SToomas Soome /*
58199767f8SToomas Soome  * bcache per device node. cache is allocated on device first open and freed
59199767f8SToomas Soome  * on last close, to save memory. The issue there is the size; biosdisk
60199767f8SToomas Soome  * supports up to 31 (0x1f) devices. Classic setup would use single disk
61199767f8SToomas Soome  * to boot from, but this has changed with zfs.
62199767f8SToomas Soome  */
63199767f8SToomas Soome struct bcache {
64c28006deSToomas Soome 	struct bcachectl	*bcache_ctl;
65c28006deSToomas Soome 	caddr_t			bcache_data;
66c28006deSToomas Soome 	size_t			bcache_nblks;
67c28006deSToomas Soome 	size_t			ra;
68199767f8SToomas Soome };
69199767f8SToomas Soome 
70c28006deSToomas Soome static uint_t bcache_total_nblks;	/* set by bcache_init */
71c28006deSToomas Soome static uint_t bcache_blksize;		/* set by bcache_init */
72c28006deSToomas Soome static uint_t bcache_numdev;		/* set by bcache_add_dev */
73199767f8SToomas Soome /* statistics */
74c28006deSToomas Soome static uint_t bcache_units;	/* number of devices with cache */
75c28006deSToomas Soome static uint_t bcache_unit_nblks;	/* nblocks per unit */
76c28006deSToomas Soome static uint_t bcache_hits;
77c28006deSToomas Soome static uint_t bcache_misses;
78c28006deSToomas Soome static uint_t bcache_ops;
79c28006deSToomas Soome static uint_t bcache_bypasses;
80c28006deSToomas Soome static uint_t bcache_bcount;
81c28006deSToomas Soome static uint_t bcache_rablks;
82199767f8SToomas Soome 
83199767f8SToomas Soome #define	BHASH(bc, blkno)	((blkno) & ((bc)->bcache_nblks - 1))
84199767f8SToomas Soome #define	BCACHE_LOOKUP(bc, blkno)	\
85199767f8SToomas Soome 	((bc)->bcache_ctl[BHASH((bc), (blkno))].bc_blkno != (blkno))
86199767f8SToomas Soome #define	BCACHE_READAHEAD	256
87199767f8SToomas Soome #define	BCACHE_MINREADAHEAD	32
88199767f8SToomas Soome 
89199767f8SToomas Soome static void	bcache_invalidate(struct bcache *bc, daddr_t blkno);
90199767f8SToomas Soome static void	bcache_insert(struct bcache *bc, daddr_t blkno);
91199767f8SToomas Soome static void	bcache_free_instance(struct bcache *bc);
92199767f8SToomas Soome 
93199767f8SToomas Soome /*
94199767f8SToomas Soome  * Initialise the cache for (nblks) of (bsize).
95199767f8SToomas Soome  */
96199767f8SToomas Soome void
bcache_init(size_t nblks,size_t bsize)9707542513SToomas Soome bcache_init(size_t nblks, size_t bsize)
98199767f8SToomas Soome {
99c28006deSToomas Soome 	/* set up control data */
100c28006deSToomas Soome 	bcache_total_nblks = nblks;
101c28006deSToomas Soome 	bcache_blksize = bsize;
102199767f8SToomas Soome }
103199767f8SToomas Soome 
104199767f8SToomas Soome /*
105199767f8SToomas Soome  * add number of devices to bcache. we have to divide cache space
106199767f8SToomas Soome  * between the devices, so bcache_add_dev() can be used to set up the
107199767f8SToomas Soome  * number. The issue is, we need to get the number before actual allocations.
108199767f8SToomas Soome  * bcache_add_dev() is supposed to be called from device init() call, so the
109199767f8SToomas Soome  * assumption is, devsw dv_init is called for plain devices first, and
110199767f8SToomas Soome  * for zfs, last.
111199767f8SToomas Soome  */
112199767f8SToomas Soome void
bcache_add_dev(int devices)113199767f8SToomas Soome bcache_add_dev(int devices)
114199767f8SToomas Soome {
115c28006deSToomas Soome 	bcache_numdev += devices;
116199767f8SToomas Soome }
117199767f8SToomas Soome 
118199767f8SToomas Soome void *
bcache_allocate(void)119199767f8SToomas Soome bcache_allocate(void)
120199767f8SToomas Soome {
121c28006deSToomas Soome 	uint_t i;
122c28006deSToomas Soome 	struct bcache *bc = malloc(sizeof (struct bcache));
123c28006deSToomas Soome 	int disks = bcache_numdev;
124c28006deSToomas Soome 
125c28006deSToomas Soome 	if (disks == 0)
126c28006deSToomas Soome 		disks = 1;	/* safe guard */
127c28006deSToomas Soome 
128c28006deSToomas Soome 	if (bc == NULL) {
129c28006deSToomas Soome 		errno = ENOMEM;
130c28006deSToomas Soome 		return (bc);
131c28006deSToomas Soome 	}
132c28006deSToomas Soome 
133c28006deSToomas Soome 	/*
134c28006deSToomas Soome 	 * the bcache block count must be power of 2 for hash function
135c28006deSToomas Soome 	 */
136c28006deSToomas Soome 	i = fls(disks) - 1;		/* highbit - 1 */
137c28006deSToomas Soome 	if (disks > (1 << i))	/* next power of 2 */
138c28006deSToomas Soome 		i++;
139c28006deSToomas Soome 
140c28006deSToomas Soome 	bc->bcache_nblks = bcache_total_nblks >> i;
141c28006deSToomas Soome 	bcache_unit_nblks = bc->bcache_nblks;
142c28006deSToomas Soome 	bc->bcache_data = malloc(bc->bcache_nblks * bcache_blksize);
143c28006deSToomas Soome 	if (bc->bcache_data == NULL) {
144c28006deSToomas Soome 		/* dont error out yet. fall back to 32 blocks and try again */
145c28006deSToomas Soome 		bc->bcache_nblks = 32;
146c28006deSToomas Soome 		bc->bcache_data = malloc(bc->bcache_nblks * bcache_blksize +
147c28006deSToomas Soome 		    sizeof (uint32_t));
148c28006deSToomas Soome 	}
149199767f8SToomas Soome 
150c28006deSToomas Soome 	bc->bcache_ctl = malloc(bc->bcache_nblks * sizeof (struct bcachectl));
151199767f8SToomas Soome 
152c28006deSToomas Soome 	if ((bc->bcache_data == NULL) || (bc->bcache_ctl == NULL)) {
153c28006deSToomas Soome 		bcache_free_instance(bc);
154c28006deSToomas Soome 		errno = ENOMEM;
155c28006deSToomas Soome 		return (NULL);
156c28006deSToomas Soome 	}
157c28006deSToomas Soome 
158c28006deSToomas Soome 	/* Flush the cache */
159c28006deSToomas Soome 	for (i = 0; i < bc->bcache_nblks; i++) {
160c28006deSToomas Soome 		bc->bcache_ctl[i].bc_count = -1;
161c28006deSToomas Soome 		bc->bcache_ctl[i].bc_blkno = -1;
162c28006deSToomas Soome 	}
163c28006deSToomas Soome 	bcache_units++;
164c28006deSToomas Soome 	bc->ra = BCACHE_READAHEAD;	/* optimistic read ahead */
165199767f8SToomas Soome 	return (bc);
166199767f8SToomas Soome }
167199767f8SToomas Soome 
168199767f8SToomas Soome void
bcache_free(void * cache)169199767f8SToomas Soome bcache_free(void *cache)
170199767f8SToomas Soome {
171c28006deSToomas Soome 	struct bcache *bc = cache;
172199767f8SToomas Soome 
173c28006deSToomas Soome 	if (bc == NULL)
174c28006deSToomas Soome 		return;
175199767f8SToomas Soome 
176c28006deSToomas Soome 	bcache_free_instance(bc);
177c28006deSToomas Soome 	bcache_units--;
178199767f8SToomas Soome }
179199767f8SToomas Soome 
180199767f8SToomas Soome /*
181199767f8SToomas Soome  * Handle a write request; write directly to the disk, and populate the
182199767f8SToomas Soome  * cache with the new values.
183199767f8SToomas Soome  */
184199767f8SToomas Soome static int
write_strategy(void * devdata,int rw,daddr_t blk,size_t size,char * buf,size_t * rsize)18538dea910SToomas Soome write_strategy(void *devdata, int rw, daddr_t blk, size_t size,
18638dea910SToomas Soome     char *buf, size_t *rsize)
187199767f8SToomas Soome {
188c28006deSToomas Soome 	struct bcache_devdata	*dd = (struct bcache_devdata *)devdata;
189c28006deSToomas Soome 	struct bcache		*bc = dd->dv_cache;
190c28006deSToomas Soome 	daddr_t			i, nblk;
191199767f8SToomas Soome 
192c28006deSToomas Soome 	nblk = size / bcache_blksize;
193199767f8SToomas Soome 
194c28006deSToomas Soome 	/* Invalidate the blocks being written */
195c28006deSToomas Soome 	for (i = 0; i < nblk; i++) {
196c28006deSToomas Soome 		bcache_invalidate(bc, blk + i);
197c28006deSToomas Soome 	}
198199767f8SToomas Soome 
199c28006deSToomas Soome 	/* Write the blocks */
200c28006deSToomas Soome 	return (dd->dv_strategy(dd->dv_devdata, rw, blk, size, buf, rsize));
201199767f8SToomas Soome }
202199767f8SToomas Soome 
203199767f8SToomas Soome /*
204199767f8SToomas Soome  * Handle a read request; fill in parts of the request that can
205199767f8SToomas Soome  * be satisfied by the cache, use the supplied strategy routine to do
2069a637b37SToomas Soome  * device I/O and then use the I/O results to populate the cache.
207199767f8SToomas Soome  */
208199767f8SToomas Soome static int
read_strategy(void * devdata,int rw,daddr_t blk,size_t size,char * buf,size_t * rsize)20938dea910SToomas Soome read_strategy(void *devdata, int rw, daddr_t blk, size_t size,
21038dea910SToomas Soome     char *buf, size_t *rsize)
211199767f8SToomas Soome {
212c28006deSToomas Soome 	struct bcache_devdata	*dd = devdata;
213c28006deSToomas Soome 	struct bcache		*bc = dd->dv_cache;
214c28006deSToomas Soome 	size_t			i, nblk, p_size, r_size, complete, ra;
215c28006deSToomas Soome 	int			result;
216c28006deSToomas Soome 	daddr_t			p_blk;
217c28006deSToomas Soome 	caddr_t			p_buf;
218c28006deSToomas Soome 
219c28006deSToomas Soome 	if (bc == NULL) {
220c28006deSToomas Soome 		errno = ENODEV;
221c28006deSToomas Soome 		return (-1);
222199767f8SToomas Soome 	}
223c28006deSToomas Soome 
224c28006deSToomas Soome 	if (rsize != NULL)
225c28006deSToomas Soome 		*rsize = 0;
226c28006deSToomas Soome 
227c28006deSToomas Soome 	nblk = size / bcache_blksize;
228c28006deSToomas Soome 	if (nblk == 0 && size != 0)
229c28006deSToomas Soome 		nblk++;
230c28006deSToomas Soome 	result = 0;
231c28006deSToomas Soome 	complete = 1;
232c28006deSToomas Soome 
233c28006deSToomas Soome 	/* Satisfy any cache hits up front, break on first miss */
234c28006deSToomas Soome 	for (i = 0; i < nblk; i++) {
235c28006deSToomas Soome 		if (BCACHE_LOOKUP(bc, (daddr_t)(blk + i))) {
236c28006deSToomas Soome 			bcache_misses += (nblk - i);
237c28006deSToomas Soome 			complete = 0;
238c28006deSToomas Soome 			if (nblk - i > BCACHE_MINREADAHEAD &&
239c28006deSToomas Soome 			    bc->ra > BCACHE_MINREADAHEAD)
240c28006deSToomas Soome 				bc->ra >>= 1;	/* reduce read ahead */
241c28006deSToomas Soome 			break;
242c28006deSToomas Soome 		} else {
243c28006deSToomas Soome 			bcache_hits++;
244c28006deSToomas Soome 		}
245c28006deSToomas Soome 	}
246c28006deSToomas Soome 
247c28006deSToomas Soome 	if (complete) {	/* whole set was in cache, return it */
248c28006deSToomas Soome 		if (bc->ra < BCACHE_READAHEAD)
249c28006deSToomas Soome 			bc->ra <<= 1;	/* increase read ahead */
250c28006deSToomas Soome 		bcopy(bc->bcache_data + (bcache_blksize * BHASH(bc, blk)),
251c28006deSToomas Soome 		    buf, size);
252c28006deSToomas Soome 		goto done;
253c28006deSToomas Soome 	}
254c28006deSToomas Soome 
255c28006deSToomas Soome 	/*
256c28006deSToomas Soome 	 * Fill in any misses. From check we have i pointing to first missing
257c28006deSToomas Soome 	 * block, read in all remaining blocks + readahead.
258c28006deSToomas Soome 	 * We have space at least for nblk - i before bcache wraps.
259c28006deSToomas Soome 	 */
260c28006deSToomas Soome 	p_blk = blk + i;
261c28006deSToomas Soome 	p_buf = bc->bcache_data + (bcache_blksize * BHASH(bc, p_blk));
262c28006deSToomas Soome 	r_size = bc->bcache_nblks - BHASH(bc, p_blk); /* remaining blocks */
263c28006deSToomas Soome 
264c28006deSToomas Soome 	p_size = MIN(r_size, nblk - i);	/* read at least those blocks */
265c28006deSToomas Soome 
266c28006deSToomas Soome 	/*
267c28006deSToomas Soome 	 * The read ahead size setup.
268c28006deSToomas Soome 	 * While the read ahead can save us IO, it also can complicate things:
269c28006deSToomas Soome 	 * 1. We do not want to read ahead by wrapping around the
270c28006deSToomas Soome 	 *	bcache end - this would complicate the cache management.
271c28006deSToomas Soome 	 * 2. We are using bc->ra as dynamic hint for read ahead size,
272c28006deSToomas Soome 	 *	detected cache hits will increase the read-ahead block count,
273c28006deSToomas Soome 	 *	and misses will decrease, see the code above.
274c28006deSToomas Soome 	 * 3. The bcache is sized by 512B blocks, however, the underlying device
275c28006deSToomas Soome 	 *	may have a larger sector size, and we should perform the IO by
276c28006deSToomas Soome 	 *	taking into account these larger sector sizes. We could solve
277c28006deSToomas Soome 	 *	this by passing the sector size to bcache_allocate(), or by
278c28006deSToomas Soome 	 *	using ioctl(), but in this version we are using the constant,
279c28006deSToomas Soome 	 *	16 blocks, and are rounding read ahead block count down to
280c28006deSToomas Soome 	 *	multiple of 16. Using the constant has two reasons, we are not
281c28006deSToomas Soome 	 *	entirely sure if the BIOS disk interface is providing the
282c28006deSToomas Soome 	 *	correct value for sector size. And secondly, this way we get
283c28006deSToomas Soome 	 *	the most conservative setup for the ra.
284c28006deSToomas Soome 	 *
285c28006deSToomas Soome 	 * The selection of multiple of 16 blocks (8KB) is quite arbitrary,
286c28006deSToomas Soome 	 * however, we want to cover CDs (2K) and 4K disks.
287c28006deSToomas Soome 	 * bcache_allocate() will always fall back to a minimum of 32 blocks.
288c28006deSToomas Soome 	 * Our choice of 16 read ahead blocks will always fit inside the bcache.
289c28006deSToomas Soome 	 */
290c28006deSToomas Soome 
291c28006deSToomas Soome 	if ((rw & F_NORA) == F_NORA)
292c28006deSToomas Soome 		ra = 0;
293199767f8SToomas Soome 	else
294c28006deSToomas Soome 		ra = bc->bcache_nblks - BHASH(bc, p_blk + p_size);
295199767f8SToomas Soome 
296c28006deSToomas Soome 	if (ra != 0 && ra != bc->bcache_nblks) { /* do we have RA space? */
297c28006deSToomas Soome 		ra = MIN(bc->ra, ra - 1);
298c28006deSToomas Soome 		ra = rounddown(ra, 16);		/* multiple of 16 blocks */
299c28006deSToomas Soome 		p_size += ra;
300c28006deSToomas Soome 	}
301199767f8SToomas Soome 
302c28006deSToomas Soome 	/* invalidate bcache */
303c28006deSToomas Soome 	for (i = 0; i < p_size; i++) {
304c28006deSToomas Soome 		bcache_invalidate(bc, p_blk + i);
305c28006deSToomas Soome 	}
306199767f8SToomas Soome 
307c28006deSToomas Soome 	r_size = 0;
308c28006deSToomas Soome 	/*
309c28006deSToomas Soome 	 * with read-ahead, it may happen we are attempting to read past
310c28006deSToomas Soome 	 * disk end, as bcache has no information about disk size.
311c28006deSToomas Soome 	 * in such case we should get partial read if some blocks can be
312c28006deSToomas Soome 	 * read or error, if no blocks can be read.
313c28006deSToomas Soome 	 * in either case we should return the data in bcache and only
314c28006deSToomas Soome 	 * return error if there is no data.
315c28006deSToomas Soome 	 */
316c28006deSToomas Soome 	rw &= F_MASK;
317c28006deSToomas Soome 	result = dd->dv_strategy(dd->dv_devdata, rw, p_blk,
318c28006deSToomas Soome 	    p_size * bcache_blksize, p_buf, &r_size);
319c28006deSToomas Soome 
320c28006deSToomas Soome 	r_size /= bcache_blksize;
321c28006deSToomas Soome 	for (i = 0; i < r_size; i++)
322c28006deSToomas Soome 		bcache_insert(bc, p_blk + i);
323c28006deSToomas Soome 
324c28006deSToomas Soome 	/* update ra statistics */
325c28006deSToomas Soome 	if (r_size != 0) {
326c28006deSToomas Soome 		if (r_size < p_size)
327c28006deSToomas Soome 			bcache_rablks += (p_size - r_size);
328c28006deSToomas Soome 		else
329c28006deSToomas Soome 			bcache_rablks += ra;
330c28006deSToomas Soome 	}
331199767f8SToomas Soome 
332c28006deSToomas Soome 	/* check how much data can we copy */
333c28006deSToomas Soome 	for (i = 0; i < nblk; i++) {
334c28006deSToomas Soome 		if (BCACHE_LOOKUP(bc, (daddr_t)(blk + i)))
335c28006deSToomas Soome 			break;
336c28006deSToomas Soome 	}
337c28006deSToomas Soome 
338c28006deSToomas Soome 	if (size > i * bcache_blksize)
339c28006deSToomas Soome 		size = i * bcache_blksize;
340c28006deSToomas Soome 
341c28006deSToomas Soome 	if (size != 0) {
342c28006deSToomas Soome 		bcopy(bc->bcache_data + (bcache_blksize * BHASH(bc, blk)),
343c28006deSToomas Soome 		    buf, size);
344c28006deSToomas Soome 		result = 0;
345c28006deSToomas Soome 	}
346c28006deSToomas Soome 
347c28006deSToomas Soome done:
348c28006deSToomas Soome 	if ((result == 0) && (rsize != NULL))
349c28006deSToomas Soome 		*rsize = size;
350c28006deSToomas Soome 	return (result);
351199767f8SToomas Soome }
352199767f8SToomas Soome 
3539a637b37SToomas Soome /*
354199767f8SToomas Soome  * Requests larger than 1/2 cache size will be bypassed and go
355199767f8SToomas Soome  * directly to the disk.  XXX tune this.
356199767f8SToomas Soome  */
357199767f8SToomas Soome int
bcache_strategy(void * devdata,int rw,daddr_t blk,size_t size,char * buf,size_t * rsize)35838dea910SToomas Soome bcache_strategy(void *devdata, int rw, daddr_t blk, size_t size,
35938dea910SToomas Soome     char *buf, size_t *rsize)
360199767f8SToomas Soome {
361c28006deSToomas Soome 	struct bcache_devdata	*dd = (struct bcache_devdata *)devdata;
362c28006deSToomas Soome 	struct bcache		*bc = dd->dv_cache;
363c28006deSToomas Soome 	uint_t bcache_nblks = 0;
364c28006deSToomas Soome 	int nblk, cblk, ret;
365c28006deSToomas Soome 	size_t csize, isize, total;
366c28006deSToomas Soome 
367c28006deSToomas Soome 	bcache_ops++;
368c28006deSToomas Soome 
369c28006deSToomas Soome 	if (bc != NULL)
370c28006deSToomas Soome 		bcache_nblks = bc->bcache_nblks;
371c28006deSToomas Soome 
372c28006deSToomas Soome 	/* bypass large requests, or when the cache is inactive */
373c28006deSToomas Soome 	if (bc == NULL ||
374c28006deSToomas Soome 	    ((size * 2 / bcache_blksize) > bcache_nblks)) {
3752377faa9SEmmanuel Vadot 		DPRINTF("bypass %zu from %jd", size / bcache_blksize,
3762377faa9SEmmanuel Vadot 		    (intmax_t)blk);
377c28006deSToomas Soome 		bcache_bypasses++;
378c28006deSToomas Soome 		rw &= F_MASK;
379c28006deSToomas Soome 		return (dd->dv_strategy(dd->dv_devdata, rw, blk, size, buf,
380c28006deSToomas Soome 		    rsize));
381199767f8SToomas Soome 	}
382199767f8SToomas Soome 
383c28006deSToomas Soome 	switch (rw & F_MASK) {
384c28006deSToomas Soome 	case F_READ:
385c28006deSToomas Soome 		nblk = size / bcache_blksize;
386c28006deSToomas Soome 		if (size != 0 && nblk == 0)
387c28006deSToomas Soome 			nblk++;	/* read at least one block */
388c28006deSToomas Soome 
389c28006deSToomas Soome 		ret = 0;
390c28006deSToomas Soome 		total = 0;
391c28006deSToomas Soome 		while (size) {
392c28006deSToomas Soome 			/* # of blocks left */
393c28006deSToomas Soome 			cblk = bcache_nblks - BHASH(bc, blk);
394c28006deSToomas Soome 			cblk = MIN(cblk, nblk);
395c28006deSToomas Soome 
396c28006deSToomas Soome 			if (size <= bcache_blksize)
397c28006deSToomas Soome 				csize = size;
398c28006deSToomas Soome 			else
399c28006deSToomas Soome 				csize = cblk * bcache_blksize;
400c28006deSToomas Soome 
401c28006deSToomas Soome 			ret = read_strategy(devdata, rw, blk, csize,
402c28006deSToomas Soome 			    buf + total, &isize);
403c28006deSToomas Soome 
404c28006deSToomas Soome 			/*
405c28006deSToomas Soome 			 * we may have error from read ahead, if we have read
406c28006deSToomas Soome 			 * some data return partial read.
407c28006deSToomas Soome 			 */
408c28006deSToomas Soome 			if (ret != 0 || isize == 0) {
409c28006deSToomas Soome 				if (total != 0)
410c28006deSToomas Soome 					ret = 0;
411c28006deSToomas Soome 				break;
412c28006deSToomas Soome 			}
413c28006deSToomas Soome 			blk += isize / bcache_blksize;
414c28006deSToomas Soome 			total += isize;
415c28006deSToomas Soome 			size -= isize;
416c28006deSToomas Soome 			nblk = size / bcache_blksize;
417c28006deSToomas Soome 		}
418c28006deSToomas Soome 
419c28006deSToomas Soome 		if (rsize)
420c28006deSToomas Soome 			*rsize = total;
421c28006deSToomas Soome 
422c28006deSToomas Soome 		return (ret);
423c28006deSToomas Soome 	case F_WRITE:
424c28006deSToomas Soome 		return (write_strategy(devdata, F_WRITE, blk, size, buf,
425c28006deSToomas Soome 		    rsize));
426c28006deSToomas Soome 	}
427c28006deSToomas Soome 	return (-1);
428199767f8SToomas Soome }
429199767f8SToomas Soome 
430199767f8SToomas Soome /*
431199767f8SToomas Soome  * Free allocated bcache instance
432199767f8SToomas Soome  */
433199767f8SToomas Soome static void
bcache_free_instance(struct bcache * bc)434199767f8SToomas Soome bcache_free_instance(struct bcache *bc)
435199767f8SToomas Soome {
436c28006deSToomas Soome 	if (bc != NULL) {
437c28006deSToomas Soome 		free(bc->bcache_ctl);
438c28006deSToomas Soome 		free(bc->bcache_data);
439c28006deSToomas Soome 		free(bc);
440c28006deSToomas Soome 	}
441199767f8SToomas Soome }
442199767f8SToomas Soome 
443199767f8SToomas Soome /*
444199767f8SToomas Soome  * Insert a block into the cache.
445199767f8SToomas Soome  */
446199767f8SToomas Soome static void
bcache_insert(struct bcache * bc,daddr_t blkno)447199767f8SToomas Soome bcache_insert(struct bcache *bc, daddr_t blkno)
448199767f8SToomas Soome {
449c28006deSToomas Soome 	uint_t	cand;
4509a637b37SToomas Soome 
451c28006deSToomas Soome 	cand = BHASH(bc, blkno);
452199767f8SToomas Soome 
4532377faa9SEmmanuel Vadot 	DPRINTF("insert blk %jd -> %u # %d", (intmax_t)blkno, cand,
4542377faa9SEmmanuel Vadot 	    bcache_bcount);
455c28006deSToomas Soome 	bc->bcache_ctl[cand].bc_blkno = blkno;
456c28006deSToomas Soome 	bc->bcache_ctl[cand].bc_count = bcache_bcount++;
457199767f8SToomas Soome }
458199767f8SToomas Soome 
459199767f8SToomas Soome /*
460199767f8SToomas Soome  * Invalidate a block from the cache.
461199767f8SToomas Soome  */
462199767f8SToomas Soome static void
bcache_invalidate(struct bcache * bc,daddr_t blkno)463199767f8SToomas Soome bcache_invalidate(struct bcache *bc, daddr_t blkno)
464199767f8SToomas Soome {
465c28006deSToomas Soome 	uint_t	i;
466c28006deSToomas Soome 
467c28006deSToomas Soome 	i = BHASH(bc, blkno);
468c28006deSToomas Soome 	if (bc->bcache_ctl[i].bc_blkno == blkno) {
469c28006deSToomas Soome 		bc->bcache_ctl[i].bc_count = -1;
470c28006deSToomas Soome 		bc->bcache_ctl[i].bc_blkno = -1;
4712377faa9SEmmanuel Vadot 		DPRINTF("invalidate blk %jd", (intmax_t)blkno);
472c28006deSToomas Soome 	}
473199767f8SToomas Soome }
474199767f8SToomas Soome 
475c28006deSToomas Soome COMMAND_SET(bcachestat, "bcachestat", "get disk block cache stats",
476c28006deSToomas Soome     command_bcache);
477199767f8SToomas Soome 
478199767f8SToomas Soome static int
command_bcache(int argc,char * argv[]__unused)479c28006deSToomas Soome command_bcache(int argc, char *argv[] __unused)
480199767f8SToomas Soome {
481c28006deSToomas Soome 	if (argc != 1) {
482c28006deSToomas Soome 		command_errmsg = "wrong number of arguments";
483c28006deSToomas Soome 		return (CMD_ERROR);
484c28006deSToomas Soome 	}
485c28006deSToomas Soome 
486c28006deSToomas Soome 	printf("\ncache blocks: %u\n", bcache_total_nblks);
487c28006deSToomas Soome 	printf("cache blocksz: %u\n", bcache_blksize);
488c28006deSToomas Soome 	printf("cache readahead: %u\n", bcache_rablks);
489c28006deSToomas Soome 	printf("unit cache blocks: %u\n", bcache_unit_nblks);
490c28006deSToomas Soome 	printf("cached units: %u\n", bcache_units);
491c28006deSToomas Soome 	printf("%u ops %u bypasses %u hits  %u misses\n", bcache_ops,
492c28006deSToomas Soome 	    bcache_bypasses, bcache_hits, bcache_misses);
493c28006deSToomas Soome 	return (CMD_OK);
494199767f8SToomas Soome }
495