xref: /illumos-gate/usr/src/uts/common/fs/zfs/sys/arc.h (revision e419e0b9)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright (c) 2012, 2017 by Delphix. All rights reserved.
24  * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
25  * Copyright 2024 Bill Sommerfeld <sommerfeld@hamachi.org>
26  */
27 
28 #ifndef	_SYS_ARC_H
29 #define	_SYS_ARC_H
30 
31 #include <sys/zfs_context.h>
32 
33 #ifdef	__cplusplus
34 extern "C" {
35 #endif
36 
37 #include <sys/zio.h>
38 #include <sys/dmu.h>
39 #include <sys/spa.h>
40 
41 /*
42  * Used by arc_flush() to inform arc_evict_state() that it should evict
43  * all available buffers from the arc state being passed in.
44  */
45 #define	ARC_EVICT_ALL	-1ULL
46 
47 #define	HDR_SET_LSIZE(hdr, x) do { \
48 	ASSERT(IS_P2ALIGNED(x, 1U << SPA_MINBLOCKSHIFT)); \
49 	(hdr)->b_lsize = ((x) >> SPA_MINBLOCKSHIFT); \
50 _NOTE(CONSTCOND) } while (0)
51 
52 #define	HDR_SET_PSIZE(hdr, x) do { \
53 	ASSERT(IS_P2ALIGNED((x), 1U << SPA_MINBLOCKSHIFT)); \
54 	(hdr)->b_psize = ((x) >> SPA_MINBLOCKSHIFT); \
55 _NOTE(CONSTCOND) } while (0)
56 
57 #define	HDR_GET_LSIZE(hdr)	((hdr)->b_lsize << SPA_MINBLOCKSHIFT)
58 #define	HDR_GET_PSIZE(hdr)	((hdr)->b_psize << SPA_MINBLOCKSHIFT)
59 
60 typedef struct arc_buf_hdr arc_buf_hdr_t;
61 typedef struct arc_buf arc_buf_t;
62 
63 /*
64  * Because the ARC can store encrypted data, errors (not due to bugs) may arise
65  * while transforming data into its desired format - specifically, when
66  * decrypting, the key may not be present, or the HMAC may not be correct
67  * which signifies deliberate tampering with the on-disk state
68  * (assuming that the checksum was correct). The "error" parameter will be
69  * nonzero in this case, even if there is no associated zio.
70  */
71 typedef void arc_read_done_func_t(zio_t *zio, const zbookmark_phys_t *zb,
72     const blkptr_t *bp, arc_buf_t *buf, void *priv);
73 typedef void arc_write_done_func_t(zio_t *zio, arc_buf_t *buf, void *priv);
74 
75 /* generic arc_done_func_t's which you can use */
76 arc_read_done_func_t arc_bcopy_func;
77 arc_read_done_func_t arc_getbuf_func;
78 
79 typedef enum arc_flags
80 {
81 	/*
82 	 * Public flags that can be passed into the ARC by external consumers.
83 	 */
84 	ARC_FLAG_WAIT			= 1 << 0,	/* perform sync I/O */
85 	ARC_FLAG_NOWAIT			= 1 << 1,	/* perform async I/O */
86 	ARC_FLAG_PREFETCH		= 1 << 2,	/* I/O is a prefetch */
87 	ARC_FLAG_CACHED			= 1 << 3,	/* I/O was in cache */
88 	ARC_FLAG_L2CACHE		= 1 << 4,	/* cache in L2ARC */
89 	ARC_FLAG_PREDICTIVE_PREFETCH	= 1 << 5,	/* I/O from zfetch */
90 	ARC_FLAG_PRESCIENT_PREFETCH	= 1 << 6,	/* long min lifespan */
91 
92 	/*
93 	 * Private ARC flags.  These flags are private ARC only flags that
94 	 * will show up in b_flags in the arc_hdr_buf_t. These flags should
95 	 * only be set by ARC code.
96 	 */
97 	ARC_FLAG_IN_HASH_TABLE		= 1 << 7,	/* buffer is hashed */
98 	ARC_FLAG_IO_IN_PROGRESS		= 1 << 8,	/* I/O in progress */
99 	ARC_FLAG_IO_ERROR		= 1 << 9,	/* I/O failed for buf */
100 	ARC_FLAG_INDIRECT		= 1 << 10,	/* indirect block */
101 	/* Indicates that block was read with ASYNC priority. */
102 	ARC_FLAG_PRIO_ASYNC_READ	= 1 << 11,
103 	ARC_FLAG_L2_WRITING		= 1 << 12,	/* write in progress */
104 	ARC_FLAG_L2_EVICTED		= 1 << 13,	/* evicted during I/O */
105 	ARC_FLAG_L2_WRITE_HEAD		= 1 << 14,	/* head of write list */
106 	/*
107 	 * Encrypted or authenticated on disk (may be plaintext in memory).
108 	 * This header has b_crypt_hdr allocated. Does not include indirect
109 	 * blocks with checksums of MACs which will also have their X
110 	 * (encrypted) bit set in the bp.
111 	 */
112 	ARC_FLAG_PROTECTED		= 1 << 15,
113 	/* data has not been authenticated yet */
114 	ARC_FLAG_NOAUTH			= 1 << 16,
115 	/* indicates that the buffer contains metadata (otherwise, data) */
116 	ARC_FLAG_BUFC_METADATA		= 1 << 17,
117 
118 	/* Flags specifying whether optional hdr struct fields are defined */
119 	ARC_FLAG_HAS_L1HDR		= 1 << 18,
120 	ARC_FLAG_HAS_L2HDR		= 1 << 19,
121 
122 	/*
123 	 * Indicates the arc_buf_hdr_t's b_pdata matches the on-disk data.
124 	 * This allows the l2arc to use the blkptr's checksum to verify
125 	 * the data without having to store the checksum in the hdr.
126 	 */
127 	ARC_FLAG_COMPRESSED_ARC		= 1 << 20,
128 	ARC_FLAG_SHARED_DATA		= 1 << 21,
129 
130 	/*
131 	 * The arc buffer's compression mode is stored in the top 7 bits of the
132 	 * flags field, so these dummy flags are included so that MDB can
133 	 * interpret the enum properly.
134 	 */
135 	ARC_FLAG_COMPRESS_0		= 1 << 24,
136 	ARC_FLAG_COMPRESS_1		= 1 << 25,
137 	ARC_FLAG_COMPRESS_2		= 1 << 26,
138 	ARC_FLAG_COMPRESS_3		= 1 << 27,
139 	ARC_FLAG_COMPRESS_4		= 1 << 28,
140 	ARC_FLAG_COMPRESS_5		= 1 << 29,
141 	ARC_FLAG_COMPRESS_6		= 1 << 30
142 
143 } arc_flags_t;
144 
145 typedef enum arc_buf_flags {
146 	ARC_BUF_FLAG_SHARED		= 1 << 0,
147 	ARC_BUF_FLAG_COMPRESSED		= 1 << 1,
148 	/*
149 	 * indicates whether this arc_buf_t is encrypted, regardless of
150 	 * state on-disk
151 	 */
152 	ARC_BUF_FLAG_ENCRYPTED		= 1 << 2
153 } arc_buf_flags_t;
154 
155 struct arc_buf {
156 	arc_buf_hdr_t		*b_hdr;
157 	arc_buf_t		*b_next;
158 	kmutex_t		b_evict_lock;
159 	void			*b_data;
160 	arc_buf_flags_t		b_flags;
161 };
162 
163 typedef enum arc_buf_contents {
164 	ARC_BUFC_INVALID,			/* invalid type */
165 	ARC_BUFC_DATA,				/* buffer contains data */
166 	ARC_BUFC_METADATA,			/* buffer contains metadata */
167 	ARC_BUFC_NUMTYPES
168 } arc_buf_contents_t;
169 
170 /*
171  * The following breakdows of arc_size exist for kstat only.
172  */
173 typedef enum arc_space_type {
174 	ARC_SPACE_DATA,
175 	ARC_SPACE_META,
176 	ARC_SPACE_HDRS,
177 	ARC_SPACE_L2HDRS,
178 	ARC_SPACE_OTHER,
179 	ARC_SPACE_BONUS,
180 	ARC_SPACE_NUMTYPES
181 } arc_space_type_t;
182 
183 typedef enum arc_state_type {
184 	ARC_STATE_ANON,
185 	ARC_STATE_MRU,
186 	ARC_STATE_MRU_GHOST,
187 	ARC_STATE_MFU,
188 	ARC_STATE_MFU_GHOST,
189 	ARC_STATE_L2C_ONLY,
190 	ARC_STATE_NUMTYPES
191 } arc_state_type_t;
192 
193 void arc_space_consume(uint64_t space, arc_space_type_t type);
194 void arc_space_return(uint64_t space, arc_space_type_t type);
195 boolean_t arc_is_metadata(arc_buf_t *buf);
196 boolean_t arc_is_encrypted(arc_buf_t *buf);
197 boolean_t arc_is_unauthenticated(arc_buf_t *buf);
198 enum zio_compress arc_get_compression(arc_buf_t *buf);
199 void arc_get_raw_params(arc_buf_t *buf, boolean_t *byteorder, uint8_t *salt,
200     uint8_t *iv, uint8_t *mac);
201 int arc_untransform(arc_buf_t *buf, spa_t *spa, const zbookmark_phys_t *zb,
202     boolean_t in_place);
203 void arc_convert_to_raw(arc_buf_t *buf, uint64_t dsobj, boolean_t byteorder,
204     dmu_object_type_t ot, const uint8_t *salt, const uint8_t *iv,
205     const uint8_t *mac);
206 arc_buf_t *arc_alloc_buf(spa_t *spa, void *tag, arc_buf_contents_t type,
207     int32_t size);
208 arc_buf_t *arc_alloc_compressed_buf(spa_t *spa, void *tag,
209     uint64_t psize, uint64_t lsize, enum zio_compress compression_type);
210 arc_buf_t *arc_alloc_raw_buf(spa_t *spa, void *tag, uint64_t dsobj,
211     boolean_t byteorder, const uint8_t *salt, const uint8_t *iv,
212     const uint8_t *mac, dmu_object_type_t ot, uint64_t psize, uint64_t lsize,
213     enum zio_compress compression_type);
214 arc_buf_t *arc_loan_buf(spa_t *spa, boolean_t is_metadata, int size);
215 arc_buf_t *arc_loan_compressed_buf(spa_t *spa, uint64_t psize, uint64_t lsize,
216     enum zio_compress compression_type);
217 arc_buf_t *arc_loan_raw_buf(spa_t *spa, uint64_t dsobj, boolean_t byteorder,
218     const uint8_t *salt, const uint8_t *iv, const uint8_t *mac,
219     dmu_object_type_t ot, uint64_t psize, uint64_t lsize,
220     enum zio_compress compression_type);
221 void arc_return_buf(arc_buf_t *buf, void *tag);
222 void arc_loan_inuse_buf(arc_buf_t *buf, void *tag);
223 void arc_buf_destroy(arc_buf_t *buf, void *tag);
224 int arc_buf_size(arc_buf_t *buf);
225 int arc_buf_lsize(arc_buf_t *buf);
226 void arc_buf_access(arc_buf_t *buf);
227 void arc_release(arc_buf_t *buf, void *tag);
228 int arc_released(arc_buf_t *buf);
229 void arc_buf_freeze(arc_buf_t *buf);
230 void arc_buf_thaw(arc_buf_t *buf);
231 #ifdef ZFS_DEBUG
232 int arc_referenced(arc_buf_t *buf);
233 #endif
234 
235 int arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp,
236     arc_read_done_func_t *done, void *private, zio_priority_t priority,
237     int flags, arc_flags_t *arc_flags, const zbookmark_phys_t *zb);
238 zio_t *arc_write(zio_t *pio, spa_t *spa, uint64_t txg,
239     blkptr_t *bp, arc_buf_t *buf, boolean_t l2arc, const zio_prop_t *zp,
240     arc_write_done_func_t *ready, arc_write_done_func_t *child_ready,
241     arc_write_done_func_t *physdone, arc_write_done_func_t *done,
242     void *private, zio_priority_t priority, int zio_flags,
243     const zbookmark_phys_t *zb);
244 void arc_freed(spa_t *spa, const blkptr_t *bp);
245 
246 void arc_flush(spa_t *spa, boolean_t retry);
247 void arc_tempreserve_clear(uint64_t reserve);
248 int arc_tempreserve_space(spa_t *spa, uint64_t reserve, uint64_t txg);
249 
250 boolean_t arc_memory_is_low(void);
251 uint64_t arc_all_memory(void);
252 uint64_t arc_max_bytes(void);
253 void arc_init(void);
254 void arc_fini(void);
255 
256 /*
257  * Level 2 ARC
258  */
259 
260 void l2arc_add_vdev(spa_t *spa, vdev_t *vd);
261 void l2arc_remove_vdev(vdev_t *vd);
262 boolean_t l2arc_vdev_present(vdev_t *vd);
263 void l2arc_rebuild_vdev(vdev_t *vd, boolean_t reopen);
264 boolean_t l2arc_range_check_overlap(uint64_t bottom, uint64_t top,
265     uint64_t check);
266 void l2arc_init(void);
267 void l2arc_fini(void);
268 void l2arc_start(void);
269 void l2arc_stop(void);
270 void l2arc_spa_rebuild_start(spa_t *spa);
271 
272 #ifndef _KERNEL
273 extern boolean_t arc_watch;
274 extern int arc_procfd;
275 #endif
276 
277 #ifdef	__cplusplus
278 }
279 #endif
280 
281 #endif /* _SYS_ARC_H */
282