xref: /illumos-gate/usr/src/uts/common/fs/zfs/arc.c (revision f43aa5fa)
1fa9e4066Sahrens /*
2fa9e4066Sahrens  * CDDL HEADER START
3fa9e4066Sahrens  *
4fa9e4066Sahrens  * The contents of this file are subject to the terms of the
5033f9833Sek  * Common Development and Distribution License (the "License").
6033f9833Sek  * You may not use this file except in compliance with the License.
7fa9e4066Sahrens  *
8fa9e4066Sahrens  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9fa9e4066Sahrens  * or http://www.opensolaris.org/os/licensing.
10fa9e4066Sahrens  * See the License for the specific language governing permissions
11fa9e4066Sahrens  * and limitations under the License.
12fa9e4066Sahrens  *
13fa9e4066Sahrens  * When distributing Covered Code, include this CDDL HEADER in each
14fa9e4066Sahrens  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15fa9e4066Sahrens  * If applicable, add the following below this CDDL HEADER, with the
16fa9e4066Sahrens  * fields enclosed by brackets "[]" replaced with your own identifying
17fa9e4066Sahrens  * information: Portions Copyright [yyyy] [name of copyright owner]
18fa9e4066Sahrens  *
19fa9e4066Sahrens  * CDDL HEADER END
20fa9e4066Sahrens  */
21fa9e4066Sahrens /*
223f9d6ad7SLin Ling  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23af1d63abSPaul Dagnelie  * Copyright (c) 2019, Joyent, Inc.
24fa98e487SMatthew Ahrens  * Copyright (c) 2011, 2018 by Delphix. All rights reserved.
2571cb1b74SSaso Kiselkov  * Copyright (c) 2014 by Saso Kiselkov. All rights reserved.
2601a059eeSRoman Strashkin  * Copyright 2017 Nexenta Systems, Inc.  All rights reserved.
27f0a05239SGeorge Amanakis  * Copyright (c) 2011, 2019, Delphix. All rights reserved.
28f0a05239SGeorge Amanakis  * Copyright (c) 2020, George Amanakis. All rights reserved.
29*9e3493cbSJason King  * Copyright (c) 2020, The FreeBSD Foundation [1]
30*9e3493cbSJason King  *
31*9e3493cbSJason King  * [1] Portions of this software were developed by Allan Jude
32*9e3493cbSJason King  *     under sponsorship from the FreeBSD Foundation.
33fa9e4066Sahrens  */
35fa9e4066Sahrens /*
3644cb6abcSbmc  * DVA-based Adjustable Replacement Cache
37fa9e4066Sahrens  *
38ea8dc4b6Seschrock  * While much of the theory of operation used here is
39ea8dc4b6Seschrock  * based on the self-tuning, low overhead replacement cache
40fa9e4066Sahrens  * presented by Megiddo and Modha at FAST 2003, there are some
41fa9e4066Sahrens  * significant differences:
42fa9e4066Sahrens  *
43fa9e4066Sahrens  * 1. The Megiddo and Modha model assumes any page is evictable.
44fa9e4066Sahrens  * Pages in its cache cannot be "locked" into memory.  This makes
45fa9e4066Sahrens  * the eviction algorithm simple: evict the last page in the list.
46fa9e4066Sahrens  * This also make the performance characteristics easy to reason
47fa9e4066Sahrens  * about.  Our cache is not so simple.  At any given moment, some
48fa9e4066Sahrens  * subset of the blocks in the cache are un-evictable because we
49fa9e4066Sahrens  * have handed out a reference to them.  Blocks are only evictable
50fa9e4066Sahrens  * when there are no external references active.  This makes
51fa9e4066Sahrens  * eviction far more problematic:  we choose to evict the evictable
52fa9e4066Sahrens  * blocks that are the "lowest" in the list.
53fa9e4066Sahrens  *
54fa9e4066Sahrens  * There are times when it is not possible to evict the requested
55fa9e4066Sahrens  * space.  In these circumstances we are unable to adjust the cache
56fa9e4066Sahrens  * size.  To prevent the cache growing unbounded at these times we
57fa94a07fSbrendan  * implement a "cache throttle" that slows the flow of new data
58fa94a07fSbrendan  * into the cache until we can make space available.
59fa9e4066Sahrens  *
60fa9e4066Sahrens  * 2. The Megiddo and Modha model assumes a fixed cache size.
61fa9e4066Sahrens  * Pages are evicted when the cache is full and there is a cache
62fa9e4066Sahrens  * miss.  Our model has a variable sized cache.  It grows with
63fa94a07fSbrendan  * high use, but also tries to react to memory pressure from the
64fa9e4066Sahrens  * operating system: decreasing its size when system memory is
65fa9e4066Sahrens  * tight.
66fa9e4066Sahrens  *
67fa9e4066Sahrens  * 3. The Megiddo and Modha model assumes a fixed page size. All
68f7170741SWill Andrews  * elements of the cache are therefore exactly the same size.  So
69fa9e4066Sahrens  * when adjusting the cache size following a cache miss, its simply
70fa9e4066Sahrens  * a matter of choosing a single page to evict.  In our model, we
71fa9e4066Sahrens  * have variable sized cache blocks (rangeing from 512 bytes to
72f7170741SWill Andrews  * 128K bytes).  We therefore choose a set of blocks to evict to make
73fa9e4066Sahrens  * space for a cache miss that approximates as closely as possible
74fa9e4066Sahrens  * the space used by the new block.
75fa9e4066Sahrens  *
76fa9e4066Sahrens  * See also:  "ARC: A Self-Tuning, Low Overhead Replacement Cache"
77fa9e4066Sahrens  * by N. Megiddo & D. Modha, FAST 2003
78fa9e4066Sahrens  */
80fa9e4066Sahrens /*
81fa9e4066Sahrens  * The locking model:
82fa9e4066Sahrens  *
83fa9e4066Sahrens  * A new reference to a cache buffer can be obtained in two
84fa9e4066Sahrens  * ways: 1) via a hash table lookup using the DVA as a key,
85fa94a07fSbrendan  * or 2) via one of the ARC lists.  The arc_read() interface
865602294fSDan Kimmel  * uses method 1, while the internal ARC algorithms for
87f7170741SWill Andrews  * adjusting the cache use method 2.  We therefore provide two
88fa9e4066Sahrens  * types of locks: 1) the hash table lock array, and 2) the
895602294fSDan Kimmel  * ARC list locks.
90fa9e4066Sahrens  *
91fc98fea5SBart Coddens  * Buffers do not have their own mutexes, rather they rely on the
92fc98fea5SBart Coddens  * hash table mutexes for the bulk of their protection (i.e. most
93fc98fea5SBart Coddens  * fields in the arc_buf_hdr_t are protected by these mutexes).
94fa9e4066Sahrens  *
95fa9e4066Sahrens  * buf_hash_find() returns the appropriate mutex (held) when it
96fa9e4066Sahrens  * locates the requested buffer in the hash table.  It returns
97fa9e4066Sahrens  * NULL for the mutex if the buffer was not in the table.
98fa9e4066Sahrens  *
99fa9e4066Sahrens  * buf_hash_remove() expects the appropriate hash mutex to be
100fa9e4066Sahrens  * already held before it is invoked.
101fa9e4066Sahrens  *
1025602294fSDan Kimmel  * Each ARC state also has a mutex which is used to protect the
103fa9e4066Sahrens  * buffer list associated with the state.  When attempting to
1045602294fSDan Kimmel  * obtain a hash table lock while holding an ARC list lock you
105fa9e4066Sahrens  * must use: mutex_tryenter() to avoid deadlock.  Also note that
10644eda4d7Smaybee  * the active state mutex must be held before the ghost state mutex.
107fa9e4066Sahrens  *
108fa9e4066Sahrens  * Note that the majority of the performance stats are manipulated
109fa9e4066Sahrens  * with atomic operations.
110fa94a07fSbrendan  *
11189c86e32SChris Williamson  * The L2ARC uses the l2ad_mtx on each vdev for the following:
112fa94a07fSbrendan  *
113fa94a07fSbrendan  *	- L2ARC buflist creation
114fa94a07fSbrendan  *	- L2ARC buflist eviction
115fa94a07fSbrendan  *	- L2ARC write completion, which walks L2ARC buflists
116fa94a07fSbrendan  *	- ARC header destruction, as it removes from L2ARC buflists
117fa94a07fSbrendan  *	- ARC header release, as it removes from L2ARC buflists
118fa9e4066Sahrens  */
120dcbf3bd6SGeorge Wilson /*
121dcbf3bd6SGeorge Wilson  * ARC operation:
122dcbf3bd6SGeorge Wilson  *
123dcbf3bd6SGeorge Wilson  * Every block that is in the ARC is tracked by an arc_buf_hdr_t structure.
124dcbf3bd6SGeorge Wilson  * This structure can point either to a block that is still in the cache or to
125dcbf3bd6SGeorge Wilson  * one that is only accessible in an L2 ARC device, or it can provide
126dcbf3bd6SGeorge Wilson  * information about a block that was recently evicted. If a block is
127dcbf3bd6SGeorge Wilson  * only accessible in the L2ARC, then the arc_buf_hdr_t only has enough
128dcbf3bd6SGeorge Wilson  * information to retrieve it from the L2ARC device. This information is
129dcbf3bd6SGeorge Wilson  * stored in the l2arc_buf_hdr_t sub-structure of the arc_buf_hdr_t. A block
130dcbf3bd6SGeorge Wilson  * that is in this state cannot access the data directly.
131dcbf3bd6SGeorge Wilson  *
132dcbf3bd6SGeorge Wilson  * Blocks that are actively being referenced or have not been evicted
133dcbf3bd6SGeorge Wilson  * are cached in the L1ARC. The L1ARC (l1arc_buf_hdr_t) is a structure within
134dcbf3bd6SGeorge Wilson  * the arc_buf_hdr_t that will point to the data block in memory. A block can
135dcbf3bd6SGeorge Wilson  * only be read by a consumer if it has an l1arc_buf_hdr_t. The L1ARC
1365602294fSDan Kimmel  * caches data in two ways -- in a list of ARC buffers (arc_buf_t) and
137770499e1SDan Kimmel  * also in the arc_buf_hdr_t's private physical data block pointer (b_pabd).
1385602294fSDan Kimmel  *
1395602294fSDan Kimmel  * The L1ARC's data pointer may or may not be uncompressed. The ARC has the
140770499e1SDan Kimmel  * ability to store the physical data (b_pabd) associated with the DVA of the
141770499e1SDan Kimmel  * arc_buf_hdr_t. Since the b_pabd is a copy of the on-disk physical block,
1425602294fSDan Kimmel  * it will match its on-disk compression characteristics. This behavior can be
1435602294fSDan Kimmel  * disabled by setting 'zfs_compressed_arc_enabled' to B_FALSE. When the
144770499e1SDan Kimmel  * compressed ARC functionality is disabled, the b_pabd will point to an
1455602294fSDan Kimmel  * uncompressed version of the on-disk data.
1465602294fSDan Kimmel  *
1475602294fSDan Kimmel  * Data in the L1ARC is not accessed by consumers of the ARC directly. Each
1485602294fSDan Kimmel  * arc_buf_hdr_t can have multiple ARC buffers (arc_buf_t) which reference it.
1495602294fSDan Kimmel  * Each ARC buffer (arc_buf_t) is being actively accessed by a specific ARC
1505602294fSDan Kimmel  * consumer. The ARC will provide references to this data and will keep it
1515602294fSDan Kimmel  * cached until it is no longer in use. The ARC caches only the L1ARC's physical
1525602294fSDan Kimmel  * data block and will evict any arc_buf_t that is no longer referenced. The
1535602294fSDan Kimmel  * amount of memory consumed by the arc_buf_ts' data buffers can be seen via the
154dcbf3bd6SGeorge Wilson  * "overhead_size" kstat.
155dcbf3bd6SGeorge Wilson  *
1565602294fSDan Kimmel  * Depending on the consumer, an arc_buf_t can be requested in uncompressed or
1575602294fSDan Kimmel  * compressed form. The typical case is that consumers will want uncompressed
1585602294fSDan Kimmel  * data, and when that happens a new data buffer is allocated where the data is
1595602294fSDan Kimmel  * decompressed for them to use. Currently the only consumer who wants
1605602294fSDan Kimmel  * compressed arc_buf_t's is "zfs send", when it streams data exactly as it
1615602294fSDan Kimmel  * exists on disk. When this happens, the arc_buf_t's data buffer is shared
1625602294fSDan Kimmel  * with the arc_buf_hdr_t.
163dcbf3bd6SGeorge Wilson  *
1645602294fSDan Kimmel  * Here is a diagram showing an arc_buf_hdr_t referenced by two arc_buf_t's. The
1655602294fSDan Kimmel  * first one is owned by a compressed send consumer (and therefore references
1665602294fSDan Kimmel  * the same compressed data buffer as the arc_buf_hdr_t) and the second could be
1675602294fSDan Kimmel  * used by any other consumer (and has its own uncompressed copy of the data
1685602294fSDan Kimmel  * buffer).
169dcbf3bd6SGeorge Wilson  *
1705602294fSDan Kimmel  *   arc_buf_hdr_t
1715602294fSDan Kimmel  *   +-----------+
1725602294fSDan Kimmel  *   | fields    |
1735602294fSDan Kimmel  *   | common to |
1745602294fSDan Kimmel  *   | L1- and   |
1755602294fSDan Kimmel  *   | L2ARC     |
1765602294fSDan Kimmel  *   +-----------+
1775602294fSDan Kimmel  *   | l2arc_buf_hdr_t
1785602294fSDan Kimmel  *   |           |
1795602294fSDan Kimmel  *   +-----------+
1805602294fSDan Kimmel  *   | l1arc_buf_hdr_t
1815602294fSDan Kimmel  *   |           |              arc_buf_t
1825602294fSDan Kimmel  *   | b_buf     +------------>+-----------+      arc_buf_t
183770499e1SDan Kimmel  *   | b_pabd    +-+           |b_next     +---->+-----------+
1845602294fSDan Kimmel  *   +-----------+ |           |-----------|     |b_next     +-->NULL
1855602294fSDan Kimmel  *                 |           |b_comp = T |     +-----------+
1865602294fSDan Kimmel  *                 |           |b_data     +-+   |b_comp = F |
1875602294fSDan Kimmel  *                 |           +-----------+ |   |b_data     +-+
1885602294fSDan Kimmel  *                 +->+------+               |   +-----------+ |
1895602294fSDan Kimmel  *        compressed  |      |               |                 |
1905602294fSDan Kimmel  *           data     |      |<--------------+                 | uncompressed
1915602294fSDan Kimmel  *                    +------+          compressed,            |     data
1925602294fSDan Kimmel  *                                        shared               +-->+------+
1935602294fSDan Kimmel  *                                         data                    |      |
1945602294fSDan Kimmel  *                                                                 |      |
1955602294fSDan Kimmel  *                                                                 +------+
196dcbf3bd6SGeorge Wilson  *
197dcbf3bd6SGeorge Wilson  * When a consumer reads a block, the ARC must first look to see if the
1985602294fSDan Kimmel  * arc_buf_hdr_t is cached. If the hdr is cached then the ARC allocates a new
1995602294fSDan Kimmel  * arc_buf_t and either copies uncompressed data into a new data buffer from an
200770499e1SDan Kimmel  * existing uncompressed arc_buf_t, decompresses the hdr's b_pabd buffer into a
201770499e1SDan Kimmel  * new data buffer, or shares the hdr's b_pabd buffer, depending on whether the
2025602294fSDan Kimmel  * hdr is compressed and the desired compression characteristics of the
2035602294fSDan Kimmel  * arc_buf_t consumer. If the arc_buf_t ends up sharing data with the
2045602294fSDan Kimmel  * arc_buf_hdr_t and both of them are uncompressed then the arc_buf_t must be
2055602294fSDan Kimmel  * the last buffer in the hdr's b_buf list, however a shared compressed buf can
2065602294fSDan Kimmel  * be anywhere in the hdr's list.
207dcbf3bd6SGeorge Wilson  *
208dcbf3bd6SGeorge Wilson  * The diagram below shows an example of an uncompressed ARC hdr that is
2095602294fSDan Kimmel  * sharing its data with an arc_buf_t (note that the shared uncompressed buf is
2105602294fSDan Kimmel  * the last element in the buf list):
211dcbf3bd6SGeorge Wilson  *
212dcbf3bd6SGeorge Wilson  *                arc_buf_hdr_t
213dcbf3bd6SGeorge Wilson  *                +-----------+
214dcbf3bd6SGeorge Wilson  *                |           |
215dcbf3bd6SGeorge Wilson  *                |           |
216dcbf3bd6SGeorge Wilson  *                |           |
217dcbf3bd6SGeorge Wilson  *                +-----------+
218dcbf3bd6SGeorge Wilson  * l2arc_buf_hdr_t|           |
219dcbf3bd6SGeorge Wilson  *                |           |
220dcbf3bd6SGeorge Wilson  *                +-----------+
221dcbf3bd6SGeorge Wilson  * l1arc_buf_hdr_t|           |
222dcbf3bd6SGeorge Wilson  *                |           |                 arc_buf_t    (shared)
223dcbf3bd6SGeorge Wilson  *                |    b_buf  +------------>+---------+      arc_buf_t
224dcbf3bd6SGeorge Wilson  *                |           |             |b_next   +---->+---------+
225770499e1SDan Kimmel  *                |  b_pabd   +-+           |---------|     |b_next   +-->NULL
226dcbf3bd6SGeorge Wilson  *                +-----------+ |           |         |     +---------+
227dcbf3bd6SGeorge Wilson  *                              |           |b_data   +-+   |         |
228dcbf3bd6SGeorge Wilson  *                              |           +---------+ |   |b_data   +-+
229dcbf3bd6SGeorge Wilson  *                              +->+------+             |   +---------+ |
230dcbf3bd6SGeorge Wilson  *                                 |      |             |               |
231dcbf3bd6SGeorge Wilson  *                   uncompressed  |      |             |               |
232dcbf3bd6SGeorge Wilson  *                        data     +------+             |               |
233dcbf3bd6SGeorge Wilson  *                                    ^                 +->+------+     |
234dcbf3bd6SGeorge Wilson  *                                    |       uncompressed |      |     |
235dcbf3bd6SGeorge Wilson  *                                    |           data     |      |     |
236dcbf3bd6SGeorge Wilson  *                                    |                    +------+     |
237dcbf3bd6SGeorge Wilson  *                                    +---------------------------------+
238dcbf3bd6SGeorge Wilson  *
239770499e1SDan Kimmel  * Writing to the ARC requires that the ARC first discard the hdr's b_pabd
240dcbf3bd6SGeorge Wilson  * since the physical block is about to be rewritten. The new data contents
2415602294fSDan Kimmel  * will be contained in the arc_buf_t. As the I/O pipeline performs the write,
2425602294fSDan Kimmel  * it may compress the data before writing it to disk. The ARC will be called
2435602294fSDan Kimmel  * with the transformed data and will bcopy the transformed on-disk block into
244770499e1SDan Kimmel  * a newly allocated b_pabd. Writes are always done into buffers which have
2455602294fSDan Kimmel  * either been loaned (and hence are new and don't have other readers) or
2465602294fSDan Kimmel  * buffers which have been released (and hence have their own hdr, if there
2475602294fSDan Kimmel  * were originally other readers of the buf's original hdr). This ensures that
2485602294fSDan Kimmel  * the ARC only needs to update a single buf and its hdr after a write occurs.
249dcbf3bd6SGeorge Wilson  *
250770499e1SDan Kimmel  * When the L2ARC is in use, it will also take advantage of the b_pabd. The
251770499e1SDan Kimmel  * L2ARC will always write the contents of b_pabd to the L2ARC. This means
2525602294fSDan Kimmel  * that when compressed ARC is enabled that the L2ARC blocks are identical
253dcbf3bd6SGeorge Wilson  * to the on-disk block in the main data pool. This provides a significant
254dcbf3bd6SGeorge Wilson  * advantage since the ARC can leverage the bp's checksum when reading from the
255dcbf3bd6SGeorge Wilson  * L2ARC to determine if the contents are valid. However, if the compressed
2565602294fSDan Kimmel  * ARC is disabled, then the L2ARC's block must be transformed to look
257dcbf3bd6SGeorge Wilson  * like the physical block in the main data pool before comparing the
258dcbf3bd6SGeorge Wilson  * checksum and determining its validity.
259eb633035STom Caputi  *
260eb633035STom Caputi  * The L1ARC has a slightly different system for storing encrypted data.
261eb633035STom Caputi  * Raw (encrypted + possibly compressed) data has a few subtle differences from
262eb633035STom Caputi  * data that is just compressed. The biggest difference is that it is not
263eb633035STom Caputi  * possible to decrypt encrypted data (or visa versa) if the keys aren't loaded.
264eb633035STom Caputi  * The other difference is that encryption cannot be treated as a suggestion.
265eb633035STom Caputi  * If a caller would prefer compressed data, but they actually wind up with
266eb633035STom Caputi  * uncompressed data the worst thing that could happen is there might be a
267eb633035STom Caputi  * performance hit. If the caller requests encrypted data, however, we must be
268eb633035STom Caputi  * sure they actually get it or else secret information could be leaked. Raw
269eb633035STom Caputi  * data is stored in hdr->b_crypt_hdr.b_rabd. An encrypted header, therefore,
270eb633035STom Caputi  * may have both an encrypted version and a decrypted version of its data at
271eb633035STom Caputi  * once. When a caller needs a raw arc_buf_t, it is allocated and the data is
272eb633035STom Caputi  * copied out of this header. To avoid complications with b_pabd, raw buffers
273eb633035STom Caputi  * cannot be shared.
274dcbf3bd6SGeorge Wilson  */
275dcbf3bd6SGeorge Wilson 
276fa9e4066Sahrens #include <sys/spa.h>
277fa9e4066Sahrens #include <sys/zio.h>
278dcbf3bd6SGeorge Wilson #include <sys/spa_impl.h>
279aad02571SSaso Kiselkov #include <sys/zio_compress.h>
280dcbf3bd6SGeorge Wilson #include <sys/zio_checksum.h>
281fa9e4066Sahrens #include <sys/zfs_context.h>
282fa9e4066Sahrens #include <sys/arc.h>
283fa9e4066Sahrens #include <sys/refcount.h>
284c5904d13Seschrock #include <sys/vdev.h>
285573ca77eSGeorge Wilson #include <sys/vdev_impl.h>
28669962b56SMatthew Ahrens #include <sys/dsl_pool.h>
287770499e1SDan Kimmel #include <sys/zio_checksum.h>
288244781f1SPrakash Surya #include <sys/multilist.h>
289770499e1SDan Kimmel #include <sys/abd.h>
290eb633035STom Caputi #include <sys/zil.h>
291eb633035STom Caputi #include <sys/fm/fs/zfs.h>
292fa9e4066Sahrens #ifdef _KERNEL
293fa9e4066Sahrens #include <sys/vmsystm.h>
294fa9e4066Sahrens #include <vm/anon.h>
295fa9e4066Sahrens #include <sys/fs/swapnode.h>
296033f9833Sek #include <sys/dnlc.h>
297fa9e4066Sahrens #endif
298fa9e4066Sahrens #include <sys/callb.h>
29944cb6abcSbmc #include <sys/kstat.h>
300de753e34SBrad Lewis #include <sys/zthr.h>
301b24ab676SJeff Bonwick #include <zfs_fletcher.h>
302f0a05239SGeorge Amanakis #include <sys/arc_impl.h>
3033a2d8a1bSPaul Dagnelie #include <sys/aggsum.h>
3043a2d8a1bSPaul Dagnelie #include <sys/cityhash.h>
305af1d63abSPaul Dagnelie #include <sys/param.h>
307cd1c8b85SMatthew Ahrens #ifndef _KERNEL
308cd1c8b85SMatthew Ahrens /* set with ZFS_DEBUG=watch, to enable watchpoints on frozen buffers */
309cd1c8b85SMatthew Ahrens boolean_t arc_watch = B_FALSE;
310cd1c8b85SMatthew Ahrens int arc_procfd;
311cd1c8b85SMatthew Ahrens #endif
312cd1c8b85SMatthew Ahrens 
313de753e34SBrad Lewis /*
314de753e34SBrad Lewis  * This thread's job is to keep enough free memory in the system, by
315de753e34SBrad Lewis  * calling arc_kmem_reap_now() plus arc_shrink(), which improves
316de753e34SBrad Lewis  * arc_available_memory().
317de753e34SBrad Lewis  */
318de753e34SBrad Lewis static zthr_t		*arc_reap_zthr;
319de753e34SBrad Lewis 
320de753e34SBrad Lewis /*
321de753e34SBrad Lewis  * This thread's job is to keep arc_size under arc_c, by calling
322de753e34SBrad Lewis  * arc_adjust(), which improves arc_is_overflowing().
323de753e34SBrad Lewis  */
324de753e34SBrad Lewis static zthr_t		*arc_adjust_zthr;
325de753e34SBrad Lewis 
326de753e34SBrad Lewis static kmutex_t		arc_adjust_lock;
327de753e34SBrad Lewis static kcondvar_t	arc_adjust_waiters_cv;
328de753e34SBrad Lewis static boolean_t	arc_adjust_needed = B_FALSE;
329244781f1SPrakash Surya 
3302ec99e3eSMatthew Ahrens uint_t arc_reduce_dnlc_percent = 3;
33269962b56SMatthew Ahrens /*
333244781f1SPrakash Surya  * The number of headers to evict in arc_evict_state_impl() before
334244781f1SPrakash Surya  * dropping the sublist lock and evicting from another sublist. A lower
335244781f1SPrakash Surya  * value means we're more likely to evict the "correct" header (i.e. the
336244781f1SPrakash Surya  * oldest header in the arc state), but comes with higher overhead
337244781f1SPrakash Surya  * (i.e. more invocations of arc_evict_state_impl()).
338244781f1SPrakash Surya  */
339244781f1SPrakash Surya int zfs_arc_evict_batch_limit = 10;
340244781f1SPrakash Surya 
341fa9e4066Sahrens /* number of seconds before growing cache again */
342de753e34SBrad Lewis int arc_grow_retry = 60;
344de753e34SBrad Lewis /*
345de753e34SBrad Lewis  * Minimum time between calls to arc_kmem_reap_soon().  Note that this will
346de753e34SBrad Lewis  * be converted to ticks, so with the default hz=100, a setting of 15 ms
347de753e34SBrad Lewis  * will actually wait 2 ticks, or 20ms.
348de753e34SBrad Lewis  */
349de753e34SBrad Lewis int arc_kmem_cache_reap_retry_ms = 1000;
35036a64e62STim Kordas 
351770499e1SDan Kimmel /* shift of arc_c for calculating overflow limit in arc_get_data_impl */
352de753e34SBrad Lewis int zfs_arc_overflow_shift = 8;
353244781f1SPrakash Surya 
3545a98e54bSBrendan Gregg - Sun Microsystems /* shift of arc_c for calculating both min and max arc_p */
355de753e34SBrad Lewis int arc_p_min_shift = 4;
3565a98e54bSBrendan Gregg - Sun Microsystems 
3575a98e54bSBrendan Gregg - Sun Microsystems /* log2(fraction of arc to reclaim) */
358de753e34SBrad Lewis int arc_shrink_shift = 7;
3592ec99e3eSMatthew Ahrens 
3602ec99e3eSMatthew Ahrens /*
3612ec99e3eSMatthew Ahrens  * log2(fraction of ARC which must be free to allow growing).
3622ec99e3eSMatthew Ahrens  * I.e. If there is less than arc_c >> arc_no_grow_shift free memory,
3632ec99e3eSMatthew Ahrens  * when reading a new block into the ARC, we will evict an equal-sized block
3642ec99e3eSMatthew Ahrens  * from the ARC.
3652ec99e3eSMatthew Ahrens  *
3662ec99e3eSMatthew Ahrens  * This must be less than arc_shrink_shift, so that when we shrink the ARC,
3672ec99e3eSMatthew Ahrens  * we will still not allow it to grow.
3682ec99e3eSMatthew Ahrens  */
3692ec99e3eSMatthew Ahrens int			arc_no_grow_shift = 5;
3702ec99e3eSMatthew Ahrens 
3715a98e54bSBrendan Gregg - Sun Microsystems 
37213506d1eSmaybee /*
373b19a79ecSperrin  * minimum lifespan of a prefetch block in clock ticks
374b19a79ecSperrin  * (initialized in arc_init())
37513506d1eSmaybee  */
376a3874b8bSToomas Soome static int		zfs_arc_min_prefetch_ms = 1;
377a3874b8bSToomas Soome static int		zfs_arc_min_prescient_prefetch_ms = 6;
37969962b56SMatthew Ahrens /*
38069962b56SMatthew Ahrens  * If this percent of memory is free, don't throttle.
38169962b56SMatthew Ahrens  */
38269962b56SMatthew Ahrens int arc_lotsfree_percent = 10;
38369962b56SMatthew Ahrens 
384de753e34SBrad Lewis static boolean_t arc_initialized;
3863a737e0dSbrendan /*
3873a737e0dSbrendan  * The arc has filled available memory and has now warmed up.
3883a737e0dSbrendan  */
3893a737e0dSbrendan static boolean_t arc_warm;
3910dd053d7SPrakash Surya /*
3920dd053d7SPrakash Surya  * log2 fraction of the zio arena to keep free.
3930dd053d7SPrakash Surya  */
3940dd053d7SPrakash Surya int arc_zio_arena_free_shift = 2;
3950dd053d7SPrakash Surya 
396a2eea2e1Sahrens /*
397a2eea2e1Sahrens  * These tunables are for performance analysis.
398a2eea2e1Sahrens  */
399a2eea2e1Sahrens uint64_t zfs_arc_max;
400a2eea2e1Sahrens uint64_t zfs_arc_min;
4011116048bSek uint64_t zfs_arc_meta_limit = 0;
4023a5286a1SMatthew Ahrens uint64_t zfs_arc_meta_min = 0;
4035a98e54bSBrendan Gregg - Sun Microsystems int zfs_arc_grow_retry = 0;
4045a98e54bSBrendan Gregg - Sun Microsystems int zfs_arc_shrink_shift = 0;
4055a98e54bSBrendan Gregg - Sun Microsystems int zfs_arc_p_min_shift = 0;
40663e911b6SMatthew Ahrens int zfs_arc_average_blocksize = 8 * 1024; /* 8KB */
408abe1fd01SDon Brady /*
409abe1fd01SDon Brady  * ARC dirty data constraints for arc_tempreserve_space() throttle
410abe1fd01SDon Brady  */
411abe1fd01SDon Brady uint_t zfs_arc_dirty_limit_percent = 50;	/* total dirty data limit */
412abe1fd01SDon Brady uint_t zfs_arc_anon_limit_percent = 25;		/* anon block dirty limit */
413abe1fd01SDon Brady uint_t zfs_arc_pool_dirty_percent = 20;		/* each pool's anon allowance */
414abe1fd01SDon Brady 
415dcbf3bd6SGeorge Wilson boolean_t zfs_compressed_arc_enabled = B_TRUE;
416dcbf3bd6SGeorge Wilson 
417fa94a07fSbrendan /* The 6 states: */
418fa9e4066Sahrens static arc_state_t ARC_anon;
419ea8dc4b6Seschrock static arc_state_t ARC_mru;
420ea8dc4b6Seschrock static arc_state_t ARC_mru_ghost;
421ea8dc4b6Seschrock static arc_state_t ARC_mfu;
422ea8dc4b6Seschrock static arc_state_t ARC_mfu_ghost;
423fa94a07fSbrendan static arc_state_t ARC_l2c_only;
425f0a05239SGeorge Amanakis arc_stats_t arc_stats = {
42644cb6abcSbmc 	{ "hits",			KSTAT_DATA_UINT64 },
42744cb6abcSbmc 	{ "misses",			KSTAT_DATA_UINT64 },
42844cb6abcSbmc 	{ "demand_data_hits",		KSTAT_DATA_UINT64 },
42944cb6abcSbmc 	{ "demand_data_misses",		KSTAT_DATA_UINT64 },
43044cb6abcSbmc 	{ "demand_metadata_hits",	KSTAT_DATA_UINT64 },
43144cb6abcSbmc 	{ "demand_metadata_misses",	KSTAT_DATA_UINT64 },
43244cb6abcSbmc 	{ "prefetch_data_hits",		KSTAT_DATA_UINT64 },
43344cb6abcSbmc 	{ "prefetch_data_misses",	KSTAT_DATA_UINT64 },
43444cb6abcSbmc 	{ "prefetch_metadata_hits",	KSTAT_DATA_UINT64 },
43544cb6abcSbmc 	{ "prefetch_metadata_misses",	KSTAT_DATA_UINT64 },
43644cb6abcSbmc 	{ "mru_hits",			KSTAT_DATA_UINT64 },
43744cb6abcSbmc 	{ "mru_ghost_hits",		KSTAT_DATA_UINT64 },
43844cb6abcSbmc 	{ "mfu_hits",			KSTAT_DATA_UINT64 },
43944cb6abcSbmc 	{ "mfu_ghost_hits",		KSTAT_DATA_UINT64 },
44044cb6abcSbmc 	{ "deleted",			KSTAT_DATA_UINT64 },
44144cb6abcSbmc 	{ "mutex_miss",			KSTAT_DATA_UINT64 },
4427b38fab6SAlexander Motin 	{ "access_skip",		KSTAT_DATA_UINT64 },
44344cb6abcSbmc 	{ "evict_skip",			KSTAT_DATA_UINT64 },
444244781f1SPrakash Surya 	{ "evict_not_enough",		KSTAT_DATA_UINT64 },
4455ea40c06SBrendan Gregg - Sun Microsystems 	{ "evict_l2_cached",		KSTAT_DATA_UINT64 },
4465ea40c06SBrendan Gregg - Sun Microsystems 	{ "evict_l2_eligible",		KSTAT_DATA_UINT64 },
447*9e3493cbSJason King 	{ "evict_l2_eligible_mfu",	KSTAT_DATA_UINT64 },
448*9e3493cbSJason King 	{ "evict_l2_eligible_mru",	KSTAT_DATA_UINT64 },
4495ea40c06SBrendan Gregg - Sun Microsystems 	{ "evict_l2_ineligible",	KSTAT_DATA_UINT64 },
450244781f1SPrakash Surya 	{ "evict_l2_skip",		KSTAT_DATA_UINT64 },
45144cb6abcSbmc 	{ "hash_elements",		KSTAT_DATA_UINT64 },
45244cb6abcSbmc 	{ "hash_elements_max",		KSTAT_DATA_UINT64 },
45344cb6abcSbmc 	{ "hash_collisions",		KSTAT_DATA_UINT64 },
45444cb6abcSbmc 	{ "hash_chains",		KSTAT_DATA_UINT64 },
45544cb6abcSbmc 	{ "hash_chain_max",		KSTAT_DATA_UINT64 },
45644cb6abcSbmc 	{ "p",				KSTAT_DATA_UINT64 },
45744cb6abcSbmc 	{ "c",				KSTAT_DATA_UINT64 },
45844cb6abcSbmc 	{ "c_min",			KSTAT_DATA_UINT64 },
45944cb6abcSbmc 	{ "c_max",			KSTAT_DATA_UINT64 },
460fa94a07fSbrendan 	{ "size",			KSTAT_DATA_UINT64 },
461dcbf3bd6SGeorge Wilson 	{ "compressed_size",		KSTAT_DATA_UINT64 },
462dcbf3bd6SGeorge Wilson 	{ "uncompressed_size",		KSTAT_DATA_UINT64 },
463dcbf3bd6SGeorge Wilson 	{ "overhead_size",		KSTAT_DATA_UINT64 },
464fa94a07fSbrendan 	{ "hdr_size",			KSTAT_DATA_UINT64 },
4655a98e54bSBrendan Gregg - Sun Microsystems 	{ "data_size",			KSTAT_DATA_UINT64 },
4664076b1bfSPrakash Surya 	{ "metadata_size",		KSTAT_DATA_UINT64 },
4675a98e54bSBrendan Gregg - Sun Microsystems 	{ "other_size",			KSTAT_DATA_UINT64 },
4684076b1bfSPrakash Surya 	{ "anon_size",			KSTAT_DATA_UINT64 },
4694076b1bfSPrakash Surya 	{ "anon_evictable_data",	KSTAT_DATA_UINT64 },
4704076b1bfSPrakash Surya 	{ "anon_evictable_metadata",	KSTAT_DATA_UINT64 },
4714076b1bfSPrakash Surya 	{ "mru_size",			KSTAT_DATA_UINT64 },
4724076b1bfSPrakash Surya 	{ "mru_evictable_data",		KSTAT_DATA_UINT64 },
4734076b1bfSPrakash Surya 	{ "mru_evictable_metadata",	KSTAT_DATA_UINT64 },
4744076b1bfSPrakash Surya 	{ "mru_ghost_size",		KSTAT_DATA_UINT64 },
4754076b1bfSPrakash Surya 	{ "mru_ghost_evictable_data",	KSTAT_DATA_UINT64 },
4764076b1bfSPrakash Surya 	{ "mru_ghost_evictable_metadata", KSTAT_DATA_UINT64 },
4774076b1bfSPrakash Surya 	{ "mfu_size",			KSTAT_DATA_UINT64 },
4784076b1bfSPrakash Surya 	{ "mfu_evictable_data",		KSTAT_DATA_UINT64 },
4794076b1bfSPrakash Surya 	{ "mfu_evictable_metadata",	KSTAT_DATA_UINT64 },
4804076b1bfSPrakash Surya 	{ "mfu_ghost_size",		KSTAT_DATA_UINT64 },
4814076b1bfSPrakash Surya 	{ "mfu_ghost_evictable_data",	KSTAT_DATA_UINT64 },
4824076b1bfSPrakash Surya 	{ "mfu_ghost_evictable_metadata", KSTAT_DATA_UINT64 },
483fa94a07fSbrendan 	{ "l2_hits",			KSTAT_DATA_UINT64 },
484fa94a07fSbrendan 	{ "l2_misses",			KSTAT_DATA_UINT64 },
485*9e3493cbSJason King 	{ "l2_prefetch_asize",		KSTAT_DATA_UINT64 },
486*9e3493cbSJason King 	{ "l2_mru_asize",		KSTAT_DATA_UINT64 },
487*9e3493cbSJason King 	{ "l2_mfu_asize",		KSTAT_DATA_UINT64 },
488*9e3493cbSJason King 	{ "l2_bufc_data_asize",		KSTAT_DATA_UINT64 },
489*9e3493cbSJason King 	{ "l2_bufc_metadata_asize",	KSTAT_DATA_UINT64 },
490fa94a07fSbrendan 	{ "l2_feeds",			KSTAT_DATA_UINT64 },
491fa94a07fSbrendan 	{ "l2_rw_clash",		KSTAT_DATA_UINT64 },
4925a98e54bSBrendan Gregg - Sun Microsystems 	{ "l2_read_bytes",		KSTAT_DATA_UINT64 },
4935a98e54bSBrendan Gregg - Sun Microsystems 	{ "l2_write_bytes",		KSTAT_DATA_UINT64 },
494fa94a07fSbrendan 	{ "l2_writes_sent",		KSTAT_DATA_UINT64 },
495fa94a07fSbrendan 	{ "l2_writes_done",		KSTAT_DATA_UINT64 },
496fa94a07fSbrendan 	{ "l2_writes_error",		KSTAT_DATA_UINT64 },
497244781f1SPrakash Surya 	{ "l2_writes_lock_retry",	KSTAT_DATA_UINT64 },
498fa94a07fSbrendan 	{ "l2_evict_lock_retry",	KSTAT_DATA_UINT64 },
499fa94a07fSbrendan 	{ "l2_evict_reading",		KSTAT_DATA_UINT64 },
50089c86e32SChris Williamson 	{ "l2_evict_l1cached",		KSTAT_DATA_UINT64 },
501fa94a07fSbrendan 	{ "l2_free_on_write",		KSTAT_DATA_UINT64 },
502fa94a07fSbrendan 	{ "l2_abort_lowmem",		KSTAT_DATA_UINT64 },
503fa94a07fSbrendan 	{ "l2_cksum_bad",		KSTAT_DATA_UINT64 },
504fa94a07fSbrendan 	{ "l2_io_error",		KSTAT_DATA_UINT64 },
505fa94a07fSbrendan 	{ "l2_size",			KSTAT_DATA_UINT64 },
506aad02571SSaso Kiselkov 	{ "l2_asize",			KSTAT_DATA_UINT64 },
5071ab7f2deSmaybee 	{ "l2_hdr_size",		KSTAT_DATA_UINT64 },
508f0a05239SGeorge Amanakis 	{ "l2_log_blk_writes",		KSTAT_DATA_UINT64 },
509f0a05239SGeorge Amanakis 	{ "l2_log_blk_avg_asize",	KSTAT_DATA_UINT64 },
510f0a05239SGeorge Amanakis 	{ "l2_log_blk_asize",		KSTAT_DATA_UINT64 },
511f0a05239SGeorge Amanakis 	{ "l2_log_blk_count",		KSTAT_DATA_UINT64 },
512f0a05239SGeorge Amanakis 	{ "l2_data_to_meta_ratio",	KSTAT_DATA_UINT64 },
513f0a05239SGeorge Amanakis 	{ "l2_rebuild_success",		KSTAT_DATA_UINT64 },
514f0a05239SGeorge Amanakis 	{ "l2_rebuild_unsupported",	KSTAT_DATA_UINT64 },
515f0a05239SGeorge Amanakis 	{ "l2_rebuild_io_errors",	KSTAT_DATA_UINT64 },
516f0a05239SGeorge Amanakis 	{ "l2_rebuild_dh_errors",	KSTAT_DATA_UINT64 },
517f0a05239SGeorge Amanakis 	{ "l2_rebuild_cksum_lb_errors",	KSTAT_DATA_UINT64 },
518f0a05239SGeorge Amanakis 	{ "l2_rebuild_lowmem",		KSTAT_DATA_UINT64 },
519f0a05239SGeorge Amanakis 	{ "l2_rebuild_size",		KSTAT_DATA_UINT64 },
520f0a05239SGeorge Amanakis 	{ "l2_rebuild_asize",		KSTAT_DATA_UINT64 },
521f0a05239SGeorge Amanakis 	{ "l2_rebuild_bufs",		KSTAT_DATA_UINT64 },
522f0a05239SGeorge Amanakis 	{ "l2_rebuild_bufs_precached",	KSTAT_DATA_UINT64 },
523f0a05239SGeorge Amanakis 	{ "l2_rebuild_log_blks",	KSTAT_DATA_UINT64 },
5249253d63dSGeorge Wilson 	{ "memory_throttle_count",	KSTAT_DATA_UINT64 },
52520128a08SGeorge Wilson 	{ "arc_meta_used",		KSTAT_DATA_UINT64 },
52620128a08SGeorge Wilson 	{ "arc_meta_limit",		KSTAT_DATA_UINT64 },
5273a5286a1SMatthew Ahrens 	{ "arc_meta_max",		KSTAT_DATA_UINT64 },
528cf6106c8SMatthew Ahrens 	{ "arc_meta_min",		KSTAT_DATA_UINT64 },
529a3874b8bSToomas Soome 	{ "async_upgrade_sync",		KSTAT_DATA_UINT64 },
530cf6106c8SMatthew Ahrens 	{ "demand_hit_predictive_prefetch", KSTAT_DATA_UINT64 },
531a3874b8bSToomas Soome 	{ "demand_hit_prescient_prefetch", KSTAT_DATA_UINT64 },
53244cb6abcSbmc };
53444cb6abcSbmc #define	ARCSTAT_MAX(stat, val) {					\
53544cb6abcSbmc 	uint64_t m;							\
53644cb6abcSbmc 	while ((val) > (m = arc_stats.stat.value.ui64) &&		\
53744cb6abcSbmc 	    (m != atomic_cas_64(&arc_stats.stat.value.ui64, m, (val))))	\
53844cb6abcSbmc 		continue;						\
53944cb6abcSbmc }
54144cb6abcSbmc #define	ARCSTAT_MAXSTAT(stat) \
54244cb6abcSbmc 	ARCSTAT_MAX(stat##_max, arc_stats.stat.value.ui64)
54444cb6abcSbmc /*
54544cb6abcSbmc  * We define a macro to allow ARC hits/misses to be easily broken down by
54644cb6abcSbmc  * two separate conditions, giving a total of four different subtypes for
54744cb6abcSbmc  * each of hits and misses (so eight statistics total).
54844cb6abcSbmc  */
54944cb6abcSbmc #define	ARCSTAT_CONDSTAT(cond1, stat1, notstat1, cond2, stat2, notstat2, stat) \
55044cb6abcSbmc 	if (cond1) {							\
55144cb6abcSbmc 		if (cond2) {						\
55244cb6abcSbmc 			ARCSTAT_BUMP(arcstat_##stat1##_##stat2##_##stat); \
55344cb6abcSbmc 		} else {						\
55444cb6abcSbmc 			ARCSTAT_BUMP(arcstat_##stat1##_##notstat2##_##stat); \
55544cb6abcSbmc 		}							\
55644cb6abcSbmc 	} else {							\
55744cb6abcSbmc 		if (cond2) {						\
55844cb6abcSbmc 			ARCSTAT_BUMP(arcstat_##notstat1##_##stat2##_##stat); \
55944cb6abcSbmc 		} else {						\
56044cb6abcSbmc 			ARCSTAT_BUMP(arcstat_##notstat1##_##notstat2##_##stat);\
56144cb6abcSbmc 		}							\
56244cb6abcSbmc 	}
564f0a05239SGeorge Amanakis /*
565f0a05239SGeorge Amanakis  * This macro allows us to use kstats as floating averages. Each time we
566f0a05239SGeorge Amanakis  * update this kstat, we first factor it and the update value by
567f0a05239SGeorge Amanakis  * ARCSTAT_AVG_FACTOR to shrink the new value's contribution to the overall
568f0a05239SGeorge Amanakis  * average. This macro assumes that integer loads and stores are atomic, but
569f0a05239SGeorge Amanakis  * is not safe for multiple writers updating the kstat in parallel (only the
570f0a05239SGeorge Amanakis  * last writer's update will remain).
571f0a05239SGeorge Amanakis  */
572f0a05239SGeorge Amanakis #define	ARCSTAT_F_AVG_FACTOR	3
573f0a05239SGeorge Amanakis #define	ARCSTAT_F_AVG(stat, value) \
574f0a05239SGeorge Amanakis 	do { \
575f0a05239SGeorge Amanakis 		uint64_t x = ARCSTAT(stat); \
576f0a05239SGeorge Amanakis 		x = x - x / ARCSTAT_F_AVG_FACTOR + \
577f0a05239SGeorge Amanakis 		    (value) / ARCSTAT_F_AVG_FACTOR; \
578f0a05239SGeorge Amanakis 		ARCSTAT(stat) = x; \
579f0a05239SGeorge Amanakis 		_NOTE(CONSTCOND) \
580f0a05239SGeorge Amanakis 	} while (0)
581f0a05239SGeorge Amanakis 
58244cb6abcSbmc kstat_t			*arc_ksp;
583b24ab676SJeff Bonwick static arc_state_t	*arc_anon;
58444cb6abcSbmc static arc_state_t	*arc_mru;
58544cb6abcSbmc static arc_state_t	*arc_mru_ghost;
58644cb6abcSbmc static arc_state_t	*arc_mfu;
58744cb6abcSbmc static arc_state_t	*arc_mfu_ghost;
588fa94a07fSbrendan static arc_state_t	*arc_l2c_only;
5903a2d8a1bSPaul Dagnelie /*
5913a2d8a1bSPaul Dagnelie  * There are also some ARC variables that we want to export, but that are
5923a2d8a1bSPaul Dagnelie  * updated so often that having the canonical representation be the statistic
5933a2d8a1bSPaul Dagnelie  * variable causes a performance bottleneck. We want to use aggsum_t's for these
5943a2d8a1bSPaul Dagnelie  * instead, but still be able to export the kstat in the same way as before.
5953a2d8a1bSPaul Dagnelie  * The solution is to always use the aggsum version, except in the kstat update
5963a2d8a1bSPaul Dagnelie  * callback.
5973a2d8a1bSPaul Dagnelie  */
5983a2d8a1bSPaul Dagnelie aggsum_t arc_size;
5993a2d8a1bSPaul Dagnelie aggsum_t arc_meta_used;
6003a2d8a1bSPaul Dagnelie aggsum_t astat_data_size;
6013a2d8a1bSPaul Dagnelie aggsum_t astat_metadata_size;
6023a2d8a1bSPaul Dagnelie aggsum_t astat_hdr_size;
6033a2d8a1bSPaul Dagnelie aggsum_t astat_other_size;
6043a2d8a1bSPaul Dagnelie aggsum_t astat_l2_hdr_size;
6053a2d8a1bSPaul Dagnelie 
60644cb6abcSbmc static int		arc_no_grow;	/* Don't try to grow cache size */
607de753e34SBrad Lewis static hrtime_t		arc_growtime;
60844cb6abcSbmc static uint64_t		arc_tempreserve;
6092fdbea25SAleksandr Guzovskiy static uint64_t		arc_loaned_bytes;
611ea8dc4b6Seschrock #define	GHOST_STATE(state)	\
612fa94a07fSbrendan 	((state) == arc_mru_ghost || (state) == arc_mfu_ghost ||	\
613fa94a07fSbrendan 	(state) == arc_l2c_only)
6157adb730bSGeorge Wilson #define	HDR_IN_HASH_TABLE(hdr)	((hdr)->b_flags & ARC_FLAG_IN_HASH_TABLE)
6167adb730bSGeorge Wilson #define	HDR_IO_IN_PROGRESS(hdr)	((hdr)->b_flags & ARC_FLAG_IO_IN_PROGRESS)
6177adb730bSGeorge Wilson #define	HDR_IO_ERROR(hdr)	((hdr)->b_flags & ARC_FLAG_IO_ERROR)
6187adb730bSGeorge Wilson #define	HDR_PREFETCH(hdr)	((hdr)->b_flags & ARC_FLAG_PREFETCH)
619a3874b8bSToomas Soome #define	HDR_PRESCIENT_PREFETCH(hdr)	\
620a3874b8bSToomas Soome 	((hdr)->b_flags & ARC_FLAG_PRESCIENT_PREFETCH)
621dcbf3bd6SGeorge Wilson #define	HDR_COMPRESSION_ENABLED(hdr)	\
622dcbf3bd6SGeorge Wilson 	((hdr)->b_flags & ARC_FLAG_COMPRESSED_ARC)
62389c86e32SChris Williamson 
6247adb730bSGeorge Wilson #define	HDR_L2CACHE(hdr)	((hdr)->b_flags & ARC_FLAG_L2CACHE)
6257adb730bSGeorge Wilson #define	HDR_L2_READING(hdr)	\
626dcbf3bd6SGeorge Wilson 	(((hdr)->b_flags & ARC_FLAG_IO_IN_PROGRESS) &&	\
627dcbf3bd6SGeorge Wilson 	((hdr)->b_flags & ARC_FLAG_HAS_L2HDR))
6287adb730bSGeorge Wilson #define	HDR_L2_WRITING(hdr)	((hdr)->b_flags & ARC_FLAG_L2_WRITING)
6297adb730bSGeorge Wilson #define	HDR_L2_EVICTED(hdr)	((hdr)->b_flags & ARC_FLAG_L2_EVICTED)
6307adb730bSGeorge Wilson #define	HDR_L2_WRITE_HEAD(hdr)	((hdr)->b_flags & ARC_FLAG_L2_WRITE_HEAD)
631eb633035STom Caputi #define	HDR_PROTECTED(hdr)	((hdr)->b_flags & ARC_FLAG_PROTECTED)
632eb633035STom Caputi #define	HDR_NOAUTH(hdr)		((hdr)->b_flags & ARC_FLAG_NOAUTH)
633dcbf3bd6SGeorge Wilson #define	HDR_SHARED_DATA(hdr)	((hdr)->b_flags & ARC_FLAG_SHARED_DATA)
63589c86e32SChris Williamson #define	HDR_ISTYPE_METADATA(hdr)	\
636dcbf3bd6SGeorge Wilson 	((hdr)->b_flags & ARC_FLAG_BUFC_METADATA)
63789c86e32SChris Williamson #define	HDR_ISTYPE_DATA(hdr)	(!HDR_ISTYPE_METADATA(hdr))
63889c86e32SChris Williamson 
63989c86e32SChris Williamson #define	HDR_HAS_L1HDR(hdr)	((hdr)->b_flags & ARC_FLAG_HAS_L1HDR)
64089c86e32SChris Williamson #define	HDR_HAS_L2HDR(hdr)	((hdr)->b_flags & ARC_FLAG_HAS_L2HDR)
641eb633035STom Caputi #define	HDR_HAS_RABD(hdr)	\
642eb633035STom Caputi 	(HDR_HAS_L1HDR(hdr) && HDR_PROTECTED(hdr) &&	\
643eb633035STom Caputi 	(hdr)->b_crypt_hdr.b_rabd != NULL)