xref: /illumos-gate/usr/src/uts/common/fs/zfs/sys/btree.h (revision 4d7988d6)
1 /*
2  * CDDL HEADER START
3  *
4  * This file and its contents are supplied under the terms of the
5  * Common Development and Distribution License ("CDDL"), version 1.0.
6  * You may only use this file in accordance with the terms of version
7  * 1.0 of the CDDL.
8  *
9  * A full copy of the text of the CDDL should have accompanied this
10  * source.  A copy of the CDDL is also available via the Internet at
11  * http://www.illumos.org/license/CDDL.
12  *
13  * CDDL HEADER END
14  */
15 /*
16  * Copyright (c) 2019 by Delphix. All rights reserved.
17  */
18 
19 #ifndef	_BTREE_H
20 #define	_BTREE_H
21 
22 #ifdef	__cplusplus
23 extern "C" {
24 #endif
25 
26 #include	<sys/zfs_context.h>
27 
28 /*
29  * This file defines the interface for a B-Tree implementation for ZFS. The
30  * tree can be used to store arbitrary sortable data types with low overhead
31  * and good operation performance. In addition the tree intelligently
32  * optimizes bulk in-order insertions to improve memory use and performance.
33  *
34  * Note that for all B-Tree functions, the values returned are pointers to the
35  * internal copies of the data in the tree. The internal data can only be
36  * safely mutated if the changes cannot change the ordering of the element
37  * with respect to any other elements in the tree.
38  *
39  * The major drawback of the B-Tree is that any returned elements or indexes
40  * are only valid until a side-effectful operation occurs, since these can
41  * result in reallocation or relocation of data. Side effectful operations are
42  * defined as insertion, removal, and zfs_btree_destroy_nodes.
43  *
44  * The B-Tree has two types of nodes: core nodes, and leaf nodes. Core
45  * nodes have an array of children pointing to other nodes, and an array of
46  * elements that act as separators between the elements of the subtrees rooted
47  * at its children. Leaf nodes only contain data elements, and form the bottom
48  * layer of the tree. Unlike B+ Trees, in this B-Tree implementation the
49  * elements in the core nodes are not copies of or references to leaf node
50  * elements.  Each element occcurs only once in the tree, no matter what kind
51  * of node it is in.
52  *
53  * The tree's height is the same throughout, unlike many other forms of search
54  * tree. Each node (except for the root) must be between half minus one and
55  * completely full of elements (and children) at all times. Any operation that
56  * would put the node outside of that range results in a rebalancing operation
57  * (taking, merging, or splitting).
58  *
59  * This tree was implemented using descriptions from Wikipedia's articles on
60  * B-Trees and B+ Trees.
61  */
62 
63 /*
64  * Decreasing these values results in smaller memmove operations, but more of
65  * them, and increased memory overhead. Increasing these values results in
66  * higher variance in operation time, and reduces memory overhead.
67  */
68 #define	BTREE_CORE_ELEMS	128
69 #define	BTREE_LEAF_SIZE		4096
70 
71 typedef struct zfs_btree_hdr {
72 	struct zfs_btree_core	*bth_parent;
73 	boolean_t		bth_core;
74 	/*
75 	 * For both leaf and core nodes, represents the number of elements in
76 	 * the node. For core nodes, they will have bth_count + 1 children.
77 	 */
78 	uint32_t		bth_count;
79 } zfs_btree_hdr_t;
80 
81 typedef struct zfs_btree_core {
82 	zfs_btree_hdr_t	btc_hdr;
83 	zfs_btree_hdr_t	*btc_children[BTREE_CORE_ELEMS + 1];
84 	uint8_t		btc_elems[];
85 } zfs_btree_core_t;
86 
87 typedef struct zfs_btree_leaf {
88 	zfs_btree_hdr_t	btl_hdr;
89 	uint8_t		btl_elems[];
90 } zfs_btree_leaf_t;
91 
92 typedef struct zfs_btree_index {
93 	zfs_btree_hdr_t	*bti_node;
94 	uint64_t	bti_offset;
95 	/*
96 	 * True if the location is before the list offset, false if it's at
97 	 * the listed offset.
98 	 */
99 	boolean_t	bti_before;
100 } zfs_btree_index_t;
101 
102 typedef struct btree {
103 	zfs_btree_hdr_t		*bt_root;
104 	int64_t			bt_height;
105 	size_t			bt_elem_size;
106 	uint64_t		bt_num_elems;
107 	uint64_t		bt_num_nodes;
108 	zfs_btree_leaf_t	*bt_bulk; // non-null if bulk loading
109 	int (*bt_compar) (const void *, const void *);
110 } zfs_btree_t;
111 
112 /*
113  * Allocate and deallocate caches for btree nodes.
114  */
115 void zfs_btree_init(void);
116 void zfs_btree_fini(void);
117 
118 /*
119  * Initialize an B-Tree. Arguments are:
120  *
121  * tree   - the tree to be initialized
122  * compar - function to compare two nodes, it must return exactly: -1, 0, or +1
123  *          -1 for <, 0 for ==, and +1 for >
124  * size   - the value of sizeof(struct my_type)
125  */
126 void zfs_btree_create(zfs_btree_t *, int (*) (const void *, const void *),
127     size_t);
128 
129 /*
130  * Find a node with a matching value in the tree. Returns the matching node
131  * found. If not found, it returns NULL and then if "where" is not NULL it sets
132  * "where" for use with zfs_btree_insert() or zfs_btree_nearest().
133  *
134  * node   - node that has the value being looked for
135  * where  - position for use with zfs_btree_nearest() or zfs_btree_insert(),
136  *          may be NULL
137  */
138 void *zfs_btree_find(zfs_btree_t *, const void *, zfs_btree_index_t *);
139 
140 /*
141  * Insert a node into the tree.
142  *
143  * node   - the node to insert
144  * where  - position as returned from zfs_btree_find()
145  */
146 void zfs_btree_insert(zfs_btree_t *, const void *, const zfs_btree_index_t *);
147 
148 /*
149  * Return the first or last valued node in the tree. Will return NULL
150  * if the tree is empty.
151  */
152 void *zfs_btree_first(zfs_btree_t *, zfs_btree_index_t *);
153 void *zfs_btree_last(zfs_btree_t *, zfs_btree_index_t *);
154 
155 /*
156  * Return the next or previous valued node in the tree.
157  */
158 void *zfs_btree_next(zfs_btree_t *, const zfs_btree_index_t *,
159     zfs_btree_index_t *);
160 void *zfs_btree_prev(zfs_btree_t *, const zfs_btree_index_t *,
161     zfs_btree_index_t *);
162 
163 /*
164  * Get a value from a tree and an index.
165  */
166 void *zfs_btree_get(zfs_btree_t *, zfs_btree_index_t *);
167 
168 /*
169  * Add a single value to the tree. The value must not compare equal to any
170  * other node already in the tree.
171  */
172 void zfs_btree_add(zfs_btree_t *, const void *);
173 
174 /*
175  * Remove a single value from the tree.  The value must be in the tree. The
176  * pointer passed in may be a pointer into a tree-controlled buffer, but it
177  * need not be.
178  */
179 void zfs_btree_remove(zfs_btree_t *, const void *);
180 
181 /*
182  * Remove the value at the given location from the tree.
183  */
184 void zfs_btree_remove_from(zfs_btree_t *, zfs_btree_index_t *);
185 
186 /*
187  * Return the number of nodes in the tree
188  */
189 ulong_t zfs_btree_numnodes(zfs_btree_t *);
190 
191 /*
192  * Used to destroy any remaining nodes in a tree. The cookie argument should
193  * be initialized to NULL before the first call. Returns a node that has been
194  * removed from the tree and may be free()'d. Returns NULL when the tree is
195  * empty.
196  *
197  * Once you call zfs_btree_destroy_nodes(), you can only continuing calling it
198  * and finally zfs_btree_destroy(). No other B-Tree routines will be valid.
199  *
200  * cookie - an index used to save state between calls to
201  * zfs_btree_destroy_nodes()
202  *
203  * EXAMPLE:
204  *	zfs_btree_t *tree;
205  *	struct my_data *node;
206  *	zfs_btree_index_t *cookie;
207  *
208  *	cookie = NULL;
209  *	while ((node = zfs_btree_destroy_nodes(tree, &cookie)) != NULL)
210  *		data_destroy(node);
211  *	zfs_btree_destroy(tree);
212  */
213 void *zfs_btree_destroy_nodes(zfs_btree_t *, zfs_btree_index_t **);
214 
215 /*
216  * Destroys all nodes in the tree quickly. This doesn't give the caller an
217  * opportunity to iterate over each node and do its own cleanup; for that, use
218  * zfs_btree_destroy_nodes().
219  */
220 void zfs_btree_clear(zfs_btree_t *);
221 
222 /*
223  * Final destroy of an B-Tree. Arguments are:
224  *
225  * tree   - the empty tree to destroy
226  */
227 void zfs_btree_destroy(zfs_btree_t *tree);
228 
229 /* Runs a variety of self-checks on the btree to verify integrity. */
230 void zfs_btree_verify(zfs_btree_t *tree);
231 
232 #ifdef	__cplusplus
233 }
234 #endif
235 
236 #endif	/* _BTREE_H */
237