xref: /illumos-gate/usr/src/uts/common/fs/zfs/sys/sa_impl.h (revision 0a586cea3ceec7e5e50e7e54c745082a7a333ac2)
1*0a586ceaSMark Shellenbaum /*
2*0a586ceaSMark Shellenbaum  * CDDL HEADER START
3*0a586ceaSMark Shellenbaum  *
4*0a586ceaSMark Shellenbaum  * The contents of this file are subject to the terms of the
5*0a586ceaSMark Shellenbaum  * Common Development and Distribution License (the "License").
6*0a586ceaSMark Shellenbaum  * You may not use this file except in compliance with the License.
7*0a586ceaSMark Shellenbaum  *
8*0a586ceaSMark Shellenbaum  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*0a586ceaSMark Shellenbaum  * or http://www.opensolaris.org/os/licensing.
10*0a586ceaSMark Shellenbaum  * See the License for the specific language governing permissions
11*0a586ceaSMark Shellenbaum  * and limitations under the License.
12*0a586ceaSMark Shellenbaum  *
13*0a586ceaSMark Shellenbaum  * When distributing Covered Code, include this CDDL HEADER in each
14*0a586ceaSMark Shellenbaum  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*0a586ceaSMark Shellenbaum  * If applicable, add the following below this CDDL HEADER, with the
16*0a586ceaSMark Shellenbaum  * fields enclosed by brackets "[]" replaced with your own identifying
17*0a586ceaSMark Shellenbaum  * information: Portions Copyright [yyyy] [name of copyright owner]
18*0a586ceaSMark Shellenbaum  *
19*0a586ceaSMark Shellenbaum  * CDDL HEADER END
20*0a586ceaSMark Shellenbaum  */
21*0a586ceaSMark Shellenbaum /*
22*0a586ceaSMark Shellenbaum  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
23*0a586ceaSMark Shellenbaum  * Use is subject to license terms.
24*0a586ceaSMark Shellenbaum  */
25*0a586ceaSMark Shellenbaum 
26*0a586ceaSMark Shellenbaum #ifndef	_SYS_SA_IMPL_H
27*0a586ceaSMark Shellenbaum #define	_SYS_SA_IMPL_H
28*0a586ceaSMark Shellenbaum 
29*0a586ceaSMark Shellenbaum #include <sys/dmu.h>
30*0a586ceaSMark Shellenbaum #include <sys/refcount.h>
31*0a586ceaSMark Shellenbaum #include <sys/list.h>
32*0a586ceaSMark Shellenbaum 
33*0a586ceaSMark Shellenbaum /*
34*0a586ceaSMark Shellenbaum  * Array of known attributes and their
35*0a586ceaSMark Shellenbaum  * various characteristics.
36*0a586ceaSMark Shellenbaum  */
37*0a586ceaSMark Shellenbaum typedef struct sa_attr_table {
38*0a586ceaSMark Shellenbaum 	sa_attr_type_t	sa_attr;
39*0a586ceaSMark Shellenbaum 	uint8_t sa_registered;
40*0a586ceaSMark Shellenbaum 	uint16_t sa_length;
41*0a586ceaSMark Shellenbaum 	sa_bswap_type_t sa_byteswap;
42*0a586ceaSMark Shellenbaum 	char *sa_name;
43*0a586ceaSMark Shellenbaum } sa_attr_table_t;
44*0a586ceaSMark Shellenbaum 
45*0a586ceaSMark Shellenbaum /*
46*0a586ceaSMark Shellenbaum  * Zap attribute format for attribute registration
47*0a586ceaSMark Shellenbaum  *
48*0a586ceaSMark Shellenbaum  * 64      56      48      40      32      24      16      8       0
49*0a586ceaSMark Shellenbaum  * +-------+-------+-------+-------+-------+-------+-------+-------+
50*0a586ceaSMark Shellenbaum  * |        unused         |      len      | bswap |   attr num    |
51*0a586ceaSMark Shellenbaum  * +-------+-------+-------+-------+-------+-------+-------+-------+
52*0a586ceaSMark Shellenbaum  *
53*0a586ceaSMark Shellenbaum  * Zap attribute format for layout information.
54*0a586ceaSMark Shellenbaum  *
55*0a586ceaSMark Shellenbaum  * layout information is stored as an array of attribute numbers
56*0a586ceaSMark Shellenbaum  * The name of the attribute is the layout number (0, 1, 2, ...)
57*0a586ceaSMark Shellenbaum  *
58*0a586ceaSMark Shellenbaum  * 16       0
59*0a586ceaSMark Shellenbaum  * +---- ---+
60*0a586ceaSMark Shellenbaum  * | attr # |
61*0a586ceaSMark Shellenbaum  * +--------+
62*0a586ceaSMark Shellenbaum  * | attr # |
63*0a586ceaSMark Shellenbaum  * +--- ----+
64*0a586ceaSMark Shellenbaum  *  ......
65*0a586ceaSMark Shellenbaum  *
66*0a586ceaSMark Shellenbaum  */
67*0a586ceaSMark Shellenbaum 
68*0a586ceaSMark Shellenbaum #define	ATTR_BSWAP(x)	BF32_GET(x, 16, 8)
69*0a586ceaSMark Shellenbaum #define	ATTR_LENGTH(x)	BF32_GET(x, 24, 16)
70*0a586ceaSMark Shellenbaum #define	ATTR_NUM(x)	BF32_GET(x, 0, 16)
71*0a586ceaSMark Shellenbaum #define	ATTR_ENCODE(x, attr, length, bswap) \
72*0a586ceaSMark Shellenbaum { \
73*0a586ceaSMark Shellenbaum 	BF64_SET(x, 24, 16, length); \
74*0a586ceaSMark Shellenbaum 	BF64_SET(x, 16, 8, bswap); \
75*0a586ceaSMark Shellenbaum 	BF64_SET(x, 0, 16, attr); \
76*0a586ceaSMark Shellenbaum }
77*0a586ceaSMark Shellenbaum 
78*0a586ceaSMark Shellenbaum #define	TOC_OFF(x)		BF32_GET(x, 0, 23)
79*0a586ceaSMark Shellenbaum #define	TOC_ATTR_PRESENT(x)	BF32_GET(x, 31, 1)
80*0a586ceaSMark Shellenbaum #define	TOC_LEN_IDX(x)		BF32_GET(x, 24, 4)
81*0a586ceaSMark Shellenbaum #define	TOC_ATTR_ENCODE(x, len_idx, offset) \
82*0a586ceaSMark Shellenbaum { \
83*0a586ceaSMark Shellenbaum 	BF32_SET(x, 31, 1, 1); \
84*0a586ceaSMark Shellenbaum 	BF32_SET(x, 24, 7, len_idx); \
85*0a586ceaSMark Shellenbaum 	BF32_SET(x, 0, 24, offset); \
86*0a586ceaSMark Shellenbaum }
87*0a586ceaSMark Shellenbaum 
88*0a586ceaSMark Shellenbaum #define	SA_LAYOUTS	"LAYOUTS"
89*0a586ceaSMark Shellenbaum #define	SA_REGISTRY	"REGISTRY"
90*0a586ceaSMark Shellenbaum 
91*0a586ceaSMark Shellenbaum /*
92*0a586ceaSMark Shellenbaum  * Each unique layout will have their own table
93*0a586ceaSMark Shellenbaum  * sa_lot (layout_table)
94*0a586ceaSMark Shellenbaum  */
95*0a586ceaSMark Shellenbaum typedef struct sa_lot {
96*0a586ceaSMark Shellenbaum 	avl_node_t lot_num_node;
97*0a586ceaSMark Shellenbaum 	avl_node_t lot_hash_node;
98*0a586ceaSMark Shellenbaum 	uint64_t lot_num;
99*0a586ceaSMark Shellenbaum 	uint64_t lot_hash;
100*0a586ceaSMark Shellenbaum 	sa_attr_type_t *lot_attrs;	/* array of attr #'s */
101*0a586ceaSMark Shellenbaum 	uint32_t lot_var_sizes;	/* how many aren't fixed size */
102*0a586ceaSMark Shellenbaum 	uint32_t lot_attr_count;	/* total attr count */
103*0a586ceaSMark Shellenbaum 	list_t 	lot_idx_tab;	/* should be only a couple of entries */
104*0a586ceaSMark Shellenbaum 	int	lot_instance;	/* used with lot_hash to identify entry */
105*0a586ceaSMark Shellenbaum } sa_lot_t;
106*0a586ceaSMark Shellenbaum 
107*0a586ceaSMark Shellenbaum /* index table of offsets */
108*0a586ceaSMark Shellenbaum typedef struct sa_idx_tab {
109*0a586ceaSMark Shellenbaum 	list_node_t	sa_next;
110*0a586ceaSMark Shellenbaum 	sa_lot_t	*sa_layout;
111*0a586ceaSMark Shellenbaum 	uint16_t	*sa_variable_lengths;
112*0a586ceaSMark Shellenbaum 	refcount_t	sa_refcount;
113*0a586ceaSMark Shellenbaum 	uint32_t	*sa_idx_tab;	/* array of offsets */
114*0a586ceaSMark Shellenbaum } sa_idx_tab_t;
115*0a586ceaSMark Shellenbaum 
116*0a586ceaSMark Shellenbaum /*
117*0a586ceaSMark Shellenbaum  * Since the offset/index information into the actual data
118*0a586ceaSMark Shellenbaum  * will usually be identical we can share that information with
119*0a586ceaSMark Shellenbaum  * all handles that have the exact same offsets.
120*0a586ceaSMark Shellenbaum  *
121*0a586ceaSMark Shellenbaum  * You would typically only have a large number of different table of
122*0a586ceaSMark Shellenbaum  * contents if you had a several variable sized attributes.
123*0a586ceaSMark Shellenbaum  *
124*0a586ceaSMark Shellenbaum  * Two AVL trees are used to track the attribute layout numbers.
125*0a586ceaSMark Shellenbaum  * one is keyed by number and will be consulted when a DMU_OT_SA
126*0a586ceaSMark Shellenbaum  * object is first read.  The second tree is keyed by the hash signature
127*0a586ceaSMark Shellenbaum  * of the attributes and will be consulted when an attribute is added
128*0a586ceaSMark Shellenbaum  * to determine if we already have an instance of that layout.  Both
129*0a586ceaSMark Shellenbaum  * of these tree's are interconnected.  The only difference is that
130*0a586ceaSMark Shellenbaum  * when an entry is found in the "hash" tree the list of attributes will
131*0a586ceaSMark Shellenbaum  * need to be compared against the list of attributes you have in hand.
132*0a586ceaSMark Shellenbaum  * The assumption is that typically attributes will just be updated and
133*0a586ceaSMark Shellenbaum  * adding a completely new attribute is a very rare operation.
134*0a586ceaSMark Shellenbaum  */
135*0a586ceaSMark Shellenbaum struct sa_os {
136*0a586ceaSMark Shellenbaum 	kmutex_t 	sa_lock;
137*0a586ceaSMark Shellenbaum 	boolean_t	sa_need_attr_registration;
138*0a586ceaSMark Shellenbaum 	boolean_t	sa_force_spill;
139*0a586ceaSMark Shellenbaum 	uint64_t	sa_master_obj;
140*0a586ceaSMark Shellenbaum 	uint64_t	sa_reg_attr_obj;
141*0a586ceaSMark Shellenbaum 	uint64_t	sa_layout_attr_obj;
142*0a586ceaSMark Shellenbaum 	int		sa_num_attrs;
143*0a586ceaSMark Shellenbaum 	sa_attr_table_t *sa_attr_table;	 /* private attr table */
144*0a586ceaSMark Shellenbaum 	sa_update_cb_t	*sa_update_cb;
145*0a586ceaSMark Shellenbaum 	avl_tree_t	sa_layout_num_tree;  /* keyed by layout number */
146*0a586ceaSMark Shellenbaum 	avl_tree_t	sa_layout_hash_tree; /* keyed by layout hash value */
147*0a586ceaSMark Shellenbaum 	int		sa_user_table_sz;
148*0a586ceaSMark Shellenbaum 	sa_attr_type_t	*sa_user_table; /* user name->attr mapping table */
149*0a586ceaSMark Shellenbaum };
150*0a586ceaSMark Shellenbaum 
151*0a586ceaSMark Shellenbaum /*
152*0a586ceaSMark Shellenbaum  * header for all bonus and spill buffers.
153*0a586ceaSMark Shellenbaum  * The header has a fixed portion with a variable number
154*0a586ceaSMark Shellenbaum  * of "lengths" depending on the number of variable sized
155*0a586ceaSMark Shellenbaum  * attribues which are determined by the "layout number"
156*0a586ceaSMark Shellenbaum  */
157*0a586ceaSMark Shellenbaum 
158*0a586ceaSMark Shellenbaum #define	SA_MAGIC	0x2F505A  /* ZFS SA */
159*0a586ceaSMark Shellenbaum typedef struct sa_hdr_phys {
160*0a586ceaSMark Shellenbaum 	uint32_t sa_magic;
161*0a586ceaSMark Shellenbaum 	uint16_t sa_layout_info;  /* Encoded with hdrsize and layout number */
162*0a586ceaSMark Shellenbaum 	uint16_t sa_lengths[1];	/* optional sizes for variable length attrs */
163*0a586ceaSMark Shellenbaum 	/* ... Data follows the lengths.  */
164*0a586ceaSMark Shellenbaum } sa_hdr_phys_t;
165*0a586ceaSMark Shellenbaum 
166*0a586ceaSMark Shellenbaum /*
167*0a586ceaSMark Shellenbaum  * sa_hdr_phys -> sa_layout_info
168*0a586ceaSMark Shellenbaum  *
169*0a586ceaSMark Shellenbaum  * 16      10       0
170*0a586ceaSMark Shellenbaum  * +--------+-------+
171*0a586ceaSMark Shellenbaum  * | hdrsz  |layout |
172*0a586ceaSMark Shellenbaum  * +--------+-------+
173*0a586ceaSMark Shellenbaum  *
174*0a586ceaSMark Shellenbaum  * Bits 0-10 are the layout number
175*0a586ceaSMark Shellenbaum  * Bits 11-16 are the size of the header.
176*0a586ceaSMark Shellenbaum  * The hdrsize is the number * 8
177*0a586ceaSMark Shellenbaum  *
178*0a586ceaSMark Shellenbaum  * For example.
179*0a586ceaSMark Shellenbaum  * hdrsz of 1 ==> 8 byte header
180*0a586ceaSMark Shellenbaum  *          2 ==> 16 byte header
181*0a586ceaSMark Shellenbaum  *
182*0a586ceaSMark Shellenbaum  */
183*0a586ceaSMark Shellenbaum 
184*0a586ceaSMark Shellenbaum #define	SA_HDR_LAYOUT_NUM(hdr) BF32_GET(hdr->sa_layout_info, 0, 10)
185*0a586ceaSMark Shellenbaum #define	SA_HDR_SIZE(hdr) BF32_GET_SB(hdr->sa_layout_info, 10, 16, 3, 0)
186*0a586ceaSMark Shellenbaum #define	SA_HDR_LAYOUT_INFO_ENCODE(x, num, size) \
187*0a586ceaSMark Shellenbaum { \
188*0a586ceaSMark Shellenbaum 	BF32_SET_SB(x, 10, 6, 3, 0, size); \
189*0a586ceaSMark Shellenbaum 	BF32_SET(x, 0, 10, num); \
190*0a586ceaSMark Shellenbaum }
191*0a586ceaSMark Shellenbaum 
192*0a586ceaSMark Shellenbaum typedef enum sa_buf_type {
193*0a586ceaSMark Shellenbaum 	SA_BONUS = 1,
194*0a586ceaSMark Shellenbaum 	SA_SPILL = 2
195*0a586ceaSMark Shellenbaum } sa_buf_type_t;
196*0a586ceaSMark Shellenbaum 
197*0a586ceaSMark Shellenbaum typedef enum sa_data_op {
198*0a586ceaSMark Shellenbaum 	SA_LOOKUP,
199*0a586ceaSMark Shellenbaum 	SA_UPDATE,
200*0a586ceaSMark Shellenbaum 	SA_ADD,
201*0a586ceaSMark Shellenbaum 	SA_REPLACE,
202*0a586ceaSMark Shellenbaum 	SA_REMOVE
203*0a586ceaSMark Shellenbaum } sa_data_op_t;
204*0a586ceaSMark Shellenbaum 
205*0a586ceaSMark Shellenbaum /*
206*0a586ceaSMark Shellenbaum  * Opaque handle used for most sa functions
207*0a586ceaSMark Shellenbaum  *
208*0a586ceaSMark Shellenbaum  * This needs to be kept as small as possible.
209*0a586ceaSMark Shellenbaum  */
210*0a586ceaSMark Shellenbaum 
211*0a586ceaSMark Shellenbaum struct sa_handle {
212*0a586ceaSMark Shellenbaum 	kmutex_t	sa_lock;
213*0a586ceaSMark Shellenbaum 	dmu_buf_t	*sa_bonus;
214*0a586ceaSMark Shellenbaum 	dmu_buf_t	*sa_spill;
215*0a586ceaSMark Shellenbaum 	objset_t	*sa_os;
216*0a586ceaSMark Shellenbaum 	void 		*sa_userp;
217*0a586ceaSMark Shellenbaum 	sa_idx_tab_t	*sa_bonus_tab;	 /* idx of bonus */
218*0a586ceaSMark Shellenbaum 	sa_idx_tab_t	*sa_spill_tab; /* only present if spill activated */
219*0a586ceaSMark Shellenbaum };
220*0a586ceaSMark Shellenbaum 
221*0a586ceaSMark Shellenbaum #define	SA_GET_DB(hdl, type)	\
222*0a586ceaSMark Shellenbaum 	(dmu_buf_impl_t *)((type == SA_BONUS) ? hdl->sa_bonus : hdl->sa_spill)
223*0a586ceaSMark Shellenbaum 
224*0a586ceaSMark Shellenbaum #define	SA_GET_HDR(hdl, type) \
225*0a586ceaSMark Shellenbaum 	((sa_hdr_phys_t *)((dmu_buf_impl_t *)(SA_GET_DB(hdl, \
226*0a586ceaSMark Shellenbaum 	type))->db.db_data))
227*0a586ceaSMark Shellenbaum 
228*0a586ceaSMark Shellenbaum #define	SA_IDX_TAB_GET(hdl, type) \
229*0a586ceaSMark Shellenbaum 	(type == SA_BONUS ? hdl->sa_bonus_tab : hdl->sa_spill_tab)
230*0a586ceaSMark Shellenbaum 
231*0a586ceaSMark Shellenbaum #define	IS_SA_BONUSTYPE(a)	\
232*0a586ceaSMark Shellenbaum 	((a == DMU_OT_SA) ? B_TRUE : B_FALSE)
233*0a586ceaSMark Shellenbaum 
234*0a586ceaSMark Shellenbaum #define	SA_BONUSTYPE_FROM_DB(db) \
235*0a586ceaSMark Shellenbaum 	(((dmu_buf_impl_t *)db)->db_dnode->dn_bonustype)
236*0a586ceaSMark Shellenbaum 
237*0a586ceaSMark Shellenbaum #define	SA_BLKPTR_SPACE	(DN_MAX_BONUSLEN - sizeof (blkptr_t))
238*0a586ceaSMark Shellenbaum 
239*0a586ceaSMark Shellenbaum #define	SA_LAYOUT_NUM(x, type) \
240*0a586ceaSMark Shellenbaum 	((!IS_SA_BONUSTYPE(type) ? 0 : (((IS_SA_BONUSTYPE(type)) && \
241*0a586ceaSMark Shellenbaum 	((SA_HDR_LAYOUT_NUM(x)) == 0)) ? 1 : SA_HDR_LAYOUT_NUM(x))))
242*0a586ceaSMark Shellenbaum 
243*0a586ceaSMark Shellenbaum 
244*0a586ceaSMark Shellenbaum #define	SA_REGISTERED_LEN(sa, attr) sa->sa_attr_table[attr].sa_length
245*0a586ceaSMark Shellenbaum 
246*0a586ceaSMark Shellenbaum #define	SA_ATTR_LEN(sa, idx, attr, hdr) ((SA_REGISTERED_LEN(sa, attr) == 0) ?\
247*0a586ceaSMark Shellenbaum 	hdr->sa_lengths[TOC_LEN_IDX(idx->sa_idx_tab[attr])] : \
248*0a586ceaSMark Shellenbaum 	SA_REGISTERED_LEN(sa, attr))
249*0a586ceaSMark Shellenbaum 
250*0a586ceaSMark Shellenbaum #define	SA_SET_HDR(hdr, num, size) \
251*0a586ceaSMark Shellenbaum 	{ \
252*0a586ceaSMark Shellenbaum 		hdr->sa_magic = SA_MAGIC; \
253*0a586ceaSMark Shellenbaum 		SA_HDR_LAYOUT_INFO_ENCODE(hdr->sa_layout_info, num, size); \
254*0a586ceaSMark Shellenbaum 	}
255*0a586ceaSMark Shellenbaum 
256*0a586ceaSMark Shellenbaum #define	SA_ATTR_INFO(sa, idx, hdr, attr, bulk, type, hdl) \
257*0a586ceaSMark Shellenbaum 	{ \
258*0a586ceaSMark Shellenbaum 		bulk.sa_size = SA_ATTR_LEN(sa, idx, attr, hdr); \
259*0a586ceaSMark Shellenbaum 		bulk.sa_buftype = type; \
260*0a586ceaSMark Shellenbaum 		bulk.sa_addr = \
261*0a586ceaSMark Shellenbaum 		    (void *)((uintptr_t)TOC_OFF(idx->sa_idx_tab[attr]) + \
262*0a586ceaSMark Shellenbaum 		    (uintptr_t)hdr); \
263*0a586ceaSMark Shellenbaum }
264*0a586ceaSMark Shellenbaum 
265*0a586ceaSMark Shellenbaum #define	SA_HDR_SIZE_MATCH_LAYOUT(hdr, tb) \
266*0a586ceaSMark Shellenbaum 	(SA_HDR_SIZE(hdr) == (sizeof (sa_hdr_phys_t) + \
267*0a586ceaSMark Shellenbaum 	(tb->lot_var_sizes > 1 ? P2ROUNDUP((tb->lot_var_sizes - 1) * \
268*0a586ceaSMark Shellenbaum 	sizeof (uint16_t), 8) : 0)))
269*0a586ceaSMark Shellenbaum 
270*0a586ceaSMark Shellenbaum int sa_add_impl(sa_handle_t *, sa_attr_type_t,
271*0a586ceaSMark Shellenbaum     uint32_t, sa_data_locator_t, void *, dmu_tx_t *);
272*0a586ceaSMark Shellenbaum 
273*0a586ceaSMark Shellenbaum void sa_register_update_callback_locked(objset_t *, sa_update_cb_t *);
274*0a586ceaSMark Shellenbaum int sa_size_locked(sa_handle_t *, sa_attr_type_t, int *);
275*0a586ceaSMark Shellenbaum 
276*0a586ceaSMark Shellenbaum void sa_default_locator(void **, uint32_t *, uint32_t, boolean_t, void *);
277*0a586ceaSMark Shellenbaum int sa_attr_size(sa_os_t *, sa_idx_tab_t *, sa_attr_type_t,
278*0a586ceaSMark Shellenbaum     uint16_t *, sa_hdr_phys_t *);
279*0a586ceaSMark Shellenbaum 
280*0a586ceaSMark Shellenbaum #ifdef	__cplusplus
281*0a586ceaSMark Shellenbaum extern "C" {
282*0a586ceaSMark Shellenbaum #endif
283*0a586ceaSMark Shellenbaum 
284*0a586ceaSMark Shellenbaum #ifdef	__cplusplus
285*0a586ceaSMark Shellenbaum }
286*0a586ceaSMark Shellenbaum #endif
287*0a586ceaSMark Shellenbaum 
288*0a586ceaSMark Shellenbaum #endif	/* _SYS_SA_IMPL_H */
289