10a586ceMark Shellenbaum/*
20a586ceMark Shellenbaum * CDDL HEADER START
30a586ceMark Shellenbaum *
40a586ceMark Shellenbaum * The contents of this file are subject to the terms of the
50a586ceMark Shellenbaum * Common Development and Distribution License (the "License").
60a586ceMark Shellenbaum * You may not use this file except in compliance with the License.
70a586ceMark Shellenbaum *
80a586ceMark Shellenbaum * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90a586ceMark Shellenbaum * or http://www.opensolaris.org/os/licensing.
100a586ceMark Shellenbaum * See the License for the specific language governing permissions
110a586ceMark Shellenbaum * and limitations under the License.
120a586ceMark Shellenbaum *
130a586ceMark Shellenbaum * When distributing Covered Code, include this CDDL HEADER in each
140a586ceMark Shellenbaum * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150a586ceMark Shellenbaum * If applicable, add the following below this CDDL HEADER, with the
160a586ceMark Shellenbaum * fields enclosed by brackets "[]" replaced with your own identifying
170a586ceMark Shellenbaum * information: Portions Copyright [yyyy] [name of copyright owner]
180a586ceMark Shellenbaum *
190a586ceMark Shellenbaum * CDDL HEADER END
200a586ceMark Shellenbaum */
210a586ceMark Shellenbaum/*
22744947dTom Erickson * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
2369962b5Matthew Ahrens * Copyright (c) 2013 by Delphix. All rights reserved.
24bc9014eJustin Gibbs * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
250a586ceMark Shellenbaum */
260a586ceMark Shellenbaum
270a586ceMark Shellenbaum#ifndef	_SYS_SA_IMPL_H
280a586ceMark Shellenbaum#define	_SYS_SA_IMPL_H
290a586ceMark Shellenbaum
300a586ceMark Shellenbaum#include <sys/dmu.h>
310a586ceMark Shellenbaum#include <sys/refcount.h>
320a586ceMark Shellenbaum#include <sys/list.h>
330a586ceMark Shellenbaum
340a586ceMark Shellenbaum/*
350a586ceMark Shellenbaum * Array of known attributes and their
360a586ceMark Shellenbaum * various characteristics.
370a586ceMark Shellenbaum */
380a586ceMark Shellenbaumtypedef struct sa_attr_table {
390a586ceMark Shellenbaum	sa_attr_type_t	sa_attr;
400a586ceMark Shellenbaum	uint8_t sa_registered;
410a586ceMark Shellenbaum	uint16_t sa_length;
420a586ceMark Shellenbaum	sa_bswap_type_t sa_byteswap;
430a586ceMark Shellenbaum	char *sa_name;
440a586ceMark Shellenbaum} sa_attr_table_t;
450a586ceMark Shellenbaum
460a586ceMark Shellenbaum/*
470a586ceMark Shellenbaum * Zap attribute format for attribute registration
480a586ceMark Shellenbaum *
490a586ceMark Shellenbaum * 64      56      48      40      32      24      16      8       0
500a586ceMark Shellenbaum * +-------+-------+-------+-------+-------+-------+-------+-------+
510a586ceMark Shellenbaum * |        unused         |      len      | bswap |   attr num    |
520a586ceMark Shellenbaum * +-------+-------+-------+-------+-------+-------+-------+-------+
530a586ceMark Shellenbaum *
540a586ceMark Shellenbaum * Zap attribute format for layout information.
550a586ceMark Shellenbaum *
560a586ceMark Shellenbaum * layout information is stored as an array of attribute numbers
570a586ceMark Shellenbaum * The name of the attribute is the layout number (0, 1, 2, ...)
580a586ceMark Shellenbaum *
590a586ceMark Shellenbaum * 16       0
600a586ceMark Shellenbaum * +---- ---+
610a586ceMark Shellenbaum * | attr # |
620a586ceMark Shellenbaum * +--------+
630a586ceMark Shellenbaum * | attr # |
640a586ceMark Shellenbaum * +--- ----+
650a586ceMark Shellenbaum *  ......
660a586ceMark Shellenbaum *
670a586ceMark Shellenbaum */
680a586ceMark Shellenbaum
690a586ceMark Shellenbaum#define	ATTR_BSWAP(x)	BF32_GET(x, 16, 8)
700a586ceMark Shellenbaum#define	ATTR_LENGTH(x)	BF32_GET(x, 24, 16)
710a586ceMark Shellenbaum#define	ATTR_NUM(x)	BF32_GET(x, 0, 16)
720a586ceMark Shellenbaum#define	ATTR_ENCODE(x, attr, length, bswap) \
730a586ceMark Shellenbaum{ \
740a586ceMark Shellenbaum	BF64_SET(x, 24, 16, length); \
750a586ceMark Shellenbaum	BF64_SET(x, 16, 8, bswap); \
760a586ceMark Shellenbaum	BF64_SET(x, 0, 16, attr); \
770a586ceMark Shellenbaum}
780a586ceMark Shellenbaum
790a586ceMark Shellenbaum#define	TOC_OFF(x)		BF32_GET(x, 0, 23)
800a586ceMark Shellenbaum#define	TOC_ATTR_PRESENT(x)	BF32_GET(x, 31, 1)
810a586ceMark Shellenbaum#define	TOC_LEN_IDX(x)		BF32_GET(x, 24, 4)
820a586ceMark Shellenbaum#define	TOC_ATTR_ENCODE(x, len_idx, offset) \
830a586ceMark Shellenbaum{ \
840a586ceMark Shellenbaum	BF32_SET(x, 31, 1, 1); \
850a586ceMark Shellenbaum	BF32_SET(x, 24, 7, len_idx); \
860a586ceMark Shellenbaum	BF32_SET(x, 0, 24, offset); \
870a586ceMark Shellenbaum}
880a586ceMark Shellenbaum
890a586ceMark Shellenbaum#define	SA_LAYOUTS	"LAYOUTS"
900a586ceMark Shellenbaum#define	SA_REGISTRY	"REGISTRY"
910a586ceMark Shellenbaum
920a586ceMark Shellenbaum/*
930a586ceMark Shellenbaum * Each unique layout will have their own table
940a586ceMark Shellenbaum * sa_lot (layout_table)
950a586ceMark Shellenbaum */
960a586ceMark Shellenbaumtypedef struct sa_lot {
970a586ceMark Shellenbaum	avl_node_t lot_num_node;
980a586ceMark Shellenbaum	avl_node_t lot_hash_node;
990a586ceMark Shellenbaum	uint64_t lot_num;
1000a586ceMark Shellenbaum	uint64_t lot_hash;
1010a586ceMark Shellenbaum	sa_attr_type_t *lot_attrs;	/* array of attr #'s */
1020a586ceMark Shellenbaum	uint32_t lot_var_sizes;	/* how many aren't fixed size */
1030a586ceMark Shellenbaum	uint32_t lot_attr_count;	/* total attr count */
10454811daToomas Soome	list_t	lot_idx_tab;	/* should be only a couple of entries */
1050a586ceMark Shellenbaum	int	lot_instance;	/* used with lot_hash to identify entry */
1060a586ceMark Shellenbaum} sa_lot_t;
1070a586ceMark Shellenbaum
1080a586ceMark Shellenbaum/* index table of offsets */
1090a586ceMark Shellenbaumtypedef struct sa_idx_tab {
1100a586ceMark Shellenbaum	list_node_t	sa_next;
1110a586ceMark Shellenbaum	sa_lot_t	*sa_layout;
1120a586ceMark Shellenbaum	uint16_t	*sa_variable_lengths;
113e914aceTim Schumacher	zfs_refcount_t	sa_refcount;
1140a586ceMark Shellenbaum	uint32_t	*sa_idx_tab;	/* array of offsets */
1150a586ceMark Shellenbaum} sa_idx_tab_t;
1160a586ceMark Shellenbaum
1170a586ceMark Shellenbaum/*
1180a586ceMark Shellenbaum * Since the offset/index information into the actual data
1190a586ceMark Shellenbaum * will usually be identical we can share that information with
1200a586ceMark Shellenbaum * all handles that have the exact same offsets.
1210a586ceMark Shellenbaum *
1220a586ceMark Shellenbaum * You would typically only have a large number of different table of
1230a586ceMark Shellenbaum * contents if you had a several variable sized attributes.
1240a586ceMark Shellenbaum *
1250a586ceMark Shellenbaum * Two AVL trees are used to track the attribute layout numbers.
1260a586ceMark Shellenbaum * one is keyed by number and will be consulted when a DMU_OT_SA
1270a586ceMark Shellenbaum * object is first read.  The second tree is keyed by the hash signature
1280a586ceMark Shellenbaum * of the attributes and will be consulted when an attribute is added
1290a586ceMark Shellenbaum * to determine if we already have an instance of that layout.  Both
1300a586ceMark Shellenbaum * of these tree's are interconnected.  The only difference is that
1310a586ceMark Shellenbaum * when an entry is found in the "hash" tree the list of attributes will
1320a586ceMark Shellenbaum * need to be compared against the list of attributes you have in hand.
1330a586ceMark Shellenbaum * The assumption is that typically attributes will just be updated and
1340a586ceMark Shellenbaum * adding a completely new attribute is a very rare operation.
1350a586ceMark Shellenbaum */
1360a586ceMark Shellenbaumstruct sa_os {
13754811daToomas Soome	kmutex_t	sa_lock;
1380a586ceMark Shellenbaum	boolean_t	sa_need_attr_registration;
1390a586ceMark Shellenbaum	boolean_t	sa_force_spill;
1400a586ceMark Shellenbaum	uint64_t	sa_master_obj;
1410a586ceMark Shellenbaum	uint64_t	sa_reg_attr_obj;
1420a586ceMark Shellenbaum	uint64_t	sa_layout_attr_obj;
1430a586ceMark Shellenbaum	int		sa_num_attrs;
1440a586ceMark Shellenbaum	sa_attr_table_t *sa_attr_table;	 /* private attr table */
1450a586ceMark Shellenbaum	sa_update_cb_t	*sa_update_cb;
1460a586ceMark Shellenbaum	avl_tree_t	sa_layout_num_tree;  /* keyed by layout number */
1470a586ceMark Shellenbaum	avl_tree_t	sa_layout_hash_tree; /* keyed by layout hash value */
1480a586ceMark Shellenbaum	int		sa_user_table_sz;
1490a586ceMark Shellenbaum	sa_attr_type_t	*sa_user_table; /* user name->attr mapping table */
1500a586ceMark Shellenbaum};
1510a586ceMark Shellenbaum
1520a586ceMark Shellenbaum/*
1530a586ceMark Shellenbaum * header for all bonus and spill buffers.
154f717074Will Andrews *
1550a586ceMark Shellenbaum * The header has a fixed portion with a variable number
1560a586ceMark Shellenbaum * of "lengths" depending on the number of variable sized
15769962b5Matthew Ahrens * attributes which are determined by the "layout number"
1580a586ceMark Shellenbaum */
1590a586ceMark Shellenbaum
1600a586ceMark Shellenbaum#define	SA_MAGIC	0x2F505A  /* ZFS SA */
1610a586ceMark Shellenbaumtypedef struct sa_hdr_phys {
1620a586ceMark Shellenbaum	uint32_t sa_magic;
16369962b5Matthew Ahrens	/* BEGIN CSTYLED */
164f717074Will Andrews	/*
165f717074Will Andrews	 * Encoded with hdrsize and layout number as follows:
166f717074Will Andrews	 * 16      10       0
167f717074Will Andrews	 * +--------+-------+
168f717074Will Andrews	 * | hdrsz  |layout |
169f717074Will Andrews	 * +--------+-------+
170f717074Will Andrews	 *
171f717074Will Andrews	 * Bits 0-10 are the layout number
172f717074Will Andrews	 * Bits 11-16 are the size of the header.
173f717074Will Andrews	 * The hdrsize is the number * 8
174f717074Will Andrews	 *
175f717074Will Andrews	 * For example.
176f717074Will Andrews	 * hdrsz of 1 ==> 8 byte header
177f717074Will Andrews	 *          2 ==> 16 byte header
178f717074Will Andrews	 *
179f717074Will Andrews	 */
18069962b5Matthew Ahrens	/* END CSTYLED */
181f717074Will Andrews	uint16_t sa_layout_info;
1820a586ceMark Shellenbaum	uint16_t sa_lengths[1];	/* optional sizes for variable length attrs */
1830a586ceMark Shellenbaum	/* ... Data follows the lengths.  */
1840a586ceMark Shellenbaum} sa_hdr_phys_t;
1850a586ceMark Shellenbaum
1860a586ceMark Shellenbaum#define	SA_HDR_LAYOUT_NUM(hdr) BF32_GET(hdr->sa_layout_info, 0, 10)
187e828a46Matthew Ahrens#define	SA_HDR_SIZE(hdr) BF32_GET_SB(hdr->sa_layout_info, 10, 6, 3, 0)
1880a586ceMark Shellenbaum#define	SA_HDR_LAYOUT_INFO_ENCODE(x, num, size) \
1890a586ceMark Shellenbaum{ \
1900a586ceMark Shellenbaum	BF32_SET_SB(x, 10, 6, 3, 0, size); \
1910a586ceMark Shellenbaum	BF32_SET(x, 0, 10, num); \
1920a586ceMark Shellenbaum}
1930a586ceMark Shellenbaum
1940a586ceMark Shellenbaumtypedef enum sa_buf_type {
1950a586ceMark Shellenbaum	SA_BONUS = 1,
1960a586ceMark Shellenbaum	SA_SPILL = 2
1970a586ceMark Shellenbaum} sa_buf_type_t;
1980a586ceMark Shellenbaum
1990a586ceMark Shellenbaumtypedef enum sa_data_op {
2000a586ceMark Shellenbaum	SA_LOOKUP,
2010a586ceMark Shellenbaum	SA_UPDATE,
2020a586ceMark Shellenbaum	SA_ADD,
2030a586ceMark Shellenbaum	SA_REPLACE,
2040a586ceMark Shellenbaum	SA_REMOVE
2050a586ceMark Shellenbaum} sa_data_op_t;
2060a586ceMark Shellenbaum
2070a586ceMark Shellenbaum/*
2080a586ceMark Shellenbaum * Opaque handle used for most sa functions
2090a586ceMark Shellenbaum *
2100a586ceMark Shellenbaum * This needs to be kept as small as possible.
2110a586ceMark Shellenbaum */
2120a586ceMark Shellenbaum
2130a586ceMark Shellenbaumstruct sa_handle {
214bc9014eJustin Gibbs	dmu_buf_user_t	sa_dbu;
2150a586ceMark Shellenbaum	kmutex_t	sa_lock;
2160a586ceMark Shellenbaum	dmu_buf_t	*sa_bonus;
2170a586ceMark Shellenbaum	dmu_buf_t	*sa_spill;
2180a586ceMark Shellenbaum	objset_t	*sa_os;
219bc9014eJustin Gibbs	void		*sa_userp;
2200a586ceMark Shellenbaum	sa_idx_tab_t	*sa_bonus_tab;	 /* idx of bonus */
2210a586ceMark Shellenbaum	sa_idx_tab_t	*sa_spill_tab; /* only present if spill activated */
2220a586ceMark Shellenbaum};
2230a586ceMark Shellenbaum
2240a586ceMark Shellenbaum#define	SA_GET_DB(hdl, type)	\
2250a586ceMark Shellenbaum	(dmu_buf_impl_t *)((type == SA_BONUS) ? hdl->sa_bonus : hdl->sa_spill)
2260a586ceMark Shellenbaum
2270a586ceMark Shellenbaum#define	SA_GET_HDR(hdl, type) \
2280a586ceMark Shellenbaum	((sa_hdr_phys_t *)((dmu_buf_impl_t *)(SA_GET_DB(hdl, \
2290a586ceMark Shellenbaum	type))->db.db_data))
2300a586ceMark Shellenbaum
2310a586ceMark Shellenbaum#define	SA_IDX_TAB_GET(hdl, type) \
2320a586ceMark Shellenbaum	(type == SA_BONUS ? hdl->sa_bonus_tab : hdl->sa_spill_tab)
2330a586ceMark Shellenbaum
2340a586ceMark Shellenbaum#define	IS_SA_BONUSTYPE(a)	\
2350a586ceMark Shellenbaum	((a == DMU_OT_SA) ? B_TRUE : B_FALSE)
2360a586ceMark Shellenbaum
2370a586ceMark Shellenbaum#define	SA_BONUSTYPE_FROM_DB(db) \
238744947dTom Erickson	(dmu_get_bonustype((dmu_buf_t *)db))
2390a586ceMark Shellenbaum
24054811daToomas Soome#define	SA_BLKPTR_SPACE	(DN_OLD_MAX_BONUSLEN - sizeof (blkptr_t))
2410a586ceMark Shellenbaum
2420a586ceMark Shellenbaum#define	SA_LAYOUT_NUM(x, type) \
2430a586ceMark Shellenbaum	((!IS_SA_BONUSTYPE(type) ? 0 : (((IS_SA_BONUSTYPE(type)) && \
2440a586ceMark Shellenbaum	((SA_HDR_LAYOUT_NUM(x)) == 0)) ? 1 : SA_HDR_LAYOUT_NUM(x))))
2450a586ceMark Shellenbaum
2460a586ceMark Shellenbaum
2470a586ceMark Shellenbaum#define	SA_REGISTERED_LEN(sa, attr) sa->sa_attr_table[attr].sa_length
2480a586ceMark Shellenbaum
2490a586ceMark Shellenbaum#define	SA_ATTR_LEN(sa, idx, attr, hdr) ((SA_REGISTERED_LEN(sa, attr) == 0) ?\
2500a586ceMark Shellenbaum	hdr->sa_lengths[TOC_LEN_IDX(idx->sa_idx_tab[attr])] : \
2510a586ceMark Shellenbaum	SA_REGISTERED_LEN(sa, attr))
2520a586ceMark Shellenbaum
2530a586ceMark Shellenbaum#define	SA_SET_HDR(hdr, num, size) \
2540a586ceMark Shellenbaum	{ \
2550a586ceMark Shellenbaum		hdr->sa_magic = SA_MAGIC; \
2560a586ceMark Shellenbaum		SA_HDR_LAYOUT_INFO_ENCODE(hdr->sa_layout_info, num, size); \
2570a586ceMark Shellenbaum	}
2580a586ceMark Shellenbaum
2590a586ceMark Shellenbaum#define	SA_ATTR_INFO(sa, idx, hdr, attr, bulk, type, hdl) \
2600a586ceMark Shellenbaum	{ \
2610a586ceMark Shellenbaum		bulk.sa_size = SA_ATTR_LEN(sa, idx, attr, hdr); \
2620a586ceMark Shellenbaum		bulk.sa_buftype = type; \
2630a586ceMark Shellenbaum		bulk.sa_addr = \
2640a586ceMark Shellenbaum		    (void *)((uintptr_t)TOC_OFF(idx->sa_idx_tab[attr]) + \
2650a586ceMark Shellenbaum		    (uintptr_t)hdr); \
2660a586ceMark Shellenbaum}
2670a586ceMark Shellenbaum
2680a586ceMark Shellenbaum#define	SA_HDR_SIZE_MATCH_LAYOUT(hdr, tb) \
2690a586ceMark Shellenbaum	(SA_HDR_SIZE(hdr) == (sizeof (sa_hdr_phys_t) + \
2700a586ceMark Shellenbaum	(tb->lot_var_sizes > 1 ? P2ROUNDUP((tb->lot_var_sizes - 1) * \
2710a586ceMark Shellenbaum	sizeof (uint16_t), 8) : 0)))
2720a586ceMark Shellenbaum
2730a586ceMark Shellenbaumint sa_add_impl(sa_handle_t *, sa_attr_type_t,
2740a586ceMark Shellenbaum    uint32_t, sa_data_locator_t, void *, dmu_tx_t *);
2750a586ceMark Shellenbaum
2760a586ceMark Shellenbaumvoid sa_register_update_callback_locked(objset_t *, sa_update_cb_t *);
2770a586ceMark Shellenbaumint sa_size_locked(sa_handle_t *, sa_attr_type_t, int *);
2780a586ceMark Shellenbaum
2790a586ceMark Shellenbaumvoid sa_default_locator(void **, uint32_t *, uint32_t, boolean_t, void *);
2800a586ceMark Shellenbaumint sa_attr_size(sa_os_t *, sa_idx_tab_t *, sa_attr_type_t,
2810a586ceMark Shellenbaum    uint16_t *, sa_hdr_phys_t *);
2820a586ceMark Shellenbaum
2830a586ceMark Shellenbaum#ifdef	__cplusplus
2840a586ceMark Shellenbaumextern "C" {
2850a586ceMark Shellenbaum#endif
2860a586ceMark Shellenbaum
2870a586ceMark Shellenbaum#ifdef	__cplusplus
2880a586ceMark Shellenbaum}
2890a586ceMark Shellenbaum#endif
2900a586ceMark Shellenbaum
2910a586ceMark Shellenbaum#endif	/* _SYS_SA_IMPL_H */