10a586ceMark Shellenbaum/*
20a586ceMark Shellenbaum * CDDL HEADER START
30a586ceMark Shellenbaum *
40a586ceMark Shellenbaum * The contents of this file are subject to the terms of the
50a586ceMark Shellenbaum * Common Development and Distribution License (the "License").
60a586ceMark Shellenbaum * You may not use this file except in compliance with the License.
70a586ceMark Shellenbaum *
80a586ceMark Shellenbaum * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90a586ceMark Shellenbaum * or http://www.opensolaris.org/os/licensing.
100a586ceMark Shellenbaum * See the License for the specific language governing permissions
110a586ceMark Shellenbaum * and limitations under the License.
120a586ceMark Shellenbaum *
130a586ceMark Shellenbaum * When distributing Covered Code, include this CDDL HEADER in each
140a586ceMark Shellenbaum * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150a586ceMark Shellenbaum * If applicable, add the following below this CDDL HEADER, with the
160a586ceMark Shellenbaum * fields enclosed by brackets "[]" replaced with your own identifying
170a586ceMark Shellenbaum * information: Portions Copyright [yyyy] [name of copyright owner]
180a586ceMark Shellenbaum *
190a586ceMark Shellenbaum * CDDL HEADER END
200a586ceMark Shellenbaum */
21ad135b5Christopher Siden
220a586ceMark Shellenbaum/*
2306e0070Mark Shellenbaum * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
24383e7c7Xin Li * Portions Copyright 2011 iXsystems, Inc
257f0bdb4Matthew Ahrens * Copyright (c) 2013, 2017 by Delphix. All rights reserved.
26bc9014eJustin Gibbs * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
27c3d26abMatthew Ahrens * Copyright (c) 2014 Integros [integros.com]
28f67950bNasf-Fan * Copyright 2019 Joyent, Inc.
290a586ceMark Shellenbaum */
300a586ceMark Shellenbaum
310a586ceMark Shellenbaum#include <sys/zfs_context.h>
320a586ceMark Shellenbaum#include <sys/types.h>
330a586ceMark Shellenbaum#include <sys/param.h>
340a586ceMark Shellenbaum#include <sys/systm.h>
350a586ceMark Shellenbaum#include <sys/sysmacros.h>
360a586ceMark Shellenbaum#include <sys/dmu.h>
370a586ceMark Shellenbaum#include <sys/dmu_impl.h>
380a586ceMark Shellenbaum#include <sys/dmu_objset.h>
3954811daToomas Soome#include <sys/dmu_tx.h>
400a586ceMark Shellenbaum#include <sys/dbuf.h>
410a586ceMark Shellenbaum#include <sys/dnode.h>
420a586ceMark Shellenbaum#include <sys/zap.h>
430a586ceMark Shellenbaum#include <sys/sa.h>
440a586ceMark Shellenbaum#include <sys/sunddi.h>
450a586ceMark Shellenbaum#include <sys/sa_impl.h>
460a586ceMark Shellenbaum#include <sys/dnode.h>
470a586ceMark Shellenbaum#include <sys/errno.h>
480a586ceMark Shellenbaum#include <sys/zfs_context.h>
490a586ceMark Shellenbaum
50f67950bNasf-Fan#ifdef _KERNEL
51f67950bNasf-Fan#include <sys/zfs_znode.h>
52f67950bNasf-Fan#endif
53f67950bNasf-Fan
540a586ceMark Shellenbaum/*
550a586ceMark Shellenbaum * ZFS System attributes:
560a586ceMark Shellenbaum *
570a586ceMark Shellenbaum * A generic mechanism to allow for arbitrary attributes
580a586ceMark Shellenbaum * to be stored in a dnode.  The data will be stored in the bonus buffer of
590a586ceMark Shellenbaum * the dnode and if necessary a special "spill" block will be used to handle
600a586ceMark Shellenbaum * overflow situations.  The spill block will be sized to fit the data
610a586ceMark Shellenbaum * from 512 - 128K.  When a spill block is used the BP (blkptr_t) for the
620a586ceMark Shellenbaum * spill block is stored at the end of the current bonus buffer.  Any
630a586ceMark Shellenbaum * attributes that would be in the way of the blkptr_t will be relocated
640a586ceMark Shellenbaum * into the spill block.
650a586ceMark Shellenbaum *
660a586ceMark Shellenbaum * Attribute registration:
670a586ceMark Shellenbaum *
680a586ceMark Shellenbaum * Stored persistently on a per dataset basis
690a586ceMark Shellenbaum * a mapping between attribute "string" names and their actual attribute
700a586ceMark Shellenbaum * numeric values, length, and byteswap function.  The names are only used
710a586ceMark Shellenbaum * during registration.  All  attributes are known by their unique attribute
720a586ceMark Shellenbaum * id value.  If an attribute can have a variable size then the value
730a586ceMark Shellenbaum * 0 will be used to indicate this.
740a586ceMark Shellenbaum *
750a586ceMark Shellenbaum * Attribute Layout:
760a586ceMark Shellenbaum *
770a586ceMark Shellenbaum * Attribute layouts are a way to compactly store multiple attributes, but
780a586ceMark Shellenbaum * without taking the overhead associated with managing each attribute
790a586ceMark Shellenbaum * individually.  Since you will typically have the same set of attributes
800a586ceMark Shellenbaum * stored in the same order a single table will be used to represent that
810a586ceMark Shellenbaum * layout.  The ZPL for example will usually have only about 10 different
820a586ceMark Shellenbaum * layouts (regular files, device files, symlinks,
830a586ceMark Shellenbaum * regular files + scanstamp, files/dir with extended attributes, and then
840a586ceMark Shellenbaum * you have the possibility of all of those minus ACL, because it would
850a586ceMark Shellenbaum * be kicked out into the spill block)
860a586ceMark Shellenbaum *
870a586ceMark Shellenbaum * Layouts are simply an array of the attributes and their
880a586ceMark Shellenbaum * ordering i.e. [0, 1, 4, 5, 2]
890a586ceMark Shellenbaum *
900a586ceMark Shellenbaum * Each distinct layout is given a unique layout number and that is whats
910a586ceMark Shellenbaum * stored in the header at the beginning of the SA data buffer.
920a586ceMark Shellenbaum *
930a586ceMark Shellenbaum * A layout only covers a single dbuf (bonus or spill).  If a set of
940a586ceMark Shellenbaum * attributes is split up between the bonus buffer and a spill buffer then
950a586ceMark Shellenbaum * two different layouts will be used.  This allows us to byteswap the
960a586ceMark Shellenbaum * spill without looking at the bonus buffer and keeps the on disk format of
970a586ceMark Shellenbaum * the bonus and spill buffer the same.
980a586ceMark Shellenbaum *
990a586ceMark Shellenbaum * Adding a single attribute will cause the entire set of attributes to
1000a586ceMark Shellenbaum * be rewritten and could result in a new layout number being constructed
1010a586ceMark Shellenbaum * as part of the rewrite if no such layout exists for the new set of
1020a586ceMark Shellenbaum * attribues.  The new attribute will be appended to the end of the already
1030a586ceMark Shellenbaum * existing attributes.
1040a586ceMark Shellenbaum *
1050a586ceMark Shellenbaum * Both the attribute registration and attribute layout information are
1060a586ceMark Shellenbaum * stored in normal ZAP attributes.  Their should be a small number of
1070a586ceMark Shellenbaum * known layouts and the set of attributes is assumed to typically be quite
1080a586ceMark Shellenbaum * small.
1090a586ceMark Shellenbaum *
1100a586ceMark Shellenbaum * The registered attributes and layout "table" information is maintained
1110a586ceMark Shellenbaum * in core and a special "sa_os_t" is attached to the objset_t.
1120a586ceMark Shellenbaum *
1130a586ceMark Shellenbaum * A special interface is provided to allow for quickly applying
1140a586ceMark Shellenbaum * a large set of attributes at once.  sa_replace_all_by_template() is
1150a586ceMark Shellenbaum * used to set an array of attributes.  This is used by the ZPL when
1160a586ceMark Shellenbaum * creating a brand new file.  The template that is passed into the function
1170a586ceMark Shellenbaum * specifies the attribute, size for variable length attributes, location of
1180a586ceMark Shellenbaum * data and special "data locator" function if the data isn't in a contiguous
1190a586ceMark Shellenbaum * location.
1200a586ceMark Shellenbaum *
1210a586ceMark Shellenbaum * Byteswap implications:
122f717074Will Andrews *
1230a586ceMark Shellenbaum * Since the SA attributes are not entirely self describing we can't do
1240a586ceMark Shellenbaum * the normal byteswap processing.  The special ZAP layout attribute and
1250a586ceMark Shellenbaum * attribute registration attributes define the byteswap function and the
1260a586ceMark Shellenbaum * size of the attributes, unless it is variable sized.
1270a586ceMark Shellenbaum * The normal ZFS byteswapping infrastructure assumes you don't need
1280a586ceMark Shellenbaum * to read any objects in order to do the necessary byteswapping.  Whereas
1290a586ceMark Shellenbaum * SA attributes can only be properly byteswapped if the dataset is opened
1300a586ceMark Shellenbaum * and the layout/attribute ZAP attributes are available.  Because of this
1310a586ceMark Shellenbaum * the SA attributes will be byteswapped when they are first accessed by
1320a586ceMark Shellenbaum * the SA code that will read the SA data.
1330a586ceMark Shellenbaum */
1340a586ceMark Shellenbaum
1350a586ceMark Shellenbaumtypedef void (sa_iterfunc_t)(void *hdr, void *addr, sa_attr_type_t,
1360a586ceMark Shellenbaum    uint16_t length, int length_idx, boolean_t, void *userp);
1370a586ceMark Shellenbaum
1380a586ceMark Shellenbaumstatic int sa_build_index(sa_handle_t *hdl, sa_buf_type_t buftype);
1390a586ceMark Shellenbaumstatic void sa_idx_tab_hold(objset_t *os, sa_idx_tab_t *idx_tab);
1407f0bdb4Matthew Ahrensstatic sa_idx_tab_t *sa_find_idx_tab(objset_t *os, dmu_object_type_t bonustype,
1417f0bdb4Matthew Ahrens    sa_hdr_phys_t *hdr);
1420a586ceMark Shellenbaumstatic void sa_idx_tab_rele(objset_t *os, void *arg);
1430a586ceMark Shellenbaumstatic void sa_copy_data(sa_data_locator_t *func, void *start, void *target,
1440a586ceMark Shellenbaum    int buflen);
1450a586ceMark Shellenbaumstatic int sa_modify_attrs(sa_handle_t *hdl, sa_attr_type_t newattr,
1460a586ceMark Shellenbaum    sa_data_op_t action, sa_data_locator_t *locator, void *datastart,
1470a586ceMark Shellenbaum    uint16_t buflen, dmu_tx_t *tx);
1480a586ceMark Shellenbaum
1490a586ceMark Shellenbaumarc_byteswap_func_t *sa_bswap_table[] = {
1500a586ceMark Shellenbaum	byteswap_uint64_array,
1510a586ceMark Shellenbaum	byteswap_uint32_array,
1520a586ceMark Shellenbaum	byteswap_uint16_array,
1530a586ceMark Shellenbaum	byteswap_uint8_array,
1540a586ceMark Shellenbaum	zfs_acl_byteswap,
1550a586ceMark Shellenbaum};
1560a586ceMark Shellenbaum
1570a586ceMark Shellenbaum#define	SA_COPY_DATA(f, s, t, l) \
1580a586ceMark Shellenbaum	{ \
1590a586ceMark Shellenbaum		if (f == NULL) { \
1600a586ceMark Shellenbaum			if (l == 8) { \
1610a586ceMark Shellenbaum				*(uint64_t *)t = *(uint64_t *)s; \
1620a586ceMark Shellenbaum			} else if (l == 16) { \
1630a586ceMark Shellenbaum				*(uint64_t *)t = *(uint64_t *)s; \
1640a586ceMark Shellenbaum				*(uint64_t *)((uintptr_t)t + 8) = \
1650a586ceMark Shellenbaum				    *(uint64_t *)((uintptr_t)s + 8); \
1660a586ceMark Shellenbaum			} else { \
1670a586ceMark Shellenbaum				bcopy(s, t, l); \
1680a586ceMark Shellenbaum			} \
1690a586ceMark Shellenbaum		} else \
1700a586ceMark Shellenbaum			sa_copy_data(f, s, t, l); \
1710a586ceMark Shellenbaum	}
1720a586ceMark Shellenbaum
1730a586ceMark Shellenbaum/*
1740a586ceMark Shellenbaum * This table is fixed and cannot be changed.  Its purpose is to
1750a586ceMark Shellenbaum * allow the SA code to work with both old/new ZPL file systems.
1760a586ceMark Shellenbaum * It contains the list of legacy attributes.  These attributes aren't
1770a586ceMark Shellenbaum * stored in the "attribute" registry zap objects, since older ZPL file systems
1780a586ceMark Shellenbaum * won't have the registry.  Only objsets of type ZFS_TYPE_FILESYSTEM will
1790a586ceMark Shellenbaum * use this static table.
1800a586ceMark Shellenbaum */
1810a586ceMark Shellenbaumsa_attr_reg_t sa_legacy_attrs[] = {
1820a586ceMark Shellenbaum	{"ZPL_ATIME", sizeof (uint64_t) * 2, SA_UINT64_ARRAY, 0},
1830a586ceMark Shellenbaum	{"ZPL_MTIME", sizeof (uint64_t) * 2, SA_UINT64_ARRAY, 1},
1840a586ceMark Shellenbaum	{"ZPL_CTIME", sizeof (uint64_t) * 2, SA_UINT64_ARRAY, 2},
1850a586ceMark Shellenbaum	{"ZPL_CRTIME", sizeof (uint64_t) * 2, SA_UINT64_ARRAY, 3},
1860a586ceMark Shellenbaum	{"ZPL_GEN", sizeof (uint64_t), SA_UINT64_ARRAY, 4},
1870a586ceMark Shellenbaum	{"ZPL_MODE", sizeof (uint64_t), SA_UINT64_ARRAY, 5},
1880a586ceMark Shellenbaum	{"ZPL_SIZE", sizeof (uint64_t), SA_UINT64_ARRAY, 6},
1890a586ceMark Shellenbaum	{"ZPL_PARENT", sizeof (uint64_t), SA_UINT64_ARRAY, 7},
1900a586ceMark Shellenbaum	{"ZPL_LINKS", sizeof (uint64_t), SA_UINT64_ARRAY, 8},
1910a586ceMark Shellenbaum	{"ZPL_XATTR", sizeof (uint64_t), SA_UINT64_ARRAY, 9},
1920a586ceMark Shellenbaum	{"ZPL_RDEV", sizeof (uint64_t), SA_UINT64_ARRAY, 10},
1930a586ceMark Shellenbaum	{"ZPL_FLAGS", sizeof (uint64_t), SA_UINT64_ARRAY, 11},
1940a586ceMark Shellenbaum	{"ZPL_UID", sizeof (uint64_t), SA_UINT64_ARRAY, 12},
1950a586ceMark Shellenbaum	{"ZPL_GID", sizeof (uint64_t), SA_UINT64_ARRAY, 13},
1960a586ceMark Shellenbaum	{"ZPL_PAD", sizeof (uint64_t) * 4, SA_UINT64_ARRAY, 14},
1970a586ceMark Shellenbaum	{"ZPL_ZNODE_ACL", 88, SA_UINT8_ARRAY, 15},
1980a586ceMark Shellenbaum};
1990a586ceMark Shellenbaum
2000a586ceMark Shellenbaum/*
2010a586ceMark Shellenbaum * This is only used for objects of type DMU_OT_ZNODE
2020a586ceMark Shellenbaum */
2030a586ceMark Shellenbaumsa_attr_type_t sa_legacy_zpl_layout[] = {
2040a586ceMark Shellenbaum    0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
2050a586ceMark Shellenbaum};
2060a586ceMark Shellenbaum
2070a586ceMark Shellenbaum/*
2080a586ceMark Shellenbaum * Special dummy layout used for buffers with no attributes.
2090a586ceMark Shellenbaum */
2100a586ceMark Shellenbaumsa_attr_type_t sa_dummy_zpl_layout[] = { 0 };
2110a586ceMark Shellenbaum
2120a586ceMark Shellenbaumstatic int sa_legacy_attr_count = 16;
2130a586ceMark Shellenbaumstatic kmem_cache_t *sa_cache = NULL;
2140a586ceMark Shellenbaum
2150a586ceMark Shellenbaum/*ARGSUSED*/
2160a586ceMark Shellenbaumstatic int
2170a586ceMark Shellenbaumsa_cache_constructor(void *buf, void *unused, int kmflag)
2180a586ceMark Shellenbaum{
2190a586ceMark Shellenbaum	sa_handle_t *hdl = buf;
2200a586ceMark Shellenbaum
2210a586ceMark Shellenbaum	mutex_init(&hdl->sa_lock, NULL, MUTEX_DEFAULT, NULL);
2220a586ceMark Shellenbaum	return (0);
2230a586ceMark Shellenbaum}
2240a586ceMark Shellenbaum
2250a586ceMark Shellenbaum/*ARGSUSED*/
2260a586ceMark Shellenbaumstatic void
2270a586ceMark Shellenbaumsa_cache_destructor(void *buf, void *unused)
2280a586ceMark Shellenbaum{
2290a586ceMark Shellenbaum	sa_handle_t *hdl = buf;
2300a586ceMark Shellenbaum	mutex_destroy(&hdl->sa_lock);
2310a586ceMark Shellenbaum}
2320a586ceMark Shellenbaum
2330a586ceMark Shellenbaumvoid
2340a586ceMark Shellenbaumsa_cache_init(void)
2350a586ceMark Shellenbaum{
2360a586ceMark Shellenbaum	sa_cache = kmem_cache_create("sa_cache",
2370a586ceMark Shellenbaum	    sizeof (sa_handle_t), 0, sa_cache_constructor,
2380a586ceMark Shellenbaum	    sa_cache_destructor, NULL, NULL, NULL, 0);
2390a586ceMark Shellenbaum}
2400a586ceMark Shellenbaum
2410a586ceMark Shellenbaumvoid
2420a586ceMark Shellenbaumsa_cache_fini(void)
2430a586ceMark Shellenbaum{
2440a586ceMark Shellenbaum	if (sa_cache)
2450a586ceMark Shellenbaum		kmem_cache_destroy(sa_cache);
2460a586ceMark Shellenbaum}
2470a586ceMark Shellenbaum
2480a586ceMark Shellenbaumstatic int
2490a586ceMark Shellenbaumlayout_num_compare(const void *arg1, const void *arg2)
2500a586ceMark Shellenbaum{
251c4ab0d3Gvozden Neskovic	const sa_lot_t *node1 = (const sa_lot_t *)arg1;
252c4ab0d3Gvozden Neskovic	const sa_lot_t *node2 = (const sa_lot_t *)arg2;
2530a586ceMark Shellenbaum
2544d7988dPaul Dagnelie	return (TREE_CMP(node1->lot_num, node2->lot_num));
2550a586ceMark Shellenbaum}
2560a586ceMark Shellenbaum
2570a586ceMark Shellenbaumstatic int
2580a586ceMark Shellenbaumlayout_hash_compare(const void *arg1, const void *arg2)
2590a586ceMark Shellenbaum{
260c4ab0d3Gvozden Neskovic	const sa_lot_t *node1 = (const sa_lot_t *)arg1;
261c4ab0d3Gvozden Neskovic	const sa_lot_t *node2 = (const sa_lot_t *)arg2;
2620a586ceMark Shellenbaum
2634d7988dPaul Dagnelie	int cmp = TREE_CMP(node1->lot_hash, node2->lot_hash);
264c4ab0d3Gvozden Neskovic	if (likely(cmp))
265c4ab0d3Gvozden Neskovic		return (cmp);
266c4ab0d3Gvozden Neskovic
2674d7988dPaul Dagnelie	return (TREE_CMP(node1->lot_instance, node2->lot_instance));
2680a586ceMark Shellenbaum}
2690a586ceMark Shellenbaum
2700a586ceMark Shellenbaumboolean_t
2710a586ceMark Shellenbaumsa_layout_equal(sa_lot_t *tbf, sa_attr_type_t *attrs, int count)
2720a586ceMark Shellenbaum{
2730a586ceMark Shellenbaum	int i;
2740a586ceMark Shellenbaum
2750a586ceMark Shellenbaum	if (count != tbf->lot_attr_count)
2760a586ceMark Shellenbaum		return (1);
2770a586ceMark Shellenbaum
2780a586ceMark Shellenbaum	for (i = 0; i != count; i++) {
2790a586ceMark Shellenbaum		if (attrs[i] != tbf->lot_attrs[i])
2800a586ceMark Shellenbaum			return (1);
2810a586ceMark Shellenbaum	}
2820a586ceMark Shellenbaum	return (0);
2830a586ceMark Shellenbaum}
2840a586ceMark Shellenbaum
2850a586ceMark Shellenbaum#define	SA_ATTR_HASH(attr) (zfs_crc64_table[(-1ULL ^ attr) & 0xFF])
2860a586ceMark Shellenbaum
2870a586ceMark Shellenbaumstatic uint64_t
2880a586ceMark Shellenbaumsa_layout_info_hash(sa_attr_type_t *attrs, int attr_count)
2890a586ceMark Shellenbaum{
2900a586ceMark Shellenbaum	int i;
2910a586ceMark Shellenbaum	uint64_t crc = -1ULL;
2920a586ceMark Shellenbaum
2930a586ceMark Shellenbaum	for (i = 0; i != attr_count; i++)
2940a586ceMark Shellenbaum		crc ^= SA_ATTR_HASH(attrs[i]);
2950a586ceMark Shellenbaum
2960a586ceMark Shellenbaum	return (crc);
2970a586ceMark Shellenbaum}
2980a586ceMark Shellenbaum
2991d8ccc7Mark Shellenbaumstatic int
3001d8ccc7Mark Shellenbaumsa_get_spill(sa_handle_t *hdl)
3010a586ceMark Shellenbaum{
3020a586ceMark Shellenbaum	int rc;
3030a586ceMark Shellenbaum	if (hdl->sa_spill == NULL) {
3040a586ceMark Shellenbaum		if ((rc = dmu_spill_hold_existing(hdl->sa_bonus, NULL,
3050a586ceMark Shellenbaum		    &hdl->sa_spill)) == 0)
3060a586ceMark Shellenbaum			VERIFY(0 == sa_build_index(hdl, SA_SPILL));
3070a586ceMark Shellenbaum	} else {
3080a586ceMark Shellenbaum		rc = 0;
3090a586ceMark Shellenbaum	}
3100a586ceMark Shellenbaum
3111d8ccc7Mark Shellenbaum	return (rc);
3120a586ceMark Shellenbaum}
3130a586ceMark Shellenbaum
3140a586ceMark Shellenbaum/*
3150a586ceMark Shellenbaum * Main attribute lookup/update function
3160a586ceMark Shellenbaum * returns 0 for success or non zero for failures
3170a586ceMark Shellenbaum *
3180a586ceMark Shellenbaum * Operates on bulk array, first failure will abort further processing
3190a586ceMark Shellenbaum */
3200a586ceMark Shellenbaumint
3210a586ceMark Shellenbaumsa_attr_op(sa_handle_t *hdl, sa_bulk_attr_t *bulk, int count,
3220a586ceMark Shellenbaum    sa_data_op_t data_op, dmu_tx_t *tx)
3230a586ceMark Shellenbaum{
3240a586ceMark Shellenbaum	sa_os_t *sa = hdl->sa_os->os_sa;
3250a586ceMark Shellenbaum	int i;
3260a586ceMark Shellenbaum	int error = 0;
3270a586ceMark Shellenbaum	sa_buf_type_t buftypes;
3280a586ceMark Shellenbaum
3290a586ceMark Shellenbaum	buftypes = 0;
3300a586ceMark Shellenbaum
3310a586ceMark Shellenbaum	ASSERT(count > 0);
3320a586ceMark Shellenbaum	for (i = 0; i != count; i++) {
3330a586ceMark Shellenbaum		ASSERT(bulk[i].sa_attr <= hdl->sa_os->os_sa->sa_num_attrs);
3340a586ceMark Shellenbaum
3350a586ceMark Shellenbaum		bulk[i].sa_addr = NULL;
3360a586ceMark Shellenbaum		/* First check the bonus buffer */
3370a586ceMark Shellenbaum
3380a586ceMark Shellenbaum		if (hdl->sa_bonus_tab && TOC_ATTR_PRESENT(
3390a586ceMark Shellenbaum		    hdl->sa_bonus_tab->sa_idx_tab[bulk[i].sa_attr])) {
3400a586ceMark Shellenbaum			SA_ATTR_INFO(sa, hdl->sa_bonus_tab,
3410a586ceMark Shellenbaum			    SA_GET_HDR(hdl, SA_BONUS),
3420a586ceMark Shellenbaum			    bulk[i].sa_attr, bulk[i], SA_BONUS, hdl);
3430a586ceMark Shellenbaum			if (tx && !(buftypes & SA_BONUS)) {
3440a586ceMark Shellenbaum				dmu_buf_will_dirty(hdl->sa_bonus, tx);
3450a586ceMark Shellenbaum				buftypes |= SA_BONUS;
3460a586ceMark Shellenbaum			}
3470a586ceMark Shellenbaum		}
3481d8ccc7Mark Shellenbaum		if (bulk[i].sa_addr == NULL &&
3491d8ccc7Mark Shellenbaum		    ((error = sa_get_spill(hdl)) == 0)) {
3500a586ceMark Shellenbaum			if (TOC_ATTR_PRESENT(
3510a586ceMark Shellenbaum			    hdl->sa_spill_tab->sa_idx_tab[bulk[i].sa_attr])) {
3520a586ceMark Shellenbaum				SA_ATTR_INFO(sa, hdl->sa_spill_tab,
3530a586ceMark Shellenbaum				    SA_GET_HDR(hdl, SA_SPILL),
3540a586ceMark Shellenbaum				    bulk[i].sa_attr, bulk[i], SA_SPILL, hdl);
3550a586ceMark Shellenbaum				if (tx && !(buftypes & SA_SPILL) &&
3560a586ceMark Shellenbaum				    bulk[i].sa_size == bulk[i].sa_length) {
3570a586ceMark Shellenbaum					dmu_buf_will_dirty(hdl->sa_spill, tx);
3580a586ceMark Shellenbaum					buftypes |= SA_SPILL;
3590a586ceMark Shellenbaum				}
3600a586ceMark Shellenbaum			}
3610a586ceMark Shellenbaum		}
3621d8ccc7Mark Shellenbaum		if (error && error != ENOENT) {
3631d8ccc7Mark Shellenbaum			return ((error == ECKSUM) ? EIO : error);
3641d8ccc7Mark Shellenbaum		}
3651d8ccc7Mark Shellenbaum
3660a586ceMark Shellenbaum		switch (data_op) {
3670a586ceMark Shellenbaum		case SA_LOOKUP:
3680a586ceMark Shellenbaum			if (bulk[i].sa_addr == NULL)
369be6fd75Matthew Ahrens				return (SET_ERROR(ENOENT));
3700a586ceMark Shellenbaum			if (bulk[i].sa_data) {
3710a586ceMark Shellenbaum				SA_COPY_DATA(bulk[i].sa_data_func,
3720a586ceMark Shellenbaum				    bulk[i].sa_addr, bulk[i].sa_data,
3730a586ceMark Shellenbaum				    bulk[i].sa_size);
3740a586ceMark Shellenbaum			}
3750a586ceMark Shellenbaum			continue;
3760a586ceMark Shellenbaum
3770a586ceMark Shellenbaum		case SA_UPDATE:
3780a586ceMark Shellenbaum			/* existing rewrite of attr */
3790a586ceMark Shellenbaum			if (bulk[i].sa_addr &&
3800a586ceMark Shellenbaum			    bulk[i].sa_size == bulk[i].sa_length) {
3810a586ceMark Shellenbaum				SA_COPY_DATA(bulk[i].sa_data_func,
3820a586ceMark Shellenbaum				    bulk[i].sa_data, bulk[i].sa_addr,
3830a586ceMark Shellenbaum				    bulk[i].sa_length);
3840a586ceMark Shellenbaum				continue;
3850a586ceMark Shellenbaum			} else if (bulk[i].sa_addr) { /* attr size change */
3860a586ceMark Shellenbaum				error = sa_modify_attrs(hdl, bulk[i].sa_attr,
3870a586ceMark Shellenbaum				    SA_REPLACE, bulk[i].sa_data_func,
3880a586ceMark Shellenbaum				    bulk[i].sa_data, bulk[i].sa_length, tx);
3890a586ceMark Shellenbaum			} else { /* adding new attribute */
3900a586ceMark Shellenbaum				error = sa_modify_attrs(hdl, bulk[i].sa_attr,
3910a586ceMark Shellenbaum				    SA_ADD, bulk[i].sa_data_func,
3920a586ceMark Shellenbaum				    bulk[i].sa_data, bulk[i].sa_length, tx);
3930a586ceMark Shellenbaum			}
3940a586ceMark Shellenbaum			if (error)
3950a586ceMark Shellenbaum				return (error);
3960a586ceMark Shellenbaum			break;
3970a586ceMark Shellenbaum		}
3980a586ceMark Shellenbaum	}
3990a586ceMark Shellenbaum	return (error);
4000a586ceMark Shellenbaum}
4010a586ceMark Shellenbaum
4020a586ceMark Shellenbaumstatic sa_lot_t *
4030a586ceMark Shellenbaumsa_add_layout_entry(objset_t *os, sa_attr_type_t *attrs, int attr_count,
4040a586ceMark Shellenbaum    uint64_t lot_num, uint64_t hash, boolean_t zapadd, dmu_tx_t *tx)
4050a586ceMark Shellenbaum{
4060a586ceMark Shellenbaum	sa_os_t *sa = os->os_sa;
4070a586ceMark Shellenbaum	sa_lot_t *tb, *findtb;
408d158018Bryan Cantrill	int i, size;
4090a586ceMark Shellenbaum	avl_index_t loc;
4100a586ceMark Shellenbaum
4110a586ceMark Shellenbaum	ASSERT(MUTEX_HELD(&sa->sa_lock));
4120a586ceMark Shellenbaum	tb = kmem_zalloc(sizeof (sa_lot_t), KM_SLEEP);
4130a586ceMark Shellenbaum	tb->lot_attr_count = attr_count;
414d158018Bryan Cantrill
415d158018Bryan Cantrill	if ((size = sizeof (sa_attr_type_t) * attr_count) != 0) {
416d158018Bryan Cantrill		tb->lot_attrs = kmem_alloc(size, KM_SLEEP);
417d158018Bryan Cantrill		bcopy(attrs, tb->lot_attrs, size);
418d158018Bryan Cantrill	}
419d158018Bryan Cantrill
4200a586ceMark Shellenbaum	tb->lot_num = lot_num;
4210a586ceMark Shellenbaum	tb->lot_hash = hash;
4220a586ceMark Shellenbaum	tb->lot_instance = 0;
4230a586ceMark Shellenbaum
4240a586ceMark Shellenbaum	if (zapadd) {
4250a586ceMark Shellenbaum		char attr_name[8];
4260a586ceMark Shellenbaum
4270a586ceMark Shellenbaum		if (sa->sa_layout_attr_obj == 0) {
428ad135b5Christopher Siden			sa->sa_layout_attr_obj = zap_create_link(os,
429ad135b5Christopher Siden			    DMU_OT_SA_ATTR_LAYOUTS,
430ad135b5Christopher Siden			    sa->sa_master_obj, SA_LAYOUTS, tx);
4310a586ceMark Shellenbaum		}
4320a586ceMark Shellenbaum
4330a586ceMark Shellenbaum		(void) snprintf(attr_name, sizeof (attr_name),
4340a586ceMark Shellenbaum		    "%d", (int)lot_num);
4350a586ceMark Shellenbaum		VERIFY(0 == zap_update(os, os->os_sa->sa_layout_attr_obj,
4360a586ceMark Shellenbaum		    attr_name, 2, attr_count, attrs, tx));
4370a586ceMark Shellenbaum	}
4380a586ceMark Shellenbaum
4390a586ceMark Shellenbaum	list_create(&tb->lot_idx_tab, sizeof (sa_idx_tab_t),
4400a586ceMark Shellenbaum	    offsetof(sa_idx_tab_t, sa_next));
4410a586ceMark Shellenbaum
4420a586ceMark Shellenbaum	for (i = 0; i != attr_count; i++) {
4430a586ceMark Shellenbaum		if (sa->sa_attr_table[tb->lot_attrs[i]].sa_length == 0)
4440a586ceMark Shellenbaum			tb->lot_var_sizes++;
4450a586ceMark Shellenbaum	}
4460a586ceMark Shellenbaum
4470a586ceMark Shellenbaum	avl_add(&sa->sa_layout_num_tree, tb);
4480a586ceMark Shellenbaum
4490a586ceMark Shellenbaum	/* verify we don't have a hash collision */
4500a586ceMark Shellenbaum	if ((findtb = avl_find(&sa->sa_layout_hash_tree, tb, &loc)) != NULL) {
4510a586ceMark Shellenbaum		for (; findtb && findtb->lot_hash == hash;
4520a586ceMark Shellenbaum		    findtb = AVL_NEXT(&sa->sa_layout_hash_tree, findtb)) {
4530a586ceMark Shellenbaum			if (findtb->lot_instance != tb->lot_instance)
4540a586ceMark Shellenbaum				break;
4550a586ceMark Shellenbaum			tb->lot_instance++;
4560a586ceMark Shellenbaum		}
4570a586ceMark Shellenbaum	}
4580a586ceMark Shellenbaum	avl_add(&sa->sa_layout_hash_tree, tb);
4590a586ceMark Shellenbaum	return (tb);
4600a586ceMark Shellenbaum}
4610a586ceMark Shellenbaum
4620a586ceMark Shellenbaumstatic void
4630a586ceMark Shellenbaumsa_find_layout(objset_t *os, uint64_t hash, sa_attr_type_t *attrs,
4640a586ceMark Shellenbaum    int count, dmu_tx_t *tx, sa_lot_t **lot)
4650a586ceMark Shellenbaum{
4660a586ceMark Shellenbaum	sa_lot_t *tb, tbsearch;
4670a586ceMark Shellenbaum	avl_index_t loc;
4680a586ceMark Shellenbaum	sa_os_t *sa = os->os_sa;
4690a586ceMark Shellenbaum	boolean_t found = B_FALSE;
4700a586ceMark Shellenbaum
4710a586ceMark Shellenbaum	mutex_enter(&sa->sa_lock);
4720a586ceMark Shellenbaum	tbsearch.lot_hash = hash;
4730a586ceMark Shellenbaum	tbsearch.lot_instance = 0;
4740a586ceMark Shellenbaum	tb = avl_find(&sa->sa_layout_hash_tree, &tbsearch, &loc);
4750a586ceMark Shellenbaum	if (tb) {
4760a586ceMark Shellenbaum		for (; tb && tb->lot_hash == hash;
4770a586ceMark Shellenbaum		    tb = AVL_NEXT(&sa->sa_layout_hash_tree, tb)) {
4780a586ceMark Shellenbaum			if (sa_layout_equal(tb, attrs, count) == 0) {
4790a586ceMark Shellenbaum				found = B_TRUE;
4800a586ceMark Shellenbaum				break;
4810a586ceMark Shellenbaum			}
4820a586ceMark Shellenbaum		}
4830a586ceMark Shellenbaum	}
4840a586ceMark Shellenbaum	if (!found) {
4850a586ceMark Shellenbaum		tb = sa_add_layout_entry(os, attrs, count,
4860a586ceMark Shellenbaum		    avl_numnodes(&sa->sa_layout_num_tree), hash, B_TRUE, tx);
4870a586ceMark Shellenbaum	}
4880a586ceMark Shellenbaum	mutex_exit(&sa->sa_lock);
4890a586ceMark Shellenbaum	*lot = tb;
4900a586ceMark Shellenbaum}
4910a586ceMark Shellenbaum
4920a586ceMark Shellenbaumstatic int
4930a586ceMark Shellenbaumsa_resize_spill(sa_handle_t *hdl, uint32_t size, dmu_tx_t *tx)
4940a586ceMark Shellenbaum{
4950a586ceMark Shellenbaum	int error;
4960a586ceMark Shellenbaum	uint32_t blocksize;
4970a586ceMark Shellenbaum
4980a586ceMark Shellenbaum	if (size == 0) {
4990a586ceMark Shellenbaum		blocksize = SPA_MINBLOCKSIZE;
500b515258Matthew Ahrens	} else if (size > SPA_OLD_MAXBLOCKSIZE) {
5010a586ceMark Shellenbaum		ASSERT(0);
502be6fd75Matthew Ahrens		return (SET_ERROR(EFBIG));
5030a586ceMark Shellenbaum	} else {
5040a586ceMark Shellenbaum		blocksize = P2ROUNDUP_TYPED(size, SPA_MINBLOCKSIZE, uint32_t);
5050a586ceMark Shellenbaum	}
5060a586ceMark Shellenbaum
5070a586ceMark Shellenbaum	error = dbuf_spill_set_blksz(hdl->sa_spill, blocksize, tx);
5080a586ceMark Shellenbaum	ASSERT(error == 0);
5090a586ceMark Shellenbaum	return (error);
5100a586ceMark Shellenbaum}
5110a586ceMark Shellenbaum
5120a586ceMark Shellenbaumstatic void
5130a586ceMark Shellenbaumsa_copy_data(sa_data_locator_t *func, void *datastart, void *target, int buflen)
5140a586ceMark Shellenbaum{
5150a586ceMark Shellenbaum	if (func == NULL) {
5160a586ceMark Shellenbaum		bcopy(datastart, target, buflen);
5170a586ceMark Shellenbaum	} else {
5180a586ceMark Shellenbaum		boolean_t start;
5190a586ceMark Shellenbaum		int bytes;
5200a586ceMark Shellenbaum		void *dataptr;
5210a586ceMark Shellenbaum		void *saptr = target;
5220a586ceMark Shellenbaum		uint32_t length;
5230a586ceMark Shellenbaum
5240a586ceMark Shellenbaum		start = B_TRUE;
5250a586ceMark Shellenbaum		bytes = 0;
5260a586ceMark Shellenbaum		while (bytes < buflen) {
5270a586ceMark Shellenbaum			func(&dataptr, &length, buflen, start, datastart);
5280a586ceMark Shellenbaum			bcopy(dataptr, saptr, length);
5290a586ceMark Shellenbaum			saptr = (void *)((caddr_t)saptr + length);
5300a586ceMark Shellenbaum			bytes += length;
5310a586ceMark Shellenbaum			start = B_FALSE;
5320a586ceMark Shellenbaum		}
5330a586ceMark Shellenbaum	}
5340a586ceMark Shellenbaum}
5350a586ceMark Shellenbaum
5360a586ceMark Shellenbaum/*
5370a586ceMark Shellenbaum * Determine several different sizes
5380a586ceMark Shellenbaum * first the sa header size
5390a586ceMark Shellenbaum * the number of bytes to be stored
5400a586ceMark Shellenbaum * if spill would occur the index in the attribute array is returned
5410a586ceMark Shellenbaum *
5420a586ceMark Shellenbaum * the boolean will_spill will be set when spilling is necessary.  It
5430a586ceMark Shellenbaum * is only set when the buftype is SA_BONUS
5440a586ceMark Shellenbaum */
5450a586ceMark Shellenbaumstatic int
5460a586ceMark Shellenbaumsa_find_sizes(sa_os_t *sa, sa_bulk_attr_t *attr_desc, int attr_count,
54754811daToomas Soome    dmu_buf_t *db, sa_buf_type_t buftype, int full_space, int *index,
54854811daToomas Soome    int *total, boolean_t *will_spill)
5490a586ceMark Shellenbaum{
5500a586ceMark Shellenbaum	int var_size = 0;
5510a586ceMark Shellenbaum	int i;
5520a586ceMark Shellenbaum	int hdrsize;
5533502ed6James Pan	int extra_hdrsize;
5540a586ceMark Shellenbaum
5550a586ceMark Shellenbaum	if (buftype == SA_BONUS && sa->sa_force_spill) {
5560a586ceMark Shellenbaum		*total = 0;
5570a586ceMark Shellenbaum		*index = 0;
5580a586ceMark Shellenbaum		*will_spill = B_TRUE;
5590a586ceMark Shellenbaum		return (0);
5600a586ceMark Shellenbaum	}
5610a586ceMark Shellenbaum
5620a586ceMark Shellenbaum	*index = -1;
5630a586ceMark Shellenbaum	*total = 0;
5643502ed6James Pan	*will_spill = B_FALSE;
5650a586ceMark Shellenbaum
5663502ed6James Pan	extra_hdrsize = 0;
5670a586ceMark Shellenbaum	hdrsize = (SA_BONUSTYPE_FROM_DB(db) == DMU_OT_ZNODE) ? 0 :
5680a586ceMark Shellenbaum	    sizeof (sa_hdr_phys_t);
5690a586ceMark Shellenbaum
570644b952Ned Bass	ASSERT(IS_P2ALIGNED(full_space, 8));
5710a586ceMark Shellenbaum
5720a586ceMark Shellenbaum	for (i = 0; i != attr_count; i++) {
5730a586ceMark Shellenbaum		boolean_t is_var_sz;
5740a586ceMark Shellenbaum
575644b952Ned Bass		*total = P2ROUNDUP(*total, 8);
5760a586ceMark Shellenbaum		*total += attr_desc[i].sa_length;
5773502ed6James Pan		if (*will_spill)
5783502ed6James Pan			continue;
5790a586ceMark Shellenbaum
5800a586ceMark Shellenbaum		is_var_sz = (SA_REGISTERED_LEN(sa, attr_desc[i].sa_attr) == 0);
5810a586ceMark Shellenbaum		if (is_var_sz) {
5820a586ceMark Shellenbaum			var_size++;
5830a586ceMark Shellenbaum		}
5840a586ceMark Shellenbaum
5850a586ceMark Shellenbaum		if (is_var_sz && var_size > 1) {
5863502ed6James Pan			/*
5873502ed6James Pan			 * Don't worry that the spill block might overflow.
5883502ed6James Pan			 * It will be resized if needed in sa_build_layouts().
5893502ed6James Pan			 */
5903502ed6James Pan			if (buftype == SA_SPILL ||
5913502ed6James Pan			    P2ROUNDUP(hdrsize + sizeof (uint16_t), 8) +
5920a586ceMark Shellenbaum			    *total < full_space) {
593644b952Ned Bass				/*
594644b952Ned Bass				 * Account for header space used by array of
595644b952Ned Bass				 * optional sizes of variable-length attributes.
5963502ed6James Pan				 * Record the extra header size in case this
5973502ed6James Pan				 * increase needs to be reversed due to
5983502ed6James Pan				 * spill-over.
599644b952Ned Bass				 */
6000a586ceMark Shellenbaum				hdrsize += sizeof (uint16_t);
6013502ed6James Pan				if (*index != -1)
6023502ed6James Pan					extra_hdrsize += sizeof (uint16_t);
6030a586ceMark Shellenbaum			} else {
6043502ed6James Pan				ASSERT(buftype == SA_BONUS);
6053502ed6James Pan				if (*index == -1)
6063502ed6James Pan					*index = i;
6073502ed6James Pan				*will_spill = B_TRUE;
6080a586ceMark Shellenbaum				continue;
6090a586ceMark Shellenbaum			}
6100a586ceMark Shellenbaum		}
6110a586ceMark Shellenbaum
6120a586ceMark Shellenbaum		/*
6130a586ceMark Shellenbaum		 * find index of where spill *could* occur.
6140a586ceMark Shellenbaum		 * Then continue to count of remainder attribute
6150a586ceMark Shellenbaum		 * space.  The sum is used later for sizing bonus
6160a586ceMark Shellenbaum		 * and spill buffer.
6170a586ceMark Shellenbaum		 */
6180a586ceMark Shellenbaum		if (buftype == SA_BONUS && *index == -1 &&
619383e7c7Xin Li		    *total + P2ROUNDUP(hdrsize, 8) >
6200a586ceMark Shellenbaum		    (full_space - sizeof (blkptr_t))) {
6210a586ceMark Shellenbaum			*index = i;
6220a586ceMark Shellenbaum		}
6230a586ceMark Shellenbaum
624383e7c7Xin Li		if (*total + P2ROUNDUP(hdrsize, 8) > full_space &&
6250a586ceMark Shellenbaum		    buftype == SA_BONUS)
6260a586ceMark Shellenbaum			*will_spill = B_TRUE;
6270a586ceMark Shellenbaum	}
6280a586ceMark Shellenbaum
6293502ed6James Pan	if (*will_spill)
6303502ed6James Pan		hdrsize -= extra_hdrsize;
631644b952Ned Bass
6320a586ceMark Shellenbaum	hdrsize = P2ROUNDUP(hdrsize, 8);
6330a586ceMark Shellenbaum	return (hdrsize);
6340a586ceMark Shellenbaum}
6350a586ceMark Shellenbaum
6360a586ceMark Shellenbaum#define	BUF_SPACE_NEEDED(total, header) (total + header)
6370a586ceMark Shellenbaum
6380a586ceMark Shellenbaum/*
6390a586ceMark Shellenbaum * Find layout that corresponds to ordering of attributes
6400a586ceMark Shellenbaum * If not found a new layout number is created and added to
6410a586ceMark Shellenbaum * persistent layout tables.
6420a586ceMark Shellenbaum */
6430a586ceMark Shellenbaumstatic int
6440a586ceMark Shellenbaumsa_build_layouts(sa_handle_t *hdl, sa_bulk_attr_t *attr_desc, int attr_count,
6450a586ceMark Shellenbaum    dmu_tx_t *tx)
6460a586ceMark Shellenbaum{
6470a586ceMark Shellenbaum	sa_os_t *sa = hdl->sa_os->os_sa;
6480a586ceMark Shellenbaum	uint64_t hash;
6490a586ceMark Shellenbaum	sa_buf_type_t buftype;
6500a586ceMark Shellenbaum	sa_hdr_phys_t *sahdr;
6510a586ceMark Shellenbaum	void *data_start;
6520a586ceMark Shellenbaum	int buf_space;
6530a586ceMark Shellenbaum	sa_attr_type_t *attrs, *attrs_start;
6540a586ceMark Shellenbaum	int i, lot_count;
65554811daToomas Soome	int dnodesize;
656d5285caGeorge Wilson	int hdrsize;
657d5285caGeorge Wilson	int spillhdrsize = 0;
6580a586ceMark Shellenbaum	int used;
6590a586ceMark Shellenbaum	dmu_object_type_t bonustype;
6600a586ceMark Shellenbaum	sa_lot_t *lot;
6610a586ceMark Shellenbaum	int len_idx;
6620a586ceMark Shellenbaum	int spill_used;
66354811daToomas Soome	int bonuslen;
6640a586ceMark Shellenbaum	boolean_t spilling;
6650a586ceMark Shellenbaum
6660a586ceMark Shellenbaum	dmu_buf_will_dirty(hdl->sa_bonus, tx);
6670a586ceMark Shellenbaum	bonustype = SA_BONUSTYPE_FROM_DB(hdl->sa_bonus);
6680a586ceMark Shellenbaum
66954811daToomas Soome	dmu_object_dnsize_from_db(hdl->sa_bonus, &dnodesize);
67054811daToomas Soome	bonuslen = DN_BONUS_SIZE(dnodesize);
67154811daToomas Soome
6720a586ceMark Shellenbaum	/* first determine bonus header size and sum of all attributes */
6730a586ceMark Shellenbaum	hdrsize = sa_find_sizes(sa, attr_desc, attr_count, hdl->sa_bonus,
67454811daToomas Soome	    SA_BONUS, bonuslen, &i, &used, &spilling);
6750a586ceMark Shellenbaum
676b515258Matthew Ahrens	if (used > SPA_OLD_MAXBLOCKSIZE)
677be6fd75Matthew Ahrens		return (SET_ERROR(EFBIG));
6780a586ceMark Shellenbaum
6790a586ceMark Shellenbaum	VERIFY(0 == dmu_set_bonus(hdl->sa_bonus, spilling ?
68054811daToomas Soome	    MIN(bonuslen - sizeof (blkptr_t), used + hdrsize) :
6810a586ceMark Shellenbaum	    used + hdrsize, tx));
6820a586ceMark Shellenbaum
6830a586ceMark Shellenbaum	ASSERT((bonustype == DMU_OT_ZNODE && spilling == 0) ||
6840a586ceMark Shellenbaum	    bonustype == DMU_OT_SA);
6850a586ceMark Shellenbaum
6860a586ceMark Shellenbaum	/* setup and size spill buffer when needed */
6870a586ceMark Shellenbaum	if (spilling) {
6880a586ceMark Shellenbaum		boolean_t dummy;
6890a586ceMark Shellenbaum
6900a586ceMark Shellenbaum		if (hdl->sa_spill == NULL) {
691eb63303Tom Caputi			VERIFY(dmu_spill_hold_by_bonus(hdl->sa_bonus, 0, NULL,
6921d8ccc7Mark Shellenbaum			    &hdl->sa_spill) == 0);
6930a586ceMark Shellenbaum		}
6940a586ceMark Shellenbaum		dmu_buf_will_dirty(hdl->sa_spill, tx);
6950a586ceMark Shellenbaum
6960a586ceMark Shellenbaum		spillhdrsize = sa_find_sizes(sa, &attr_desc[i],
69754811daToomas Soome		    attr_count - i, hdl->sa_spill, SA_SPILL,
69854811daToomas Soome		    hdl->sa_spill->db_size, &i, &spill_used, &dummy);
6990a586ceMark Shellenbaum
700b515258Matthew Ahrens		if (spill_used > SPA_OLD_MAXBLOCKSIZE)
701be6fd75Matthew Ahrens			return (SET_ERROR(EFBIG));
7020a586ceMark Shellenbaum
7030a586ceMark Shellenbaum		buf_space = hdl->sa_spill->db_size - spillhdrsize;
7040a586ceMark Shellenbaum		if (BUF_SPACE_NEEDED(spill_used, spillhdrsize) >
7050a586ceMark Shellenbaum		    hdl->sa_spill->db_size)
7060a586ceMark Shellenbaum			VERIFY(0 == sa_resize_spill(hdl,
7070a586ceMark Shellenbaum			    BUF_SPACE_NEEDED(spill_used, spillhdrsize), tx));
7080a586ceMark Shellenbaum	}
7090a586ceMark Shellenbaum
7100a586ceMark Shellenbaum	/* setup starting pointers to lay down data */
7110a586ceMark Shellenbaum	data_start = (void *)((uintptr_t)hdl->sa_bonus->db_data + hdrsize);
7120a586ceMark Shellenbaum	sahdr = (sa_hdr_phys_t *)hdl->sa_bonus->db_data;
7130a586ceMark Shellenbaum	buftype = SA_BONUS;
7140a586ceMark Shellenbaum
7150a586ceMark Shellenbaum	if (spilling)
7160a586ceMark Shellenbaum		buf_space = (sa->sa_force_spill) ?
7170a586ceMark Shellenbaum		    0 : SA_BLKPTR_SPACE - hdrsize;
7180a586ceMark Shellenbaum	else
7190a586ceMark Shellenbaum		buf_space = hdl->sa_bonus->db_size - hdrsize;
7200a586ceMark Shellenbaum
7210a586ceMark Shellenbaum	attrs_start = attrs = kmem_alloc(sizeof (sa_attr_type_t) * attr_count,
7220a586ceMark Shellenbaum	    KM_SLEEP);
7230a586ceMark Shellenbaum	lot_count = 0;
7240a586ceMark Shellenbaum
7250a586ceMark Shellenbaum	for (i = 0, len_idx = 0, hash = -1ULL; i != attr_count; i++) {
7260a586ceMark Shellenbaum		uint16_t length;
7270a586ceMark Shellenbaum
728644b952Ned Bass		ASSERT(IS_P2ALIGNED(data_start, 8));
729644b952Ned Bass		ASSERT(IS_P2ALIGNED(buf_space, 8));
7300a586ceMark Shellenbaum		attrs[i] = attr_desc[i].sa_attr;
7310a586ceMark Shellenbaum		length = SA_REGISTERED_LEN(sa, attrs[i]);
7320a586ceMark Shellenbaum		if (length == 0)
7330a586ceMark Shellenbaum			length = attr_desc[i].sa_length;
7340a586ceMark Shellenbaum
7350a586ceMark Shellenbaum		if (buf_space < length) {  /* switch to spill buffer */
736644b952Ned Bass			VERIFY(spilling);
7371412a1aMark Shellenbaum			VERIFY(bonustype == DMU_OT_SA);
7380a586ceMark Shellenbaum			if (buftype == SA_BONUS && !sa->sa_force_spill) {
7390a586ceMark Shellenbaum				sa_find_layout(hdl->sa_os, hash, attrs_start,
7400a586ceMark Shellenbaum				    lot_count, tx, &lot);
7410a586ceMark Shellenbaum				SA_SET_HDR(sahdr, lot->lot_num, hdrsize);
7420a586ceMark Shellenbaum			}
7430a586ceMark Shellenbaum
7440a586ceMark Shellenbaum			buftype = SA_SPILL;
7450a586ceMark Shellenbaum			hash = -1ULL;
7460a586ceMark Shellenbaum			len_idx = 0;
7470a586ceMark Shellenbaum
7480a586ceMark Shellenbaum			sahdr = (sa_hdr_phys_t *)hdl->sa_spill->db_data;
7490a586ceMark Shellenbaum			sahdr->sa_magic = SA_MAGIC;
7500a586ceMark Shellenbaum			data_start = (void *)((uintptr_t)sahdr +
7510a586ceMark Shellenbaum			    spillhdrsize);
7520a586ceMark Shellenbaum			attrs_start = &attrs[i];
7530a586ceMark Shellenbaum			buf_space = hdl->sa_spill->db_size - spillhdrsize;
7540a586ceMark Shellenbaum			lot_count = 0;
7550a586ceMark Shellenbaum		}
7560a586ceMark Shellenbaum		hash ^= SA_ATTR_HASH(attrs[i]);
7570a586ceMark Shellenbaum		attr_desc[i].sa_addr = data_start;
7580a586ceMark Shellenbaum		attr_desc[i].sa_size = length;
7590a586ceMark Shellenbaum		SA_COPY_DATA(attr_desc[i].sa_data_func, attr_desc[i].sa_data,
7600a586ceMark Shellenbaum		    data_start, length);
7610a586ceMark Shellenbaum		if (sa->sa_attr_table[attrs[i]].sa_length == 0) {
7620a586ceMark Shellenbaum			sahdr->sa_lengths[len_idx++] = length;
7630a586ceMark Shellenbaum		}
7640a586ceMark Shellenbaum		data_start = (void *)P2ROUNDUP(((uintptr_t)data_start +
7650a586ceMark Shellenbaum		    length), 8);
7660a586ceMark Shellenbaum		buf_space -= P2ROUNDUP(length, 8);
7670a586ceMark Shellenbaum		lot_count++;
7680a586ceMark Shellenbaum	}
7690a586ceMark Shellenbaum
7700a586ceMark Shellenbaum	sa_find_layout(hdl->sa_os, hash, attrs_start, lot_count, tx, &lot);
7711412a1aMark Shellenbaum
7721412a1aMark Shellenbaum	/*
7731412a1aMark Shellenbaum	 * Verify that old znodes always have layout number 0.
7741412a1aMark Shellenbaum	 * Must be DMU_OT_SA for arbitrary layouts
7751412a1aMark Shellenbaum	 */
7761412a1aMark Shellenbaum	VERIFY((bonustype == DMU_OT_ZNODE && lot->lot_num == 0) ||
7771412a1aMark Shellenbaum	    (bonustype == DMU_OT_SA && lot->lot_num > 1));
7781412a1aMark Shellenbaum
7790a586ceMark Shellenbaum	if (bonustype == DMU_OT_SA) {
7800a586ceMark Shellenbaum		SA_SET_HDR(sahdr, lot->lot_num,
7810a586ceMark Shellenbaum		    buftype == SA_BONUS ? hdrsize : spillhdrsize);
7820a586ceMark Shellenbaum	}
7830a586ceMark Shellenbaum
7840a586ceMark Shellenbaum	kmem_free(attrs, sizeof (sa_attr_type_t) * attr_count);
7850a586ceMark Shellenbaum	if (hdl->sa_bonus_tab) {
7860a586ceMark Shellenbaum		sa_idx_tab_rele(hdl->sa_os, hdl->sa_bonus_tab);
7870a586ceMark Shellenbaum		hdl->sa_bonus_tab = NULL;
7880a586ceMark Shellenbaum	}
7890a586ceMark Shellenbaum	if (!sa->sa_force_spill)
7900a586ceMark Shellenbaum		VERIFY(0 == sa_build_index(hdl, SA_BONUS));
7910a586ceMark Shellenbaum	if (hdl->sa_spill) {
7920a586ceMark Shellenbaum		sa_idx_tab_rele(hdl->sa_os, hdl->sa_spill_tab);
7930a586ceMark Shellenbaum		if (!spilling) {
7940a586ceMark Shellenbaum			/*
7950a586ceMark Shellenbaum			 * remove spill block that is no longer needed.
7960a586ceMark Shellenbaum			 */
7970a586ceMark Shellenbaum			dmu_buf_rele(hdl->sa_spill, NULL);
7980a586ceMark Shellenbaum			hdl->sa_spill = NULL;
7990a586ceMark Shellenbaum			hdl->sa_spill_tab = NULL;
8000a586ceMark Shellenbaum			VERIFY(0 == dmu_rm_spill(hdl->sa_os,
8010a586ceMark Shellenbaum			    sa_handle_object(hdl), tx));
8020a586ceMark Shellenbaum		} else {
8030a586ceMark Shellenbaum			VERIFY(0 == sa_build_index(hdl, SA_SPILL));
8040a586ceMark Shellenbaum		}
8050a586ceMark Shellenbaum	}
8060a586ceMark Shellenbaum
8070a586ceMark Shellenbaum	return (0);
8080a586ceMark Shellenbaum}
8090a586ceMark Shellenbaum
8100a586ceMark Shellenbaumstatic void
8111d8ccc7Mark Shellenbaumsa_free_attr_table(sa_os_t *sa)
8121d8ccc7Mark Shellenbaum{
8131d8ccc7Mark Shellenbaum	int i;
8141d8ccc7Mark Shellenbaum
8151d8ccc7Mark Shellenbaum	if (sa->sa_attr_table == NULL)
8161d8ccc7Mark Shellenbaum		return;
8171d8ccc7Mark Shellenbaum
8181d8ccc7Mark Shellenbaum	for (i = 0; i != sa->sa_num_attrs; i++) {
8191d8ccc7Mark Shellenbaum		if (sa->sa_attr_table[i].sa_name)
8201d8ccc7Mark Shellenbaum			kmem_free(sa->sa_attr_table[i].sa_name,
8211d8ccc7Mark Shellenbaum			    strlen(sa->sa_attr_table[i].sa_name) + 1);
8221d8ccc7Mark Shellenbaum	}
8231d8ccc7Mark Shellenbaum
8241d8ccc7Mark Shellenbaum	kmem_free(sa->sa_attr_table,
8251d8ccc7Mark Shellenbaum	    sizeof (sa_attr_table_t) * sa->sa_num_attrs);
8261d8ccc7Mark Shellenbaum
8271d8ccc7Mark Shellenbaum	sa->sa_attr_table = NULL;
8281d8ccc7Mark Shellenbaum}
8291d8ccc7Mark Shellenbaum
8301d8ccc7Mark Shellenbaumstatic int
8310a586ceMark Shellenbaumsa_attr_table_setup(objset_t *os, sa_attr_reg_t *reg_attrs, int count)
8320a586ceMark Shellenbaum{
8330a586ceMark Shellenbaum	sa_os_t *sa = os->os_sa;
8340a586ceMark Shellenbaum	uint64_t sa_attr_count = 0;
835d5285caGeorge Wilson	uint64_t sa_reg_count = 0;
8360a586ceMark Shellenbaum	int error = 0;
8370a586ceMark Shellenbaum	uint64_t attr_value;
8380a586ceMark Shellenbaum	sa_attr_table_t *tb;
8390a586ceMark Shellenbaum	zap_cursor_t zc;
8400a586ceMark Shellenbaum	zap_attribute_t za;
8410a586ceMark Shellenbaum	int registered_count = 0;
8420a586ceMark Shellenbaum	int i;
8430a586ceMark Shellenbaum	dmu_objset_type_t ostype = dmu_objset_type(os);
8440a586ceMark Shellenbaum
8450a586ceMark Shellenbaum	sa->sa_user_table =
8460a586ceMark Shellenbaum	    kmem_zalloc(count * sizeof (sa_attr_type_t), KM_SLEEP);
8470a586ceMark Shellenbaum	sa->sa_user_table_sz = count * sizeof (sa_attr_type_t);
8480a586ceMark Shellenbaum
8491d8ccc7Mark Shellenbaum	if (sa->sa_reg_attr_obj != 0) {
8501d8ccc7Mark Shellenbaum		error = zap_count(os, sa->sa_reg_attr_obj,
8511d8ccc7Mark Shellenbaum		    &sa_attr_count);
8521d8ccc7Mark Shellenbaum
8531d8ccc7Mark Shellenbaum		/*
8541d8ccc7Mark Shellenbaum		 * Make sure we retrieved a count and that it isn't zero
8551d8ccc7Mark Shellenbaum		 */
8561d8ccc7Mark Shellenbaum		if (error || (error == 0 && sa_attr_count == 0)) {
8571d8ccc7Mark Shellenbaum			if (error == 0)
858be6fd75Matthew Ahrens				error = SET_ERROR(EINVAL);
8591d8ccc7Mark Shellenbaum			goto bail;
8601d8ccc7Mark Shellenbaum		}
8611d8ccc7Mark Shellenbaum		sa_reg_count = sa_attr_count;
8621d8ccc7Mark Shellenbaum	}
8630a586ceMark Shellenbaum
8640a586ceMark Shellenbaum	if (ostype == DMU_OST_ZFS && sa_attr_count == 0)
8650a586ceMark Shellenbaum		sa_attr_count += sa_legacy_attr_count;
8660a586ceMark Shellenbaum
8670a586ceMark Shellenbaum	/* Allocate attribute numbers for attributes that aren't registered */
8680a586ceMark Shellenbaum	for (i = 0; i != count; i++) {
8690a586ceMark Shellenbaum		boolean_t found = B_FALSE;
8700a586ceMark Shellenbaum		int j;
8710a586ceMark Shellenbaum
8720a586ceMark Shellenbaum		if (ostype == DMU_OST_ZFS) {
8730a586ceMark Shellenbaum			for (j = 0; j != sa_legacy_attr_count; j++) {
8740a586ceMark Shellenbaum				if (strcmp(reg_attrs[i].sa_name,
8750a586ceMark Shellenbaum				    sa_legacy_attrs[j].sa_name) == 0) {
8760a586ceMark Shellenbaum					sa->sa_user_table[i] =
8770a586ceMark Shellenbaum					    sa_legacy_attrs[j].sa_attr;
8780a586ceMark Shellenbaum					found = B_TRUE;
8790a586ceMark Shellenbaum				}
8800a586ceMark Shellenbaum			}
8810a586ceMark Shellenbaum		}
8820a586ceMark Shellenbaum		if (found)
8830a586ceMark Shellenbaum			continue;
8840a586ceMark Shellenbaum
8850a586ceMark Shellenbaum		if (sa->sa_reg_attr_obj)
8860a586ceMark Shellenbaum			error = zap_lookup(os, sa->sa_reg_attr_obj,
8870a586ceMark Shellenbaum			    reg_attrs[i].sa_name, 8, 1, &attr_value);
8880a586ceMark Shellenbaum		else
889be6fd75Matthew Ahrens			error = SET_ERROR(ENOENT);
8900a586ceMark Shellenbaum		switch (error) {
8910a586ceMark Shellenbaum		case ENOENT:
8920a586ceMark Shellenbaum			sa->sa_user_table[i] = (sa_attr_type_t)sa_attr_count;
8930a586ceMark Shellenbaum			sa_attr_count++;
8940a586ceMark Shellenbaum			break;
8950a586ceMark Shellenbaum		case 0:
8960a586ceMark Shellenbaum			sa->sa_user_table[i] = ATTR_NUM(attr_value);
8970a586ceMark Shellenbaum			break;
8981d8ccc7Mark Shellenbaum		default:
8991d8ccc7Mark Shellenbaum			goto bail;
9000a586ceMark Shellenbaum		}
9010a586ceMark Shellenbaum	}
9020a586ceMark Shellenbaum
9031d8ccc7Mark Shellenbaum	sa->sa_num_attrs = sa_attr_count;
9041d8ccc7Mark Shellenbaum	tb = sa->sa_attr_table =
9050a586ceMark Shellenbaum	    kmem_zalloc(sizeof (sa_attr_table_t) * sa_attr_count, KM_SLEEP);
9060a586ceMark Shellenbaum
9070a586ceMark Shellenbaum	/*
9080a586ceMark Shellenbaum	 * Attribute table is constructed from requested attribute list,
9090a586ceMark Shellenbaum	 * previously foreign registered attributes, and also the legacy
9100a586ceMark Shellenbaum	 * ZPL set of attributes.
9110a586ceMark Shellenbaum	 */
9120a586ceMark Shellenbaum
9130a586ceMark Shellenbaum	if (sa->sa_reg_attr_obj) {
9140a586ceMark Shellenbaum		for (zap_cursor_init(&zc, os, sa->sa_reg_attr_obj);
9151d8ccc7Mark Shellenbaum		    (error = zap_cursor_retrieve(&zc, &za)) == 0;
9160a586ceMark Shellenbaum		    zap_cursor_advance(&zc)) {
9170a586ceMark Shellenbaum			uint64_t value;
9180a586ceMark Shellenbaum			value  = za.za_first_integer;
9190a586ceMark Shellenbaum
9200a586ceMark Shellenbaum			registered_count++;
9210a586ceMark Shellenbaum			tb[ATTR_NUM(value)].sa_attr = ATTR_NUM(value);
9220a586ceMark Shellenbaum			tb[ATTR_NUM(value)].sa_length = ATTR_LENGTH(value);
9230a586ceMark Shellenbaum			tb[ATTR_NUM(value)].sa_byteswap = ATTR_BSWAP(value);
9240a586ceMark Shellenbaum			tb[ATTR_NUM(value)].sa_registered = B_TRUE;
9250a586ceMark Shellenbaum
9260a586ceMark Shellenbaum			if (tb[ATTR_NUM(value)].sa_name) {
9270a586ceMark Shellenbaum				continue;
9280a586ceMark Shellenbaum			}
9290a586ceMark Shellenbaum			tb[ATTR_NUM(value)].sa_name =
9300a586ceMark Shellenbaum			    kmem_zalloc(strlen(za.za_name) +1, KM_SLEEP);
9310a586ceMark Shellenbaum			(void) strlcpy(tb[ATTR_NUM(value)].sa_name, za.za_name,
9320a586ceMark Shellenbaum			    strlen(za.za_name) +1);
9330a586ceMark Shellenbaum		}
9340a586ceMark Shellenbaum		zap_cursor_fini(&zc);
9351d8ccc7Mark Shellenbaum		/*
9361d8ccc7Mark Shellenbaum		 * Make sure we processed the correct number of registered
9371d8ccc7Mark Shellenbaum		 * attributes
9381d8ccc7Mark Shellenbaum		 */
9391d8ccc7Mark Shellenbaum		if (registered_count != sa_reg_count) {
9401d8ccc7Mark Shellenbaum			ASSERT(error != 0);
9411d8ccc7Mark Shellenbaum			goto bail;
9421d8ccc7Mark Shellenbaum		}
9431d8ccc7Mark Shellenbaum
9440a586ceMark Shellenbaum	}
9450a586ceMark Shellenbaum
9460a586ceMark Shellenbaum	if (ostype == DMU_OST_ZFS) {
9470a586ceMark Shellenbaum		for (i = 0; i != sa_legacy_attr_count; i++) {
9480a586ceMark Shellenbaum			if (tb[i].sa_name)
9490a586ceMark Shellenbaum				continue;
9500a586ceMark Shellenbaum			tb[i].sa_attr = sa_legacy_attrs[i].sa_attr;
9510a586ceMark Shellenbaum			tb[i].sa_length = sa_legacy_attrs[i].sa_length;
9520a586ceMark Shellenbaum			tb[i].sa_byteswap = sa_legacy_attrs[i].sa_byteswap;
9530a586ceMark Shellenbaum			tb[i].sa_registered = B_FALSE;
9540a586ceMark Shellenbaum			tb[i].sa_name =
9550a586ceMark Shellenbaum			    kmem_zalloc(strlen(sa_legacy_attrs[i].sa_name) +1,
9560a586ceMark Shellenbaum			    KM_SLEEP);
9570a586ceMark Shellenbaum			(void) strlcpy(tb[i].sa_name,
9580a586ceMark Shellenbaum			    sa_legacy_attrs[i].sa_name,
9590a586ceMark Shellenbaum			    strlen(sa_legacy_attrs[i].sa_name) + 1);
9600a586ceMark Shellenbaum		}
9610a586ceMark Shellenbaum	}
9620a586ceMark Shellenbaum
9630a586ceMark Shellenbaum	for (i = 0; i != count; i++) {
9640a586ceMark Shellenbaum		sa_attr_type_t attr_id;
9650a586ceMark Shellenbaum
9660a586ceMark Shellenbaum		attr_id = sa->sa_user_table[i];
9670a586ceMark Shellenbaum		if (tb[attr_id].sa_name)
9680a586ceMark Shellenbaum			continue;
9690a586ceMark Shellenbaum
9700a586ceMark Shellenbaum		tb[attr_id].sa_length = reg_attrs[i].sa_length;
9710a586ceMark Shellenbaum		tb[attr_id].sa_byteswap = reg_attrs[i].sa_byteswap;
9720a586ceMark Shellenbaum		tb[attr_id].sa_attr = attr_id;
9730a586ceMark Shellenbaum		tb[attr_id].sa_name =
9740a586ceMark Shellenbaum		    kmem_zalloc(strlen(reg_attrs[i].sa_name) + 1, KM_SLEEP);
9750a586ceMark Shellenbaum		(void) strlcpy(tb[attr_id].sa_name, reg_attrs[i].sa_name,
9760a586ceMark Shellenbaum		    strlen(reg_attrs[i].sa_name) + 1);
9770a586ceMark Shellenbaum	}
9780a586ceMark Shellenbaum
9791d8ccc7Mark Shellenbaum	sa->sa_need_attr_registration =
9800a586ceMark Shellenbaum	    (sa_attr_count != registered_count);
9811d8ccc7Mark Shellenbaum
9821d8ccc7Mark Shellenbaum	return (0);
9831d8ccc7Mark Shellenbaumbail:
9841d8ccc7Mark Shellenbaum	kmem_free(sa->sa_user_table, count * sizeof (sa_attr_type_t));
9851d8ccc7Mark Shellenbaum	sa->sa_user_table = NULL;
9861d8ccc7Mark Shellenbaum	sa_free_attr_table(sa);
9871d8ccc7Mark Shellenbaum	return ((error != 0) ? error : EINVAL);
9880a586ceMark Shellenbaum}
9890a586ceMark Shellenbaum
9901d8ccc7Mark Shellenbaumint
9911d8ccc7Mark Shellenbaumsa_setup(objset_t *os, uint64_t sa_obj, sa_attr_reg_t *reg_attrs, int count,
9921d8ccc7Mark Shellenbaum    sa_attr_type_t **user_table)
9930a586ceMark Shellenbaum{
9940a586ceMark Shellenbaum	zap_cursor_t zc;
9950a586ceMark Shellenbaum	zap_attribute_t za;
9960a586ceMark Shellenbaum	sa_os_t *sa;
9970a586ceMark Shellenbaum	dmu_objset_type_t ostype = dmu_objset_type(os);
9980a586ceMark Shellenbaum	sa_attr_type_t *tb;
9991d8ccc7Mark Shellenbaum	int error;
10000a586ceMark Shellenbaum
10013b2aab1Matthew Ahrens	mutex_enter(&os->os_user_ptr_lock);
10020a586ceMark Shellenbaum	if (os->os_sa) {
10030a586ceMark Shellenbaum		mutex_enter(&os->os_sa->sa_lock);
10043b2aab1Matthew Ahrens		mutex_exit(&os->os_user_ptr_lock);
10050a586ceMark Shellenbaum		tb = os->os_sa->sa_user_table;
10060a586ceMark Shellenbaum		mutex_exit(&os->os_sa->sa_lock);
10071d8ccc7Mark Shellenbaum		*user_table = tb;
10081d8ccc7Mark Shellenbaum		return (0);
10090a586ceMark Shellenbaum	}
10100a586ceMark Shellenbaum
10110a586ceMark Shellenbaum	sa = kmem_zalloc(sizeof (sa_os_t), KM_SLEEP);
10120a586ceMark Shellenbaum	mutex_init(&sa->sa_lock, NULL, MUTEX_DEFAULT, NULL);
10130a586ceMark Shellenbaum	sa->sa_master_obj = sa_obj;
10140a586ceMark Shellenbaum
10151d8ccc7Mark Shellenbaum	os->os_sa = sa;
10160a586ceMark Shellenbaum	mutex_enter(&sa->sa_lock);
10173b2aab1Matthew Ahrens	mutex_exit(&os->os_user_ptr_lock);
10180a586ceMark Shellenbaum	avl_create(&sa->sa_layout_num_tree, layout_num_compare,
10190a586ceMark Shellenbaum	    sizeof (sa_lot_t), offsetof(sa_lot_t, lot_num_node));
10200a586ceMark Shellenbaum	avl_create(&sa->sa_layout_hash_tree, layout_hash_compare,
10210a586ceMark Shellenbaum	    sizeof (sa_lot_t), offsetof(sa_lot_t, lot_hash_node));
10220a586ceMark Shellenbaum
10230a586ceMark Shellenbaum	if (sa_obj) {
10240a586ceMark Shellenbaum		error = zap_lookup(os, sa_obj, SA_LAYOUTS,
10250a586ceMark Shellenbaum		    8, 1, &sa->sa_layout_attr_obj);
10261d8ccc7Mark Shellenbaum		if (error != 0 && error != ENOENT)
10271d8ccc7Mark Shellenbaum			goto fail;
10280a586ceMark Shellenbaum		error = zap_lookup(os, sa_obj, SA_REGISTRY,
10290a586ceMark Shellenbaum		    8, 1, &sa->sa_reg_attr_obj);
10301d8ccc7Mark Shellenbaum		if (error != 0 && error != ENOENT)
10311d8ccc7Mark Shellenbaum			goto fail;
10320a586ceMark Shellenbaum	}
10330a586ceMark Shellenbaum
10341d8ccc7Mark Shellenbaum	if ((error = sa_attr_table_setup(os, reg_attrs, count)) != 0)
10351d8ccc7Mark Shellenbaum		goto fail;
10360a586ceMark Shellenbaum
10370a586ceMark Shellenbaum	if (sa->sa_layout_attr_obj != 0) {
10381d8ccc7Mark Shellenbaum		uint64_t layout_count;
10391d8ccc7Mark Shellenbaum
10401d8ccc7Mark Shellenbaum		error = zap_count(os, sa->sa_layout_attr_obj,
10411d8ccc7Mark Shellenbaum		    &layout_count);
10421d8ccc7Mark Shellenbaum
10431d8ccc7Mark Shellenbaum		/*
10441d8ccc7Mark Shellenbaum		 * Layout number count should be > 0
10451d8ccc7Mark Shellenbaum		 */
10461d8ccc7Mark Shellenbaum		if (error || (error == 0 && layout_count == 0)) {
10471d8ccc7Mark Shellenbaum			if (error == 0)
1048be6fd75Matthew Ahrens				error = SET_ERROR(EINVAL);
10491d8ccc7Mark Shellenbaum			goto fail;
10501d8ccc7Mark Shellenbaum		}
10511d8ccc7Mark Shellenbaum
10520a586ceMark Shellenbaum		for (zap_cursor_init(&zc, os, sa->sa_layout_attr_obj);
10531d8ccc7Mark Shellenbaum		    (error = zap_cursor_retrieve(&zc, &za)) == 0;
10540a586ceMark Shellenbaum		    zap_cursor_advance(&zc)) {
10550a586ceMark Shellenbaum			sa_attr_type_t *lot_attrs;
10560a586ceMark Shellenbaum			uint64_t lot_num;
10570a586ceMark Shellenbaum
10580a586ceMark Shellenbaum			lot_attrs = kmem_zalloc(sizeof (sa_attr_type_t) *
10590a586ceMark Shellenbaum			    za.za_num_integers, KM_SLEEP);
10600a586ceMark Shellenbaum
10611d8ccc7Mark Shellenbaum			if ((error = (zap_lookup(os, sa->sa_layout_attr_obj,
10621d8ccc7Mark Shellenbaum			    za.za_name, 2, za.za_num_integers,
10631d8ccc7Mark Shellenbaum			    lot_attrs))) != 0) {
10641d8ccc7Mark Shellenbaum				kmem_free(lot_attrs, sizeof (sa_attr_type_t) *
10651d8ccc7Mark Shellenbaum				    za.za_num_integers);
10661d8ccc7Mark Shellenbaum				break;
10671d8ccc7Mark Shellenbaum			}
10680a586ceMark Shellenbaum			VERIFY(ddi_strtoull(za.za_name, NULL, 10,
10690a586ceMark Shellenbaum			    (unsigned long long *)&lot_num) == 0);
10700a586ceMark Shellenbaum
10710a586ceMark Shellenbaum			(void) sa_add_layout_entry(os, lot_attrs,
10720a586ceMark Shellenbaum			    za.za_num_integers, lot_num,
10730a586ceMark Shellenbaum			    sa_layout_info_hash(lot_attrs,
10740a586ceMark Shellenbaum			    za.za_num_integers), B_FALSE, NULL);
10750a586ceMark Shellenbaum			kmem_free(lot_attrs, sizeof (sa_attr_type_t) *
10760a586ceMark Shellenbaum			    za.za_num_integers);
10770a586ceMark Shellenbaum		}
10780a586ceMark Shellenbaum		zap_cursor_fini(&zc);
10791d8ccc7Mark Shellenbaum
10801d8ccc7Mark Shellenbaum		/*
10811d8ccc7Mark Shellenbaum		 * Make sure layout count matches number of entries added
10821d8ccc7Mark Shellenbaum		 * to AVL tree
10831d8ccc7Mark Shellenbaum		 */
10841d8ccc7Mark Shellenbaum		if (avl_numnodes(&sa->sa_layout_num_tree) != layout_count) {
10851d8ccc7Mark Shellenbaum			ASSERT(error != 0);
10861d8ccc7Mark Shellenbaum			goto fail;
10871d8ccc7Mark Shellenbaum		}
10880a586ceMark Shellenbaum	}
10890a586ceMark Shellenbaum
10900a586ceMark Shellenbaum	/* Add special layout number for old ZNODES */
10910a586ceMark Shellenbaum	if (ostype == DMU_OST_ZFS) {
10920a586ceMark Shellenbaum		(void) sa_add_layout_entry(os, sa_legacy_zpl_layout,
10930a586ceMark Shellenbaum		    sa_legacy_attr_count, 0,
10940a586ceMark Shellenbaum		    sa_layout_info_hash(sa_legacy_zpl_layout,
10950a586ceMark Shellenbaum		    sa_legacy_attr_count), B_FALSE, NULL);
10960a586ceMark Shellenbaum
10970a586ceMark Shellenbaum		(void) sa_add_layout_entry(os, sa_dummy_zpl_layout, 0, 1,
10980a586ceMark Shellenbaum		    0, B_FALSE, NULL);
10990a586ceMark Shellenbaum	}
11001d8ccc7Mark Shellenbaum	*user_table = os->os_sa->sa_user_table;
11010a586ceMark Shellenbaum	mutex_exit(&sa->sa_lock);
11021d8ccc7Mark Shellenbaum	return (0);
11031d8ccc7Mark Shellenbaumfail:
11041d8ccc7Mark Shellenbaum	os->os_sa = NULL;
11051d8ccc7Mark Shellenbaum	sa_free_attr_table(sa);
11061d8ccc7Mark Shellenbaum	if (sa->sa_user_table)
11071d8ccc7Mark Shellenbaum		kmem_free(sa->sa_user_table, sa->sa_user_table_sz);
11081d8ccc7Mark Shellenbaum	mutex_exit(&sa->sa_lock);
1109d2b3cbbJorgen Lundman	avl_destroy(&sa->sa_layout_hash_tree);
1110d2b3cbbJorgen Lundman	avl_destroy(&sa->sa_layout_num_tree);
1111d2b3cbbJorgen Lundman	mutex_destroy(&sa->sa_lock);
11121d8ccc7Mark Shellenbaum	kmem_free(sa, sizeof (sa_os_t));
11131d8ccc7Mark Shellenbaum	return ((error == ECKSUM) ? EIO : error);
11140a586ceMark Shellenbaum}
11150a586ceMark Shellenbaum
11160a586ceMark Shellenbaumvoid
11170a586ceMark Shellenbaumsa_tear_down(objset_t *os)
11180a586ceMark Shellenbaum{
11190a586ceMark Shellenbaum	sa_os_t *sa = os->os_sa;
11200a586ceMark Shellenbaum	sa_lot_t *layout;
11210a586ceMark Shellenbaum	void *cookie;
11220a586ceMark Shellenbaum
11230a586ceMark Shellenbaum	kmem_free(sa->sa_user_table, sa->sa_user_table_sz);
11240a586ceMark Shellenbaum
11250a586ceMark Shellenbaum	/* Free up attr table */
11260a586ceMark Shellenbaum
11271d8ccc7Mark Shellenbaum	sa_free_attr_table(sa);
11280a586ceMark Shellenbaum
11290a586ceMark Shellenbaum	cookie = NULL;
11300a586ceMark Shellenbaum	while (layout = avl_destroy_nodes(&sa->sa_layout_hash_tree, &cookie)) {
11310a586ceMark Shellenbaum		sa_idx_tab_t *tab;
11320a586ceMark Shellenbaum		while (tab = list_head(&layout->lot_idx_tab)) {
1133e914aceTim Schumacher			ASSERT(zfs_refcount_count(&tab->sa_refcount));
11340a586ceMark Shellenbaum			sa_idx_tab_rele(os, tab);
11350a586ceMark Shellenbaum		}
11360a586ceMark Shellenbaum	}
11370a586ceMark Shellenbaum
11380a586ceMark Shellenbaum	cookie = NULL;
11390a586ceMark Shellenbaum	while (layout = avl_destroy_nodes(&sa->sa_layout_num_tree, &cookie)) {
11400a586ceMark Shellenbaum		kmem_free(layout->lot_attrs,
11410a586ceMark Shellenbaum		    sizeof (sa_attr_type_t) * layout->lot_attr_count);
11420a586ceMark Shellenbaum		kmem_free(layout, sizeof (sa_lot_t));
11430a586ceMark Shellenbaum	}
11440a586ceMark Shellenbaum
11450a586ceMark Shellenbaum	avl_destroy(&sa->sa_layout_hash_tree);
11460a586ceMark Shellenbaum	avl_destroy(&sa->sa_layout_num_tree);
1147d2b3cbbJorgen Lundman	mutex_destroy(&sa->sa_lock);
11480a586ceMark Shellenbaum
11490a586ceMark Shellenbaum	kmem_free(sa, sizeof (sa_os_t));
11500a586ceMark Shellenbaum	os->os_sa = NULL;
11510a586ceMark Shellenbaum}
1152