2efb8094ahrens * CDDL HEADER START
3efb8094ahrens *
4efb8094ahrens * The contents of this file are subject to the terms of the
5efb8094ahrens * Common Development and Distribution License (the "License").
6efb8094ahrens * You may not use this file except in compliance with the License.
7efb8094ahrens *
8efb8094ahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9efb8094ahrens * or http://www.opensolaris.org/os/licensing.
10efb8094ahrens * See the License for the specific language governing permissions
11efb8094ahrens * and limitations under the License.
12efb8094ahrens *
13efb8094ahrens * When distributing Covered Code, include this CDDL HEADER in each
14efb8094ahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15efb8094ahrens * If applicable, add the following below this CDDL HEADER, with the
16efb8094ahrens * fields enclosed by brackets "[]" replaced with your own identifying
17efb8094ahrens * information: Portions Copyright [yyyy] [name of copyright owner]
18efb8094ahrens *
19efb8094ahrens * CDDL HEADER END
20efb8094ahrens */
22dc7cd54Mark Shellenbaum * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23ec5cf9dAlexander Stetsenko * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
24ca0cc39Matthew Ahrens * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
25a2afb61Jerry Jelinek * Copyright (c) 2014, Joyent, Inc. All rights reserved.
26e77d42eMatthew Ahrens * Copyright 2014 HybridCluster. All rights reserved.
27880094bAndrew Stormont * Copyright 2016 RackTop Systems.
28c3d26abMatthew Ahrens * Copyright (c) 2014 Integros [integros.com]
29ec5cf9dAlexander Stetsenko */
31efb8094ahrens#include <sys/dmu.h>
32efb8094ahrens#include <sys/dmu_impl.h>
33efb8094ahrens#include <sys/dmu_tx.h>
34efb8094ahrens#include <sys/dbuf.h>
35efb8094ahrens#include <sys/dnode.h>
36efb8094ahrens#include <sys/zfs_context.h>
37efb8094ahrens#include <sys/dmu_objset.h>
38efb8094ahrens#include <sys/dmu_traverse.h>
39efb8094ahrens#include <sys/dsl_dataset.h>
40efb8094ahrens#include <sys/dsl_dir.h>
4192241e0Tom Erickson#include <sys/dsl_prop.h>
42efb8094ahrens#include <sys/dsl_pool.h>
43efb8094ahrens#include <sys/dsl_synctask.h>
44efb8094ahrens#include <sys/zfs_ioctl.h>
45efb8094ahrens#include <sys/zap.h>
46efb8094ahrens#include <sys/zio_checksum.h>
47dc7cd54Mark Shellenbaum#include <sys/zfs_znode.h>
48cde58dbMatthew Ahrens#include <zfs_fletcher.h>
499e69d7dLori Alt#include <sys/avl.h>
508e71447Lori Alt#include <sys/ddt.h>
51c99e4bdChris Kirby#include <sys/zfs_onexit.h>
523b2aab1Matthew Ahrens#include <sys/dmu_send.h>
533b2aab1Matthew Ahrens#include <sys/dsl_destroy.h>
545d7b4d4Matthew Ahrens#include <sys/blkptr.h>
5578f1710Matthew Ahrens#include <sys/dsl_bookmark.h>
565d7b4d4Matthew Ahrens#include <sys/zfeature.h>
57a2cdcddPaul Dagnelie#include <sys/bqueue.h>
5919b94dfMatthew Ahrens/* Set this tunable to TRUE to replace corrupt data with 0x2f5baddb10c */
6019b94dfMatthew Ahrensint zfs_send_corrupt_data = B_FALSE;
616ccda74loliint zfs_send_queue_length = SPA_MAXBLOCKSIZE;
62880094bAndrew Stormont/* Set this tunable to FALSE to disable setting of DRR_FLAG_FREERECORDS */
63880094bAndrew Stormontint zfs_send_set_freerecords_bit = B_TRUE;
64eb63303Tom Caputi/* Set this tunable to FALSE is disable sending unmodified spill blocks. */
65eb63303Tom Caputiint zfs_send_unmodified_spill_blocks = B_TRUE;
6619b94dfMatthew Ahrens
67df477c0Paul Dagnelie/*
68df477c0Paul Dagnelie * Use this to override the recordsize calculation for fast zfs send estimates.
69df477c0Paul Dagnelie */
70df477c0Paul Dagnelieuint64_t zfs_override_estimate_recordsize = 0;
71df477c0Paul Dagnelie
72a2cdcddPaul Dagnelie#define	BP_SPAN(datablkszsec, indblkshift, level) \
73a2cdcddPaul Dagnelie	(((uint64_t)datablkszsec) << (SPA_MINBLOCKSHIFT + \
74a2cdcddPaul Dagnelie	(level) * (indblkshift - SPA_BLKPTRSHIFT)))
75a2cdcddPaul Dagnelie
76a2cdcddPaul Dagneliestruct send_thread_arg {
77a2cdcddPaul Dagnelie	bqueue_t	q;
78a2cdcddPaul Dagnelie	dsl_dataset_t	*ds;		/* Dataset to traverse */
79a2cdcddPaul Dagnelie	uint64_t	fromtxg;	/* Traverse from this txg */
80a2cdcddPaul Dagnelie	int		flags;		/* flags to pass to traverse_dataset */
81a2cdcddPaul Dagnelie	int		error_code;
82a2cdcddPaul Dagnelie	boolean_t	cancel;
839c3fd12Matthew Ahrens	zbookmark_phys_t resume;
84a2cdcddPaul Dagnelie};
85a2cdcddPaul Dagnelie
86a2cdcddPaul Dagneliestruct send_block_record {
87a2cdcddPaul Dagnelie	boolean_t		eos_marker; /* Marks the end of the stream */
88a2cdcddPaul Dagnelie	blkptr_t		bp;
89a2cdcddPaul Dagnelie	zbookmark_phys_t	zb;
90a2cdcddPaul Dagnelie	uint8_t			indblkshift;
91a2cdcddPaul Dagnelie	uint16_t		datablkszsec;
92a2cdcddPaul Dagnelie	bqueue_node_t		ln;
93a2cdcddPaul Dagnelie};
94a2cdcddPaul Dagnelie
95eb63303Tom Caputistatic int do_dump(dmu_sendarg_t *dsa, struct send_block_record *data);
96eb63303Tom Caputi
97efb8094ahrensstatic int
984e3c9f4Bill Pijewskidump_bytes(dmu_sendarg_t *dsp, void *buf, int len)
1009c3fd12Matthew Ahrens	dsl_dataset_t *ds = dmu_objset_ds(dsp->dsa_os);
101efb8094ahrens	ssize_t resid; /* have to get resid to get detailed errno */
102c20404fEli Rosenthal
103c20404fEli Rosenthal	/*
104eb63303Tom Caputi	 * The code does not rely on len being a multiple of 8.  We keep
105c20404fEli Rosenthal	 * this assertion because of the corresponding assertion in
106c20404fEli Rosenthal	 * receive_read().  Keeping this assertion ensures that we do not
107c20404fEli Rosenthal	 * inadvertently break backwards compatibility (causing the assertion
108eb63303Tom Caputi	 * in receive_read() to trigger on old software). Newer feature flags
109eb63303Tom Caputi	 * (such as raw send) may break this assertion since they were
110eb63303Tom Caputi	 * introduced after the requirement was made obsolete.
111c20404fEli Rosenthal	 */
112c20404fEli Rosenthal
113eb63303Tom Caputi	ASSERT(len % 8 == 0 ||
114eb63303Tom Caputi	    (dsp->dsa_featureflags & DMU_BACKUP_FEATURE_RAW) != 0);
1164e3c9f4Bill Pijewski	dsp->dsa_err = vn_rdwr(UIO_WRITE, dsp->dsa_vp,
117efb8094ahrens	    (caddr_t)buf, len,
118efb8094ahrens	    0, UIO_SYSSPACE, FAPPEND, RLIM64_INFINITY, CRED(), &resid);
1194e3c9f4Bill Pijewski
1204e3c9f4Bill Pijewski	mutex_enter(&ds->ds_sendstream_lock);
1214e3c9f4Bill Pijewski	*dsp->dsa_off += len;
1224e3c9f4Bill Pijewski	mutex_exit(&ds->ds_sendstream_lock);
1234e3c9f4Bill Pijewski
1244e3c9f4Bill Pijewski	return (dsp->dsa_err);
12798110f0Matthew Ahrens/*
12898110f0Matthew Ahrens * For all record types except BEGIN, fill in the checksum (overlaid in
12998110f0Matthew Ahrens * drr_u.drr_checksum.drr_checksum).  The checksum verifies everything
13098110f0Matthew Ahrens * up to the start of the checksum itself.
13198110f0Matthew Ahrens */
13298110f0Matthew Ahrensstatic int
13398110f0Matthew Ahrensdump_record(dmu_sendarg_t *dsp, void *payload, int payload_len)
13498110f0Matthew Ahrens{
13598110f0Matthew Ahrens	ASSERT3U(offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum),
13698110f0Matthew Ahrens	    ==, sizeof (dmu_replay_record_t) - sizeof (zio_cksum_t));
137770499eDan Kimmel	(void) fletcher_4_incremental_native(dsp->dsa_drr,
13898110f0Matthew Ahrens	    offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum),
13998110f0Matthew Ahrens	    &dsp->dsa_zc);
14012b90eeMatt Krantz	if (dsp->dsa_drr->drr_type == DRR_BEGIN) {
14112b90eeMatt Krantz		dsp->dsa_sent_begin = B_TRUE;
14212b90eeMatt Krantz	} else {
14398110f0Matthew Ahrens		ASSERT(ZIO_CHECKSUM_IS_ZERO(&dsp->dsa_drr->drr_u.
14498110f0Matthew Ahrens		    drr_checksum.drr_checksum));
14598110f0Matthew Ahrens		dsp->dsa_drr->drr_u.drr_checksum.drr_checksum = dsp->dsa_zc;
14698110f0Matthew Ahrens	}
14712b90eeMatt Krantz	if (dsp->dsa_drr->drr_type == DRR_END) {
14812b90eeMatt Krantz		dsp->dsa_sent_end = B_TRUE;
14912b90eeMatt Krantz	}
150770499eDan Kimmel	(void) fletcher_4_incremental_native(&dsp->dsa_drr->
15198110f0Matthew Ahrens	    drr_u.drr_checksum.drr_checksum,
15298110f0Matthew Ahrens	    sizeof (zio_cksum_t), &dsp->dsa_zc);
15398110f0Matthew Ahrens	if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0)
15498110f0Matthew Ahrens		return (SET_ERROR(EINTR));
15598110f0Matthew Ahrens	if (payload_len != 0) {
156770499eDan Kimmel		(void) fletcher_4_incremental_native(payload, payload_len,
15798110f0Matthew Ahrens		    &dsp->dsa_zc);
15898110f0Matthew Ahrens		if (dump_bytes(dsp, payload, payload_len) != 0)
15998110f0Matthew Ahrens			return (SET_ERROR(EINTR));
16098110f0Matthew Ahrens	}
16198110f0Matthew Ahrens	return (0);
16298110f0Matthew Ahrens}
16398110f0Matthew Ahrens
16468ecb2ePaul Dagnelie/*
16568ecb2ePaul Dagnelie * Fill in the drr_free struct, or perform aggregation if the previous record is
16668ecb2ePaul Dagnelie * also a free record, and the two are adjacent.
16768ecb2ePaul Dagnelie *
16868ecb2ePaul Dagnelie * Note that we send free records even for a full send, because we want to be
16968ecb2ePaul Dagnelie * able to receive a full send as a clone, which requires a list of all the free
17068ecb2ePaul Dagnelie * and freeobject records that were generated on the source.
17168ecb2ePaul Dagnelie */
172efb8094ahrensstatic int
1734e3c9f4Bill Pijewskidump_free(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset,
174efb8094ahrens    uint64_t length)
1764e3c9f4Bill Pijewski	struct drr_free *drrf = &(dsp->dsa_drr->drr_u.drr_free);
1779e69d7dLori Alt
1782f3d878Matthew Ahrens	/*
1792f3d878Matthew Ahrens	 * When we receive a free record, dbuf_free_range() assumes
1802f3d878Matthew Ahrens	 * that the receiving system doesn't have any dbufs in the range
1812f3d878Matthew Ahrens	 * being freed.  This is always true because there is a one-record
1822f3d878Matthew Ahrens	 * constraint: we only send one WRITE record for any given
1839c3fd12Matthew Ahrens	 * object,offset.  We know that the one-record constraint is
1842f3d878Matthew Ahrens	 * true because we always send data in increasing order by
1852f3d878Matthew Ahrens	 * object,offset.
1862f3d878Matthew Ahrens	 *
1872f3d878Matthew Ahrens	 * If the increasing-order constraint ever changes, we should find
1882f3d878Matthew Ahrens	 * another way to assert that the one-record constraint is still
1892f3d878Matthew Ahrens	 * satisfied.
1902f3d878Matthew Ahrens	 */
1912f3d878Matthew Ahrens	ASSERT(object > dsp->dsa_last_data_object ||
1922f3d878Matthew Ahrens	    (object == dsp->dsa_last_data_object &&
1932f3d878Matthew Ahrens	    offset > dsp->dsa_last_data_offset));
1942f3d878Matthew Ahrens
1959e69d7dLori Alt	/*
1969e69d7dLori Alt	 * If there is a pending op, but it's not PENDING_FREE, push it out,
1979e69d7dLori Alt	 * since free block aggregation can only be done for blocks of the
1989e69d7dLori Alt	 * same type (i.e., DRR_FREE records can only be aggregated with
1999e69d7dLori Alt	 * other DRR_FREE records.  DRR_FREEOBJECTS records can only be
2009e69d7dLori Alt	 * aggregated with other DRR_FREEOBJECTS records.
2019e69d7dLori Alt	 */
2024e3c9f4Bill Pijewski	if (dsp->dsa_pending_op != PENDING_NONE &&
2034e3c9f4Bill Pijewski	    dsp->dsa_pending_op != PENDING_FREE) {
20498110f0Matthew Ahrens		if (dump_record(dsp, NULL, 0) != 0)
205be6fd75Matthew Ahrens			return (SET_ERROR(EINTR));
2064e3c9f4Bill Pijewski		dsp->dsa_pending_op = PENDING_NONE;
2079e69d7dLori Alt	}
2089e69d7dLori Alt
2094e3c9f4Bill Pijewski	if (dsp->dsa_pending_op == PENDING_FREE) {
2109e69d7dLori Alt		/*
211eb63303Tom Caputi		 * There should never be a PENDING_FREE if length is
212eb63303Tom Caputi		 * DMU_OBJECT_END (because dump_dnode is the only place where
213eb63303Tom Caputi		 * this function is called with a DMU_OBJECT_END, and only after
214eb63303Tom Caputi		 * flushing any pending record).
2159e69d7dLori Alt		 */
216eb63303Tom Caputi		ASSERT(length != DMU_OBJECT_END);
2179e69d7dLori Alt		/*
2189e69d7dLori Alt		 * Check to see whether this free block can be aggregated
2199e69d7dLori Alt		 * with pending one.
2209e69d7dLori Alt		 */
2219e69d7dLori Alt		if (drrf->drr_object == object && drrf->drr_offset +
2229e69d7dLori Alt		    drrf->drr_length == offset) {
223eb63303Tom Caputi			if (offset + length < offset)
224eb63303Tom Caputi				drrf->drr_length = DMU_OBJECT_END;
225eb63303Tom Caputi			else
226eb63303Tom Caputi				drrf->drr_length += length;
2279e69d7dLori Alt			return (0);
2289e69d7dLori Alt		} else {
2299e69d7dLori Alt			/* not a continuation.  Push out pending record */
23098110f0Matthew Ahrens			if (dump_record(dsp, NULL, 0) != 0)
231be6fd75Matthew Ahrens				return (SET_ERROR(EINTR));
2324e3c9f4Bill Pijewski			dsp->dsa_pending_op = PENDING_NONE;
2339e69d7dLori Alt		}
2349e69d7dLori Alt	}
2359e69d7dLori Alt	/* create a FREE record and make it pending */
2364e3c9f4Bill Pijewski	bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t));
2374e3c9f4Bill Pijewski	dsp->dsa_drr->drr_type = DRR_FREE;
2389e69d7dLori Alt	drrf->drr_object = object;
2399e69d7dLori Alt	drrf->drr_offset = offset;
240eb63303Tom Caputi	if (offset + length < offset)
241eb63303Tom Caputi		drrf->drr_length = DMU_OBJECT_END;
242eb63303Tom Caputi	else
243eb63303Tom Caputi		drrf->drr_length = length;
2444e3c9f4Bill Pijewski	drrf->drr_toguid = dsp->dsa_toguid;
245eb63303Tom Caputi	if (length == DMU_OBJECT_END) {
24698110f0Matthew Ahrens		if (dump_record(dsp, NULL, 0) != 0)
247be6fd75Matthew Ahrens			return (SET_ERROR(EINTR));
2489e69d7dLori Alt	} else {
2494e3c9f4Bill Pijewski		dsp->dsa_pending_op = PENDING_FREE;
2509e69d7dLori Alt	}
252efb8094ahrens	return (0);
255efb8094ahrensstatic int
256eb63303Tom Caputidump_write(dmu_sendarg_t *dsp, dmu_object_type_t type, uint64_t object,
257eb63303Tom Caputi    uint64_t offset, int lsize, int psize, const blkptr_t *bp, void *data)
2595602294Dan Kimmel	uint64_t payload_size;
260eb63303Tom Caputi	boolean_t raw = (dsp->dsa_featureflags & DMU_BACKUP_FEATURE_RAW);
2614e3c9f4Bill Pijewski	struct drr_write *drrw = &(dsp->dsa_drr->drr_u.drr_write);
2629e69d7dLori Alt
2632f3d878Matthew Ahrens	/*
2642f3d878Matthew Ahrens	 * We send data in increasing object, offset order.
2652f3d878Matthew Ahrens	 * See comment in dump_free() for details.
2662f3d878Matthew Ahrens	 */
2672f3d878Matthew Ahrens	ASSERT(object > dsp->dsa_last_data_object ||
2682f3d878Matthew Ahrens	    (object == dsp->dsa_last_data_object &&
2692f3d878Matthew Ahrens	    offset > dsp->dsa_last_data_offset));
2702f3d878Matthew Ahrens	dsp->dsa_last_data_object = object;
2715602294Dan Kimmel	dsp->dsa_last_data_offset = offset + lsize - 1;
2728e71447Lori Alt
2739e69d7dLori Alt	/*
2749e69d7dLori Alt	 * If there is any kind of pending aggregation (currently either
2759e69d7dLori Alt	 * a grouping of free objects or free blocks), push it out to
2769e69d7dLori Alt	 * the stream, since aggregation can't be done across operations
2779e69d7dLori Alt	 * of different types.
2789e69d7dLori Alt	 */
2794e3c9f4Bill Pijewski	if (dsp->dsa_pending_op != PENDING_NONE) {
28098110f0Matthew Ahrens		if (dump_record(dsp, NULL, 0) != 0)
281be6fd75Matthew Ahrens			return (SET_ERROR(EINTR));
2824e3c9f4Bill Pijewski		dsp->dsa_pending_op = PENDING_NONE;
2839e69d7dLori Alt	}
28498110f0Matthew Ahrens	/* write a WRITE record */
2854e3c9f4Bill Pijewski	bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t));
2864e3c9f4Bill Pijewski	dsp->dsa_drr->drr_type = DRR_WRITE;
2879e69d7dLori Alt	drrw->drr_object = object;
2889e69d7dLori Alt	drrw->drr_type = type;
2899e69d7dLori Alt	drrw->drr_offset = offset;
2904e3c9f4Bill Pijewski	drrw->drr_toguid = dsp->dsa_toguid;
2915602294Dan Kimmel	drrw->drr_logical_size = lsize;
2925602294Dan Kimmel
293eb63303Tom Caputi	/* only set the compression fields if the buf is compressed or raw */
294eb63303Tom Caputi	if (raw || lsize != psize) {
2955602294Dan Kimmel		ASSERT(!BP_IS_EMBEDDED(bp));
2965602294Dan Kimmel		ASSERT3S(psize, >, 0);
2975602294Dan Kimmel
298eb63303Tom Caputi		if (raw) {
299eb63303Tom Caputi			ASSERT(BP_IS_PROTECTED(bp));
300eb63303Tom Caputi
301eb63303Tom Caputi			/*
302eb63303Tom Caputi			 * This is a raw protected block so we need to pass
303eb63303Tom Caputi			 * along everything the receiving side will need to
304eb63303Tom Caputi			 * interpret this block, including the byteswap, salt,
305eb63303Tom Caputi			 * IV, and MAC.
306eb63303Tom Caputi			 */
307eb63303Tom Caputi			if (BP_SHOULD_BYTESWAP(bp))
308eb63303Tom Caputi				drrw->drr_flags |= DRR_RAW_BYTESWAP;
309eb63303Tom Caputi			zio_crypt_decode_params_bp(bp, drrw->drr_salt,
310eb63303Tom Caputi			    drrw->drr_iv);
311eb63303Tom Caputi			zio_crypt_decode_mac_bp(bp, drrw->drr_mac);
312eb63303Tom Caputi		} else {
313eb63303Tom Caputi			/* this is a compressed block */
314eb63303Tom Caputi			ASSERT(dsp->dsa_featureflags &
315eb63303Tom Caputi			    DMU_BACKUP_FEATURE_COMPRESSED);
316eb63303Tom Caputi			ASSERT(!BP_SHOULD_BYTESWAP(bp));
317eb63303Tom Caputi			ASSERT(!DMU_OT_IS_METADATA(BP_GET_TYPE(bp)));
318eb63303Tom Caputi			ASSERT3U(BP_GET_COMPRESS(bp), !=, ZIO_COMPRESS_OFF);
319eb63303Tom Caputi			ASSERT3S(lsize, >=, psize);
320eb63303Tom Caputi		}
321eb63303Tom Caputi
322eb63303Tom Caputi		/* set fields common to compressed and raw sends */
3235602294Dan Kimmel		drrw->drr_compressiontype = BP_GET_COMPRESS(bp);
3245602294Dan Kimmel		drrw->drr_compressed_size = psize;
3255602294Dan Kimmel		payload_size = drrw->drr_compressed_size;
3265602294Dan Kimmel	} else {
3275602294Dan Kimmel		payload_size = drrw->drr_logical_size;
3285602294Dan Kimmel	}
3295602294Dan Kimmel
330eb63303Tom Caputi	if (bp == NULL || BP_IS_EMBEDDED(bp) || (BP_IS_PROTECTED(bp) && !raw)) {
3315d7b4d4Matthew Ahrens		/*
332eb63303Tom Caputi		 * There's no pre-computed checksum for partial-block writes,
333eb63303Tom Caputi		 * embedded BP's, or encrypted BP's that are being sent as
334eb63303Tom Caputi		 * plaintext, so (like fletcher4-checkummed blocks) userland
335eb63303Tom Caputi		 * will have to compute a dedup-capable checksum itself.
3365d7b4d4Matthew Ahrens		 */
3375d7b4d4Matthew Ahrens		drrw->drr_checksumtype = ZIO_CHECKSUM_OFF;
3385d7b4d4Matthew Ahrens	} else {
3395d7b4d4Matthew Ahrens		drrw->drr_checksumtype = BP_GET_CHECKSUM(bp);
34045818eeMatthew Ahrens		if (zio_checksum_table[drrw->drr_checksumtype].ci_flags &
34145818eeMatthew Ahrens		    ZCHECKSUM_FLAG_DEDUP)
342eb63303Tom Caputi			drrw->drr_flags |= DRR_CHECKSUM_DEDUP;
3435d7b4d4Matthew Ahrens		DDK_SET_LSIZE(&drrw->drr_key, BP_GET_LSIZE(bp));
3445d7b4d4Matthew Ahrens		DDK_SET_PSIZE(&drrw->drr_key, BP_GET_PSIZE(bp));
3455d7b4d4Matthew Ahrens		DDK_SET_COMPRESS(&drrw->drr_key, BP_GET_COMPRESS(bp));
346eb63303Tom Caputi		DDK_SET_CRYPT(&drrw->drr_key, BP_IS_PROTECTED(bp));
3475d7b4d4Matthew Ahrens		drrw->drr_key.ddk_cksum = bp->blk_cksum;
3485d7b4d4Matthew Ahrens	}
3505602294Dan Kimmel	if (dump_record(dsp, data, payload_size) != 0)
351be6fd75Matthew Ahrens		return (SET_ERROR(EINTR));
352efb8094ahrens	return (0);
355efb8094ahrensstatic int
3565d7b4d4Matthew Ahrensdump_write_embedded(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset,
3575d7b4d4Matthew Ahrens    int blksz, const blkptr_t *bp)
3585d7b4d4Matthew Ahrens{
3595d7b4d4Matthew Ahrens	char buf[BPE_PAYLOAD_SIZE];
3605d7b4d4Matthew Ahrens	struct drr_write_embedded *drrw =
3615d7b4d4Matthew Ahrens	    &(dsp->dsa_drr->drr_u.drr_write_embedded);
3625d7b4d4Matthew Ahrens
3635d7b4d4Matthew Ahrens	if (dsp->dsa_pending_op != PENDING_NONE) {
36498110f0Matthew Ahrens		if (dump_record(dsp, NULL, 0) != 0)
3655d7b4d4Matthew Ahrens			return (EINTR);
3665d7b4d4Matthew Ahrens		dsp->dsa_pending_op = PENDING_NONE;
3675d7b4d4Matthew Ahrens	}
3685d7b4d4Matthew Ahrens
3695d7b4d4Matthew Ahrens	ASSERT(BP_IS_EMBEDDED(bp));
3705d7b4d4Matthew Ahrens
3715d7b4d4Matthew Ahrens	bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t));
3725d7b4d4Matthew Ahrens	dsp->dsa_drr->drr_type = DRR_WRITE_EMBEDDED;
3735d7b4d4Matthew Ahrens	drrw->drr_object = object;
3745d7b4d4Matthew Ahrens	drrw->drr_offset = offset;
3755d7b4d4Matthew Ahrens	drrw->drr_length = blksz;
3765d7b4d4Matthew Ahrens	drrw->drr_toguid = dsp->dsa_toguid;
3775d7b4d4Matthew Ahrens	drrw->drr_compression = BP_GET_COMPRESS(bp);
3785d7b4d4Matthew Ahrens	drrw->drr_etype = BPE_GET_ETYPE(bp);
3795d7b4d4Matthew Ahrens	drrw->drr_lsize = BPE_GET_LSIZE(bp);
3805d7b4d4Matthew Ahrens	drrw->drr_psize = BPE_GET_PSIZE(bp);
3815d7b4d4Matthew Ahrens
3825d7b4d4Matthew Ahrens	decode_embedded_bp_compressed(bp, buf);
3835d7b4d4Matthew Ahrens
38498110f0Matthew Ahrens	if (dump_record(dsp, buf, P2ROUNDUP(drrw->drr_psize, 8)) != 0)
3855d7b4d4Matthew Ahrens		return (EINTR);
3865d7b4d4Matthew Ahrens	return (0);
3875d7b4d4Matthew Ahrens}
3885d7b4d4Matthew Ahrens
3895d7b4d4Matthew Ahrensstatic int
390eb63303Tom Caputidump_spill(dmu_sendarg_t *dsp, const blkptr_t *bp, uint64_t object, void *data)
3910a586ceMark Shellenbaum{
3924e3c9f4Bill Pijewski	struct drr_spill *drrs = &(dsp->dsa_drr->drr_u.drr_spill);
393eb63303Tom Caputi	uint64_t blksz = BP_GET_LSIZE(bp);
394eb63303Tom Caputi	uint64_t payload_size = blksz;
3950a586ceMark Shellenbaum
3964e3c9f4Bill Pijewski	if (dsp->dsa_pending_op != PENDING_NONE) {
39798110f0Matthew Ahrens		if (dump_record(dsp, NULL, 0) != 0)
398be6fd75Matthew Ahrens			return (SET_ERROR(EINTR));
3994e3c9f4Bill Pijewski		dsp->dsa_pending_op = PENDING_NONE;
4000a586ceMark Shellenbaum	}
4010a586ceMark Shellenbaum
4020a586ceMark Shellenbaum	/* write a SPILL record */
4034e3c9f4Bill Pijewski	bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t));
4044e3c9f4Bill Pijewski	dsp->dsa_drr->drr_type = DRR_SPILL;
4050a586ceMark Shellenbaum	drrs->drr_object = object;
4060a586ceMark Shellenbaum	drrs->drr_length = blksz;
4074e3c9f4Bill Pijewski	drrs->drr_toguid = dsp->dsa_toguid;
4080a586ceMark Shellenbaum
409eb63303Tom Caputi	/* See comment in dump_dnode() for full details */
410eb63303Tom Caputi	if (zfs_send_unmodified_spill_blocks &&
411eb63303Tom Caputi	    (bp->blk_birth <= dsp->dsa_fromtxg)) {
412eb63303Tom Caputi		drrs->drr_flags |= DRR_SPILL_UNMODIFIED;
413eb63303Tom Caputi	}
414eb63303Tom Caputi
415eb63303Tom Caputi	/* handle raw send fields */
416eb63303Tom Caputi	if (dsp->dsa_featureflags & DMU_BACKUP_FEATURE_RAW) {
417eb63303Tom Caputi		ASSERT(BP_IS_PROTECTED(bp));
418eb63303Tom Caputi
419eb63303Tom Caputi		if (BP_SHOULD_BYTESWAP(bp))
420eb63303Tom Caputi			drrs->drr_flags |= DRR_RAW_BYTESWAP;
421eb63303Tom Caputi		drrs->drr_compressiontype = BP_GET_COMPRESS(bp);
422eb63303Tom Caputi		drrs->drr_compressed_size = BP_GET_PSIZE(bp);
423eb63303Tom Caputi		zio_crypt_decode_params_bp(bp, drrs->drr_salt, drrs->drr_iv);
424eb63303Tom Caputi		zio_crypt_decode_mac_bp(bp, drrs->drr_mac);
425eb63303Tom Caputi		payload_size = drrs->drr_compressed_size;
426eb63303Tom Caputi	}
427eb63303Tom Caputi
428eb63303Tom Caputi	if (dump_record(dsp, data, payload_size) != 0)
429be6fd75Matthew Ahrens		return (SET_ERROR(EINTR));
4300a586ceMark Shellenbaum	return (0);
4310a586ceMark Shellenbaum}
4320a586ceMark Shellenbaum
4330a586ceMark Shellenbaumstatic int
4344e3c9f4Bill Pijewskidump_freeobjects(dmu_sendarg_t *dsp, uint64_t firstobj, uint64_t numobjs)
4364e3c9f4Bill Pijewski	struct drr_freeobjects *drrfo = &(dsp->dsa_drr->drr_u.drr_freeobjects);
4379e69d7dLori Alt
4389e69d7dLori Alt	/*
4399e69d7dLori Alt	 * If there is a pending op, but it's not PENDING_FREEOBJECTS,
4409e69d7dLori Alt	 * push it out, since free block aggregation can only be done for
4419e69d7dLori Alt	 * blocks of the same type (i.e., DRR_FREE records can only be
4429e69d7dLori Alt	 * aggregated with other DRR_FREE records.  DRR_FREEOBJECTS records
4439e69d7dLori Alt	 * can only be aggregated with other DRR_FREEOBJECTS records.
4449e69d7dLori Alt	 */
4454e3c9f4Bill Pijewski	if (dsp->dsa_pending_op != PENDING_NONE &&
4464e3c9f4Bill Pijewski	    dsp->dsa_pending_op != PENDING_FREEOBJECTS) {
44798110f0Matthew Ahrens		if (dump_record(dsp, NULL, 0) != 0)
448be6fd75Matthew Ahrens			return (SET_ERROR(EINTR));
4494e3c9f4Bill Pijewski		dsp->dsa_pending_op = PENDING_NONE;
4509e69d7dLori Alt	}
4514e3c9f4Bill Pijewski	if (dsp->dsa_pending_op == PENDING_FREEOBJECTS) {
4529e69d7dLori Alt		/*
4539e69d7dLori Alt		 * See whether this free object array can be aggregated
4549e69d7dLori Alt		 * with pending one
4559e69d7dLori Alt		 */
4569e69d7dLori Alt		if (drrfo->drr_firstobj + drrfo->drr_numobjs == firstobj) {
4579e69d7dLori Alt			drrfo->drr_numobjs += numobjs;
4589e69d7dLori Alt			return (0);
4599e69d7dLori Alt		} else {
4609e69d7dLori Alt			/* can't be aggregated.  Push out pending record */
46198110f0Matthew Ahrens			if (dump_record(dsp, NULL, 0) != 0)
462be6fd75Matthew Ahrens				return (SET_ERROR(EINTR));
4634e3c9f4Bill Pijewski			dsp->dsa_pending_op = PENDING_NONE;
4649e69d7dLori Alt		}
4659e69d7dLori Alt	}
4669e69d7dLori Alt
467efb8094ahrens	/* write a FREEOBJECTS record */
4684e3c9f4Bill Pijewski	bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t));
4694e3c9f4Bill Pijewski	dsp->dsa_drr->drr_type = DRR_FREEOBJECTS;
4709e69d7dLori Alt	drrfo->drr_firstobj = firstobj;
4719e69d7dLori Alt	drrfo->drr_numobjs = numobjs;
4724e3c9f4Bill Pijewski	drrfo->drr_toguid = dsp->dsa_toguid;
4739e69d7dLori Alt
4744e3c9f4Bill Pijewski	dsp->dsa_pending_op = PENDING_FREEOBJECTS;
476efb8094ahrens	return (0);
479efb8094ahrensstatic int
480eb63303Tom Caputidump_dnode(dmu_sendarg_t *dsp, const blkptr_t *bp, uint64_t object,
481eb63303Tom Caputi    dnode_phys_t *dnp)
4834e3c9f4Bill Pijewski	struct drr_object *drro = &(dsp->dsa_drr->drr_u.drr_object);
484eb63303Tom Caputi	int bonuslen;
4859e69d7dLori Alt
4869c3fd12Matthew Ahrens	if (object < dsp->dsa_resume_object) {
4879c3fd12Matthew Ahrens		/*
4889c3fd12Matthew Ahrens		 * Note: when resuming, we will visit all the dnodes in
4899c3fd12Matthew Ahrens		 * the block of dnodes that we are resuming from.  In
4909c3fd12Matthew Ahrens		 * this case it's unnecessary to send the dnodes prior to
4919c3fd12Matthew Ahrens		 * the one we are resuming from.  We should be at most one
4929c3fd12Matthew Ahrens		 * block's worth of dnodes behind the resume point.
4939c3fd12Matthew Ahrens		 */
4949c3fd12Matthew Ahrens		ASSERT3U(dsp->dsa_resume_object - object, <,
4959c3fd12Matthew Ahrens		    1 << (DNODE_BLOCK_SHIFT - DNODE_SHIFT));
4969c3fd12Matthew Ahrens		return (0);
4979c3fd12Matthew Ahrens	}
4989c3fd12Matthew Ahrens
499efb8094ahrens	if (dnp == NULL || dnp->dn_type == DMU_OT_NONE)
5004e3c9f4Bill Pijewski		return (dump_freeobjects(dsp, object, 1));
5024e3c9f4Bill Pijewski	if (dsp->dsa_pending_op != PENDING_NONE) {
50398110f0Matthew Ahrens		if (dump_record(dsp, NULL, 0) != 0)
504be6fd75Matthew Ahrens			return (SET_ERROR(EINTR));
5054e3c9f4Bill Pijewski		dsp->dsa_pending_op = PENDING_NONE;
5069e69d7dLori Alt	}
5079e69d7dLori Alt
508efb8094ahrens	/* write an OBJECT record */
5094e3c9f4Bill Pijewski	bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t));
5104e3c9f4Bill Pijewski	dsp->dsa_drr->drr_type = DRR_OBJECT;
5119e69d7dLori Alt	drro->drr_object = object;
5129e69d7dLori Alt	drro->drr_type = dnp->dn_type;
5139e69d7dLori Alt	drro->drr_bonustype = dnp->dn_bonustype;
5149e69d7dLori Alt	drro->drr_blksz = dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT;
5159e69d7dLori Alt	drro->drr_bonuslen = dnp->dn_bonuslen;
51654811daToomas Soome	drro->drr_dn_slots = dnp->dn_extra_slots + 1;
5179e69d7dLori Alt	drro->drr_checksumtype = dnp->dn_checksum;
5189e69d7dLori Alt	drro->drr_compress = dnp->dn_compress;
5194e3c9f4Bill Pijewski	drro->drr_toguid = dsp->dsa_toguid;
5209e69d7dLori Alt
521b515258Matthew Ahrens	if (!(dsp->dsa_featureflags & DMU_BACKUP_FEATURE_LARGE_BLOCKS) &&
522b515258Matthew Ahrens	    drro->drr_blksz > SPA_OLD_MAXBLOCKSIZE)
523b515258Matthew Ahrens		drro->drr_blksz = SPA_OLD_MAXBLOCKSIZE;
524b515258Matthew Ahrens
525eb63303Tom Caputi	bonuslen = P2ROUNDUP(dnp->dn_bonuslen, 8);
526eb63303Tom Caputi
527eb63303Tom Caputi	if ((dsp->dsa_featureflags & DMU_BACKUP_FEATURE_RAW)) {
528eb63303Tom Caputi		ASSERT(BP_IS_ENCRYPTED(bp));
529eb63303Tom Caputi
530eb63303Tom Caputi		if (BP_SHOULD_BYTESWAP(bp))
531eb63303Tom Caputi			drro->drr_flags |= DRR_RAW_BYTESWAP;
532eb63303Tom Caputi
533eb63303Tom Caputi		/* needed for reconstructing dnp on recv side */
534eb63303Tom Caputi		drro->drr_maxblkid = dnp->dn_maxblkid;
535eb63303Tom Caputi		drro->drr_indblkshift = dnp->dn_indblkshift;
536eb63303Tom Caputi		drro->drr_nlevels = dnp->dn_nlevels;
537eb63303Tom Caputi		drro->drr_nblkptr = dnp->dn_nblkptr;
538eb63303Tom Caputi
539eb63303Tom Caputi		/*
540eb63303Tom Caputi		 * Since we encrypt the entire bonus area, the (raw) part
541eb63303Tom Caputi		 * beyond the bonuslen is actually nonzero, so we need
542eb63303Tom Caputi		 * to send it.
543eb63303Tom Caputi		 */
544eb63303Tom Caputi		if (bonuslen != 0) {
545eb63303Tom Caputi			drro->drr_raw_bonuslen = DN_MAX_BONUS_LEN(dnp);
546eb63303Tom Caputi			bonuslen = drro->drr_raw_bonuslen;
547eb63303Tom Caputi		}
54898110f0Matthew Ahrens	}
550eb63303Tom Caputi	/*
551eb63303Tom Caputi	 * DRR_OBJECT_SPILL is set for every dnode which references a
552eb63303Tom Caputi	 * spill block.  This allows the receiving pool to definitively
553eb63303Tom Caputi	 * determine when a spill block should be kept or freed.
554eb63303Tom Caputi	 */
555eb63303Tom Caputi	if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR)
556eb63303Tom Caputi		drro->drr_flags |= DRR_OBJECT_SPILL;
557eb63303Tom Caputi
558eb63303Tom Caputi	if (dump_record(dsp, DN_BONUS(dnp), bonuslen) != 0)
559eb63303Tom Caputi		return (SET_ERROR(EINTR));
560eb63303Tom Caputi
5612f3d878Matthew Ahrens	/* Free anything past the end of the file. */
5624e3c9f4Bill Pijewski	if (dump_free(dsp, object, (dnp->dn_maxblkid + 1) *
563eb63303Tom Caputi	    (dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT), DMU_OBJECT_END) != 0)
564be6fd75Matthew Ahrens		return (SET_ERROR(EINTR));
565eb63303Tom Caputi
566eb63303Tom Caputi	/*
567eb63303Tom Caputi	 * Send DRR_SPILL records for unmodified spill blocks.  This is useful
568eb63303Tom Caputi	 * because changing certain attributes of the object (e.g. blocksize)
569eb63303Tom Caputi	 * can cause old versions of ZFS to incorrectly remove a spill block.
570eb63303Tom Caputi	 * Including these records in the stream forces an up to date version
571eb63303Tom Caputi	 * to always be written ensuring they're never lost.  Current versions
572eb63303Tom Caputi	 * of the code which understand the DRR_FLAG_SPILL_BLOCK feature can
573eb63303Tom Caputi	 * ignore these unmodified spill blocks.
574eb63303Tom Caputi	 */
575eb63303Tom Caputi	if (zfs_send_unmodified_spill_blocks &&
576eb63303Tom Caputi	    (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) &&
577eb63303Tom Caputi	    (DN_SPILL_BLKPTR(dnp)->blk_birth <= dsp->dsa_fromtxg)) {
578eb63303Tom Caputi		struct