1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
25 * Copyright 2019 Joyent, Inc.
26 * Copyright (c) 2012 Pawel Jakub Dawidek. All rights reserved.
27 * Copyright (c) 2013 Steven Hartland. All rights reserved.
28 * Copyright 2015, OmniTI Computer Consulting, Inc. All rights reserved.
29 * Copyright (c) 2014 Integros [integros.com]
30 * Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>
31 * Copyright (c) 2017, loli10K <ezomori.nozomu@gmail.com>. All rights reserved.
32 * Copyright (c) 2018 Datto Inc.
33 */
34
35#include <assert.h>
36#include <ctype.h>
37#include <errno.h>
38#include <libintl.h>
39#include <stdio.h>
40#include <stdlib.h>
41#include <strings.h>
42#include <unistd.h>
43#include <stddef.h>
44#include <fcntl.h>
45#include <sys/mount.h>
46#include <pthread.h>
47#include <umem.h>
48#include <time.h>
49
50#include <libzfs.h>
51#include <libzfs_core.h>
52
53#include "zfs_namecheck.h"
54#include "zfs_prop.h"
55#include "zfs_fletcher.h"
56#include "libzfs_impl.h"
57#include <zlib.h>
58#include <sha2.h>
59#include <sys/zio_checksum.h>
60#include <sys/dsl_crypt.h>
61#include <sys/ddt.h>
62
63/* in libzfs_dataset.c */
64extern void zfs_setprop_error(libzfs_handle_t *, zfs_prop_t, int, char *);
65
66static int zfs_receive_impl(libzfs_handle_t *, const char *, const char *,
67    recvflags_t *, int, const char *, nvlist_t *, avl_tree_t *, char **, int,
68    uint64_t *, const char *, nvlist_t *);
69static int guid_to_name(libzfs_handle_t *, const char *,
70    uint64_t, boolean_t, char *);
71
72static const zio_cksum_t zero_cksum = { 0 };
73
74typedef struct dedup_arg {
75	int	inputfd;
76	int	outputfd;
77	libzfs_handle_t  *dedup_hdl;
78} dedup_arg_t;
79
80typedef struct progress_arg {
81	zfs_handle_t *pa_zhp;
82	int pa_fd;
83	boolean_t pa_parsable;
84} progress_arg_t;
85
86typedef struct dataref {
87	uint64_t ref_guid;
88	uint64_t ref_object;
89	uint64_t ref_offset;
90} dataref_t;
91
92typedef struct dedup_entry {
93	struct dedup_entry	*dde_next;
94	zio_cksum_t dde_chksum;
95	uint64_t dde_prop;
96	dataref_t dde_ref;
97} dedup_entry_t;
98
99#define	MAX_DDT_PHYSMEM_PERCENT		20
100#define	SMALLEST_POSSIBLE_MAX_DDT_MB		128
101
102typedef struct dedup_table {
103	dedup_entry_t	**dedup_hash_array;
104	umem_cache_t	*ddecache;
105	uint64_t	max_ddt_size;  /* max dedup table size in bytes */
106	uint64_t	cur_ddt_size;  /* current dedup table size in bytes */
107	uint64_t	ddt_count;
108	int		numhashbits;
109	boolean_t	ddt_full;
110} dedup_table_t;
111
112static int
113high_order_bit(uint64_t n)
114{
115	int count;
116
117	for (count = 0; n != 0; count++)
118		n >>= 1;
119	return (count);
120}
121
122static size_t
123ssread(void *buf, size_t len, FILE *stream)
124{
125	size_t outlen;
126
127	if ((outlen = fread(buf, len, 1, stream)) == 0)
128		return (0);
129
130	return (outlen);
131}
132
133static void
134ddt_hash_append(libzfs_handle_t *hdl, dedup_table_t *ddt, dedup_entry_t **ddepp,
135    zio_cksum_t *cs, uint64_t prop, dataref_t *dr)
136{
137	dedup_entry_t	*dde;
138
139	if (ddt->cur_ddt_size >= ddt->max_ddt_size) {
140		if (ddt->ddt_full == B_FALSE) {
141			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
142			    "Dedup table full.  Deduplication will continue "
143			    "with existing table entries"));
144			ddt->ddt_full = B_TRUE;
145		}
146		return;
147	}
148
149	if ((dde = umem_cache_alloc(ddt->ddecache, UMEM_DEFAULT))
150	    != NULL) {
151		assert(*ddepp == NULL);
152		dde->dde_next = NULL;
153		dde->dde_chksum = *cs;
154		dde->dde_prop = prop;
155		dde->dde_ref = *dr;
156		*ddepp = dde;
157		ddt->cur_ddt_size += sizeof (dedup_entry_t);
158		ddt->ddt_count++;
159	}
160}
161
162/*
163 * Using the specified dedup table, do a lookup for an entry with
164 * the checksum cs.  If found, return the block's reference info
165 * in *dr. Otherwise, insert a new entry in the dedup table, using
166 * the reference information specified by *dr.
167 *
168 * return value:  true - entry was found
169 *		  false - entry was not found
170 */
171static boolean_t
172ddt_update(libzfs_handle_t *hdl, dedup_table_t *ddt, zio_cksum_t *cs,
173    uint64_t prop, dataref_t *dr)
174{
175	uint32_t hashcode;
176	dedup_entry_t **ddepp;
177
178	hashcode = BF64_GET(cs->zc_word[0], 0, ddt->numhashbits);
179
180	for (ddepp = &(ddt->dedup_hash_array[hashcode]); *ddepp != NULL;
181	    ddepp = &((*ddepp)->dde_next)) {
182		if (ZIO_CHECKSUM_EQUAL(((*ddepp)->dde_chksum), *cs) &&
183		    (*ddepp)->dde_prop == prop) {
184			*dr = (*ddepp)->dde_ref;
185			return (B_TRUE);
186		}
187	}
188	ddt_hash_append(hdl, ddt, ddepp, cs, prop, dr);
189	return (B_FALSE);
190}
191
192static int
193dump_record(dmu_replay_record_t *drr, void *payload, int payload_len,
194    zio_cksum_t *zc, int outfd)
195{
196	ASSERT3U(offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum),
197	    ==, sizeof (dmu_replay_record_t) - sizeof (zio_cksum_t));
198	(void) fletcher_4_incremental_native(drr,
199	    offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum), zc);
200	if (drr->drr_type != DRR_BEGIN) {
201		ASSERT(ZIO_CHECKSUM_IS_ZERO(&drr->drr_u.
202		    drr_checksum.drr_checksum));
203		drr->drr_u.drr_checksum.drr_checksum = *zc;
204	}
205	(void) fletcher_4_incremental_native(
206	    &drr->drr_u.drr_checksum.drr_checksum, sizeof (zio_cksum_t), zc);
207	if (write(outfd, drr, sizeof (*drr)) == -1)
208		return (errno);
209	if (payload_len != 0) {
210		(void) fletcher_4_incremental_native(payload, payload_len, zc);
211		if (write(outfd, payload, payload_len) == -1)
212			return (errno);
213	}
214	return (0);
215}
216
217/*
218 * This function is started in a separate thread when the dedup option
219 * has been requested.  The main send thread determines the list of
220 * snapshots to be included in the send stream and makes the ioctl calls
221 * for each one.  But instead of having the ioctl send the output to the
222 * the output fd specified by the caller of zfs_send()), the
223 * ioctl is told to direct the output to a pipe, which is read by the
224 * alternate thread running THIS function.  This function does the
225 * dedup'ing by:
226 *  1. building a dedup table (the DDT)
227 *  2. doing checksums on each data block and inserting a record in the DDT
228 *  3. looking for matching checksums, and
229 *  4.  sending a DRR_WRITE_BYREF record instead of a write record whenever
230 *      a duplicate block is found.
231 * The output of this function then goes to the output fd requested
232 * by the caller of zfs_send().
233 */
234static void *
235cksummer(void *arg)
236{
237	dedup_arg_t *dda = arg;
238	char *buf = zfs_alloc(dda->dedup_hdl, SPA_MAXBLOCKSIZE);
239	dmu_replay_record_t thedrr;
240	dmu_replay_record_t *drr = &thedrr;
241	FILE *ofp;
242	int outfd;
243	dedup_table_t ddt;
244	zio_cksum_t stream_cksum;
245	uint64_t physmem = sysconf(_SC_PHYS_PAGES) * sysconf(_SC_PAGESIZE);
246	uint64_t numbuckets;
247
248	ddt.max_ddt_size =
249	    MAX((physmem * MAX_DDT_PHYSMEM_PERCENT) / 100,
250	    SMALLEST_POSSIBLE_MAX_DDT_MB << 20);
251
252	numbuckets = ddt.max_ddt_size / (sizeof (dedup_entry_t));
253
254	/*
255	 * numbuckets must be a power of 2.  Increase number to
256	 * a power of 2 if necessary.
257	 */
258	if (!ISP2(numbuckets))
259		numbuckets = 1 << high_order_bit(numbuckets);
260
261	ddt.dedup_hash_array = calloc(numbuckets, sizeof (dedup_entry_t *));
262	ddt.ddecache = umem_cache_create("dde", sizeof (dedup_entry_t), 0,
263	    NULL, NULL, NULL, NULL, NULL, 0);
264	ddt.cur_ddt_size = numbuckets * sizeof (dedup_entry_t *);
265	ddt.numhashbits = high_order_bit(numbuckets) - 1;
266	ddt.ddt_full = B_FALSE;
267
268	outfd = dda->outputfd;
269	ofp = fdopen(dda->inputfd, "r");
270	while (ssread(drr, sizeof (*drr), ofp) != 0) {
271
272		/*
273		 * kernel filled in checksum, we are going to write same
274		 * record, but need to regenerate checksum.
275		 */
276		if (drr->drr_type != DRR_BEGIN) {
277			bzero(&drr->drr_u.drr_checksum.drr_checksum,
278			    sizeof (drr->drr_u.drr_checksum.drr_checksum));
279		}
280
281		switch (drr->drr_type) {
282		case DRR_BEGIN:
283		{
284			struct drr_begin *drrb = &drr->drr_u.drr_begin;
285			int fflags;
286			int sz = 0;
287			ZIO_SET_CHECKSUM(&stream_cksum, 0, 0, 0, 0);
288
289			ASSERT3U(drrb->drr_magic, ==, DMU_BACKUP_MAGIC);
290
291			/* set the DEDUP feature flag for this stream */
292			fflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo);
293			fflags |= (DMU_BACKUP_FEATURE_DEDUP |
294			    DMU_BACKUP_FEATURE_DEDUPPROPS);
295			DMU_SET_FEATUREFLAGS(drrb->drr_versioninfo, fflags);
296
297			if (drr->drr_payloadlen != 0) {
298				sz = drr->drr_payloadlen;
299
300				if (sz > SPA_MAXBLOCKSIZE) {
301					buf = zfs_realloc(dda->dedup_hdl, buf,
302					    SPA_MAXBLOCKSIZE, sz);
303				}
304				(void) ssread(buf, sz, ofp);
305				if (ferror(stdin))
306					perror("fread");
307			}
308			if (dump_record(drr, buf, sz, &stream_cksum,
309			    outfd) != 0)
310				goto out;
311			break;
312		}
313
314		case DRR_END:
315		{
316			struct drr_end *drre = &drr->drr_u.drr_end;
317			/* use the recalculated checksum */
318			drre->drr_checksum = stream_cksum;
319			if (dump_record(drr, NULL, 0, &stream_cksum,
320			    outfd) != 0)
321				goto out;
322			break;
323		}
324
325		case DRR_OBJECT:
326		{
327			struct drr_object *drro = &drr->drr_u.drr_object;
328			if (drro->drr_bonuslen > 0) {
329				(void) ssread(buf,
330				    DRR_OBJECT_PAYLOAD_SIZE(drro), ofp);
331			}
332			if (dump_record(drr, buf, DRR_OBJECT_PAYLOAD_SIZE(drro),
333			    &stream_cksum, outfd) != 0)
334				goto out;
335			break;
336		}
337
338		case DRR_SPILL:
339		{
340			struct drr_spill *drrs = &drr->drr_u.drr_spill;
341			(void) ssread(buf, DRR_SPILL_PAYLOAD_SIZE(drrs), ofp);
342			if (dump_record(drr, buf, DRR_SPILL_PAYLOAD_SIZE(drrs),
343			    &stream_cksum, outfd) != 0)
344				goto out;
345			break;
346		}
347
348		case DRR_FREEOBJECTS:
349		{
350			if (dump_record(drr, NULL, 0, &stream_cksum,
351			    outfd) != 0)
352				goto out;
353			break;
354		}
355
356		case DRR_WRITE:
357		{
358			struct drr_write *drrw = &drr->drr_u.drr_write;
359			dataref_t	dataref;
360			uint64_t	payload_size;
361
362			payload_size = DRR_WRITE_PAYLOAD_SIZE(drrw);
363			(void) ssread(buf, payload_size, ofp);
364
365			/*
366			 * Use the existing checksum if it's dedup-capable,
367			 * else calculate a SHA256 checksum for it.
368			 */
369
370			if (ZIO_CHECKSUM_EQUAL(drrw->drr_key.ddk_cksum,
371			    zero_cksum) ||
372			    !DRR_IS_DEDUP_CAPABLE(drrw->drr_flags)) {
373				SHA256_CTX	ctx;
374				zio_cksum_t	tmpsha256;
375
376				SHA256Init(&ctx);
377				SHA256Update(&ctx, buf, payload_size);
378				SHA256Final(&tmpsha256, &ctx);
379				drrw->drr_key.ddk_cksum.zc_word[0] =
380				    BE_64(tmpsha256.zc_word[0]);
381				drrw->drr_key.ddk_cksum.zc_word[1] =
382				    BE_64(tmpsha256.zc_word[1]);
383				drrw->drr_key.ddk_cksum.zc_word[2] =
384				    BE_64(tmpsha256.zc_word[2]);
385				drrw->drr_key.ddk_cksum.zc_word[3] =
386				    BE_64(tmpsha256.zc_word[3]);
387				drrw->drr_checksumtype = ZIO_CHECKSUM_SHA256;
388				drrw->drr_flags |= DRR_CHECKSUM_DEDUP;
389			}
390
391			dataref.ref_guid = drrw->drr_toguid;
392			dataref.ref_object = drrw->drr_object;
393			dataref.ref_offset = drrw->drr_offset;
394
395			if (ddt_update(dda->dedup_hdl, &ddt,
396			    &drrw->drr_key.ddk_cksum, drrw->drr_key.ddk_prop,
397			    &dataref)) {
398				dmu_replay_record_t wbr_drr = {0};
399				struct drr_write_byref *wbr_drrr =
400				    &wbr_drr.drr_u.drr_write_byref;
401
402				/* block already present in stream */
403				wbr_drr.drr_type = DRR_WRITE_BYREF;
404
405				wbr_drrr->drr_object = drrw->drr_object;
406				wbr_drrr->drr_offset = drrw->drr_offset;
407				wbr_drrr->drr_length = drrw->drr_logical_size;
408				wbr_drrr->drr_toguid = drrw->drr_toguid;
409				wbr_drrr->drr_refguid = dataref.ref_guid;
410				wbr_drrr->drr_refobject =
411				    dataref.ref_object;
412				wbr_drrr->drr_refoffset =
413				    dataref.ref_offset;
414
415				wbr_drrr->drr_checksumtype =
416				    drrw->drr_checksumtype;
417				wbr_drrr->drr_flags = drrw->drr_flags;
418				wbr_drrr->drr_key.ddk_cksum =
419				    drrw->drr_key.ddk_cksum;
420				wbr_drrr->drr_key.ddk_prop =
421				    drrw->drr_key.ddk_prop;
422
423				if (dump_record(&wbr_drr, NULL, 0,
424				    &stream_cksum, outfd) != 0)
425					goto out;
426			} else {
427				/* block not previously seen */
428				if (dump_record(drr, buf, payload_size,
429				    &stream_cksum, outfd) != 0)
430					goto out;
431			}
432			break;
433		}
434
435		case DRR_WRITE_EMBEDDED:
436		{
437			struct drr_write_embedded *drrwe =
438			    &drr->drr_u.drr_write_embedded;
439			(void) ssread(buf,
440			    P2ROUNDUP((uint64_t)drrwe->drr_psize, 8), ofp);
441			if (dump_record(drr, buf,
442			    P2ROUNDUP((uint64_t)drrwe->drr_psize, 8),
443			    &stream_cksum, outfd) != 0)
444				goto out;
445			break;
446		}
447
448		case DRR_FREE:
449		{
450			if (dump_record(drr, NULL, 0, &stream_cksum,
451			    outfd) != 0)
452				goto out;
453			break;
454		}
455
456		case DRR_OBJECT_RANGE:
457		{
458			if (dump_record(drr, NULL, 0, &stream_cksum,
459			    outfd) != 0)
460				goto out;
461			break;
462		}
463
464		default:
465			(void) fprintf(stderr, "INVALID record type 0x%x\n",
466			    drr->drr_type);
467			/* should never happen, so assert */
468			assert(B_FALSE);
469		}
470	}
471out:
472	umem_cache_destroy(ddt.ddecache);
473	free(ddt.dedup_hash_array);
474	free(buf);
475	(void) fclose(ofp);
476
477	return (NULL);
478}
479
480/*
481 * Routines for dealing with the AVL tree of fs-nvlists
482 */
483typedef struct fsavl_node {
484	avl_node_t fn_node;
485	nvlist_t *fn_nvfs;
486	char *fn_snapname;
487	uint64_t fn_guid;
488} fsavl_node_t;
489
490static int
491fsavl_compare(const void *arg1, const void *arg2)
492{
493	const fsavl_node_t *fn1 = (const fsavl_node_t *)arg1;
494	const fsavl_node_t *fn2 = (const fsavl_node_t *)arg2;
495
496	if (fn1->fn_guid > fn2->fn_guid)
497		return (+1);
498	if (fn1->fn_guid < fn2->fn_guid)
499		return (-1);
500	return (0);
501}
502
503/*
504 * Given the GUID of a snapshot, find its containing filesystem and
505 * (optionally) name.
506 */
507static nvlist_t *
508fsavl_find(avl_tree_t *avl, uint64_t snapguid, char **snapname)
509{
510	fsavl_node_t fn_find;
511	fsavl_node_t *fn;
512
513	fn_find.fn_guid = snapguid;
514
515	fn = avl_find(avl, &fn_find, NULL);
516	if (fn) {
517		if (snapname)
518			*snapname = fn->fn_snapname;
519		return (fn->fn_nvfs);
520	}
521	return (NULL);
522}
523
524static void
525fsavl_destroy(avl_tree_t *avl)
526{
527	fsavl_node_t *fn;
528	void *cookie;
529
530	if (avl == NULL)
531		return;
532
533	cookie = NULL;
534	while ((fn = avl_destroy_nodes(avl, &cookie)) != NULL)
535		free(fn);
536	avl_destroy(avl);
537	free(avl);
538}
539
540/*
541 * Given an nvlist, produce an avl tree of snapshots, ordered by guid
542 */
543static avl_tree_t *
544fsavl_create(nvlist_t *fss)
545{
546	avl_tree_t *fsavl;
547	nvpair_t *fselem = NULL;
548
549	if ((fsavl = malloc(sizeof (avl_tree_t))) == NULL)
550		return (NULL);
551
552	avl_create(fsavl, fsavl_compare, sizeof (fsavl_node_t),
553	    offsetof(fsavl_node_t, fn_node));
554
555	while ((fselem = nvlist_next_nvpair(fss, fselem)) != NULL) {
556		nvlist_t *nvfs, *snaps;
557		nvpair_t *snapelem = NULL;
558
559		VERIFY(0 == nvpair_value_nvlist(fselem, &nvfs));
560		VERIFY(0 == nvlist_lookup_nvlist(nvfs, "snaps", &snaps));
561
562		while ((snapelem =
563		    nvlist_next_nvpair(snaps, snapelem)) != NULL) {
564			fsavl_node_t *fn;
565			uint64_t guid;
566
567			VERIFY(0 == nvpair_value_uint64(snapelem, &guid));
568			if ((fn = malloc(sizeof (fsavl_node_t))) == NULL) {
569				fsavl_destroy(fsavl);
570				return (NULL);
571			}
572			fn->fn_nvfs = nvfs;
573			fn->fn_snapname = nvpair_name(snapelem);
574			fn->fn_guid = guid;
575
576			/*
577			 * Note: if there are multiple snaps with the
578			 * same GUID, we ignore all but one.
579			 */
580			if (avl_find(fsavl, fn, NULL) == NULL)
581				avl_add(fsavl, fn);
582			else
583				free(fn);
584		}
585	}
586
587	return (fsavl);
588}
589
590/*
591 * Routines for dealing with the giant nvlist of fs-nvlists, etc.
592 */
593typedef struct send_data {
594	/*
595	 * assigned inside every recursive call,
596	 * restored from *_save on return:
597	 *
598	 * guid of fromsnap snapshot in parent dataset
599	 * txg of fromsnap snapshot in current dataset
600	 * txg of tosnap snapshot in current dataset
601	 */
602
603	uint64_t parent_fromsnap_guid;
604	uint64_t fromsnap_txg;
605	uint64_t tosnap_txg;
606
607	/* the nvlists get accumulated during depth-first traversal */
608	nvlist_t *parent_snaps;
609	nvlist_t *fss;
610	nvlist_t *snapprops;
611	nvlist_t *snapholds;	/* user holds */
612
613	/* send-receive configuration, does not change during traversal */
614	const char *fsname;
615	const char *fromsnap;
616	const char *tosnap;
617	boolean_t recursive;
618	boolean_t raw;
619	boolean_t verbose;
620	boolean_t backup;
621	boolean_t holds;	/* were holds requested with send -h */
622	boolean_t props;
623
624	/*
625	 * The header nvlist is of the following format:
626	 * {
627	 *   "tosnap" -> string
628	 *   "fromsnap" -> string (if incremental)
629	 *   "fss" -> {
630	 *	id -> {
631	 *
632	 *	 "name" -> string (full name; for debugging)
633	 *	 "parentfromsnap" -> number (guid of fromsnap in parent)
634	 *
635	 *	 "props" -> { name -> value (only if set here) }
636	 *	 "snaps" -> { name (lastname) -> number (guid) }
637	 *	 "snapprops" -> { name (lastname) -> { name -> value } }
638	 *	 "snapholds" -> { name (lastname) -> { holdname -> crtime } }
639	 *
640	 *	 "origin" -> number (guid) (if clone)
641	 *	 "is_encroot" -> boolean
642	 *	 "sent" -> boolean (not on-disk)
643	 *	}
644	 *   }
645	 * }
646	 *
647	 */
648} send_data_t;
649
650static void
651send_iterate_prop(zfs_handle_t *zhp, boolean_t received_only, nvlist_t *nv);
652
653static int
654send_iterate_snap(zfs_handle_t *zhp, void *arg)
655{
656	send_data_t *sd = arg;
657	uint64_t guid = zhp->zfs_dmustats.dds_guid;
658	uint64_t txg = zhp->zfs_dmustats.dds_creation_txg;
659	char *snapname;
660	nvlist_t *nv;
661
662	snapname = strrchr(zhp->zfs_name, '@')+1;
663
664	if (sd->tosnap_txg != 0 && txg > sd->tosnap_txg) {
665		if (sd->verbose) {
666			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
667			    "skipping snapshot %s because it was created "
668			    "after the destination snapshot (%s)\n"),
669			    zhp->zfs_name, sd->tosnap);
670		}
671		zfs_close(zhp);
672		return (0);
673	}
674
675	VERIFY(0 == nvlist_add_uint64(sd->parent_snaps, snapname, guid));
676	/*
677	 * NB: if there is no fromsnap here (it's a newly created fs in
678	 * an incremental replication), we will substitute the tosnap.
679	 */
680	if ((sd->fromsnap && strcmp(snapname, sd->fromsnap) == 0) ||
681	    (sd->parent_fromsnap_guid == 0 && sd->tosnap &&
682	    strcmp(snapname, sd->tosnap) == 0)) {
683		sd->parent_fromsnap_guid = guid;
684	}
685
686	VERIFY(0 == nvlist_alloc(&nv, NV_UNIQUE_NAME, 0));
687	send_iterate_prop(zhp, sd->backup, nv);
688	VERIFY(0 == nvlist_add_nvlist(sd->snapprops, snapname, nv));
689	nvlist_free(nv);
690	if (sd->holds) {
691		nvlist_t *holds = fnvlist_alloc();
692		int err = lzc_get_holds(zhp->zfs_name, &holds);
693		if (err == 0) {
694			VERIFY(0 == nvlist_add_nvlist(sd->snapholds,
695			    snapname, holds));
696		}
697		fnvlist_free(holds);
698	}
699
700	zfs_close(zhp);
701	return (0);
702}
703
704static void
705send_iterate_prop(zfs_handle_t *zhp, boolean_t received_only, nvlist_t *nv)
706{
707	nvlist_t *props = NULL;
708	nvpair_t *elem = NULL;
709
710	if (received_only)
711		props = zfs_get_recvd_props(zhp);
712	else
713		props = zhp->zfs_props;
714
715	while ((elem = nvlist_next_nvpair(props, elem)) != NULL) {
716		char *propname = nvpair_name(elem);
717		zfs_prop_t prop = zfs_name_to_prop(propname);
718		nvlist_t *propnv;
719
720		if (!zfs_prop_user(propname)) {
721			/*
722			 * Realistically, this should never happen.  However,
723			 * we want the ability to add DSL properties without
724			 * needing to make incompatible version changes.  We
725			 * need to ignore unknown properties to allow older
726			 * software to still send datasets containing these
727			 * properties, with the unknown properties elided.
728			 */
729			if (prop == ZPROP_INVAL)
730				continue;
731
732			if (zfs_prop_readonly(prop))
733				continue;
734		}
735
736		verify(nvpair_value_nvlist(elem, &propnv) == 0);
737		if (prop == ZFS_PROP_QUOTA || prop == ZFS_PROP_RESERVATION ||
738		    prop == ZFS_PROP_REFQUOTA ||
739		    prop == ZFS_PROP_REFRESERVATION) {
740			char *source;
741			uint64_t value;
742			verify(nvlist_lookup_uint64(propnv,
743			    ZPROP_VALUE, &value) == 0);
744			if (zhp->zfs_type == ZFS_TYPE_SNAPSHOT)
745				continue;
746			/*
747			 * May have no source before SPA_VERSION_RECVD_PROPS,
748			 * but is still modifiable.
749			 */
750			if (nvlist_lookup_string(propnv,
751			    ZPROP_SOURCE, &source) == 0) {
752				if ((strcmp(source, zhp->zfs_name) != 0) &&
753				    (strcmp(source,
754				    ZPROP_SOURCE_VAL_RECVD) != 0))
755					continue;
756			}
757		} else {
758			char *source;
759			if (nvlist_lookup_string(propnv,
760			    ZPROP_SOURCE, &source) != 0)
761				continue;
762			if ((strcmp(source, zhp->zfs_name) != 0) &&
763			    (strcmp(source, ZPROP_SOURCE_VAL_RECVD) != 0))
764				continue;
765		}
766
767		if (zfs_prop_user(propname) ||
768		    zfs_prop_get_type(prop) == PROP_TYPE_STRING) {
769			char *value;
770			verify(nvlist_lookup_string(propnv,
771			    ZPROP_VALUE, &value) == 0);
772			VERIFY(0 == nvlist_add_string(nv, propname, value));
773		} else {
774			uint64_t value;
775			verify(nvlist_lookup_uint64(propnv,
776			    ZPROP_VALUE, &value) == 0);
777			VERIFY(0 == nvlist_add_uint64(nv, propname, value));
778		}
779	}
780}
781
782/*
783 * returns snapshot creation txg
784 * and returns 0 if the snapshot does not exist
785 */
786static uint64_t
787get_snap_txg(libzfs_handle_t *hdl, const char *fs, const char *snap)
788{
789	char name[ZFS_MAX_DATASET_NAME_LEN];
790	uint64_t txg = 0;
791
792	if (fs == NULL || fs[0] == '\0' || snap == NULL || snap[0] == '\0')
793		return (txg);
794
795	(void) snprintf(name, sizeof (name), "%s@%s", fs, snap);
796	if (zfs_dataset_exists(hdl, name, ZFS_TYPE_SNAPSHOT)) {
797		zfs_handle_t *zhp = zfs_open(hdl, name, ZFS_TYPE_SNAPSHOT);
798		if (zhp != NULL) {
799			txg = zfs_prop_get_int(zhp, ZFS_PROP_CREATETXG);
800			zfs_close(zhp);
801		}
802	}
803
804	return (txg);
805}
806
807/*
808 * recursively generate nvlists describing datasets.  See comment
809 * for the data structure send_data_t above for description of contents
810 * of the nvlist.
811 */
812static int
813send_iterate_fs(zfs_handle_t *zhp, void *arg)
814{
815	send_data_t *sd = arg;
816	nvlist_t *nvfs = NULL, *nv = NULL;
817	int rv = 0;
818	uint64_t parent_fromsnap_guid_save = sd->parent_fromsnap_guid;
819	uint64_t fromsnap_txg_save = sd->fromsnap_txg;
820	uint64_t tosnap_txg_save = sd->tosnap_txg;
821	uint64_t txg = zhp->zfs_dmustats.dds_creation_txg;
822	uint64_t guid = zhp->zfs_dmustats.dds_guid;
823	uint64_t fromsnap_txg, tosnap_txg;
824	char guidstring[64];
825
826	fromsnap_txg = get_snap_txg(zhp->zfs_hdl, zhp->zfs_name, sd->fromsnap);
827	if (fromsnap_txg != 0)
828		sd->fromsnap_txg = fromsnap_txg;
829
830	tosnap_txg = get_snap_txg(zhp->zfs_hdl, zhp->zfs_name, sd->tosnap);
831	if (tosnap_txg != 0)
832		sd->tosnap_txg = tosnap_txg;
833
834	/*
835	 * on the send side, if the current dataset does not have tosnap,
836	 * perform two additional checks:
837	 *
838	 * - skip sending the current dataset if it was created later than
839	 *   the parent tosnap
840	 * - return error if the current dataset was created earlier than
841	 *   the parent tosnap
842	 */
843	if (sd->tosnap != NULL && tosnap_txg == 0) {
844		if (sd->tosnap_txg != 0 && txg > sd->tosnap_txg) {
845			if (sd->verbose) {
846				(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
847				    "skipping dataset %s: snapshot %s does "
848				    "not exist\n"), zhp->zfs_name, sd->tosnap);
849			}
850		} else {
851			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
852			    "cannot send %s@%s%s: snapshot %s@%s does not "
853			    "exist\n"), sd->fsname, sd->tosnap, sd->recursive ?
854			    dgettext(TEXT_DOMAIN, " recursively") : "",
855			    zhp->zfs_name, sd->tosnap);
856			rv = -1;
857		}
858		goto out;
859	}
860
861	VERIFY(0 == nvlist_alloc(&nvfs, NV_UNIQUE_NAME, 0));
862	VERIFY(0 == nvlist_add_string(nvfs, "name", zhp->zfs_name));
863	VERIFY(0 == nvlist_add_uint64(nvfs, "parentfromsnap",
864	    sd->parent_fromsnap_guid));
865
866	if (zhp->zfs_dmustats.dds_origin[0]) {
867		zfs_handle_t *origin = zfs_open(zhp->zfs_hdl,
868		    zhp->zfs_dmustats.dds_origin, ZFS_TYPE_SNAPSHOT);
869		if (origin == NULL) {
870			rv = -1;
871			goto out;
872		}
873		VERIFY(0 == nvlist_add_uint64(nvfs, "origin",
874		    origin->zfs_dmustats.dds_guid));
875	}
876
877	/* iterate over props */
878	if (sd->props || sd->backup || sd->recursive) {
879		VERIFY(0 == nvlist_alloc(&nv, NV_UNIQUE_NAME, 0));
880		send_iterate_prop(zhp, sd->backup, nv);
881	}
882
883	if (zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION) != ZIO_CRYPT_OFF) {
884		boolean_t encroot;
885
886		/* determine if this dataset is an encryption root */
887		if (zfs_crypto_get_encryption_root(zhp, &encroot, NULL) != 0) {
888			rv = -1;
889			goto out;
890		}
891
892		if (encroot)
893			VERIFY(0 == nvlist_add_boolean(nvfs, "is_encroot"));
894
895		/*
896		 * Encrypted datasets can only be sent with properties if
897		 * the raw flag is specified because the receive side doesn't
898		 * currently have a mechanism for recursively asking the user
899		 * for new encryption parameters.
900		 */
901		if (!sd->raw) {
902			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
903			    "cannot send %s@%s: encrypted dataset %s may not "
904			    "be sent with properties without the raw flag\n"),
905			    sd->fsname, sd->tosnap, zhp->zfs_name);
906			rv = -1;
907			goto out;
908		}
909
910	}
911
912	if (nv != NULL)
913		VERIFY(0 == nvlist_add_nvlist(nvfs, "props", nv));
914
915	/* iterate over snaps, and set sd->parent_fromsnap_guid */
916	sd->parent_fromsnap_guid = 0;
917	VERIFY(0 == nvlist_alloc(&sd->parent_snaps, NV_UNIQUE_NAME, 0));
918	VERIFY(0 == nvlist_alloc(&sd->snapprops, NV_UNIQUE_NAME, 0));
919	if (sd->holds)
920		VERIFY(0 == nvlist_alloc(&sd->snapholds, NV_UNIQUE_NAME, 0));
921	(void) zfs_iter_snapshots(zhp, B_FALSE, send_iterate_snap, sd);
922	VERIFY(0 == nvlist_add_nvlist(nvfs, "snaps", sd->parent_snaps));
923	VERIFY(0 == nvlist_add_nvlist(nvfs, "snapprops", sd->snapprops));
924	if (sd->holds)
925		VERIFY(0 == nvlist_add_nvlist(nvfs, "snapholds",
926		    sd->snapholds));
927	nvlist_free(sd->parent_snaps);
928	nvlist_free(sd->snapprops);
929	nvlist_free(sd->snapholds);
930
931	/* add this fs to nvlist */
932	(void) snprintf(guidstring, sizeof (guidstring),
933	    "0x%llx", (longlong_t)guid);
934	VERIFY(0 == nvlist_add_nvlist(sd->fss, guidstring, nvfs));
935
936	/* iterate over children */
937	if (sd->recursive)
938		rv = zfs_iter_filesystems(zhp, send_iterate_fs, sd);
939
940out:
941	sd->parent_fromsnap_guid = parent_fromsnap_guid_save;
942	sd->fromsnap_txg = fromsnap_txg_save;
943	sd->tosnap_txg = tosnap_txg_save;
944	nvlist_free(nv);
945	nvlist_free(nvfs);
946
947	zfs_close(zhp);
948	return (rv);
949}
950
951static int
952gather_nvlist(libzfs_handle_t *hdl, const char *fsname, const char *fromsnap,
953    const char *tosnap, boolean_t recursive, boolean_t raw,
954    boolean_t verbose, boolean_t backup, boolean_t holds,
955    boolean_t props, nvlist_t **nvlp, avl_tree_t **avlp)
956{
957	zfs_handle_t *zhp;
958	send_data_t sd = { 0 };
959	int error;
960
961	zhp = zfs_open(hdl, fsname, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
962	if (zhp == NULL)
963		return (EZFS_BADTYPE);
964
965	VERIFY(0 == nvlist_alloc(&sd.fss, NV_UNIQUE_NAME, 0));
966	sd.fsname = fsname;
967	sd.fromsnap = fromsnap;
968	sd.tosnap = tosnap;
969	sd.recursive = recursive;
970	sd.raw = raw;
971	sd.verbose = verbose;
972	sd.backup = backup;
973	sd.holds = holds;
974	sd.props = props;
975
976	if ((error = send_iterate_fs(zhp, &sd)) != 0) {
977		nvlist_free(sd.fss);
978		if (avlp != NULL)
979			*avlp = NULL;
980		*nvlp = NULL;
981		return (error);
982	}
983
984	if (avlp != NULL && (*avlp = fsavl_create(sd.fss)) == NULL) {
985		nvlist_free(sd.fss);
986		*nvlp = NULL;
987		return (EZFS_NOMEM);
988	}
989
990	*nvlp = sd.fss;
991	return (0);
992}
993
994/*
995 * Routines specific to "zfs send"
996 */
997typedef struct send_dump_data {
998	/* these are all just the short snapname (the part after the @) */
999	const char *fromsnap;
1000	const char *tosnap;
1001	char prevsnap[ZFS_MAX_DATASET_NAME_LEN];
1002	uint64_t prevsnap_obj;
1003	boolean_t seenfrom, seento, replicate, doall, fromorigin;
1004	boolean_t verbose, dryrun, parsable, progress, embed_data, std_out;
1005	boolean_t large_block, compress, raw, holds;
1006	int outfd;
1007	boolean_t err;
1008	nvlist_t *fss;
1009	nvlist_t *snapholds;
1010	avl_tree_t *fsavl;
1011	snapfilter_cb_t *filter_cb;
1012	void *filter_cb_arg;
1013	nvlist_t *debugnv;
1014	char holdtag[ZFS_MAX_DATASET_NAME_LEN];
1015	int cleanup_fd;
1016	uint64_t size;
1017} send_dump_data_t;
1018
1019static int
1020estimate_ioctl(zfs_handle_t *zhp, uint64_t fromsnap_obj,
1021    boolean_t fromorigin, enum lzc_send_flags flags, uint64_t *sizep)
1022{
1023	zfs_cmd_t zc = { 0 };
1024	libzfs_handle_t *hdl = zhp->zfs_hdl;
1025
1026	assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
1027	assert(fromsnap_obj == 0 || !fromorigin);
1028
1029	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
1030	zc.zc_obj = fromorigin;
1031	zc.zc_sendobj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
1032	zc.zc_fromobj = fromsnap_obj;
1033	zc.zc_guid = 1;  /* estimate flag */
1034	zc.zc_flags = flags;
1035
1036	if (zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_SEND, &zc) != 0) {
1037		char errbuf[1024];
1038		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1039		    "warning: cannot estimate space for '%s'"), zhp->zfs_name);
1040
1041		switch (errno) {
1042		case EXDEV:
1043			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1044			    "not an earlier snapshot from the same fs"));
1045			return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf));
1046
1047		case EACCES:
1048			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1049			    "source key must be loaded"));
1050			return (zfs_error(hdl, EZFS_CRYPTOFAILED, errbuf));
1051
1052		case ENOENT:
1053			if (zfs_dataset_exists(hdl, zc.zc_name,
1054			    ZFS_TYPE_SNAPSHOT)) {
1055				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1056				    "incremental source (@%s) does not exist"),
1057				    zc.zc_value);
1058			}
1059			return (zfs_error(hdl, EZFS_NOENT, errbuf));
1060
1061		case EDQUOT:
1062		case EFBIG:
1063		case EIO:
1064		case ENOLINK:
1065		case ENOSPC:
1066		case ENOSTR:
1067		case ENXIO:
1068		case EPIPE:
1069		case ERANGE:
1070		case EFAULT:
1071		case EROFS:
1072			zfs_error_aux(hdl, strerror(errno));
1073			return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
1074
1075		default:
1076			return (zfs_standard_error(hdl, errno, errbuf));
1077		}
1078	}
1079
1080	*sizep = zc.zc_objset_type;
1081
1082	return (0);
1083}
1084
1085/*
1086 * Dumps a backup of the given snapshot (incremental from fromsnap if it's not
1087 * NULL) to the file descriptor specified by outfd.
1088 */
1089static int
1090dump_ioctl(zfs_handle_t *zhp, const char *fromsnap, uint64_t fromsnap_obj,
1091    boolean_t fromorigin, int outfd, enum lzc_send_flags flags,
1092    nvlist_t *debugnv)
1093{
1094	zfs_cmd_t zc = { 0 };
1095	libzfs_handle_t *hdl = zhp->zfs_hdl;
1096	nvlist_t *thisdbg;
1097
1098	assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
1099	assert(fromsnap_obj == 0 || !fromorigin);
1100
1101	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
1102	zc.zc_cookie = outfd;
1103	zc.zc_obj = fromorigin;
1104	zc.zc_sendobj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
1105	zc.zc_fromobj = fromsnap_obj;
1106	zc.zc_flags = flags;
1107
1108	VERIFY(0 == nvlist_alloc(&thisdbg, NV_UNIQUE_NAME, 0));
1109	if (fromsnap && fromsnap[0] != '\0') {
1110		VERIFY(0 == nvlist_add_string(thisdbg,
1111		    "fromsnap", fromsnap));
1112	}
1113
1114	if (zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_SEND, &zc) != 0) {
1115		char errbuf[1024];
1116		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1117		    "warning: cannot send '%s'"), zhp->zfs_name);
1118
1119		VERIFY(0 == nvlist_add_uint64(thisdbg, "error", errno));
1120		if (debugnv) {
1121			VERIFY(0 == nvlist_add_nvlist(debugnv,
1122			    zhp->zfs_name, thisdbg));
1123		}
1124		nvlist_free(thisdbg);
1125
1126		switch (errno) {
1127		case EXDEV:
1128			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1129			    "not an earlier snapshot from the same fs"));
1130			return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf));
1131
1132		case EACCES:
1133			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1134			    "source key must be loaded"));
1135			return (zfs_error(hdl, EZFS_CRYPTOFAILED, errbuf));
1136
1137		case ENOENT:
1138			if (zfs_dataset_exists(hdl, zc.zc_name,
1139			    ZFS_TYPE_SNAPSHOT)) {
1140				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1141				    "incremental source (@%s) does not exist"),
1142				    zc.zc_value);
1143			}
1144			return (zfs_error(hdl, EZFS_NOENT, errbuf));
1145
1146		case EDQUOT:
1147		case EFBIG:
1148		case EIO:
1149		case ENOLINK:
1150		case ENOSPC:
1151		case ENOSTR:
1152		case ENXIO:
1153		case EPIPE:
1154		case ERANGE:
1155		case EFAULT:
1156		case EROFS:
1157			zfs_error_aux(hdl, strerror(errno));
1158			return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
1159
1160		default:
1161			return (zfs_standard_error(hdl, errno, errbuf));
1162		}
1163	}
1164
1165	if (debugnv)
1166		VERIFY(0 == nvlist_add_nvlist(debugnv, zhp->zfs_name, thisdbg));
1167	nvlist_free(thisdbg);
1168
1169	return (0);
1170}
1171
1172static void
1173gather_holds(zfs_handle_t *zhp, send_dump_data_t *sdd)
1174{
1175	assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
1176
1177	/*
1178	 * zfs_send() only sets snapholds for sends that need them,
1179	 * e.g. replication and doall.
1180	 */
1181	if (sdd->snapholds == NULL)
1182		return;
1183
1184	fnvlist_add_string(sdd->snapholds, zhp->zfs_name, sdd->holdtag);
1185}
1186
1187static void *
1188send_progress_thread(void *arg)
1189{
1190	progress_arg_t *pa = arg;
1191	zfs_cmd_t zc = { 0 };
1192	zfs_handle_t *zhp = pa->pa_zhp;
1193	libzfs_handle_t *hdl = zhp->zfs_hdl;
1194	unsigned long long bytes;
1195	char buf[16];
1196	time_t t;
1197	struct tm *tm;
1198
1199	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
1200
1201	if (!pa->pa_parsable)
1202		(void) fprintf(stderr, "TIME        SENT   SNAPSHOT\n");
1203
1204	/*
1205	 * Print the progress from ZFS_IOC_SEND_PROGRESS every second.
1206	 */
1207	for (;;) {
1208		(void) sleep(1);
1209
1210		zc.zc_cookie = pa->pa_fd;
1211		if (zfs_ioctl(hdl, ZFS_IOC_SEND_PROGRESS, &zc) != 0)
1212			return ((void *)-1);
1213
1214		(void) time(&t);
1215		tm = localtime(&t);
1216		bytes = zc.zc_cookie;
1217
1218		if (pa->pa_parsable) {
1219			(void) fprintf(stderr, "%02d:%02d:%02d\t%llu\t%s\n",
1220			    tm->tm_hour, tm->tm_min, tm->tm_sec,
1221			    bytes, zhp->zfs_name);
1222		} else {
1223			zfs_nicenum(bytes, buf, sizeof (buf));
1224			(void) fprintf(stderr, "%02d:%02d:%02d   %5s   %s\n",
1225			    tm->tm_hour, tm->tm_min, tm->tm_sec,
1226			    buf, zhp->zfs_name);
1227		}
1228	}
1229}
1230
1231static void
1232send_print_verbose(FILE *fout, const char *tosnap, const char *fromsnap,
1233    uint64_t size, boolean_t parsable)
1234{
1235	if (parsable) {
1236		if (fromsnap != NULL) {
1237			(void) fprintf(fout, "incremental\t%s\t%s",
1238			    fromsnap, tosnap);
1239		} else {
1240			(void) fprintf(fout, "full\t%s",
1241			    tosnap);
1242		}
1243	} else {
1244		if (fromsnap != NULL) {
1245			if (strchr(fromsnap, '@') == NULL &&
1246			    strchr(fromsnap, '#') == NULL) {
1247				(void) fprintf(fout, dgettext(TEXT_DOMAIN,
1248				    "send from @%s to %s"),
1249				    fromsnap, tosnap);
1250			} else {
1251				(void) fprintf(fout, dgettext(TEXT_DOMAIN,
1252				    "send from %s to %s"),
1253				    fromsnap, tosnap);
1254			}
1255		} else {
1256			(void) fprintf(fout, dgettext(TEXT_DOMAIN,
1257			    "full send of %s"),
1258			    tosnap);
1259		}
1260	}
1261
1262	if (size != 0) {
1263		if (parsable) {
1264			(void) fprintf(fout, "\t%llu",
1265			    (longlong_t)size);
1266		} else {
1267			char buf[16];
1268			zfs_nicenum(size, buf, sizeof (buf));
1269			(void) fprintf(fout, dgettext(TEXT_DOMAIN,
1270			    " estimated size is %s"), buf);
1271		}
1272	}
1273	(void) fprintf(fout, "\n");
1274}
1275
1276static int
1277dump_snapshot(zfs_handle_t *zhp, void *arg)
1278{
1279	send_dump_data_t *sdd = arg;
1280	progress_arg_t pa = { 0 };
1281	pthread_t tid;
1282	char *thissnap;
1283	enum lzc_send_flags flags = 0;
1284	int err;
1285	boolean_t isfromsnap, istosnap, fromorigin;
1286	boolean_t exclude = B_FALSE;
1287	FILE *fout = sdd->std_out ? stdout : stderr;
1288
1289	err = 0;
1290	thissnap = strchr(zhp->zfs_name, '@') + 1;
1291	isfromsnap = (sdd->fromsnap != NULL &&
1292	    strcmp(sdd->fromsnap, thissnap) == 0);
1293
1294	if (!sdd->seenfrom && isfromsnap) {
1295		gather_holds(zhp, sdd);
1296		sdd->seenfrom = B_TRUE;
1297		(void) strcpy(sdd->prevsnap, thissnap);
1298		sdd->prevsnap_obj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
1299		zfs_close(zhp);
1300		return (0);
1301	}
1302
1303	if (sdd->seento || !sdd->seenfrom) {
1304		zfs_close(zhp);
1305		return (0);
1306	}
1307
1308	istosnap = (strcmp(sdd->tosnap, thissnap) == 0);
1309	if (istosnap)
1310		sdd->seento = B_TRUE;
1311
1312	if (sdd->large_block)
1313		flags |= LZC_SEND_FLAG_LARGE_BLOCK;
1314	if (sdd->embed_data)
1315		flags |= LZC_SEND_FLAG_EMBED_DATA;
1316	if (sdd->compress)
1317		flags |= LZC_SEND_FLAG_COMPRESS;
1318	if (sdd->raw)
1319		flags |= LZC_SEND_FLAG_RAW;
1320
1321	if (!sdd->doall && !isfromsnap && !istosnap) {
1322		if (sdd->replicate) {
1323			char *snapname;
1324			nvlist_t *snapprops;
1325			/*
1326			 * Filter out all intermediate snapshots except origin
1327			 * snapshots needed to replicate clones.
1328			 */
1329			nvlist_t *nvfs = fsavl_find(sdd->fsavl,
1330			    zhp->zfs_dmustats.dds_guid, &snapname);
1331
1332			VERIFY(0 == nvlist_lookup_nvlist(nvfs,
1333			    "snapprops", &snapprops));
1334			VERIFY(0 == nvlist_lookup_nvlist(snapprops,
1335			    thissnap, &snapprops));
1336			exclude = !nvlist_exists(snapprops, "is_clone_origin");
1337		} else {
1338			exclude = B_TRUE;
1339		}
1340	}
1341
1342	/*
1343	 * If a filter function exists, call it to determine whether
1344	 * this snapshot will be sent.
1345	 */
1346	if (exclude || (sdd->filter_cb != NULL &&
1347	    sdd->filter_cb(zhp, sdd->filter_cb_arg) == B_FALSE)) {
1348		/*
1349		 * This snapshot is filtered out.  Don't send it, and don't
1350		 * set prevsnap_obj, so it will be as if this snapshot didn't
1351		 * exist, and the next accepted snapshot will be sent as
1352		 * an incremental from the last accepted one, or as the
1353		 * first (and full) snapshot in the case of a replication,
1354		 * non-incremental send.
1355		 */
1356		zfs_close(zhp);
1357		return (0);
1358	}
1359
1360	gather_holds(zhp, sdd);
1361	fromorigin = sdd->prevsnap[0] == '\0' &&
1362	    (sdd->fromorigin || sdd->replicate);
1363
1364	if (sdd->verbose) {
1365		uint64_t size = 0;
1366		(void) estimate_ioctl(zhp, sdd->prevsnap_obj,
1367		    fromorigin, flags, &size);
1368
1369		send_print_verbose(fout, zhp->zfs_name,
1370		    sdd->prevsnap[0] ? sdd->prevsnap : NULL,
1371		    size, sdd->parsable);
1372		sdd->size += size;
1373	}
1374
1375	if (!sdd->dryrun) {
1376		/*
1377		 * If progress reporting is requested, spawn a new thread to
1378		 * poll ZFS_IOC_SEND_PROGRESS at a regular interval.
1379		 */
1380		if (sdd->progress) {
1381			pa.pa_zhp = zhp;
1382			pa.pa_fd = sdd->outfd;
1383			pa.pa_parsable = sdd->parsable;
1384
1385			if ((err = pthread_create(&tid, NULL,
1386			    send_progress_thread, &pa)) != 0) {
1387				zfs_close(zhp);
1388				return (err);
1389			}
1390		}
1391
1392		err = dump_ioctl(zhp, sdd->prevsnap, sdd->prevsnap_obj,
1393		    fromorigin, sdd->outfd, flags, sdd->debugnv);
1394
1395		if (sdd->progress) {
1396			(void) pthread_cancel(tid);
1397			(void) pthread_join(tid, NULL);
1398		}
1399	}
1400
1401	(void) strcpy(sdd->prevsnap, thissnap);
1402	sdd->prevsnap_obj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
1403	zfs_close(zhp);
1404	return (err);
1405}
1406
1407static int
1408dump_filesystem(zfs_handle_t *zhp, void *arg)
1409{
1410	int rv = 0;
1411	send_dump_data_t *sdd = arg;
1412	boolean_t missingfrom = B_FALSE;
1413	zfs_cmd_t zc = { 0 };
1414
1415	(void) snprintf(zc.zc_name, sizeof (zc.zc_name), "%s@%s",
1416	    zhp->zfs_name, sdd->tosnap);
1417	if (ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0) {
1418		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1419		    "WARNING: could not send %s@%s: does not exist\n"),
1420		    zhp->zfs_name, sdd->tosnap);
1421		sdd->err = B_TRUE;
1422		return (0);
1423	}
1424
1425	if (sdd->replicate && sdd->fromsnap) {
1426		/*
1427		 * If this fs does not have fromsnap, and we're doing
1428		 * recursive, we need to send a full stream from the
1429		 * beginning (or an incremental from the origin if this
1430		 * is a clone).  If we're doing non-recursive, then let
1431		 * them get the error.
1432		 */
1433		(void) snprintf(zc.zc_name, sizeof (zc.zc_name), "%s@%s",
1434		    zhp->zfs_name, sdd->fromsnap);
1435		if (ioctl(zhp->zfs_hdl->libzfs_fd,
1436		    ZFS_IOC_OBJSET_STATS, &zc) != 0) {
1437			missingfrom = B_TRUE;
1438		}
1439	}
1440
1441	sdd->seenfrom = sdd->seento = sdd->prevsnap[0] = 0;
1442	sdd->prevsnap_obj = 0;
1443	if (sdd->fromsnap == NULL || missingfrom)
1444		sdd->seenfrom = B_TRUE;
1445
1446	rv = zfs_iter_snapshots_sorted(zhp, dump_snapshot, arg);
1447	if (!sdd->seenfrom) {
1448		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1449		    "WARNING: could not send %s@%s:\n"
1450		    "incremental source (%s@%s) does not exist\n"),
1451		    zhp->zfs_name, sdd->tosnap,
1452		    zhp->zfs_name, sdd->fromsnap);
1453		sdd->err = B_TRUE;
1454	} else if (!sdd->seento) {
1455		if (sdd->fromsnap) {
1456			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1457			    "WARNING: could not send %s@%s:\n"
1458			    "incremental source (%s@%s) "
1459			    "is not earlier than it\n"),
1460			    zhp->zfs_name, sdd->tosnap,
1461			    zhp->zfs_name, sdd->fromsnap);
1462		} else {
1463			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1464			    "WARNING: "
1465			    "could not send %s@%s: does not exist\n"),
1466			    zhp->zfs_name, sdd->tosnap);
1467		}
1468		sdd->err = B_TRUE;
1469	}
1470
1471	return (rv);
1472}
1473
1474static int
1475dump_filesystems(zfs_handle_t *rzhp, void *arg)
1476{
1477	send_dump_data_t *sdd = arg;
1478	nvpair_t *fspair;
1479	boolean_t needagain, progress;
1480
1481	if (!sdd->replicate)
1482		return (dump_filesystem(rzhp, sdd));
1483
1484	/* Mark the clone origin snapshots. */
1485	for (fspair = nvlist_next_nvpair(sdd->fss, NULL); fspair;
1486	    fspair = nvlist_next_nvpair(sdd->fss, fspair)) {
1487		nvlist_t *nvfs;
1488		uint64_t origin_guid = 0;
1489
1490		VERIFY(0 == nvpair_value_nvlist(fspair, &nvfs));
1491		(void) nvlist_lookup_uint64(nvfs, "origin", &origin_guid);
1492		if (origin_guid != 0) {
1493			char *snapname;
1494			nvlist_t *origin_nv = fsavl_find(sdd->fsavl,
1495			    origin_guid, &snapname);
1496			if (origin_nv != NULL) {
1497				nvlist_t *snapprops;
1498				VERIFY(0 == nvlist_lookup_nvlist(origin_nv,
1499				    "snapprops", &snapprops));
1500				VERIFY(0 == nvlist_lookup_nvlist(snapprops,
1501				    snapname, &snapprops));
1502				VERIFY(0 == nvlist_add_boolean(
1503				    snapprops, "is_clone_origin"));
1504			}
1505		}
1506	}
1507again:
1508	needagain = progress = B_FALSE;
1509	for (fspair = nvlist_next_nvpair(sdd->fss, NULL); fspair;
1510	    fspair = nvlist_next_nvpair(sdd->fss, fspair)) {
1511		nvlist_t *fslist, *parent_nv;
1512		char *fsname;
1513		zfs_handle_t *zhp;
1514		int err;
1515		uint64_t origin_guid = 0;
1516		uint64_t parent_guid = 0;
1517
1518		VERIFY(nvpair_value_nvlist(fspair, &fslist) == 0);
1519		if (nvlist_lookup_boolean(fslist, "sent") == 0)
1520			continue;
1521
1522		VERIFY(nvlist_lookup_string(fslist, "name", &fsname) == 0);
1523		(void) nvlist_lookup_uint64(fslist, "origin", &origin_guid);
1524		(void) nvlist_lookup_uint64(fslist, "parentfromsnap",
1525		    &parent_guid);
1526
1527		if (parent_guid != 0) {
1528			parent_nv = fsavl_find(sdd->fsavl, parent_guid, NULL);
1529			if (!nvlist_exists(parent_nv, "sent")) {
1530				/* parent has not been sent; skip this one */
1531				needagain = B_TRUE;
1532				continue;
1533			}
1534		}
1535
1536		if (origin_guid != 0) {
1537			nvlist_t *origin_nv = fsavl_find(sdd->fsavl,
1538			    origin_guid, NULL);
1539			if (origin_nv != NULL &&
1540			    !nvlist_exists(origin_nv, "sent")) {
1541				/*
1542				 * origin has not been sent yet;
1543				 * skip this clone.
1544				 */
1545				needagain = B_TRUE;
1546				continue;
1547			}
1548		}
1549
1550		zhp = zfs_open(rzhp->zfs_hdl, fsname, ZFS_TYPE_DATASET);
1551		if (zhp == NULL)
1552			return (-1);
1553		err = dump_filesystem(zhp, sdd);
1554		VERIFY(nvlist_add_boolean(fslist, "sent") == 0);
1555		progress = B_TRUE;
1556		zfs_close(zhp);
1557		if (err)
1558			return (err);
1559	}
1560	if (needagain) {
1561		assert(progress);
1562		goto again;
1563	}
1564
1565	/* clean out the sent flags in case we reuse this fss */
1566	for (fspair = nvlist_next_nvpair(sdd->fss, NULL); fspair;
1567	    fspair = nvlist_next_nvpair(sdd->fss, fspair)) {
1568		nvlist_t *fslist;
1569
1570		VERIFY(nvpair_value_nvlist(fspair, &fslist) == 0);
1571		(void) nvlist_remove_all(fslist, "sent");
1572	}
1573
1574	return (0);
1575}
1576
1577nvlist_t *
1578zfs_send_resume_token_to_nvlist(libzfs_handle_t *hdl, const char *token)
1579{
1580	unsigned int version;
1581	int nread;
1582	unsigned long long checksum, packed_len;
1583
1584	/*
1585	 * Decode token header, which is:
1586	 *   <token version>-<checksum of payload>-<uncompressed payload length>
1587	 * Note that the only supported token version is 1.
1588	 */
1589	nread = sscanf(token, "%u-%llx-%llx-",
1590	    &version, &checksum, &packed_len);
1591	if (nread != 3) {
1592		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1593		    "resume token is corrupt (invalid format)"));
1594		return (NULL);
1595	}
1596
1597	if (version != ZFS_SEND_RESUME_TOKEN_VERSION) {
1598		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1599		    "resume token is corrupt (invalid version %u)"),
1600		    version);
1601		return (NULL);
1602	}
1603
1604	/* convert hexadecimal representation to binary */
1605	token = strrchr(token, '-') + 1;
1606	int len = strlen(token) / 2;
1607	unsigned char *compressed = zfs_alloc(hdl, len);
1608	for (int i = 0; i < len; i++) {
1609		nread = sscanf(token + i * 2, "%2hhx", compressed + i);
1610		if (nread != 1) {
1611			free(compressed);
1612			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1613			    "resume token is corrupt "
1614			    "(payload is not hex-encoded)"));
1615			return (NULL);
1616		}
1617	}
1618
1619	/* verify checksum */
1620	zio_cksum_t cksum;
1621	fletcher_4_native(compressed, len, NULL, &cksum);
1622	if (cksum.zc_word[0] != checksum) {
1623		free(compressed);
1624		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1625		    "resume token is corrupt (incorrect checksum)"));
1626		return (NULL);
1627	}
1628
1629	/* uncompress */
1630	void *packed = zfs_alloc(hdl, packed_len);
1631	uLongf packed_len_long = packed_len;
1632	if (uncompress(packed, &packed_len_long, compressed, len) != Z_OK ||
1633	    packed_len_long != packed_len) {
1634		free(packed);
1635		free(compressed);
1636		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1637		    "resume token is corrupt (decompression failed)"));
1638		return (NULL);
1639	}
1640
1641	/* unpack nvlist */
1642	nvlist_t *nv;
1643	int error = nvlist_unpack(packed, packed_len, &nv, KM_SLEEP);
1644	free(packed);
1645	free(compressed);
1646	if (error != 0) {
1647		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1648		    "resume token is corrupt (nvlist_unpack failed)"));
1649		return (NULL);
1650	}
1651	return (nv);
1652}
1653
1654int
1655zfs_send_resume(libzfs_handle_t *hdl, sendflags_t *flags, int outfd,
1656    const char *resume_token)
1657{
1658	char errbuf[1024];
1659	char *toname;
1660	char *fromname = NULL;
1661	uint64_t resumeobj, resumeoff, toguid, fromguid, bytes;
1662	zfs_handle_t *zhp;
1663	int error = 0;
1664	char name[ZFS_MAX_DATASET_NAME_LEN];
1665	enum lzc_send_flags lzc_flags = 0;
1666	FILE *fout = (flags->verbose && flags->dryrun) ? stdout : stderr;
1667
1668	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1669	    "cannot resume send"));
1670
1671	nvlist_t *resume_nvl =
1672	    zfs_send_resume_token_to_nvlist(hdl, resume_token);
1673	if (resume_nvl == NULL) {
1674		/*
1675		 * zfs_error_aux has already been set by
1676		 * zfs_send_resume_token_to_nvlist
1677		 */
1678		return (zfs_error(hdl, EZFS_FAULT, errbuf));
1679	}
1680	if (flags->verbose) {
1681		(void) fprintf(fout, dgettext(TEXT_DOMAIN,
1682		    "resume token contents:\n"));
1683		nvlist_print(fout, resume_nvl);
1684	}
1685
1686	if (nvlist_lookup_string(resume_nvl, "toname", &toname) != 0 ||
1687	    nvlist_lookup_uint64(resume_nvl, "object", &resumeobj) != 0 ||
1688	    nvlist_lookup_uint64(resume_nvl, "offset", &resumeoff) != 0 ||
1689	    nvlist_lookup_uint64(resume_nvl, "bytes", &bytes) != 0 ||
1690	    nvlist_lookup_uint64(resume_nvl, "toguid", &toguid) != 0) {
1691		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1692		    "resume token is corrupt"));
1693		return (zfs_error(hdl, EZFS_FAULT, errbuf));
1694	}
1695	fromguid = 0;
1696	(void) nvlist_lookup_uint64(resume_nvl, "fromguid", &fromguid);
1697
1698	if (flags->largeblock || nvlist_exists(resume_nvl, "largeblockok"))
1699		lzc_flags |= LZC_SEND_FLAG_LARGE_BLOCK;
1700	if (flags->embed_data || nvlist_exists(resume_nvl, "embedok"))
1701		lzc_flags |= LZC_SEND_FLAG_EMBED_DATA;
1702	if (flags->compress || nvlist_exists(resume_nvl, "compressok"))
1703		lzc_flags |= LZC_SEND_FLAG_COMPRESS;
1704	if (flags->raw || nvlist_exists(resume_nvl, "rawok"))
1705		lzc_flags |= LZC_SEND_FLAG_RAW;
1706
1707	if (guid_to_name(hdl, toname, toguid, B_FALSE, name) != 0) {
1708		if (zfs_dataset_exists(hdl, toname, ZFS_TYPE_DATASET)) {
1709			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1710			    "'%s' is no longer the same snapshot used in "
1711			    "the initial send"), toname);
1712		} else {
1713			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1714			    "'%s' used in the initial send no longer exists"),
1715			    toname);
1716		}
1717		return (zfs_error(hdl, EZFS_BADPATH, errbuf));
1718	}
1719	zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET);
1720	if (zhp == NULL) {
1721		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1722		    "unable to access '%s'"), name);
1723		return (zfs_error(hdl, EZFS_BADPATH, errbuf));
1724	}
1725
1726	if (fromguid != 0) {
1727		if (guid_to_name(hdl, toname, fromguid, B_TRUE, name) != 0) {
1728			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1729			    "incremental source %#llx no longer exists"),
1730			    (longlong_t)fromguid);
1731			return (zfs_error(hdl, EZFS_BADPATH, errbuf));
1732		}
1733		fromname = name;
1734	}
1735
1736	if (flags->verbose) {
1737		uint64_t size = 0;
1738		error = lzc_send_space(zhp->zfs_name, fromname,
1739		    lzc_flags, &size);
1740		if (error == 0)
1741			size = MAX(0, (int64_t)(size - bytes));
1742		send_print_verbose(fout, zhp->zfs_name, fromname,
1743		    size, flags->parsable);
1744	}
1745
1746	if (!flags->dryrun) {
1747		progress_arg_t pa = { 0 };
1748		pthread_t tid;
1749		/*
1750		 * If progress reporting is requested, spawn a new thread to
1751		 * poll ZFS_IOC_SEND_PROGRESS at a regular interval.
1752		 */
1753		if (flags->progress) {
1754			pa.pa_zhp = zhp;
1755			pa.pa_fd = outfd;
1756			pa.pa_parsable = flags->parsable;
1757
1758			error = pthread_create(&tid, NULL,
1759			    send_progress_thread, &pa);
1760			if (error != 0) {
1761				zfs_close(zhp);
1762				return (error);
1763			}
1764		}
1765
1766		error = lzc_send_resume(zhp->zfs_name, fromname, outfd,
1767		    lzc_flags, resumeobj, resumeoff);
1768
1769		if (flags->progress) {
1770			(void) pthread_cancel(tid);
1771			(void) pthread_join(tid, NULL);
1772		}
1773
1774		char errbuf[1024];
1775		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1776		    "warning: cannot send '%s'"), zhp->zfs_name);
1777
1778		zfs_close(zhp);
1779
1780		switch (error) {
1781		case 0:
1782			return (0);
1783		case EACCES:
1784			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1785			    "source key must be loaded"));
1786			return (zfs_error(hdl, EZFS_CRYPTOFAILED, errbuf));
1787
1788		case EXDEV:
1789		case ENOENT:
1790		case EDQUOT:
1791		case EFBIG:
1792		case EIO:
1793		case ENOLINK:
1794		case ENOSPC:
1795		case ENOSTR:
1796		case ENXIO:
1797		case EPIPE:
1798		case ERANGE:
1799		case EFAULT:
1800		case EROFS:
1801			zfs_error_aux(hdl, strerror(errno));
1802			return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
1803
1804		default:
1805			return (zfs_standard_error(hdl, errno, errbuf));
1806		}
1807	}
1808
1809
1810	zfs_close(zhp);
1811
1812	return (error);
1813}
1814
1815/*
1816 * Generate a send stream for the dataset identified by the argument zhp.
1817 *
1818 * The content of the send stream is the snapshot identified by
1819 * 'tosnap'.  Incremental streams are requested in two ways:
1820 *     - from the snapshot identified by "fromsnap" (if non-null) or
1821 *     - from the origin of the dataset identified by zhp, which must
1822 *	 be a clone.  In this case, "fromsnap" is null and "fromorigin"
1823 *	 is TRUE.
1824 *
1825 * The send stream is recursive (i.e. dumps a hierarchy of snapshots) and
1826 * uses a special header (with a hdrtype field of DMU_COMPOUNDSTREAM)
1827 * if "replicate" is set.  If "doall" is set, dump all the intermediate
1828 * snapshots. The DMU_COMPOUNDSTREAM header is used in the "doall"
1829 * case too. If "props" is set, send properties.
1830 */
1831int
1832zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
1833    sendflags_t *flags, int outfd, snapfilter_cb_t filter_func,
1834    void *cb_arg, nvlist_t **debugnvp)
1835{
1836	char errbuf[1024];
1837	send_dump_data_t sdd = { 0 };
1838	int err = 0;
1839	nvlist_t *fss = NULL;
1840	avl_tree_t *fsavl = NULL;
1841	static uint64_t holdseq;
1842	int spa_version;
1843	pthread_t tid = 0;
1844	int pipefd[2];
1845	dedup_arg_t dda = { 0 };
1846	int featureflags = 0;
1847	FILE *fout;
1848
1849	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1850	    "cannot send '%s'"), zhp->zfs_name);
1851
1852	if (fromsnap && fromsnap[0] == '\0') {
1853		zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
1854		    "zero-length incremental source"));
1855		return (zfs_error(zhp->zfs_hdl, EZFS_NOENT, errbuf));
1856	}
1857
1858	if (zhp->zfs_type == ZFS_TYPE_FILESYSTEM) {
1859		uint64_t version;
1860		version = zfs_prop_get_int(zhp, ZFS_PROP_VERSION);
1861		if (version >= ZPL_VERSION_SA) {
1862			featureflags |= DMU_BACKUP_FEATURE_SA_SPILL;
1863		}
1864	}
1865
1866	if (flags->holds)
1867		featureflags |= DMU_BACKUP_FEATURE_HOLDS;
1868
1869	/*
1870	 * Start the dedup thread if this is a dedup stream. We do not bother
1871	 * doing this if this a raw send of an encrypted dataset with dedup off
1872	 * because normal encrypted blocks won't dedup.
1873	 */
1874	if (flags->dedup && !flags->dryrun && !(flags->raw &&
1875	    zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION) != ZIO_CRYPT_OFF &&
1876	    zfs_prop_get_int(zhp, ZFS_PROP_DEDUP) == ZIO_CHECKSUM_OFF)) {
1877		featureflags |= (DMU_BACKUP_FEATURE_DEDUP |
1878		    DMU_BACKUP_FEATURE_DEDUPPROPS);
1879		if ((err = pipe(pipefd)) != 0) {
1880			zfs_error_aux(zhp->zfs_hdl, strerror(errno));
1881			return (zfs_error(zhp->zfs_hdl, EZFS_PIPEFAILED,
1882			    errbuf));
1883		}
1884		dda.outputfd = outfd;
1885		dda.inputfd = pipefd[1];
1886		dda.dedup_hdl = zhp->zfs_hdl;
1887		if ((err = pthread_create(&tid, NULL, cksummer, &dda)) != 0) {
1888			(void) close(pipefd[0]);
1889			(void) close(pipefd[1]);
1890			zfs_error_aux(zhp->zfs_hdl, strerror(errno));
1891			return (zfs_error(zhp->zfs_hdl,
1892			    EZFS_THREADCREATEFAILED, errbuf));
1893		}
1894	}
1895
1896	if (flags->replicate || flags->doall || flags->props ||
1897	    flags->holds || flags->backup) {
1898		dmu_replay_record_t drr = { 0 };
1899		char *packbuf = NULL;
1900		size_t buflen = 0;
1901		zio_cksum_t zc;
1902
1903		ZIO_SET_CHECKSUM(&zc, 0, 0, 0, 0);
1904
1905		if (flags->replicate || flags->props || flags->backup ||
1906		    flags->holds) {
1907			nvlist_t *hdrnv;
1908
1909			VERIFY(0 == nvlist_alloc(&hdrnv, NV_UNIQUE_NAME, 0));
1910			if (fromsnap) {
1911				VERIFY(0 == nvlist_add_string(hdrnv,
1912				    "fromsnap", fromsnap));
1913			}
1914			VERIFY(0 == nvlist_add_string(hdrnv, "tosnap", tosnap));
1915			if (!flags->replicate) {
1916				VERIFY(0 == nvlist_add_boolean(hdrnv,
1917				    "not_recursive"));
1918			}
1919			if (flags->raw) {
1920				VERIFY(0 == nvlist_add_boolean(hdrnv, "raw"));
1921			}
1922
1923			err = gather_nvlist(zhp->zfs_hdl, zhp->zfs_name,
1924			    fromsnap, tosnap, flags->replicate, flags->raw,
1925			    flags->verbose, flags->backup,
1926			    flags->holds, flags->props, &fss,
1927			    &fsavl);
1928			if (err)
1929				goto err_out;
1930			VERIFY(0 == nvlist_add_nvlist(hdrnv, "fss", fss));
1931			err = nvlist_pack(hdrnv, &packbuf, &buflen,
1932			    NV_ENCODE_XDR, 0);
1933			if (debugnvp)
1934				*debugnvp = hdrnv;
1935			else
1936				nvlist_free(hdrnv);
1937			if (err)
1938				goto stderr_out;
1939		}
1940
1941		if (!flags->dryrun) {
1942			/* write first begin record */
1943			drr.drr_type = DRR_BEGIN;
1944			drr.drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC;
1945			DMU_SET_STREAM_HDRTYPE(drr.drr_u.drr_begin.
1946			    drr_versioninfo, DMU_COMPOUNDSTREAM);
1947			DMU_SET_FEATUREFLAGS(drr.drr_u.drr_begin.
1948			    drr_versioninfo, featureflags);
1949			(void) snprintf(drr.drr_u.drr_begin.drr_toname,
1950			    sizeof (drr.drr_u.drr_begin.drr_toname),
1951			    "%s@%s", zhp->zfs_name, tosnap);
1952			drr.drr_payloadlen = buflen;
1953
1954			err = dump_record(&drr, packbuf, buflen, &zc, outfd);
1955			free(packbuf);
1956			if (err != 0)
1957				goto stderr_out;
1958
1959			/* write end record */
1960			bzero(&drr, sizeof (drr));
1961			drr.drr_type = DRR_END;
1962			drr.drr_u.drr_end.drr_checksum = zc;
1963			err = write(outfd, &drr, sizeof (drr));
1964			if (err == -1) {
1965				err = errno;
1966				goto stderr_out;
1967			}
1968
1969			err = 0;
1970		}
1971	}
1972
1973	/* dump each stream */
1974	sdd.fromsnap = fromsnap;
1975	sdd.tosnap = tosnap;
1976	if (tid != 0)
1977		sdd.outfd = pipefd[0];
1978	else
1979		sdd.outfd = outfd;
1980	sdd.replicate = flags->replicate;
1981	sdd.doall = flags->doall;
1982	sdd.fromorigin = flags->fromorigin;
1983	sdd.fss = fss;
1984	sdd.fsavl = fsavl;
1985	sdd.verbose = flags->verbose;
1986	sdd.parsable = flags->parsable;
1987	sdd.progress = flags->progress;
1988	sdd.dryrun = flags->dryrun;
1989	sdd.large_block = flags->largeblock;
1990	sdd.embed_data = flags->embed_data;
1991	sdd.compress = flags->compress;
1992	sdd.raw = flags->raw;
1993	sdd.holds = flags->holds;
1994	sdd.filter_cb = filter_func;
1995	sdd.filter_cb_arg = cb_arg;
1996	if (debugnvp)
1997		sdd.debugnv = *debugnvp;
1998	if (sdd.verbose && sdd.dryrun)
1999		sdd.std_out = B_TRUE;
2000	fout = sdd.std_out ? stdout : stderr;
2001
2002	/*
2003	 * Some flags require that we place user holds on the datasets that are
2004	 * being sent so they don't get destroyed during the send. We can skip
2005	 * this step if the pool is imported read-only since the datasets cannot
2006	 * be destroyed.
2007	 */
2008	if (!flags->dryrun && !zpool_get_prop_int(zfs_get_pool_handle(zhp),
2009	    ZPOOL_PROP_READONLY, NULL) &&
2010	    zfs_spa_version(zhp, &spa_version) == 0 &&
2011	    spa_version >= SPA_VERSION_USERREFS &&
2012	    (flags->doall || flags->replicate)) {
2013		++holdseq;
2014		(void) snprintf(sdd.holdtag, sizeof (sdd.holdtag),
2015		    ".send-%d-%llu", getpid(), (u_longlong_t)holdseq);
2016		sdd.cleanup_fd = open(ZFS_DEV, O_RDWR|O_EXCL);
2017		if (sdd.cleanup_fd < 0) {
2018			err = errno;
2019			goto stderr_out;
2020		}
2021		sdd.snapholds = fnvlist_alloc();
2022	} else {
2023		sdd.cleanup_fd = -1;
2024		sdd.snapholds = NULL;
2025	}
2026
2027	if (flags->verbose || sdd.snapholds != NULL) {
2028		/*
2029		 * Do a verbose no-op dry run to get all the verbose output
2030		 * or to gather snapshot hold's before generating any data,
2031		 * then do a non-verbose real run to generate the streams.
2032		 */
2033		sdd.dryrun = B_TRUE;
2034		err = dump_filesystems(zhp, &sdd);
2035
2036		if (err != 0)
2037			goto stderr_out;
2038
2039		if (flags->verbose) {
2040			if (flags->parsable) {
2041				(void) fprintf(fout, "size\t%llu\n",
2042				    (longlong_t)sdd.size);
2043			} else {
2044				char buf[16];
2045				zfs_nicenum(sdd.size, buf, sizeof (buf));
2046				(void) fprintf(fout, dgettext(TEXT_DOMAIN,
2047				    "total estimated size is %s\n"), buf);
2048			}
2049		}
2050
2051		/* Ensure no snaps found is treated as an error. */
2052		if (!sdd.seento) {
2053			err = ENOENT;
2054			goto err_out;
2055		}
2056
2057		/* Skip the second run if dryrun was requested. */
2058		if (flags->dryrun)
2059			goto err_out;
2060
2061		if (sdd.snapholds != NULL) {
2062			err = zfs_hold_nvl(zhp, sdd.cleanup_fd, sdd.snapholds);
2063			if (err != 0)
2064				goto stderr_out;
2065
2066			fnvlist_free(sdd.snapholds);
2067			sdd.snapholds = NULL;
2068		}
2069
2070		sdd.dryrun = B_FALSE;
2071		sdd.verbose = B_FALSE;
2072	}
2073
2074	err = dump_filesystems(zhp, &sdd);
2075	fsavl_destroy(fsavl);
2076	nvlist_free(fss);
2077
2078	/* Ensure no snaps found is treated as an error. */
2079	if (err == 0 && !sdd.seento)
2080		err = ENOENT;
2081
2082	if (tid != 0) {
2083		if (err != 0)
2084			(void) pthread_cancel(tid);
2085		(void) close(pipefd[0]);
2086		(void) pthread_join(tid, NULL);
2087	}
2088
2089	if (sdd.cleanup_fd != -1) {
2090		VERIFY(0 == close(sdd.cleanup_fd));
2091		sdd.cleanup_fd = -1;
2092	}
2093
2094	if (!flags->dryrun && (flags->replicate || flags->doall ||
2095	    flags->props || flags->backup || flags->holds)) {
2096		/*
2097		 * write final end record.  NB: want to do this even if
2098		 * there was some error, because it might not be totally
2099		 * failed.
2100		 */
2101		dmu_replay_record_t drr = { 0 };
2102		drr.drr_type = DRR_END;
2103		if (write(outfd, &drr, sizeof (drr)) == -1) {
2104			return (zfs_standard_error(zhp->zfs_hdl,
2105			    errno, errbuf));
2106		}
2107	}
2108
2109	return (err || sdd.err);
2110
2111stderr_out:
2112	err = zfs_standard_error(zhp->zfs_hdl, err, errbuf);
2113err_out:
2114	fsavl_destroy(fsavl);
2115	nvlist_free(fss);
2116	fnvlist_free(sdd.snapholds);
2117
2118	if (sdd.cleanup_fd != -1)
2119		VERIFY(0 == close(sdd.cleanup_fd));
2120	if (tid != 0) {
2121		(void) pthread_cancel(tid);
2122		(void) close(pipefd[0]);
2123		(void) pthread_join(tid, NULL);
2124	}
2125	return (err);
2126}
2127
2128int
2129zfs_send_one(zfs_handle_t *zhp, const char *from, int fd,
2130    enum lzc_send_flags flags)
2131{
2132	int err;
2133	libzfs_handle_t *hdl = zhp->zfs_hdl;
2134
2135	char errbuf[1024];
2136	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2137	    "warning: cannot send '%s'"), zhp->zfs_name);
2138
2139	err = lzc_send(zhp->zfs_name, from, fd, flags);
2140	if (err != 0) {
2141		switch (errno) {
2142		case EXDEV:
2143			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2144			    "not an earlier snapshot from the same fs"));
2145			return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf));
2146
2147		case ENOENT:
2148		case ESRCH:
2149			if (lzc_exists(zhp->zfs_name)) {
2150				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2151				    "incremental source (%s) does not exist"),
2152				    from);
2153			}
2154			return (zfs_error(hdl, EZFS_NOENT, errbuf));
2155
2156		case EACCES:
2157			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2158			    "dataset key must be loaded"));
2159			return (zfs_error(hdl, EZFS_CRYPTOFAILED, errbuf));
2160
2161		case EBUSY:
2162			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2163			    "target is busy; if a filesystem, "
2164			    "it must not be mounted"));
2165			return (zfs_error(hdl, EZFS_BUSY, errbuf));
2166
2167		case EDQUOT:
2168		case EFBIG:
2169		case EIO:
2170		case ENOLINK:
2171		case ENOSPC:
2172		case ENOSTR:
2173		case ENXIO:
2174		case EPIPE:
2175		case ERANGE:
2176		case EFAULT:
2177		case EROFS:
2178			zfs_error_aux(hdl, strerror(errno));
2179			return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
2180
2181		default:
2182			return (zfs_standard_error(hdl, errno, errbuf));
2183		}
2184	}
2185	return (err != 0);
2186}
2187
2188/*
2189 * Routines specific to "zfs recv"
2190 */
2191
2192static int
2193recv_read(libzfs_handle_t *hdl, int fd, void *buf, int ilen,
2194    boolean_t byteswap, zio_cksum_t *zc)
2195{
2196	char *cp = buf;
2197	int rv;
2198	int len = ilen;
2199
2200	assert(ilen <= SPA_MAXBLOCKSIZE);
2201
2202	do {
2203		rv = read(fd, cp, len);
2204		cp += rv;
2205		len -= rv;
2206	} while (rv > 0);
2207
2208	if (rv < 0 || len != 0) {
2209		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2210		    "failed to read from stream"));
2211		return (zfs_error(hdl, EZFS_BADSTREAM, dgettext(TEXT_DOMAIN,
2212		    "cannot receive")));
2213	}
2214
2215	if (zc) {
2216		if (byteswap)
2217			(void) fletcher_4_incremental_byteswap(buf, ilen, zc);
2218		else
2219			(void) fletcher_4_incremental_native(buf, ilen, zc);
2220	}
2221	return (0);
2222}
2223
2224static int
2225recv_read_nvlist(libzfs_handle_t *hdl, int fd, int len, nvlist_t **nvp,
2226    boolean_t byteswap, zio_cksum_t *zc)
2227{
2228	char *buf;
2229	int err;
2230
2231	buf = zfs_alloc(hdl, len);
2232	if (buf == NULL)
2233		return (ENOMEM);
2234
2235	err = recv_read(hdl, fd, buf, len, byteswap, zc);
2236	if (err != 0) {
2237		free(buf);
2238		return (err);
2239	}
2240
2241	err = nvlist_unpack(buf, len, nvp, 0);
2242	free(buf);
2243	if (err != 0) {
2244		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
2245		    "stream (malformed nvlist)"));
2246		return (EINVAL);
2247	}
2248	return (0);
2249}
2250
2251/*
2252 * Returns the grand origin (origin of origin of origin...) of a given handle.
2253 * If this dataset is not a clone, it simply returns a copy of the original
2254 * handle.
2255 */
2256static zfs_handle_t *
2257recv_open_grand_origin(zfs_handle_t *zhp)
2258{
2259	char origin[ZFS_MAX_DATASET_NAME_LEN];
2260	zprop_source_t src;
2261	zfs_handle_t *ozhp = zfs_handle_dup(zhp);
2262
2263	while (ozhp != NULL) {
2264		if (zfs_prop_get(ozhp, ZFS_PROP_ORIGIN, origin,
2265		    sizeof (origin), &src, NULL, 0, B_FALSE) != 0)
2266			break;
2267
2268		(void) zfs_close(ozhp);
2269		ozhp = zfs_open(zhp->zfs_hdl, origin, ZFS_TYPE_FILESYSTEM);
2270	}
2271
2272	return (ozhp);
2273}
2274
2275static int
2276recv_rename_impl(zfs_handle_t *zhp, const char *source, const char *target)
2277{
2278	int err;
2279	zfs_handle_t *ozhp = NULL;
2280
2281	/*
2282	 * Attempt to rename the dataset. If it fails with EACCES we have
2283	 * attempted to rename the dataset outside of its encryption root.
2284	 * Force the dataset to become an encryption root and try again.
2285	 */
2286	err = lzc_rename(source, target);
2287	if (err == EACCES) {
2288		ozhp = recv_open_grand_origin(zhp);
2289		if (ozhp == NULL) {
2290			err = ENOENT;
2291			goto out;
2292		}
2293
2294		err = lzc_change_key(ozhp->zfs_name, DCP_CMD_FORCE_NEW_KEY,
2295		    NULL, NULL, 0);
2296		if (err != 0)
2297			goto out;
2298
2299		err = lzc_rename(source, target);
2300	}
2301
2302out:
2303	if (ozhp != NULL)
2304		zfs_close(ozhp);
2305	return (err);
2306}
2307
2308static int
2309recv_rename(libzfs_handle_t *hdl, const char *name, const char *tryname,
2310    int baselen, char *newname, recvflags_t *flags)
2311{
2312	static int seq;
2313	int err;
2314	prop_changelist_t *clp = NULL;
2315	zfs_handle_t *zhp = NULL;
2316
2317	zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET);
2318	if (zhp == NULL) {
2319		err = -1;
2320		goto out;
2321	}
2322	clp = changelist_gather(zhp, ZFS_PROP_NAME, 0,
2323	    flags->force ? MS_FORCE : 0);
2324	if (clp == NULL) {
2325		err = -1;
2326		goto out;
2327	}
2328	err = changelist_prefix(clp);
2329	if (err)
2330		goto out;
2331
2332	if (tryname) {
2333		(void) strcpy(newname, tryname);
2334		if (flags->verbose) {
2335			(void) printf("attempting rename %s to %s\n",
2336			    name, newname);
2337		}
2338		err = recv_rename_impl(zhp, name, newname);
2339		if (err == 0)
2340			changelist_rename(clp, name, tryname);
2341	} else {
2342		err = ENOENT;
2343	}
2344
2345	if (err != 0 && strncmp(name + baselen, "recv-", 5) != 0) {
2346		seq++;
2347
2348		(void) snprintf(newname, ZFS_MAX_DATASET_NAME_LEN,
2349		    "%.*srecv-%u-%u", baselen, name, getpid(), seq);
2350		if (flags->verbose) {
2351			(void) printf("failed - trying rename %s to %s\n",
2352			    name, newname);
2353		}
2354		err = recv_rename_impl(zhp, name, newname);
2355		if (err == 0)
2356			changelist_rename(clp, name, newname);
2357		if (err && flags->verbose) {
2358			(void) printf("failed (%u) - "
2359			    "will try again on next pass\n", errno);
2360		}
2361		err = EAGAIN;
2362	} else if (flags->verbose) {
2363		if (err == 0)
2364			(void) printf("success\n");
2365		else
2366			(void) printf("failed (%u)\n", errno);
2367	}
2368
2369	(void) changelist_postfix(clp);
2370
2371out:
2372	if (clp != NULL)
2373		changelist_free(clp);
2374	if (zhp != NULL)
2375		zfs_close(zhp);
2376
2377	return (err);
2378}
2379
2380static int
2381recv_promote(libzfs_handle_t *hdl, const char *fsname,
2382    const char *origin_fsname, recvflags_t *flags)
2383{
2384	int err;
2385	zfs_cmd_t zc = {"\0"};
2386	zfs_handle_t *zhp = NULL, *ozhp = NULL;
2387
2388	if (flags->verbose)
2389		(void) printf("promoting %s\n", fsname);
2390
2391	(void) strlcpy(zc.zc_value, origin_fsname, sizeof (zc.zc_value));
2392	(void) strlcpy(zc.zc_name, fsname, sizeof (zc.zc_name));
2393
2394	/*
2395	 * Attempt to promote the dataset. If it fails with EACCES the
2396	 * promotion would cause this dataset to leave its encryption root.
2397	 * Force the origin to become an encryption root and try again.
2398	 */
2399	err = zfs_ioctl(hdl, ZFS_IOC_PROMOTE, &zc);
2400	if (err == EACCES) {
2401		zhp = zfs_open(hdl, fsname, ZFS_TYPE_DATASET);
2402		if (zhp == NULL) {
2403			err = -1;
2404			goto out;
2405		}
2406
2407		ozhp = recv_open_grand_origin(zhp);
2408		if (ozhp == NULL) {
2409			err = -1;
2410			goto out;
2411		}
2412
2413		err = lzc_change_key(ozhp->zfs_name, DCP_CMD_FORCE_NEW_KEY,
2414		    NULL, NULL, 0);
2415		if (err != 0)
2416			goto out;
2417
2418		err = zfs_ioctl(hdl, ZFS_IOC_PROMOTE, &zc);
2419	}
2420
2421out:
2422	if (zhp != NULL)
2423		zfs_close(zhp);
2424	if (ozhp != NULL)
2425		zfs_close(ozhp);
2426
2427	return (err);
2428}
2429
2430static int
2431recv_destroy(libzfs_handle_t *hdl, const char *name, int baselen,
2432    char *newname, recvflags_t *flags)
2433{
2434	int err = 0;
2435	prop_changelist_t *clp;
2436	zfs_handle_t *zhp;
2437	boolean_t defer = B_FALSE;
2438	int spa_version;
2439
2440	zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET);
2441	if (zhp == NULL)
2442		return (-1);
2443	clp = changelist_gather(zhp, ZFS_PROP_NAME, 0,
2444	    flags->force ? MS_FORCE : 0);
2445	if (zfs_get_type(zhp) == ZFS_TYPE_SNAPSHOT &&
2446	    zfs_spa_version(zhp, &spa_version) == 0 &&
2447	    spa_version >= SPA_VERSION_USERREFS)
2448		defer = B_TRUE;
2449	zfs_close(zhp);
2450	if (clp == NULL)
2451		return (-1);
2452	err = changelist_prefix(clp);
2453	if (err)
2454		return (err);
2455
2456	if (flags->verbose)
2457		(void) printf("attempting destroy %s\n", name);
2458	if (zhp->zfs_type == ZFS_TYPE_SNAPSHOT) {
2459		nvlist_t *nv = fnvlist_alloc();
2460		fnvlist_add_boolean(nv, name);
2461		err = lzc_destroy_snaps(nv, defer, NULL);
2462		fnvlist_free(nv);
2463	} else {
2464		err = lzc_destroy(name);
2465	}
2466	if (err == 0) {
2467		if (flags->verbose)
2468			(void) printf("success\n");
2469		changelist_remove(clp, name);
2470	}
2471
2472	(void) changelist_postfix(clp);
2473	changelist_free(clp);
2474
2475	/*
2476	 * Deferred destroy might destroy the snapshot or only mark it to be
2477	 * destroyed later, and it returns success in either case.
2478	 */
2479	if (err != 0 || (defer && zfs_dataset_exists(hdl, name,
2480	    ZFS_TYPE_SNAPSHOT))) {
2481		err = recv_rename(hdl, name, NULL, baselen, newname, flags);
2482	}
2483
2484	return (err);
2485}
2486
2487typedef struct guid_to_name_data {
2488	uint64_t guid;
2489	boolean_t bookmark_ok;
2490	char *name;
2491	char *skip;
2492} guid_to_name_data_t;
2493
2494static int
2495guid_to_name_cb(zfs_handle_t *zhp, void *arg)
2496{
2497	guid_to_name_data_t *gtnd = arg;
2498	const char *slash;
2499	int err;
2500
2501	if (gtnd->skip != NULL &&
2502	    (slash = strrchr(zhp->zfs_name, '/')) != NULL &&
2503	    strcmp(slash + 1, gtnd->skip) == 0) {
2504		zfs_close(zhp);
2505		return (0);
2506	}
2507
2508	if (zfs_prop_get_int(zhp, ZFS_PROP_GUID) == gtnd->guid) {
2509		(void) strcpy(gtnd->name, zhp->zfs_name);
2510		zfs_close(zhp);
2511		return (EEXIST);
2512	}
2513
2514	err = zfs_iter_children(zhp, guid_to_name_cb, gtnd);
2515	if (err != EEXIST && gtnd->bookmark_ok)
2516		err = zfs_iter_bookmarks(zhp, guid_to_name_cb, gtnd);
2517	zfs_close(zhp);
2518	return (err);
2519}
2520
2521/*
2522 * Attempt to find the local dataset associated with this guid.  In the case of
2523 * multiple matches, we attempt to find the "best" match by searching
2524 * progressively larger portions of the hierarchy.  This allows one to send a
2525 * tree of datasets individually and guarantee that we will find the source
2526 * guid within that hierarchy, even if there are multiple matches elsewhere.
2527 */
2528static int
2529guid_to_name(libzfs_handle_t *hdl, const char *parent, uint64_t guid,
2530    boolean_t bookmark_ok, char *name)
2531{
2532	char pname[ZFS_MAX_DATASET_NAME_LEN];
2533	guid_to_name_data_t gtnd;
2534
2535	gtnd.guid = guid;
2536	gtnd.bookmark_ok = bookmark_ok;
2537	gtnd.name = name;
2538	gtnd.skip = NULL;
2539
2540	/*
2541	 * Search progressively larger portions of the hierarchy, starting
2542	 * with the filesystem specified by 'parent'.  This will
2543	 * select the "most local" version of the origin snapshot in the case
2544	 * that there are multiple matching snapshots in the system.
2545	 */
2546	(void) strlcpy(pname, parent, sizeof (pname));
2547	char *cp = strrchr(pname, '@');
2548	if (cp == NULL)
2549		cp = strchr(pname, '\0');
2550	for (; cp != NULL; cp = strrchr(pname, '/')) {
2551		/* Chop off the last component and open the parent */
2552		*cp = '\0';
2553		zfs_handle_t *zhp = make_dataset_handle(hdl, pname);
2554
2555		if (zhp == NULL)
2556			continue;
2557		int err = guid_to_name_cb(zfs_handle_dup(zhp), &gtnd);
2558		if (err != EEXIST)
2559			err = zfs_iter_children(zhp, guid_to_name_cb, &gtnd);
2560		if (err != EEXIST && bookmark_ok)
2561			err = zfs_iter_bookmarks(zhp, guid_to_name_cb, &gtnd);
2562		zfs_close(zhp);
2563		if (err == EEXIST)
2564			return (0);
2565
2566		/*
2567		 * Remember the last portion of the dataset so we skip it next
2568		 * time through (as we've already searched that portion of the
2569		 * hierarchy).
2570		 */
2571		gtnd.skip = strrchr(pname, '/') + 1;
2572	}
2573
2574	return (ENOENT);
2575}
2576
2577/*
2578 * Return +1 if guid1 is before guid2, 0 if they are the same, and -1 if
2579 * guid1 is after guid2.
2580 */
2581static int
2582created_before(libzfs_handle_t *hdl, avl_tree_t *avl,
2583    uint64_t guid1, uint64_t guid2)
2584{
2585	nvlist_t *nvfs;
2586	char *fsname, *snapname;
2587	char buf[ZFS_MAX_DATASET_NAME_LEN];
2588	int rv;
2589	zfs_handle_t *guid1hdl, *guid2hdl;
2590	uint64_t create1, create2;
2591
2592	if (guid2 == 0)
2593		return (0);
2594	if (guid1 == 0)
2595		return (1);
2596
2597	nvfs = fsavl_find(avl, guid1, &snapname);
2598	VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname));
2599	(void) snprintf(buf, sizeof (buf), "%s@%s", fsname, snapname);
2600	guid1hdl = zfs_open(hdl, buf, ZFS_TYPE_SNAPSHOT);
2601	if (guid1hdl == NULL)
2602		return (-1);
2603
2604	nvfs = fsavl_find(avl, guid2, &snapname);
2605	VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname));
2606	(void) snprintf(buf, sizeof (buf), "%s@%s", fsname, snapname);
2607	guid2hdl = zfs_open(hdl, buf, ZFS_TYPE_SNAPSHOT);
2608	if (guid2hdl == NULL) {
2609		zfs_close(guid1hdl);
2610		return (-1);
2611	}
2612
2613	create1 = zfs_prop_get_int(guid1hdl, ZFS_PROP_CREATETXG);
2614	create2 = zfs_prop_get_int(guid2hdl, ZFS_PROP_CREATETXG);
2615
2616	if (create1 < create2)
2617		rv = -1;
2618	else if (create1 > create2)
2619		rv = +1;
2620	else
2621		rv = 0;
2622
2623	zfs_close(guid1hdl);
2624	zfs_close(guid2hdl);
2625
2626	return (rv);
2627}
2628
2629/*
2630 * This function reestablishes the heirarchy of encryption roots after a
2631 * recursive incremental receive has completed. This must be done after the
2632 * second call to recv_incremental_replication() has renamed and promoted all
2633 * sent datasets to their final locations in the dataset heriarchy.
2634 */
2635/* ARGSUSED */
2636static int
2637recv_fix_encryption_hierarchy(libzfs_handle_t *hdl, const char *destname,
2638    nvlist_t *stream_nv, avl_tree_t *stream_avl)
2639{
2640	int err;
2641	nvpair_t *fselem = NULL;
2642	nvlist_t *stream_fss;
2643	char *cp;
2644	char top_zfs[ZFS_MAX_DATASET_NAME_LEN];
2645
2646	(void) strcpy(top_zfs, destname);
2647	cp = strrchr(top_zfs, '@');
2648	if (cp != NULL)
2649		*cp = '\0';
2650
2651	VERIFY(0 == nvlist_lookup_nvlist(stream_nv, "fss", &stream_fss));
2652
2653	while ((fselem = nvlist_next_nvpair(stream_fss, fselem)) != NULL) {
2654		zfs_handle_t *zhp = NULL;
2655		uint64_t crypt;
2656		nvlist_t *snaps, *props, *stream_nvfs = NULL;
2657		nvpair_t *snapel = NULL;
2658		boolean_t is_encroot, is_clone, stream_encroot;
2659		char *cp;
2660		char *stream_keylocation = NULL;
2661		char keylocation[MAXNAMELEN];
2662		char fsname[ZFS_MAX_DATASET_NAME_LEN];
2663
2664		keylocation[0] = '\0';
2665		VERIFY(0 == nvpair_value_nvlist(fselem, &stream_nvfs));
2666		VERIFY(0 == nvlist_lookup_nvlist(stream_nvfs, "snaps", &snaps));
2667		VERIFY(0 == nvlist_lookup_nvlist(stream_nvfs, "props", &props));
2668		stream_encroot = nvlist_exists(stream_nvfs, "is_encroot");
2669
2670		/* find a snapshot from the stream that exists locally */
2671		err = ENOENT;
2672		while ((snapel = nvlist_next_nvpair(snaps, snapel)) != NULL) {
2673			uint64_t guid;
2674
2675			VERIFY(0 == nvpair_value_uint64(snapel, &guid));
2676			err = guid_to_name(hdl, destname, guid, B_FALSE,
2677			    fsname);
2678			if (err == 0)
2679				break;
2680		}
2681
2682		if (err != 0)
2683			continue;
2684
2685		cp = strchr(fsname, '@');
2686		if (cp != NULL)
2687			*cp = '\0';
2688
2689		zhp = zfs_open(hdl, fsname, ZFS_TYPE_DATASET);
2690		if (zhp == NULL) {
2691			err = ENOENT;
2692			goto error;
2693		}
2694
2695		crypt = zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION);
2696		is_clone = zhp->zfs_dmustats.dds_origin[0] != '\0';
2697		(void) zfs_crypto_get_encryption_root(zhp, &is_encroot, NULL);
2698
2699		/* we don't need to do anything for unencrypted datasets */
2700		if (crypt == ZIO_CRYPT_OFF) {
2701			zfs_close(zhp);
2702			continue;
2703		}
2704
2705		/*
2706		 * If the dataset is flagged as an encryption root, was not
2707		 * received as a clone and is not currently an encryption root,
2708		 * force it to become one. Fixup the keylocation if necessary.
2709		 */
2710		if (stream_encroot) {
2711			if (!is_clone && !is_encroot) {
2712				err = lzc_change_key(fsname,
2713				    DCP_CMD_FORCE_NEW_KEY, NULL, NULL, 0);
2714				if (err != 0) {
2715					zfs_close(zhp);
2716					goto error;
2717				}
2718			}
2719
2720			VERIFY(0 == nvlist_lookup_string(props,
2721			    zfs_prop_to_name(ZFS_PROP_KEYLOCATION),
2722			    &stream_keylocation));
2723
2724			/*
2725			 * Refresh the properties in case the call to
2726			 * lzc_change_key() changed the value.
2727			 */
2728			zfs_refresh_properties(zhp);
2729			err = zfs_prop_get(zhp, ZFS_PROP_KEYLOCATION,
2730			    keylocation, sizeof (keylocation), NULL, NULL,
2731			    0, B_TRUE);
2732			if (err != 0) {
2733				zfs_close(zhp);
2734				goto error;
2735			}
2736
2737			if (strcmp(keylocation, stream_keylocation) != 0) {
2738				err = zfs_prop_set(zhp,
2739				    zfs_prop_to_name(ZFS_PROP_KEYLOCATION),
2740				    stream_keylocation);
2741				if (err != 0) {
2742					zfs_close(zhp);
2743					goto error;
2744				}
2745			}
2746		}
2747
2748		/*
2749		 * If the dataset is not flagged as an encryption root and is
2750		 * currently an encryption root, force it to inherit from its
2751		 * parent. The root of a raw send should never be
2752		 * force-inherited.
2753		 */
2754		if (!stream_encroot && is_encroot &&
2755		    strcmp(top_zfs, fsname) != 0) {
2756			err = lzc_change_key(fsname, DCP_CMD_FORCE_INHERIT,
2757			    NULL, NULL, 0);
2758			if (err != 0) {
2759				zfs_close(zhp);
2760				goto error;
2761			}
2762		}
2763
2764		zfs_close(zhp);
2765	}
2766
2767	return (0);
2768
2769error:
2770	return (err);
2771}
2772
2773static int
2774recv_incremental_replication(libzfs_handle_t *hdl, const char *tofs,
2775    recvflags_t *flags, nvlist_t *stream_nv, avl_tree_t *stream_avl,
2776    nvlist_t *renamed)
2777{
2778	nvlist_t *local_nv;
2779	avl_tree_t *local_avl;
2780	nvpair_t *fselem, *nextfselem;
2781	char *fromsnap;
2782	char newname[ZFS_MAX_DATASET_NAME_LEN];
2783	int error;
2784	boolean_t needagain, progress, recursive;
2785	char *s1, *s2;
2786
2787	VERIFY(0 == nvlist_lookup_string(stream_nv, "fromsnap", &fromsnap));
2788
2789	recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") ==
2790	    ENOENT);
2791
2792	if (flags->dryrun)
2793		return (0);
2794
2795again:
2796	needagain = progress = B_FALSE;
2797
2798	if ((error = gather_nvlist(hdl, tofs, fromsnap, NULL,
2799	    recursive, B_TRUE, B_FALSE,
2800	    B_FALSE, B_FALSE, B_TRUE, &local_nv, &local_avl)) != 0)
2801		return (error);
2802
2803	/*
2804	 * Process deletes and renames
2805	 */
2806	for (fselem = nvlist_next_nvpair(local_nv, NULL);
2807	    fselem; fselem = nextfselem) {
2808		nvlist_t *nvfs, *snaps;
2809		nvlist_t *stream_nvfs = NULL;
2810		nvpair_t *snapelem, *nextsnapelem;
2811		uint64_t fromguid = 0;
2812		uint64_t originguid = 0;
2813		uint64_t stream_originguid = 0;
2814		uint64_t parent_fromsnap_guid, stream_parent_fromsnap_guid;
2815		char *fsname, *stream_fsname;
2816
2817		nextfselem = nvlist_next_nvpair(local_nv, fselem);
2818
2819		VERIFY(0 == nvpair_value_nvlist(fselem, &nvfs));
2820		VERIFY(0 == nvlist_lookup_nvlist(nvfs, "snaps", &snaps));
2821		VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname));
2822		VERIFY(0 == nvlist_lookup_uint64(nvfs, "parentfromsnap",
2823		    &parent_fromsnap_guid));
2824		(void) nvlist_lookup_uint64(nvfs, "origin", &originguid);
2825
2826		/*
2827		 * First find the stream's fs, so we can check for
2828		 * a different origin (due to "zfs promote")
2829		 */
2830		for (snapelem = nvlist_next_nvpair(snaps, NULL);
2831		    snapelem; snapelem = nvlist_next_nvpair(snaps, snapelem)) {
2832			uint64_t thisguid;
2833
2834			VERIFY(0 == nvpair_value_uint64(snapelem, &thisguid));
2835			stream_nvfs = fsavl_find(stream_avl, thisguid, NULL);
2836
2837			if (stream_nvfs != NULL)
2838				break;
2839		}
2840
2841		/* check for promote */
2842		(void) nvlist_lookup_uint64(stream_nvfs, "origin",
2843		    &stream_originguid);
2844		if (stream_nvfs && originguid != stream_originguid) {
2845			switch (created_before(hdl, local_avl,
2846			    stream_originguid, originguid)) {
2847			case 1: {
2848				/* promote it! */
2849				nvlist_t *origin_nvfs;
2850				char *origin_fsname;
2851
2852				origin_nvfs = fsavl_find(local_avl, originguid,
2853				    NULL);
2854				VERIFY(0 == nvlist_lookup_string(origin_nvfs,
2855				    "name", &origin_fsname));
2856				error = recv_promote(hdl, fsname, origin_fsname,
2857				    flags);
2858				if (error == 0)
2859					progress = B_TRUE;
2860				break;
2861			}
2862			default:
2863				break;
2864			case -1:
2865				fsavl_destroy(local_avl);
2866				nvlist_free(local_nv);
2867				return (-1);
2868			}
2869			/*
2870			 * We had/have the wrong origin, therefore our
2871			 * list of snapshots is wrong.  Need to handle
2872			 * them on the next pass.
2873			 */
2874			needagain = B_TRUE;
2875			continue;
2876		}
2877
2878		for (snapelem = nvlist_next_nvpair(snaps, NULL);
2879		    snapelem; snapelem = nextsnapelem) {
2880			uint64_t thisguid;
2881			char *stream_snapname;
2882			nvlist_t *found, *props;
2883
2884			nextsnapelem = nvlist_next_nvpair(snaps, snapelem);
2885
2886			VERIFY(0 == nvpair_value_uint64(snapelem, &thisguid));
2887			found = fsavl_find(stream_avl, thisguid,
2888			    &stream_snapname);
2889
2890			/* check for delete */
2891			if (found == NULL) {
2892				char name[ZFS_MAX_DATASET_NAME_LEN];
2893
2894				if (!flags->force)
2895					continue;
2896
2897				(void) snprintf(name, sizeof (name), "%s@%s",
2898				    fsname, nvpair_name(snapelem));
2899
2900				error = recv_destroy(hdl, name,
2901				    strlen(fsname)+1, newname, flags);
2902				if (error)
2903					needagain = B_TRUE;
2904				else
2905					progress = B_TRUE;
2906				continue;
2907			}
2908
2909			stream_nvfs = found;
2910
2911			if (0 == nvlist_lookup_nvlist(stream_nvfs, "snapprops",
2912			    &props) && 0 == nvlist_lookup_nvlist(props,
2913			    stream_snapname, &props)) {
2914				zfs_cmd_t zc = { 0 };
2915
2916				zc.zc_cookie = B_TRUE; /* received */
2917				(void) snprintf(zc.zc_name, sizeof (zc.zc_name),
2918				    "%s@%s", fsname, nvpair_name(snapelem));
2919				if (zcmd_write_src_nvlist(hdl, &zc,
2920				    props) == 0) {
2921					(void) zfs_ioctl(hdl,
2922					    ZFS_IOC_SET_PROP, &zc);
2923					zcmd_free_nvlists(&zc);
2924				}
2925			}
2926
2927			/* check for different snapname */
2928			if (strcmp(nvpair_name(snapelem),
2929			    stream_snapname) != 0) {
2930				char name[ZFS_MAX_DATASET_NAME_LEN];
2931				char tryname[ZFS_MAX_DATASET_NAME_LEN];
2932
2933				(void) snprintf(name, sizeof (name), "%s@%s",
2934				    fsname, nvpair_name(snapelem));
2935				(void) snprintf(tryname, sizeof (name), "%s@%s",
2936				    fsname, stream_snapname);
2937
2938				error = recv_rename(hdl, name, tryname,
2939				    strlen(fsname)+1, newname, flags);
2940				if (error)
2941					needagain = B_TRUE;
2942				else
2943					progress = B_TRUE;
2944			}
2945
2946			if (strcmp(stream_snapname, fromsnap) == 0)
2947				fromguid = thisguid;
2948		}
2949
2950		/* check for delete */
2951		if (stream_nvfs == NULL) {
2952			if (!flags->force)
2953				continue;
2954
2955			error = recv_destroy(hdl, fsname, strlen(tofs)+1,
2956			    newname, flags);
2957			if (error)
2958				needagain = B_TRUE;
2959			else
2960				progress = B_TRUE;
2961			continue;
2962		}
2963
2964		if (fromguid == 0) {
2965			if (flags->verbose) {
2966				(void) printf("local fs %s does not have "
2967				    "fromsnap (%s in stream); must have "
2968				    "been deleted locally; ignoring\n",
2969				    fsname, fromsnap);
2970			}
2971			continue;
2972		}
2973
2974		VERIFY(0 == nvlist_lookup_string(stream_nvfs,
2975		    "name", &stream_fsname));
2976		VERIFY(0 == nvlist_lookup_uint64(stream_nvfs,
2977		    "parentfromsnap", &stream_parent_fromsnap_guid));
2978
2979		s1 = strrchr(fsname, '/');
2980		s2 = strrchr(stream_fsname, '/');
2981
2982		/*
2983		 * Check for rename. If the exact receive path is specified, it
2984		 * does not count as a rename, but we still need to check the
2985		 * datasets beneath it.
2986		 */
2987		if ((stream_parent_fromsnap_guid != 0 &&
2988		    parent_fromsnap_guid != 0 &&
2989		    stream_parent_fromsnap_guid != parent_fromsnap_guid) ||
2990		    ((flags->isprefix || strcmp(tofs, fsname) != 0) &&
2991		    (s1 != NULL) && (s2 != NULL) && strcmp(s1, s2) != 0)) {
2992			nvlist_t *parent;
2993			char tryname[ZFS_MAX_DATASET_NAME_LEN];
2994
2995			parent = fsavl_find(local_avl,
2996			    stream_parent_fromsnap_guid, NULL);
2997			/*
2998			 * NB: parent might not be found if we used the
2999			 * tosnap for stream_parent_fromsnap_guid,
3000			 * because the parent is a newly-created fs;
3001			 * we'll be able to rename it after we recv the
3002			 * new fs.
3003			 */
3004			if (parent != NULL) {
3005				char *pname;
3006
3007				VERIFY(0 == nvlist_lookup_string(parent, "name",
3008				    &pname));
3009				(void) snprintf(tryname, sizeof (tryname),
3010				    "%s%s", pname, strrchr(stream_fsname, '/'));
3011			} else {
3012				tryname[0] = '\0';
3013				if (flags->verbose) {
3014					(void) printf("local fs %s new parent "
3015					    "not found\n", fsname);
3016				}
3017			}
3018
3019			newname[0] = '\0';
3020
3021			error = recv_rename(hdl, fsname, tryname,
3022			    strlen(tofs)+1, newname, flags);
3023
3024			if (renamed != NULL && newname[0] != '\0') {
3025				VERIFY(0 == nvlist_add_boolean(renamed,
3026				    newname));
3027			}
3028
3029			if (error)
3030				needagain = B_TRUE;
3031			else
3032				progress = B_TRUE;
3033		}
3034	}
3035
3036	fsavl_destroy(local_avl);
3037	nvlist_free(local_nv);
3038
3039	if (needagain && progress) {
3040		/* do another pass to fix up temporary names */
3041		if (flags->verbose)
3042			(void) printf("another pass:\n");
3043		goto again;
3044	}
3045
3046	return (needagain || error != 0);
3047}
3048
3049static int
3050zfs_receive_package(libzfs_handle_t *hdl, int fd, const char *destname,
3051    recvflags_t *flags, dmu_replay_record_t *drr, zio_cksum_t *zc,
3052    char **top_zfs, int cleanup_fd, uint64_t *action_handlep,
3053    nvlist_t *cmdprops)
3054{
3055	nvlist_t *stream_nv = NULL;
3056	avl_tree_t *stream_avl = NULL;
3057	char *fromsnap = NULL;
3058	char *sendsnap = NULL;
3059	char *cp;
3060	char tofs[ZFS_MAX_DATASET_NAME_LEN];
3061	char sendfs[ZFS_MAX_DATASET_NAME_LEN];
3062	char errbuf[1024];
3063	dmu_replay_record_t drre;
3064	int error;
3065	boolean_t anyerr = B_FALSE;
3066	boolean_t softerr = B_FALSE;
3067	boolean_t recursive, raw;
3068
3069	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
3070	    "cannot receive"));
3071
3072	assert(drr->drr_type == DRR_BEGIN);
3073	assert(drr->drr_u.drr_begin.drr_magic == DMU_BACKUP_MAGIC);
3074	assert(DMU_GET_STREAM_HDRTYPE(drr->drr_u.drr_begin.drr_versioninfo) ==
3075	    DMU_COMPOUNDSTREAM);
3076
3077	/*
3078	 * Read in the nvlist from the stream.
3079	 */
3080	if (drr->drr_payloadlen != 0) {
3081		error = recv_read_nvlist(hdl, fd, drr->drr_payloadlen,
3082		    &stream_nv, flags->byteswap, zc);
3083		if (error) {
3084			error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
3085			goto out;
3086		}
3087	}
3088
3089	recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") ==
3090	    ENOENT);
3091	raw = (nvlist_lookup_boolean(stream_nv, "raw") == 0);
3092
3093	if (recursive && strchr(destname, '@')) {
3094		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3095		    "cannot specify snapshot name for multi-snapshot stream"));
3096		error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
3097		goto out;
3098	}
3099
3100	/*
3101	 * Read in the end record and verify checksum.
3102	 */
3103	if (0 != (error = recv_read(hdl, fd, &drre, sizeof (drre),
3104	    flags->byteswap, NULL)))
3105		goto out;
3106	if (flags->byteswap) {
3107		drre.drr_type = BSWAP_32(drre.drr_type);
3108		drre.drr_u.drr_end.drr_checksum.zc_word[0] =
3109		    BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[0]);
3110		drre.drr_u.drr_end.drr_checksum.zc_word[1] =
3111		    BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[1]);
3112		drre.drr_u.drr_end.drr_checksum.zc_word[2] =
3113		    BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[2]);
3114		drre.drr_u.drr_end.drr_checksum.zc_word[3] =
3115		    BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[3]);
3116	}
3117	if (drre.drr_type != DRR_END) {
3118		error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
3119		goto out;
3120	}
3121	if (!ZIO_CHECKSUM_EQUAL(drre.drr_u.drr_end.drr_checksum, *zc)) {
3122		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3123		    "incorrect header checksum"));
3124		error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
3125		goto out;
3126	}
3127
3128	(void) nvlist_lookup_string(stream_nv, "fromsnap", &fromsnap);
3129
3130	if (drr->drr_payloadlen != 0) {
3131		nvlist_t *stream_fss;
3132
3133		VERIFY(0 == nvlist_lookup_nvlist(stream_nv, "fss",
3134		    &stream_fss));
3135		if ((stream_avl = fsavl_create(stream_fss)) == NULL) {
3136			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3137			    "couldn't allocate avl tree"));
3138			error = zfs_error(hdl, EZFS_NOMEM, errbuf);
3139			goto out;
3140		}
3141
3142		if (fromsnap != NULL && recursive) {
3143			nvlist_t *renamed = NULL;
3144			nvpair_t *pair = NULL;
3145
3146			(void) strlcpy(tofs, destname, sizeof (tofs));
3147			if (flags->isprefix) {
3148				struct drr_begin *drrb = &drr->drr_u.drr_begin;
3149				int i;
3150
3151				if (flags->istail) {
3152					cp = strrchr(drrb->drr_toname, '/');
3153					if (cp == NULL) {
3154						(void) strlcat(tofs, "/",
3155						    sizeof (tofs));
3156						i = 0;
3157					} else {
3158						i = (cp - drrb->drr_toname);
3159					}
3160				} else {
3161					i = strcspn(drrb->drr_toname, "/@");
3162				}
3163				/* zfs_receive_one() will create_parents() */
3164				(void) strlcat(tofs, &drrb->drr_toname[i],
3165				    sizeof (tofs));
3166				*strchr(tofs, '@') = '\0';
3167			}
3168
3169			if (!flags->dryrun && !flags->nomount) {
3170				VERIFY(0 == nvlist_alloc(&renamed,
3171				    NV_UNIQUE_NAME, 0));
3172			}
3173
3174			softerr = recv_incremental_replication(hdl, tofs, flags,
3175			    stream_nv, stream_avl, renamed);
3176
3177			/* Unmount renamed filesystems before receiving. */
3178			while ((pair = nvlist_next_nvpair(renamed,
3179			    pair)) != NULL) {
3180				zfs_handle_t *zhp;
3181				prop_changelist_t *clp = NULL;
3182
3183				zhp = zfs_open(hdl, nvpair_name(pair),
3184				    ZFS_TYPE_FILESYSTEM);
3185				if (zhp != NULL) {
3186					clp = changelist_gather(zhp,
3187					    ZFS_PROP_MOUNTPOINT, 0, 0);
3188					zfs_close(zhp);
3189					if (clp != NULL) {
3190						softerr |=
3191						    changelist_prefix(clp);
3192						changelist_free(clp);
3193					}
3194				}
3195			}
3196
3197			nvlist_free(renamed);
3198		}
3199	}
3200
3201	/*
3202	 * Get the fs specified by the first path in the stream (the top level
3203	 * specified by 'zfs send') and pass it to each invocation of
3204	 * zfs_receive_one().
3205	 */
3206	(void) strlcpy(sendfs, drr->drr_u.drr_begin.drr_toname,
3207	    sizeof (sendfs));
3208	if ((cp = strchr(sendfs, '@')) != NULL) {
3209		*cp = '\0';
3210		/*
3211		 * Find the "sendsnap", the final snapshot in a replication
3212		 * stream.  zfs_receive_one() handles certain errors
3213		 * differently, depending on if the contained stream is the
3214		 * last one or not.
3215		 */
3216		sendsnap = (cp + 1);
3217	}
3218
3219	/* Finally, receive each contained stream */
3220	do {
3221		/*
3222		 * we should figure out if it has a recoverable
3223		 * error, in which case do a recv_skip() and drive on.
3224		 * Note, if we fail due to already having this guid,
3225		 * zfs_receive_one() will take care of it (ie,
3226		 * recv_skip() and return 0).
3227		 */
3228		error = zfs_receive_impl(hdl, destname, NULL, flags, fd,
3229		    sendfs, stream_nv, stream_avl, top_zfs, cleanup_fd,
3230		    action_handlep, sendsnap, cmdprops);
3231		if (error == ENODATA) {
3232			error = 0;
3233			break;
3234		}
3235		anyerr |= error;
3236	} while (error == 0);
3237
3238	if (drr->drr_payloadlen != 0 && recursive && fromsnap != NULL) {
3239		/*
3240		 * Now that we have the fs's they sent us, try the
3241		 * renames again.
3242		 */
3243		softerr = recv_incremental_replication(hdl, tofs, flags,
3244		    stream_nv, stream_avl, NULL);
3245	}
3246
3247	if (raw && softerr == 0) {
3248		softerr = recv_fix_encryption_hierarchy(hdl, destname,
3249		    stream_nv, stream_avl);
3250	}
3251
3252out:
3253	fsavl_destroy(stream_avl);
3254	nvlist_free(stream_nv);
3255	if (softerr)
3256		error = -2;
3257	if (anyerr)
3258		error = -1;
3259	return (error);
3260}
3261
3262static void
3263trunc_prop_errs(int truncated)
3264{
3265	ASSERT(truncated != 0);
3266
3267	if (truncated == 1)
3268		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
3269		    "1 more property could not be set\n"));
3270	else
3271		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
3272		    "%d more properties could not be set\n"), truncated);
3273}
3274
3275static int
3276recv_skip(libzfs_handle_t *hdl, int fd, boolean_t byteswap)
3277{
3278	dmu_replay_record_t *drr;
3279	void *buf = zfs_alloc(hdl, SPA_MAXBLOCKSIZE);
3280	char errbuf[1024];
3281
3282	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
3283	    "cannot receive:"));
3284
3285	/* XXX would be great to use lseek if possible... */
3286	drr = buf;
3287
3288	while (recv_read(hdl, fd, drr, sizeof (dmu_replay_record_t),
3289	    byteswap, NULL) == 0) {
3290		if (byteswap)
3291			drr->drr_type = BSWAP_32(drr->drr_type);
3292
3293		switch (drr->drr_type) {
3294		case DRR_BEGIN:
3295			if (drr->drr_payloadlen != 0) {
3296				(void) recv_read(hdl, fd, buf,
3297				    drr->drr_payloadlen, B_FALSE, NULL);
3298			}
3299			break;
3300
3301		case DRR_END:
3302			free(buf);
3303			return (0);
3304
3305		case DRR_OBJECT:
3306			if (byteswap) {
3307				drr->drr_u.drr_object.drr_bonuslen =
3308				    BSWAP_32(drr->drr_u.drr_object.
3309				    drr_bonuslen);
3310			}
3311			(void) recv_read(hdl, fd, buf,
3312			    P2ROUNDUP(drr->drr_u.drr_object.drr_bonuslen, 8),
3313			    B_FALSE, NULL);
3314			break;
3315
3316		case DRR_WRITE:
3317			if (byteswap) {
3318				drr->drr_u.drr_write.drr_logical_size =
3319				    BSWAP_64(
3320				    drr->drr_u.drr_write.drr_logical_size);
3321				drr->drr_u.drr_write.drr_compressed_size =
3322				    BSWAP_64(
3323				    drr->drr_u.drr_write.drr_compressed_size);
3324			}
3325			uint64_t payload_size =
3326			    DRR_WRITE_PAYLOAD_SIZE(&drr->drr_u.drr_write);
3327			(void) recv_read(hdl, fd, buf,
3328			    payload_size, B_FALSE, NULL);
3329			break;
3330		case DRR_SPILL:
3331			if (byteswap) {
3332				drr->drr_u.drr_spill.drr_length =
3333				    BSWAP_64(drr->drr_u.drr_spill.drr_length);
3334			}
3335			(void) recv_read(hdl, fd, buf,
3336			    drr->drr_u.drr_spill.drr_length, B_FALSE, NULL);
3337			break;
3338		case DRR_WRITE_EMBEDDED:
3339			if (byteswap) {
3340				drr->drr_u.drr_write_embedded.drr_psize =
3341				    BSWAP_32(drr->drr_u.drr_write_embedded.
3342				    drr_psize);
3343			}
3344			(void) recv_read(hdl, fd, buf,
3345			    P2ROUNDUP(drr->drr_u.drr_write_embedded.drr_psize,
3346			    8), B_FALSE, NULL);
3347			break;
3348		case DRR_WRITE_BYREF:
3349		case DRR_FREEOBJECTS:
3350		case DRR_FREE:
3351			break;
3352
3353		default:
3354			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3355			    "invalid record type"));
3356			return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
3357		}
3358	}
3359
3360	free(buf);
3361	return (-1);
3362}
3363
3364static void
3365recv_ecksum_set_aux(libzfs_handle_t *hdl, const char *target_snap,
3366    boolean_t resumable)
3367{
3368	char target_fs[ZFS_MAX_DATASET_NAME_LEN];
3369
3370	zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3371	    "checksum mismatch or incomplete stream"));
3372
3373	if (!resumable)
3374		return;
3375	(void) strlcpy(target_fs, target_snap, sizeof (target_fs));
3376	*strchr(target_fs, '@') = '\0';
3377	zfs_handle_t *zhp = zfs_open(hdl, target_fs,
3378	    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
3379	if (zhp == NULL)
3380		return;
3381
3382	char token_buf[ZFS_MAXPROPLEN];
3383	int error = zfs_prop_get(zhp, ZFS_PROP_RECEIVE_RESUME_TOKEN,
3384	    token_buf, sizeof (token_buf),
3385	    NULL, NULL, 0, B_TRUE);
3386	if (error == 0) {
3387		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3388		    "checksum mismatch or incomplete stream.\n"
3389		    "Partially received snapshot is saved.\n"
3390		    "A resuming stream can be generated on the sending "
3391		    "system by running:\n"
3392		    "    zfs send -t %s"),
3393		    token_buf);
3394	}
3395	zfs_close(zhp);
3396}
3397
3398/*
3399 * Prepare a new nvlist of properties that are to override (-o) or be excluded
3400 * (-x) from the received dataset
3401 * recvprops: received properties from the send stream
3402 * cmdprops: raw input properties from command line
3403 * origprops: properties, both locally-set and received, currently set on the
3404 *            target dataset if it exists, NULL otherwise.
3405 * oxprops: valid output override (-o) and excluded (-x) properties
3406 */
3407static int
3408zfs_setup_cmdline_props(libzfs_handle_t *hdl, zfs_type_t type,
3409    char *fsname, boolean_t zoned, boolean_t recursive, boolean_t newfs,
3410    boolean_t raw, boolean_t toplevel, nvlist_t *recvprops, nvlist_t *cmdprops,
3411    nvlist_t *origprops, nvlist_t **oxprops, uint8_t **wkeydata_out,
3412    uint_t *wkeylen_out, const char *errbuf)
3413{
3414	nvpair_t *nvp;
3415	nvlist_t *oprops, *voprops;
3416	zfs_handle_t *zhp = NULL;
3417	zpool_handle_t *zpool_hdl = NULL;
3418	char *cp;
3419	int ret = 0;
3420	char namebuf[ZFS_MAX_DATASET_NAME_LEN];
3421
3422	if (nvlist_empty(cmdprops))
3423		return (0); /* No properties to override or exclude */
3424
3425	*oxprops = fnvlist_alloc();
3426	oprops = fnvlist_alloc();
3427
3428	strlcpy(namebuf, fsname, ZFS_MAX_DATASET_NAME_LEN);
3429
3430	/*
3431	 * Get our dataset handle. The target dataset may not exist yet.
3432	 */
3433	if (zfs_dataset_exists(hdl, namebuf, ZFS_TYPE_DATASET)) {
3434		zhp = zfs_open(hdl, namebuf, ZFS_TYPE_DATASET);
3435		if (zhp == NULL) {
3436			ret = -1;
3437			goto error;
3438		}
3439	}
3440
3441	/* open the zpool handle */
3442	cp = strchr(namebuf, '/');
3443	if (cp != NULL)
3444		*cp = '\0';
3445	zpool_hdl = zpool_open(hdl, namebuf);
3446	if (zpool_hdl == NULL) {
3447		ret = -1;
3448		goto error;
3449	}
3450
3451	/* restore namebuf to match fsname for later use */
3452	if (cp != NULL)
3453		*cp = '/';
3454
3455	/*
3456	 * first iteration: process excluded (-x) properties now and gather
3457	 * added (-o) properties to be later processed by zfs_valid_proplist()
3458	 */
3459	nvp = NULL;
3460	while ((nvp = nvlist_next_nvpair(cmdprops, nvp)) != NULL) {
3461		const char *name = nvpair_name(nvp);
3462		zfs_prop_t prop = zfs_name_to_prop(name);
3463
3464		/* "origin" is processed separately, don't handle it here */
3465		if (prop == ZFS_PROP_ORIGIN)
3466			continue;
3467
3468		/*
3469		 * we're trying to override or exclude a property that does not
3470		 * make sense for this type of dataset, but we don't want to
3471		 * fail if the receive is recursive: this comes in handy when
3472		 * the send stream contains, for instance, a child ZVOL and
3473		 * we're trying to receive it with "-o atime=on"
3474		 */
3475		if (!zfs_prop_valid_for_type(prop, type) &&
3476		    !zfs_prop_user(name)) {
3477			if (recursive)
3478				continue;
3479			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3480			    "property '%s' does not apply to datasets of this "
3481			    "type"), name);
3482			ret = zfs_error(hdl, EZFS_BADPROP, errbuf);
3483			goto error;
3484		}
3485
3486		/* raw streams can't override encryption properties */
3487		if ((zfs_prop_encryption_key_param(prop) ||
3488		    prop == ZFS_PROP_ENCRYPTION) && raw) {
3489			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3490			    "encryption property '%s' cannot "
3491			    "be set or excluded for raw streams."), name);
3492			ret = zfs_error(hdl, EZFS_BADPROP, errbuf);
3493			goto error;
3494		}
3495
3496		/* incremental streams can only exclude encryption properties */
3497		if ((zfs_prop_encryption_key_param(prop) ||
3498		    prop == ZFS_PROP_ENCRYPTION) && !newfs &&
3499		    nvpair_type(nvp) != DATA_TYPE_BOOLEAN) {
3500			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3501			    "encryption property '%s' cannot "
3502			    "be set for incremental streams."), name);
3503			ret = zfs_error(hdl, EZFS_BADPROP, errbuf);
3504			goto error;
3505		}
3506
3507		switch (nvpair_type(nvp)) {
3508		case DATA_TYPE_BOOLEAN: /* -x property */
3509			/*
3510			 * DATA_TYPE_BOOLEAN is the way we're asked to "exclude"
3511			 * a property: this is done by forcing an explicit
3512			 * inherit on the destination so the effective value is
3513			 * not the one we received from the send stream.
3514			 * We do this only if the property is not already
3515			 * locally-set, in which case its value will take
3516			 * priority over the received anyway.
3517			 */
3518			if (nvlist_exists(origprops, name)) {
3519				nvlist_t *attrs;
3520				char  *source = NULL;
3521
3522				attrs = fnvlist_lookup_nvlist(origprops, name);
3523				if (nvlist_lookup_string(attrs,
3524				    ZPROP_SOURCE, &source) == 0 &&
3525				    strcmp(source, ZPROP_SOURCE_VAL_RECVD) != 0)
3526					continue;
3527			}
3528			/*
3529			 * We can't force an explicit inherit on non-inheritable
3530			 * properties: if we're asked to exclude this kind of
3531			 * values we remove them from "recvprops" input nvlist.
3532			 */
3533			if (!zfs_prop_inheritable(prop) &&
3534			    !zfs_prop_user(name) && /* can be inherited too */
3535			    nvlist_exists(recvprops, name))
3536				fnvlist_remove(recvprops, name);
3537			else
3538				fnvlist_add_nvpair(*oxprops, nvp);
3539			break;
3540		case DATA_TYPE_STRING: /* -o property=value */
3541			fnvlist_add_nvpair(oprops, nvp);
3542			break;
3543		default:
3544			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3545			    "property '%s' must be a string or boolean"), name);
3546			ret = zfs_error(hdl, EZFS_BADPROP, errbuf);
3547			goto error;
3548		}
3549	}
3550
3551	if (toplevel) {
3552		/* convert override strings properties to native */
3553		if ((voprops = zfs_valid_proplist(hdl, ZFS_TYPE_DATASET,
3554		    oprops, zoned, zhp, zpool_hdl, B_FALSE, errbuf)) == NULL) {
3555			ret = zfs_error(hdl, EZFS_BADPROP, errbuf);
3556			goto error;
3557		}
3558
3559		/*
3560		 * zfs_crypto_create() requires the parent name. Get it
3561		 * by truncating the fsname copy stored in namebuf.
3562		 */
3563		cp = strrchr(namebuf, '/');
3564		if (cp != NULL)
3565			*cp = '\0';
3566
3567		if (!raw && zfs_crypto_create(hdl, namebuf, voprops, NULL,
3568		    B_FALSE, wkeydata_out, wkeylen_out) != 0) {
3569			fnvlist_free(voprops);
3570			ret = zfs_error(hdl, EZFS_CRYPTOFAILED, errbuf);
3571			goto error;
3572		}
3573
3574		/* second pass: process "-o" properties */
3575		fnvlist_merge(*oxprops, voprops);
3576		fnvlist_free(voprops);
3577	} else {
3578		/* override props on child dataset are inherited */
3579		nvp = NULL;
3580		while ((nvp = nvlist_next_nvpair(oprops, nvp)) != NULL) {
3581			const char *name = nvpair_name(nvp);
3582			fnvlist_add_boolean(*oxprops, name);
3583		}
3584	}
3585
3586error:
3587	if (zhp != NULL)
3588		zfs_close(zhp);
3589	if (zpool_hdl != NULL)
3590		zpool_close(zpool_hdl);
3591	fnvlist_free(oprops);
3592	return (ret);
3593}
3594
3595/*
3596 * Restores a backup of tosnap from the file descriptor specified by infd.
3597 */
3598static int
3599zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
3600    const char *originsnap, recvflags_t *flags, dmu_replay_record_t *drr,
3601    dmu_replay_record_t *drr_noswap, const char *sendfs, nvlist_t *stream_nv,
3602    avl_tree_t *stream_avl, char **top_zfs, int cleanup_fd,
3603    uint64_t *action_handlep, const char *finalsnap, nvlist_t *cmdprops)
3604{
3605	time_t begin_time;
3606	int ioctl_err, ioctl_errno, err;
3607	char *cp;
3608	struct drr_begin *drrb = &drr->drr_u.drr_begin;
3609	char errbuf[1024];
3610	const char *chopprefix;
3611	boolean_t newfs = B_FALSE;
3612	boolean_t stream_wantsnewfs;
3613	boolean_t newprops = B_FALSE;
3614	uint64_t read_bytes = 0;
3615	uint64_t errflags = 0;
3616	uint64_t parent_snapguid = 0;
3617	prop_changelist_t *clp = NULL;
3618	nvlist_t *snapprops_nvlist = NULL;
3619	nvlist_t *snapholds_nvlist = NULL;
3620	zprop_errflags_t prop_errflags;
3621	nvlist_t *prop_errors = NULL;
3622	boolean_t recursive;
3623	char *snapname = NULL;
3624	char destsnap[MAXPATHLEN * 2];
3625	char origin[MAXNAMELEN];
3626	char name[MAXPATHLEN];
3627	char tmp_keylocation[MAXNAMELEN];
3628	nvlist_t *rcvprops = NULL; /* props received from the send stream */
3629	nvlist_t *oxprops = NULL; /* override (-o) and exclude (-x) props */
3630	nvlist_t *origprops = NULL; /* original props (if destination exists) */
3631	zfs_type_t type;
3632	boolean_t toplevel = B_FALSE;
3633	boolean_t zoned = B_FALSE;
3634	boolean_t hastoken = B_FALSE;
3635	uint8_t *wkeydata = NULL;
3636	uint_t wkeylen = 0;
3637
3638	begin_time = time(NULL);
3639	bzero(origin, MAXNAMELEN);
3640	bzero(tmp_keylocation, MAXNAMELEN);
3641
3642	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
3643	    "cannot receive"));
3644
3645	recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") ==
3646	    ENOENT);
3647
3648	/* Did the user request holds be skipped via zfs recv -k? */
3649	boolean_t holds = flags->holds && !flags->skipholds;
3650
3651	if (stream_avl != NULL) {
3652		char *keylocation = NULL;
3653		nvlist_t *lookup = NULL;
3654		nvlist_t *fs = fsavl_find(stream_avl, drrb->drr_toguid,
3655		    &snapname);
3656
3657		(void) nvlist_lookup_uint64(fs, "parentfromsnap",
3658		    &parent_snapguid);
3659		err = nvlist_lookup_nvlist(fs, "props", &rcvprops);
3660		if (err) {
3661			VERIFY(0 == nvlist_alloc(&rcvprops, NV_UNIQUE_NAME, 0));
3662			newprops = B_TRUE;
3663		}
3664		/*
3665		 * The keylocation property may only be set on encryption roots,
3666		 * but this dataset might not become an encryption root until
3667		 * recv_fix_encryption_hierarchy() is called. That function
3668		 * will fixup the keylocation anyway, so we temporarily unset
3669		 * the keylocation for now to avoid any errors from the receive
3670		 * ioctl.
3671		 */
3672		err = nvlist_lookup_string(rcvprops,
3673		    zfs_prop_to_name(ZFS_PROP_KEYLOCATION), &keylocation);
3674		if (err == 0) {
3675			(void) strcpy(tmp_keylocation, keylocation);
3676			(void) nvlist_remove_all(rcvprops,
3677			    zfs_prop_to_name(ZFS_PROP_KEYLOCATION));
3678		}
3679
3680		if (flags->canmountoff) {
3681			VERIFY(0 == nvlist_add_uint64(rcvprops,
3682			    zfs_prop_to_name(ZFS_PROP_CANMOUNT), 0));
3683		} else if (newprops) {  /* nothing in rcvprops, eliminate it */
3684			nvlist_free(rcvprops);
3685			rcvprops = NULL;
3686			newprops = B_FALSE;
3687		}
3688		if (0 == nvlist_lookup_nvlist(fs, "snapprops", &lookup)) {
3689			VERIFY(0 == nvlist_lookup_nvlist(lookup,
3690			    snapname, &snapprops_nvlist));
3691		}
3692		if (holds) {
3693			if (0 == nvlist_lookup_nvlist(fs, "snapholds",
3694			    &lookup)) {
3695				VERIFY(0 == nvlist_lookup_nvlist(lookup,
3696				    snapname, &snapholds_nvlist));
3697			}
3698		}
3699	}
3700
3701	cp = NULL;
3702
3703	/*
3704	 * Determine how much of the snapshot name stored in the stream
3705	 * we are going to tack on to the name they specified on the
3706	 * command line, and how much we are going to chop off.
3707	 *
3708	 * If they specified a snapshot, chop the entire name stored in
3709	 * the stream.
3710	 */
3711	if (flags->istail) {
3712		/*
3713		 * A filesystem was specified with -e. We want to tack on only
3714		 * the tail of the sent snapshot path.
3715		 */
3716		if (strchr(tosnap, '@')) {
3717			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
3718			    "argument - snapshot not allowed with -e"));
3719			err = zfs_error(hdl, EZFS_INVALIDNAME, errbuf);
3720			goto out;
3721		}
3722
3723		chopprefix = strrchr(sendfs, '/');
3724
3725		if (chopprefix == NULL) {
3726			/*
3727			 * The tail is the poolname, so we need to
3728			 * prepend a path separator.
3729			 */
3730			int len = strlen(drrb->drr_toname);
3731			cp = malloc(len + 2);
3732			cp[0] = '/';
3733			(void) strcpy(&cp[1], drrb->drr_toname);
3734			chopprefix = cp;
3735		} else {
3736			chopprefix = drrb->drr_toname + (chopprefix - sendfs);
3737		}
3738	} else if (flags->isprefix) {
3739		/*
3740		 * A filesystem was specified with -d. We want to tack on
3741		 * everything but the first element of the sent snapshot path
3742		 * (all but the pool name).
3743		 */
3744		if (strchr(tosnap, '@')) {
3745			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
3746			    "argument - snapshot not allowed with -d"));
3747			err = zfs_error(hdl, EZFS_INVALIDNAME, errbuf);
3748			goto out;
3749		}
3750
3751		chopprefix = strchr(drrb->drr_toname, '/');
3752		if (chopprefix == NULL)
3753			chopprefix = strchr(drrb->drr_toname, '@');
3754	} else if (strchr(tosnap, '@') == NULL) {
3755		/*
3756		 * If a filesystem was specified without -d or -e, we want to
3757		 * tack on everything after the fs specified by 'zfs send'.
3758		 */
3759		chopprefix = drrb->drr_toname + strlen(sendfs);
3760	} else {
3761		/* A snapshot was specified as an exact path (no -d or -e). */
3762		if (recursive) {
3763			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3764			    "cannot specify snapshot name for multi-snapshot "
3765			    "stream"));
3766			err = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
3767			goto out;
3768		}
3769		chopprefix = drrb->drr_toname + strlen(drrb->drr_toname);
3770	}
3771
3772	ASSERT(strstr(drrb->drr_toname, sendfs) == drrb->drr_toname);
3773	ASSERT(chopprefix > drrb->drr_toname);
3774	ASSERT(chopprefix <= drrb->drr_toname + strlen(drrb->drr_toname));
3775	ASSERT(chopprefix[0] == '/' || chopprefix[0] == '@' ||
3776	    chopprefix[0] == '\0');
3777
3778	/*
3779	 * Determine name of destination snapshot, store in zc_value.
3780	 */
3781	(void) strlcpy(destsnap, tosnap, sizeof (destsnap));
3782	(void) strlcat(destsnap, chopprefix, sizeof (destsnap));
3783	free(cp);
3784	if (!zfs_name_valid(destsnap, ZFS_TYPE_SNAPSHOT)) {
3785		err = zfs_error(hdl, EZFS_INVALIDNAME, errbuf);
3786		goto out;
3787	}
3788
3789	/*
3790	 * Determine the name of the origin snapshot, store in zc_string.
3791	 */
3792	if (originsnap) {
3793		(void) strlcpy(origin, originsnap, sizeof (origin));
3794		if (flags->verbose)
3795			(void) printf("using provided clone origin %s\n",
3796			    origin);
3797	} else if (drrb->drr_flags & DRR_FLAG_CLONE) {
3798		if (guid_to_name(hdl, destsnap,
3799		    drrb->drr_fromguid, B_FALSE, origin) != 0) {
3800			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3801			    "local origin for clone %s does not exist"),
3802			    destsnap);
3803			err = zfs_error(hdl, EZFS_NOENT, errbuf);
3804			goto out;
3805		}
3806		if (flags->verbose)
3807			(void) printf("found clone origin %s\n", origin);
3808	}
3809
3810	boolean_t resuming = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) &
3811	    DMU_BACKUP_FEATURE_RESUMING;
3812	boolean_t raw = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) &
3813	    DMU_BACKUP_FEATURE_RAW;
3814	boolean_t embedded = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) &
3815	    DMU_BACKUP_FEATURE_EMBED_DATA;
3816	stream_wantsnewfs = (drrb->drr_fromguid == 0 ||
3817	    (drrb->drr_flags & DRR_FLAG_CLONE) || originsnap) && !resuming;
3818
3819	if (stream_wantsnewfs) {
3820		/*
3821		 * if the parent fs does not exist, look for it based on
3822		 * the parent snap GUID
3823		 */
3824		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
3825		    "cannot receive new filesystem stream"));
3826
3827		(void) strcpy(name, destsnap);
3828		cp = strrchr(name, '/');
3829		if (cp)
3830			*cp = '\0';
3831		if (cp &&
3832		    !zfs_dataset_exists(hdl, name, ZFS_TYPE_DATASET)) {
3833			char suffix[ZFS_MAX_DATASET_NAME_LEN];
3834			(void) strcpy(suffix, strrchr(destsnap, '/'));
3835			if (guid_to_name(hdl, name, parent_snapguid,
3836			    B_FALSE, destsnap) == 0) {
3837				*strchr(destsnap, '@') = '\0';
3838				(void) strcat(destsnap, suffix);
3839			}
3840		}
3841	} else {
3842		/*
3843		 * if the fs does not exist, look for it based on the
3844		 * fromsnap GUID
3845		 */
3846		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
3847		    "cannot receive incremental stream"));
3848
3849		(void) strcpy(name, destsnap);
3850		*strchr(name, '@') = '\0';
3851
3852		/*
3853		 * If the exact receive path was specified and this is the
3854		 * topmost path in the stream, then if the fs does not exist we
3855		 * should look no further.
3856		 */
3857		if ((flags->isprefix || (*(chopprefix = drrb->drr_toname +
3858		    strlen(sendfs)) != '\0' && *chopprefix != '@')) &&
3859		    !zfs_dataset_exists(hdl, name, ZFS_TYPE_DATASET)) {
3860			char snap[ZFS_MAX_DATASET_NAME_LEN];
3861			(void) strcpy(snap, strchr(destsnap, '@'));
3862			if (guid_to_name(hdl, name, drrb->drr_fromguid,
3863			    B_FALSE, destsnap) == 0) {
3864				*strchr(destsnap, '@') = '\0';
3865				(void) strcat(destsnap, snap);
3866			}
3867		}
3868	}
3869
3870	(void) strcpy(name, destsnap);
3871	*strchr(name, '@') = '\0';
3872
3873	if (zfs_dataset_exists(hdl, name, ZFS_TYPE_DATASET)) {
3874		zfs_cmd_t zc = { 0 };
3875		zfs_handle_t *zhp;
3876		boolean_t encrypted;
3877
3878		(void) strcpy(zc.zc_name, name);
3879
3880		/*
3881		 * Destination fs exists.  It must be one of these cases:
3882		 *  - an incremental send stream
3883		 *  - the stream specifies a new fs (full stream or clone)
3884		 *    and they want us to blow away the existing fs (and
3885		 *    have therefore specified -F and removed any snapshots)
3886		 *  - we are resuming a failed receive.
3887		 */
3888		if (stream_wantsnewfs) {
3889			if (!flags->force) {
3890				zcmd_free_nvlists(&zc);
3891				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3892				    "destination '%s' exists\n"
3893				    "must specify -F to overwrite it"), name);
3894				err = zfs_error(hdl, EZFS_EXISTS, errbuf);
3895				goto out;
3896			}
3897			if (ioctl(hdl->libzfs_fd, ZFS_IOC_SNAPSHOT_LIST_NEXT,
3898			    &zc) == 0) {
3899				zcmd_free_nvlists(&zc);
3900				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3901				    "destination has snapshots (eg. %s)\n"
3902				    "must destroy them to overwrite it"),
3903				    zc.zc_name);
3904				err = zfs_error(hdl, EZFS_EXISTS, errbuf);
3905				goto out;
3906			}
3907		}
3908
3909		if ((zhp = zfs_open(hdl, name,
3910		    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME)) == NULL) {
3911			zcmd_free_nvlists(&zc);
3912			err = -1;
3913			goto out;
3914		}
3915
3916		if (stream_wantsnewfs &&
3917		    zhp->zfs_dmustats.dds_origin[0]) {
3918			zcmd_free_nvlists(&zc);
3919			zfs_close(zhp);
3920			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3921			    "destination '%s' is a clone\n"
3922			    "must destroy it to overwrite it"), name);
3923			err = zfs_error(hdl, EZFS_EXISTS, errbuf);
3924			goto out;
3925		}
3926
3927		/*
3928		 * Raw sends can not be performed as an incremental on top
3929		 * of existing unencrypted datasets. zfs recv -F cant be
3930		 * used to blow away an existing encrypted filesystem. This
3931		 * is because it would require the dsl dir to point to the
3932		 * new key (or lack of a key) and the old key at the same
3933		 * time. The -F flag may still be used for deleting
3934		 * intermediate snapshots that would otherwise prevent the
3935		 * receive from working.
3936		 */
3937		encrypted = zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION) !=
3938		    ZIO_CRYPT_OFF;
3939		if (!stream_wantsnewfs && !encrypted && raw) {
3940			zfs_close(zhp);
3941			zcmd_free_nvlists(&zc);
3942			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3943			    "cannot perform raw receive on top of "
3944			    "existing unencrypted dataset"));
3945			err = zfs_error(hdl, EZFS_BADRESTORE, errbuf);
3946			goto out;
3947		}
3948
3949		if (stream_wantsnewfs && flags->force &&
3950		    ((raw && !encrypted) || encrypted)) {
3951			zfs_close(zhp);
3952			zcmd_free_nvlists(&zc);
3953			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3954			    "zfs receive -F cannot be used to destroy an "
3955			    "encrypted filesystem or overwrite an "
3956			    "unencrypted one with an encrypted one"));
3957			err = zfs_error(hdl, EZFS_BADRESTORE, errbuf);
3958			goto out;
3959		}
3960
3961		if (!flags->dryrun && zhp->zfs_type == ZFS_TYPE_FILESYSTEM &&
3962		    stream_wantsnewfs) {
3963			/* We can't do online recv in this case */
3964			clp = changelist_gather(zhp, ZFS_PROP_NAME, 0, 0);
3965			if (clp == NULL) {
3966				zfs_close(zhp);
3967				err = -1;
3968				goto out;
3969			}
3970			if (changelist_prefix(clp) != 0) {
3971				changelist_free(clp);
3972				zfs_close(zhp);
3973				err = -1;
3974				goto out;
3975			}
3976		}
3977
3978		/*
3979		 * If we are resuming a newfs, set newfs here so that we will
3980		 * mount it if the recv succeeds this time.  We can tell
3981		 * that it was a newfs on the first recv because the fs
3982		 * itself will be inconsistent (if the fs existed when we
3983		 * did the first recv, we would have received it into
3984		 * .../%recv).
3985		 */
3986		if (resuming && zfs_prop_get_int(zhp, ZFS_PROP_INCONSISTENT))
3987			newfs = B_TRUE;
3988
3989		/* we want to know if we're zoned when validating -o|-x props */
3990		zoned = zfs_prop_get_int(zhp, ZFS_PROP_ZONED);
3991
3992		/* may need this info later, get it now we have zhp around */
3993		if (zfs_prop_get(zhp, ZFS_PROP_RECEIVE_RESUME_TOKEN, NULL, 0,
3994		    NULL, NULL, 0, B_TRUE) == 0)
3995			hastoken = B_TRUE;
3996
3997		/* gather existing properties on destination */
3998		origprops = fnvlist_alloc();
3999		fnvlist_merge(origprops, zhp->zfs_props);
4000		fnvlist_merge(origprops, zhp->zfs_user_props);
4001
4002		zfs_close(zhp);
4003		cp = NULL;
4004	} else {
4005		zfs_handle_t *zhp;
4006
4007		/*
4008		 * Destination filesystem does not exist.  Therefore we better
4009		 * be creating a new filesystem (either from a full backup, or
4010		 * a clone).  It would therefore be invalid if the user
4011		 * specified only the pool name (i.e. if the destination name
4012		 * contained no slash character).
4013		 */
4014		cp = strrchr(name, '/');
4015
4016		if (!stream_wantsnewfs || cp == NULL) {
4017			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4018			    "destination '%s' does not exist"), name);
4019			err = zfs_error(hdl, EZFS_NOENT, errbuf);
4020			goto out;
4021		}
4022
4023		/*
4024		 * Trim off the final dataset component so we perform the
4025		 * recvbackup ioctl to the filesystems's parent.
4026		 */
4027		*cp = '\0';
4028
4029		if (flags->isprefix && !flags->istail && !flags->dryrun &&
4030		    create_parents(hdl, destsnap, strlen(tosnap)) != 0) {
4031			err = zfs_error(hdl, EZFS_BADRESTORE, errbuf);
4032			goto out;
4033		}
4034
4035		/* validate parent */
4036		zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET);
4037		if (zhp == NULL) {
4038			err = zfs_error(hdl, EZFS_BADRESTORE, errbuf);
4039			goto out;
4040		}
4041		if (zfs_get_type(zhp) != ZFS_TYPE_FILESYSTEM) {
4042			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4043			    "parent '%s' is not a filesystem"), name);
4044			err = zfs_error(hdl, EZFS_WRONG_PARENT, errbuf);
4045			zfs_close(zhp);
4046			goto out;
4047		}
4048
4049		/*
4050		 * It is invalid to receive a properties stream that was
4051		 * unencrypted on the send side as a child of an encrypted
4052		 * parent. Technically there is nothing preventing this, but
4053		 * it would mean that the encryption=off property which is
4054		 * locally set on the send side would not be received correctly.
4055		 * We can infer encryption=off if the stream is not raw and
4056		 * properties were included since the send side will only ever
4057		 * send the encryption property in a raw nvlist header. This
4058		 * check will be avoided if the user specifically overrides
4059		 * the encryption property on the command line.
4060		 */
4061		if (!raw && rcvprops != NULL &&
4062		    !nvlist_exists(cmdprops,
4063		    zfs_prop_to_name(ZFS_PROP_ENCRYPTION))) {
4064			uint64_t crypt;
4065
4066			crypt = zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION);
4067
4068			if (crypt != ZIO_CRYPT_OFF) {
4069				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4070				    "parent '%s' must not be encrypted to "
4071				    "receive unenecrypted property"), name);
4072				err = zfs_error(hdl, EZFS_BADPROP, errbuf);
4073				zfs_close(zhp);
4074				goto out;
4075			}
4076		}
4077		zfs_close(zhp);
4078
4079		newfs = B_TRUE;
4080		*cp = '/';
4081	}
4082
4083	if (flags->verbose) {
4084		(void) printf("%s %s stream of %s into %s\n",
4085		    flags->dryrun ? "would receive" : "receiving",
4086		    drrb->drr_fromguid ? "incremental" : "full",
4087		    drrb->drr_toname, destsnap);
4088		(void) fflush(stdout);
4089	}
4090
4091	if (flags->dryrun) {
4092		err = recv_skip(hdl, infd, flags->byteswap);
4093		goto out;
4094	}
4095
4096	if (top_zfs && (*top_zfs == NULL || strcmp(*top_zfs, name) == 0))
4097		toplevel = B_TRUE;
4098	if (drrb->drr_type == DMU_OST_ZVOL) {
4099		type = ZFS_TYPE_VOLUME;
4100	} else if (drrb->drr_type == DMU_OST_ZFS) {
4101		type = ZFS_TYPE_FILESYSTEM;
4102	} else {
4103		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4104		    "invalid record type: 0x%d"), drrb->drr_type);
4105		err = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
4106		goto out;
4107	}
4108	if ((err = zfs_setup_cmdline_props(hdl, type, name, zoned, recursive,
4109	    stream_wantsnewfs, raw, toplevel, rcvprops, cmdprops, origprops,
4110	    &oxprops, &wkeydata, &wkeylen, errbuf)) != 0)
4111		goto out;
4112
4113	/*
4114	 * The following is a difference between ZoL and illumos.
4115	 *
4116	 * On illumos, we must trim the last component of the dataset name
4117	 * that is passed via the ioctl so that we can properly validate
4118	 * zfs_secpolicy_recv() when receiving to a delegated dataset within
4119	 * zone. This matches the historical behavior of the receive ioctl.
4120	 * However,  we can't do this until after zfs_setup_cmdline_props()
4121	 * has finished with the full name.
4122	 */
4123	if (cp != NULL)
4124		*cp = '\0';
4125
4126	err = ioctl_err = lzc_receive_with_cmdprops(destsnap, rcvprops,
4127	    oxprops, wkeydata, wkeylen, origin, flags->force, flags->resumable,
4128	    raw, infd, drr_noswap, cleanup_fd, &read_bytes, &errflags,
4129	    action_handlep, &prop_errors);
4130	ioctl_errno = errno;
4131	prop_errflags = errflags;
4132
4133	if (err == 0) {
4134		nvpair_t *prop_err = NULL;
4135
4136		while ((prop_err = nvlist_next_nvpair(prop_errors,
4137		    prop_err)) != NULL) {
4138			char tbuf[1024];
4139			zfs_prop_t prop;
4140			int intval;
4141
4142			prop = zfs_name_to_prop(nvpair_name(prop_err));
4143			(void) nvpair_value_int32(prop_err, &intval);
4144			if (strcmp(nvpair_name(prop_err),
4145			    ZPROP_N_MORE_ERRORS) == 0) {
4146				trunc_prop_errs(intval);
4147				break;
4148			} else if (snapname == NULL || finalsnap == NULL ||
4149			    strcmp(finalsnap, snapname) == 0 ||
4150			    strcmp(nvpair_name(prop_err),
4151			    zfs_prop_to_name(ZFS_PROP_REFQUOTA)) != 0) {
4152				/*
4153				 * Skip the special case of, for example,
4154				 * "refquota", errors on intermediate
4155				 * snapshots leading up to a final one.
4156				 * That's why we have all of the checks above.
4157				 *
4158				 * See zfs_ioctl.c's extract_delay_props() for
4159				 * a list of props which can fail on
4160				 * intermediate snapshots, but shouldn't
4161				 * affect the overall receive.
4162				 */
4163				(void) snprintf(tbuf, sizeof (tbuf),
4164				    dgettext(TEXT_DOMAIN,
4165				    "cannot receive %s property on %s"),
4166				    nvpair_name(prop_err), name);
4167				zfs_setprop_error(hdl, prop, intval, tbuf);
4168			}
4169		}
4170		nvlist_free(prop_errors);
4171	}
4172
4173	if (err == 0 && snapprops_nvlist) {
4174		zfs_cmd_t zc = { 0 };
4175
4176		(void) strcpy(zc.zc_name, destsnap);
4177		zc.zc_cookie = B_TRUE; /* received */
4178		if (zcmd_write_src_nvlist(hdl, &zc, snapprops_nvlist) == 0) {
4179			(void) zfs_ioctl(hdl, ZFS_IOC_SET_PROP, &zc);
4180			zcmd_free_nvlists(&zc);
4181		}
4182	}
4183	if (err == 0 && snapholds_nvlist) {
4184		nvpair_t *pair;
4185		nvlist_t *holds, *errors = NULL;
4186		int cleanup_fd = -1;
4187
4188		VERIFY(0 == nvlist_alloc(&holds, 0, KM_SLEEP));
4189		for (pair = nvlist_next_nvpair(snapholds_nvlist, NULL);
4190		    pair != NULL;
4191		    pair = nvlist_next_nvpair(snapholds_nvlist, pair)) {
4192			VERIFY(0 == nvlist_add_string(holds, destsnap,
4193			    nvpair_name(pair)));
4194		}
4195		(void) lzc_hold(holds, cleanup_fd, &errors);
4196		nvlist_free(snapholds_nvlist);
4197		nvlist_free(holds);
4198	}
4199
4200	if (err && (ioctl_errno == ENOENT || ioctl_errno == EEXIST)) {
4201		/*
4202		 * It may be that this snapshot already exists,
4203		 * in which case we want to consume & ignore it
4204		 * rather than failing.
4205		 */
4206		avl_tree_t *local_avl;
4207		nvlist_t *local_nv, *fs;
4208		cp = strchr(destsnap, '@');
4209
4210		/*
4211		 * XXX Do this faster by just iterating over snaps in
4212		 * this fs.  Also if zc_value does not exist, we will
4213		 * get a strange "does not exist" error message.
4214		 */
4215		*cp = '\0';
4216		if (gather_nvlist(hdl, destsnap, NULL, NULL, B_FALSE, B_TRUE,
4217		    B_FALSE, B_FALSE, B_FALSE, B_TRUE,
4218		    &local_nv, &local_avl) == 0) {
4219			*cp = '@';
4220			fs = fsavl_find(local_avl, drrb->drr_toguid, NULL);
4221			fsavl_destroy(local_avl);
4222			nvlist_free(local_nv);
4223
4224			if (fs != NULL) {
4225				if (flags->verbose) {
4226					(void) printf("snap %s already exists; "
4227					    "ignoring\n", destsnap);
4228				}
4229				err = ioctl_err = recv_skip(hdl, infd,
4230				    flags->byteswap);
4231			}
4232		}
4233		*cp = '@';
4234	}
4235
4236	if (ioctl_err != 0) {
4237		switch (ioctl_errno) {
4238		case ENODEV:
4239			cp = strchr(destsnap, '@');
4240			*cp = '\0';
4241			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4242			    "most recent snapshot of %s does not\n"
4243			    "match incremental source"), destsnap);
4244			(void) zfs_error(hdl, EZFS_BADRESTORE, errbuf);
4245			*cp = '@';
4246			break;
4247		case ETXTBSY:
4248			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4249			    "destination %s has been modified\n"
4250			    "since most recent snapshot"), name);
4251			(void) zfs_error(hdl, EZFS_BADRESTORE, errbuf);
4252			break;
4253		case EACCES:
4254			if (raw && stream_wantsnewfs) {
4255				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4256				    "failed to create encryption key"));
4257			} else if (raw && !stream_wantsnewfs) {
4258				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4259				    "encryption key does not match "
4260				    "existing key"));
4261			} else {
4262				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4263				    "inherited key must be loaded"));
4264			}
4265			(void) zfs_error(hdl, EZFS_CRYPTOFAILED, errbuf);
4266			break;
4267		case EEXIST:
4268			cp = strchr(destsnap, '@');
4269			if (newfs) {
4270				/* it's the containing fs that exists */
4271				*cp = '\0';
4272			}
4273			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4274			    "destination already exists"));
4275			(void) zfs_error_fmt(hdl, EZFS_EXISTS,
4276			    dgettext(TEXT_DOMAIN, "cannot restore to %s"),
4277			    destsnap);
4278			*cp = '@';
4279			break;
4280		case EINVAL:
4281			if (embedded && !raw)
4282				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4283				    "incompatible embedded data stream "
4284				    "feature with encrypted receive."));
4285			(void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
4286			break;
4287		case ECKSUM:
4288			recv_ecksum_set_aux(hdl, destsnap, flags->resumable);
4289			(void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
4290			break;
4291		case ENOTSUP:
4292			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4293			    "pool must be upgraded to receive this stream."));
4294			(void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
4295			break;
4296		case EDQUOT:
4297			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4298			    "destination %s space quota exceeded."), name);
4299			(void) zfs_error(hdl, EZFS_NOSPC, errbuf);
4300			break;
4301		case ZFS_ERR_FROM_IVSET_GUID_MISSING:
4302			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4303			    "IV set guid missing. See errata %u at"
4304			    "http://zfsonlinux.org/msg/ZFS-8000-ER"),
4305			    ZPOOL_ERRATA_ZOL_8308_ENCRYPTION);
4306			(void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
4307			break;
4308		case ZFS_ERR_FROM_IVSET_GUID_MISMATCH:
4309			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4310			    "IV set guid mismatch. See the 'zfs receive' "
4311			    "man page section\n discussing the limitations "
4312			    "of raw encrypted send streams."));
4313			(void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
4314			break;
4315		case ZFS_ERR_SPILL_BLOCK_FLAG_MISSING:
4316			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4317			    "Spill block flag missing for raw send.\n"
4318			    "The zfs software on the sending system must "
4319			    "be updated."));
4320			(void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
4321			break;
4322		case EBUSY:
4323			if (hastoken) {
4324				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4325				    "destination %s contains "
4326				    "partially-complete state from "
4327				    "\"zfs receive -s\"."), name);
4328				(void) zfs_error(hdl, EZFS_BUSY, errbuf);
4329				break;
4330			}
4331			/* fallthru */
4332		default:
4333			(void) zfs_standard_error(hdl, ioctl_errno, errbuf);
4334		}
4335	}
4336
4337	/*
4338	 * Mount the target filesystem (if created).  Also mount any
4339	 * children of the target filesystem if we did a replication
4340	 * receive (indicated by stream_avl being non-NULL).
4341	 */
4342	cp = strchr(destsnap, '@');
4343	if (cp && (ioctl_err == 0 || !newfs)) {
4344		zfs_handle_t *h;
4345
4346		*cp = '\0';
4347		h = zfs_open(hdl, destsnap,
4348		    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
4349		if (h != NULL) {
4350			if (h->zfs_type == ZFS_TYPE_VOLUME) {
4351				*cp = '@';
4352			} else if (newfs || stream_avl) {
4353				/*
4354				 * Track the first/top of hierarchy fs,
4355				 * for mounting and sharing later.
4356				 */
4357				if (top_zfs && *top_zfs == NULL)
4358					*top_zfs = zfs_strdup(hdl, destsnap);
4359			}
4360			zfs_close(h);
4361		}
4362		*cp = '@';
4363	}
4364
4365	if (clp) {
4366		if (!flags->nomount)
4367			err |= changelist_postfix(clp);
4368		changelist_free(clp);
4369	}
4370
4371	if (prop_errflags & ZPROP_ERR_NOCLEAR) {
4372		(void) fprintf(stderr, dgettext(TEXT_DOMAIN, "Warning: "
4373		    "failed to clear unreceived properties on %s"), name);
4374		(void) fprintf(stderr, "\n");
4375	}
4376	if (prop_errflags & ZPROP_ERR_NORESTORE) {
4377		(void) fprintf(stderr, dgettext(TEXT_DOMAIN, "Warning: "
4378		    "failed to restore original properties on %s"), name);
4379		(void) fprintf(stderr, "\n");
4380	}
4381
4382	if (err || ioctl_err) {
4383		err = -1;
4384		goto out;
4385	}
4386
4387	if (flags->verbose) {
4388		char buf1[64];
4389		char buf2[64];
4390		uint64_t bytes = read_bytes;
4391		time_t delta = time(NULL) - begin_time;
4392		if (delta == 0)
4393			delta = 1;
4394		zfs_nicenum(bytes, buf1, sizeof (buf1));
4395		zfs_nicenum(bytes/delta, buf2, sizeof (buf1));
4396
4397		(void) printf("received %sB stream in %lu seconds (%sB/sec)\n",
4398		    buf1, delta, buf2);
4399	}
4400
4401	err = 0;
4402out:
4403
4404	if (tmp_keylocation[0] != '\0') {
4405		VERIFY(0 == nvlist_add_string(rcvprops,
4406		    zfs_prop_to_name(ZFS_PROP_KEYLOCATION), tmp_keylocation));
4407	}
4408
4409	if (newprops)
4410		nvlist_free(rcvprops);
4411
4412	nvlist_free(oxprops);
4413	nvlist_free(origprops);
4414
4415	return (err);
4416}
4417
4418/*
4419 * Check properties we were asked to override (both -o|-x)
4420 */
4421static boolean_t
4422zfs_receive_checkprops(libzfs_handle_t *hdl, nvlist_t *props,
4423    const char *errbuf)
4424{
4425	nvpair_t *nvp;
4426	zfs_prop_t prop;
4427	const char *name;
4428
4429	nvp = NULL;
4430	while ((nvp = nvlist_next_nvpair(props, nvp)) != NULL) {
4431		name = nvpair_name(nvp);
4432		prop = zfs_name_to_prop(name);
4433
4434		if (prop == ZPROP_INVAL) {
4435			if (!zfs_prop_user(name)) {
4436				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4437				    "invalid property '%s'"), name);
4438				return (B_FALSE);
4439			}
4440			continue;
4441		}
4442		/*
4443		 * "origin" is readonly but is used to receive datasets as
4444		 * clones so we don't raise an error here
4445		 */
4446		if (prop == ZFS_PROP_ORIGIN)
4447			continue;
4448
4449		/* encryption params have their own verification later */
4450		if (prop == ZFS_PROP_ENCRYPTION ||
4451		    zfs_prop_encryption_key_param(prop))
4452			continue;
4453
4454		/*
4455		 * cannot override readonly, set-once and other specific
4456		 * settable properties
4457		 */
4458		if (zfs_prop_readonly(prop) || prop == ZFS_PROP_VERSION ||
4459		    prop == ZFS_PROP_VOLSIZE) {
4460			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4461			    "invalid property '%s'"), name);
4462			return (B_FALSE);
4463		}
4464	}
4465
4466	return (B_TRUE);
4467}
4468
4469static int
4470zfs_receive_impl(libzfs_handle_t *hdl, const char *tosnap,
4471    const char *originsnap, recvflags_t *flags, int infd, const char *sendfs,
4472    nvlist_t *stream_nv, avl_tree_t *stream_avl, char **top_zfs, int cleanup_fd,
4473    uint64_t *action_handlep, const char *finalsnap, nvlist_t *cmdprops)
4474{
4475	int err;
4476	dmu_replay_record_t drr, drr_noswap;
4477	struct drr_begin *drrb = &drr.drr_u.drr_begin;
4478	char errbuf[1024];
4479	zio_cksum_t zcksum = { 0 };
4480	uint64_t featureflags;
4481	int hdrtype;
4482
4483	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
4484	    "cannot receive"));
4485
4486	/* check cmdline props, raise an error if they cannot be received */
4487	if (!zfs_receive_checkprops(hdl, cmdprops, errbuf)) {
4488		return (zfs_error(hdl, EZFS_BADPROP, errbuf));
4489	}
4490
4491	if (flags->isprefix &&
4492	    !zfs_dataset_exists(hdl, tosnap, ZFS_TYPE_DATASET)) {
4493		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "specified fs "
4494		    "(%s) does not exist"), tosnap);
4495		return (zfs_error(hdl, EZFS_NOENT, errbuf));
4496	}
4497	if (originsnap &&
4498	    !zfs_dataset_exists(hdl, originsnap, ZFS_TYPE_DATASET)) {
4499		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "specified origin fs "
4500		    "(%s) does not exist"), originsnap);
4501		return (zfs_error(hdl, EZFS_NOENT, errbuf));
4502	}
4503
4504	/* read in the BEGIN record */
4505	if (0 != (err = recv_read(hdl, infd, &drr, sizeof (drr), B_FALSE,
4506	    &zcksum)))
4507		return (err);
4508
4509	if (drr.drr_type == DRR_END || drr.drr_type == BSWAP_32(DRR_END)) {
4510		/* It's the double end record at the end of a package */
4511		return (ENODATA);
4512	}
4513
4514	/* the kernel needs the non-byteswapped begin record */
4515	drr_noswap = drr;
4516
4517	flags->byteswap = B_FALSE;
4518	if (drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC)) {
4519		/*
4520		 * We computed the checksum in the wrong byteorder in
4521		 * recv_read() above; do it again correctly.
4522		 */
4523		bzero(&zcksum, sizeof (zio_cksum_t));
4524		(void) fletcher_4_incremental_byteswap(&drr,
4525		    sizeof (drr), &zcksum);
4526		flags->byteswap = B_TRUE;
4527
4528		drr.drr_type = BSWAP_32(drr.drr_type);
4529		drr.drr_payloadlen = BSWAP_32(drr.drr_payloadlen);
4530		drrb->drr_magic = BSWAP_64(drrb->drr_magic);
4531		drrb->drr_versioninfo = BSWAP_64(drrb->drr_versioninfo);
4532		drrb->drr_creation_time = BSWAP_64(drrb->drr_creation_time);
4533		drrb->drr_type = BSWAP_32(drrb->drr_type);
4534		drrb->drr_flags = BSWAP_32(drrb->drr_flags);
4535		drrb->drr_toguid = BSWAP_64(drrb->drr_toguid);
4536		drrb->drr_fromguid = BSWAP_64(drrb->drr_fromguid);
4537	}
4538
4539	if (drrb->drr_magic != DMU_BACKUP_MAGIC || drr.drr_type != DRR_BEGIN) {
4540		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
4541		    "stream (bad magic number)"));
4542		return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
4543	}
4544
4545	featureflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo);
4546	hdrtype = DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo);
4547
4548	if (!DMU_STREAM_SUPPORTED(featureflags) ||
4549	    (hdrtype != DMU_SUBSTREAM && hdrtype != DMU_COMPOUNDSTREAM)) {
4550		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4551		    "stream has unsupported feature, feature flags = %lx"),
4552		    featureflags);
4553		return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
4554	}
4555
4556	/* Holds feature is set once in the compound stream header. */
4557	boolean_t holds = (DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) &
4558	    DMU_BACKUP_FEATURE_HOLDS);
4559	if (holds)
4560		flags->holds = B_TRUE;
4561
4562	if (strchr(drrb->drr_toname, '@') == NULL) {
4563		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
4564		    "stream (bad snapshot name)"));
4565		return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
4566	}
4567
4568	if (DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) == DMU_SUBSTREAM) {
4569		char nonpackage_sendfs[ZFS_MAX_DATASET_NAME_LEN];
4570		if (sendfs == NULL) {
4571			/*
4572			 * We were not called from zfs_receive_package(). Get
4573			 * the fs specified by 'zfs send'.
4574			 */
4575			char *cp;
4576			(void) strlcpy(nonpackage_sendfs,
4577			    drr.drr_u.drr_begin.drr_toname,
4578			    sizeof (nonpackage_sendfs));
4579			if ((cp = strchr(nonpackage_sendfs, '@')) != NULL)
4580				*cp = '\0';
4581			sendfs = nonpackage_sendfs;
4582			VERIFY(finalsnap == NULL);
4583		}
4584		return (zfs_receive_one(hdl, infd, tosnap, originsnap, flags,
4585		    &drr, &drr_noswap, sendfs, stream_nv, stream_avl, top_zfs,
4586		    cleanup_fd, action_handlep, finalsnap, cmdprops));
4587	} else {
4588		assert(DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) ==
4589		    DMU_COMPOUNDSTREAM);
4590		return (zfs_receive_package(hdl, infd, tosnap, flags, &drr,
4591		    &zcksum, top_zfs, cleanup_fd, action_handlep, cmdprops));
4592	}
4593}
4594
4595/*
4596 * Restores a backup of tosnap from the file descriptor specified by infd.
4597 * Return 0 on total success, -2 if some things couldn't be
4598 * destroyed/renamed/promoted, -1 if some things couldn't be received.
4599 * (-1 will override -2, if -1 and the resumable flag was specified the
4600 * transfer can be resumed if the sending side supports it).
4601 */
4602int
4603zfs_receive(libzfs_handle_t *hdl, const char *tosnap, nvlist_t *props,
4604    recvflags_t *flags, int infd, avl_tree_t *stream_avl)
4605{
4606	char *top_zfs = NULL;
4607	int err;
4608	int cleanup_fd;
4609	uint64_t action_handle = 0;
4610	char *originsnap = NULL;
4611	if (props) {
4612		err = nvlist_lookup_string(props, "origin", &originsnap);
4613		if (err && err != ENOENT)
4614			return (err);
4615	}
4616
4617	cleanup_fd = open(ZFS_DEV, O_RDWR|O_EXCL);
4618	VERIFY(cleanup_fd >= 0);
4619
4620	err = zfs_receive_impl(hdl, tosnap, originsnap, flags, infd, NULL, NULL,
4621	    stream_avl, &top_zfs, cleanup_fd, &action_handle, NULL, props);
4622
4623	VERIFY(0 == close(cleanup_fd));
4624
4625	if (err == 0 && !flags->nomount && top_zfs) {
4626		zfs_handle_t *zhp;
4627		prop_changelist_t *clp;
4628
4629		zhp = zfs_open(hdl, top_zfs, ZFS_TYPE_FILESYSTEM);
4630		if (zhp != NULL) {
4631			clp = changelist_gather(zhp, ZFS_PROP_MOUNTPOINT,
4632			    CL_GATHER_MOUNT_ALWAYS, 0);
4633			zfs_close(zhp);
4634			if (clp != NULL) {
4635				/* mount and share received datasets */
4636				err = changelist_postfix(clp);
4637				changelist_free(clp);
4638			}
4639		}
4640		if (zhp == NULL || clp == NULL || err)
4641			err = -1;
4642	}
4643	if (top_zfs)
4644		free(top_zfs);
4645
4646	return (err);
4647}
4648