1*5cabbc6bSPrashanth Sreenivasa /*
2*5cabbc6bSPrashanth Sreenivasa  * CDDL HEADER START
3*5cabbc6bSPrashanth Sreenivasa  *
4*5cabbc6bSPrashanth Sreenivasa  * This file and its contents are supplied under the terms of the
5*5cabbc6bSPrashanth Sreenivasa  * Common Development and Distribution License ("CDDL"), version 1.0.
6*5cabbc6bSPrashanth Sreenivasa  * You may only use this file in accordance with the terms of version
7*5cabbc6bSPrashanth Sreenivasa  * 1.0 of the CDDL.
8*5cabbc6bSPrashanth Sreenivasa  *
9*5cabbc6bSPrashanth Sreenivasa  * A full copy of the text of the CDDL should have accompanied this
10*5cabbc6bSPrashanth Sreenivasa  * source.  A copy of the CDDL is also available via the Internet at
11*5cabbc6bSPrashanth Sreenivasa  * http://www.illumos.org/license/CDDL.
12*5cabbc6bSPrashanth Sreenivasa  *
13*5cabbc6bSPrashanth Sreenivasa  * CDDL HEADER END
14*5cabbc6bSPrashanth Sreenivasa  */
15*5cabbc6bSPrashanth Sreenivasa 
16*5cabbc6bSPrashanth Sreenivasa /*
17*5cabbc6bSPrashanth Sreenivasa  * Copyright (c) 2015 by Delphix. All rights reserved.
18*5cabbc6bSPrashanth Sreenivasa  */
19*5cabbc6bSPrashanth Sreenivasa 
20*5cabbc6bSPrashanth Sreenivasa #include <sys/dmu_tx.h>
21*5cabbc6bSPrashanth Sreenivasa #include <sys/spa.h>
22*5cabbc6bSPrashanth Sreenivasa #include <sys/dmu.h>
23*5cabbc6bSPrashanth Sreenivasa #include <sys/dsl_pool.h>
24*5cabbc6bSPrashanth Sreenivasa #include <sys/vdev_indirect_births.h>
25*5cabbc6bSPrashanth Sreenivasa 
26*5cabbc6bSPrashanth Sreenivasa static boolean_t
vdev_indirect_births_verify(vdev_indirect_births_t * vib)27*5cabbc6bSPrashanth Sreenivasa vdev_indirect_births_verify(vdev_indirect_births_t *vib)
28*5cabbc6bSPrashanth Sreenivasa {
29*5cabbc6bSPrashanth Sreenivasa 	ASSERT(vib != NULL);
30*5cabbc6bSPrashanth Sreenivasa 
31*5cabbc6bSPrashanth Sreenivasa 	ASSERT(vib->vib_object != 0);
32*5cabbc6bSPrashanth Sreenivasa 	ASSERT(vib->vib_objset != NULL);
33*5cabbc6bSPrashanth Sreenivasa 	ASSERT(vib->vib_phys != NULL);
34*5cabbc6bSPrashanth Sreenivasa 	ASSERT(vib->vib_dbuf != NULL);
35*5cabbc6bSPrashanth Sreenivasa 
36*5cabbc6bSPrashanth Sreenivasa 	EQUIV(vib->vib_phys->vib_count > 0, vib->vib_entries != NULL);
37*5cabbc6bSPrashanth Sreenivasa 
38*5cabbc6bSPrashanth Sreenivasa 	return (B_TRUE);
39*5cabbc6bSPrashanth Sreenivasa }
40*5cabbc6bSPrashanth Sreenivasa 
41*5cabbc6bSPrashanth Sreenivasa uint64_t
vdev_indirect_births_count(vdev_indirect_births_t * vib)42*5cabbc6bSPrashanth Sreenivasa vdev_indirect_births_count(vdev_indirect_births_t *vib)
43*5cabbc6bSPrashanth Sreenivasa {
44*5cabbc6bSPrashanth Sreenivasa 	ASSERT(vdev_indirect_births_verify(vib));
45*5cabbc6bSPrashanth Sreenivasa 
46*5cabbc6bSPrashanth Sreenivasa 	return (vib->vib_phys->vib_count);
47*5cabbc6bSPrashanth Sreenivasa }
48*5cabbc6bSPrashanth Sreenivasa 
49*5cabbc6bSPrashanth Sreenivasa uint64_t
vdev_indirect_births_object(vdev_indirect_births_t * vib)50*5cabbc6bSPrashanth Sreenivasa vdev_indirect_births_object(vdev_indirect_births_t *vib)
51*5cabbc6bSPrashanth Sreenivasa {
52*5cabbc6bSPrashanth Sreenivasa 	ASSERT(vdev_indirect_births_verify(vib));
53*5cabbc6bSPrashanth Sreenivasa 
54*5cabbc6bSPrashanth Sreenivasa 	return (vib->vib_object);
55*5cabbc6bSPrashanth Sreenivasa }
56*5cabbc6bSPrashanth Sreenivasa 
57*5cabbc6bSPrashanth Sreenivasa static uint64_t
vdev_indirect_births_size_impl(vdev_indirect_births_t * vib)58*5cabbc6bSPrashanth Sreenivasa vdev_indirect_births_size_impl(vdev_indirect_births_t *vib)
59*5cabbc6bSPrashanth Sreenivasa {
60*5cabbc6bSPrashanth Sreenivasa 	return (vib->vib_phys->vib_count * sizeof (*vib->vib_entries));
61*5cabbc6bSPrashanth Sreenivasa }
62*5cabbc6bSPrashanth Sreenivasa 
63*5cabbc6bSPrashanth Sreenivasa void
vdev_indirect_births_close(vdev_indirect_births_t * vib)64*5cabbc6bSPrashanth Sreenivasa vdev_indirect_births_close(vdev_indirect_births_t *vib)
65*5cabbc6bSPrashanth Sreenivasa {
66*5cabbc6bSPrashanth Sreenivasa 	ASSERT(vdev_indirect_births_verify(vib));
67*5cabbc6bSPrashanth Sreenivasa 
68*5cabbc6bSPrashanth Sreenivasa 	if (vib->vib_phys->vib_count > 0) {
69*5cabbc6bSPrashanth Sreenivasa 		uint64_t births_size = vdev_indirect_births_size_impl(vib);
70*5cabbc6bSPrashanth Sreenivasa 
71*5cabbc6bSPrashanth Sreenivasa 		kmem_free(vib->vib_entries, births_size);
72*5cabbc6bSPrashanth Sreenivasa 		vib->vib_entries = NULL;
73*5cabbc6bSPrashanth Sreenivasa 	}
74*5cabbc6bSPrashanth Sreenivasa 
75*5cabbc6bSPrashanth Sreenivasa 	dmu_buf_rele(vib->vib_dbuf, vib);
76*5cabbc6bSPrashanth Sreenivasa 
77*5cabbc6bSPrashanth Sreenivasa 	vib->vib_objset = NULL;
78*5cabbc6bSPrashanth Sreenivasa 	vib->vib_object = 0;
79*5cabbc6bSPrashanth Sreenivasa 	vib->vib_dbuf = NULL;
80*5cabbc6bSPrashanth Sreenivasa 	vib->vib_phys = NULL;
81*5cabbc6bSPrashanth Sreenivasa 
82*5cabbc6bSPrashanth Sreenivasa 	kmem_free(vib, sizeof (*vib));
83*5cabbc6bSPrashanth Sreenivasa }
84*5cabbc6bSPrashanth Sreenivasa 
85*5cabbc6bSPrashanth Sreenivasa uint64_t
vdev_indirect_births_alloc(objset_t * os,dmu_tx_t * tx)86*5cabbc6bSPrashanth Sreenivasa vdev_indirect_births_alloc(objset_t *os, dmu_tx_t *tx)
87*5cabbc6bSPrashanth Sreenivasa {
88*5cabbc6bSPrashanth Sreenivasa 	ASSERT(dmu_tx_is_syncing(tx));
89*5cabbc6bSPrashanth Sreenivasa 
90*5cabbc6bSPrashanth Sreenivasa 	return (dmu_object_alloc(os,
91*5cabbc6bSPrashanth Sreenivasa 	    DMU_OTN_UINT64_METADATA, SPA_OLD_MAXBLOCKSIZE,
92*5cabbc6bSPrashanth Sreenivasa 	    DMU_OTN_UINT64_METADATA, sizeof (vdev_indirect_birth_phys_t),
93*5cabbc6bSPrashanth Sreenivasa 	    tx));
94*5cabbc6bSPrashanth Sreenivasa }
95*5cabbc6bSPrashanth Sreenivasa 
96*5cabbc6bSPrashanth Sreenivasa vdev_indirect_births_t *
vdev_indirect_births_open(objset_t * os,uint64_t births_object)97*5cabbc6bSPrashanth Sreenivasa vdev_indirect_births_open(objset_t *os, uint64_t births_object)
98*5cabbc6bSPrashanth Sreenivasa {
99*5cabbc6bSPrashanth Sreenivasa 	vdev_indirect_births_t *vib = kmem_zalloc(sizeof (*vib), KM_SLEEP);
100*5cabbc6bSPrashanth Sreenivasa 
101*5cabbc6bSPrashanth Sreenivasa 	vib->vib_objset = os;
102*5cabbc6bSPrashanth Sreenivasa 	vib->vib_object = births_object;
103*5cabbc6bSPrashanth Sreenivasa 
104*5cabbc6bSPrashanth Sreenivasa 	VERIFY0(dmu_bonus_hold(os, vib->vib_object, vib, &vib->vib_dbuf));
105*5cabbc6bSPrashanth Sreenivasa 	vib->vib_phys = vib->vib_dbuf->db_data;
106*5cabbc6bSPrashanth Sreenivasa 
107*5cabbc6bSPrashanth Sreenivasa 	if (vib->vib_phys->vib_count > 0) {
108*5cabbc6bSPrashanth Sreenivasa 		uint64_t births_size = vdev_indirect_births_size_impl(vib);
109*5cabbc6bSPrashanth Sreenivasa 		vib->vib_entries = kmem_alloc(births_size, KM_SLEEP);
110*5cabbc6bSPrashanth Sreenivasa 		VERIFY0(dmu_read(vib->vib_objset, vib->vib_object, 0,
111*5cabbc6bSPrashanth Sreenivasa 		    births_size, vib->vib_entries, DMU_READ_PREFETCH));
112*5cabbc6bSPrashanth Sreenivasa 	}
113*5cabbc6bSPrashanth Sreenivasa 
114*5cabbc6bSPrashanth Sreenivasa 	ASSERT(vdev_indirect_births_verify(vib));
115*5cabbc6bSPrashanth Sreenivasa 
116*5cabbc6bSPrashanth Sreenivasa 	return (vib);
117*5cabbc6bSPrashanth Sreenivasa }
118*5cabbc6bSPrashanth Sreenivasa 
119*5cabbc6bSPrashanth Sreenivasa void
vdev_indirect_births_free(objset_t * os,uint64_t object,dmu_tx_t * tx)120*5cabbc6bSPrashanth Sreenivasa vdev_indirect_births_free(objset_t *os, uint64_t object, dmu_tx_t *tx)
121*5cabbc6bSPrashanth Sreenivasa {
122*5cabbc6bSPrashanth Sreenivasa 	VERIFY0(dmu_object_free(os, object, tx));
123*5cabbc6bSPrashanth Sreenivasa }
124*5cabbc6bSPrashanth Sreenivasa 
125*5cabbc6bSPrashanth Sreenivasa void
vdev_indirect_births_add_entry(vdev_indirect_births_t * vib,uint64_t max_offset,uint64_t txg,dmu_tx_t * tx)126*5cabbc6bSPrashanth Sreenivasa vdev_indirect_births_add_entry(vdev_indirect_births_t *vib,
127*5cabbc6bSPrashanth Sreenivasa     uint64_t max_offset, uint64_t txg, dmu_tx_t *tx)
128*5cabbc6bSPrashanth Sreenivasa {
129*5cabbc6bSPrashanth Sreenivasa 	vdev_indirect_birth_entry_phys_t vibe;
130*5cabbc6bSPrashanth Sreenivasa 	uint64_t old_size;
131*5cabbc6bSPrashanth Sreenivasa 	uint64_t new_size;
132*5cabbc6bSPrashanth Sreenivasa 	vdev_indirect_birth_entry_phys_t *new_entries;
133*5cabbc6bSPrashanth Sreenivasa 
134*5cabbc6bSPrashanth Sreenivasa 	ASSERT(dmu_tx_is_syncing(tx));
135*5cabbc6bSPrashanth Sreenivasa 	ASSERT(dsl_pool_sync_context(dmu_tx_pool(tx)));
136*5cabbc6bSPrashanth Sreenivasa 	ASSERT(vdev_indirect_births_verify(vib));
137*5cabbc6bSPrashanth Sreenivasa 
138*5cabbc6bSPrashanth Sreenivasa 	dmu_buf_will_dirty(vib->vib_dbuf, tx);
139*5cabbc6bSPrashanth Sreenivasa 
140*5cabbc6bSPrashanth Sreenivasa 	vibe.vibe_offset = max_offset;
141*5cabbc6bSPrashanth Sreenivasa 	vibe.vibe_phys_birth_txg = txg;
142*5cabbc6bSPrashanth Sreenivasa 
143*5cabbc6bSPrashanth Sreenivasa 	old_size = vdev_indirect_births_size_impl(vib);
144*5cabbc6bSPrashanth Sreenivasa 	dmu_write(vib->vib_objset, vib->vib_object, old_size, sizeof (vibe),
145*5cabbc6bSPrashanth Sreenivasa 	    &vibe, tx);
146*5cabbc6bSPrashanth Sreenivasa 	vib->vib_phys->vib_count++;
147*5cabbc6bSPrashanth Sreenivasa 	new_size = vdev_indirect_births_size_impl(vib);
148*5cabbc6bSPrashanth Sreenivasa 
149*5cabbc6bSPrashanth Sreenivasa 	new_entries = kmem_alloc(new_size, KM_SLEEP);
150*5cabbc6bSPrashanth Sreenivasa 	if (old_size > 0) {
151*5cabbc6bSPrashanth Sreenivasa 		bcopy(vib->vib_entries, new_entries, old_size);
152*5cabbc6bSPrashanth Sreenivasa 		kmem_free(vib->vib_entries, old_size);
153*5cabbc6bSPrashanth Sreenivasa 	}
154*5cabbc6bSPrashanth Sreenivasa 	new_entries[vib->vib_phys->vib_count - 1] = vibe;
155*5cabbc6bSPrashanth Sreenivasa 	vib->vib_entries = new_entries;
156*5cabbc6bSPrashanth Sreenivasa }
157*5cabbc6bSPrashanth Sreenivasa 
158*5cabbc6bSPrashanth Sreenivasa uint64_t
vdev_indirect_births_last_entry_txg(vdev_indirect_births_t * vib)159*5cabbc6bSPrashanth Sreenivasa vdev_indirect_births_last_entry_txg(vdev_indirect_births_t *vib)
160*5cabbc6bSPrashanth Sreenivasa {
161*5cabbc6bSPrashanth Sreenivasa 	ASSERT(vdev_indirect_births_verify(vib));
162*5cabbc6bSPrashanth Sreenivasa 	ASSERT(vib->vib_phys->vib_count > 0);
163*5cabbc6bSPrashanth Sreenivasa 
164*5cabbc6bSPrashanth Sreenivasa 	vdev_indirect_birth_entry_phys_t *last =
165*5cabbc6bSPrashanth Sreenivasa 	    &vib->vib_entries[vib->vib_phys->vib_count - 1];
166*5cabbc6bSPrashanth Sreenivasa 	return (last->vibe_phys_birth_txg);
167*5cabbc6bSPrashanth Sreenivasa }
168*5cabbc6bSPrashanth Sreenivasa 
169*5cabbc6bSPrashanth Sreenivasa /*
170*5cabbc6bSPrashanth Sreenivasa  * Return the txg in which the given range was copied (i.e. its physical
171*5cabbc6bSPrashanth Sreenivasa  * birth txg).  The specified offset+asize must be contiguously mapped
172*5cabbc6bSPrashanth Sreenivasa  * (i.e. not a split block).
173*5cabbc6bSPrashanth Sreenivasa  *
174*5cabbc6bSPrashanth Sreenivasa  * The entries are sorted by increasing phys_birth, and also by increasing
175*5cabbc6bSPrashanth Sreenivasa  * offset.  We find the specified offset by binary search.  Note that we
176*5cabbc6bSPrashanth Sreenivasa  * can not use bsearch() because looking at each entry independently is
177*5cabbc6bSPrashanth Sreenivasa  * insufficient to find the correct entry.  Each entry implicitly relies
178*5cabbc6bSPrashanth Sreenivasa  * on the previous entry: an entry indicates that the offsets from the
179*5cabbc6bSPrashanth Sreenivasa  * end of the previous entry to the end of this entry were written in the
180*5cabbc6bSPrashanth Sreenivasa  * specified txg.
181*5cabbc6bSPrashanth Sreenivasa  */
182*5cabbc6bSPrashanth Sreenivasa uint64_t
vdev_indirect_births_physbirth(vdev_indirect_births_t * vib,uint64_t offset,uint64_t asize)183*5cabbc6bSPrashanth Sreenivasa vdev_indirect_births_physbirth(vdev_indirect_births_t *vib, uint64_t offset,
184*5cabbc6bSPrashanth Sreenivasa     uint64_t asize)
185*5cabbc6bSPrashanth Sreenivasa {
186*5cabbc6bSPrashanth Sreenivasa 	vdev_indirect_birth_entry_phys_t *base;
187*5cabbc6bSPrashanth Sreenivasa 	vdev_indirect_birth_entry_phys_t *last;
188*5cabbc6bSPrashanth Sreenivasa 
189*5cabbc6bSPrashanth Sreenivasa 	ASSERT(vdev_indirect_births_verify(vib));
190*5cabbc6bSPrashanth Sreenivasa 	ASSERT(vib->vib_phys->vib_count > 0);
191*5cabbc6bSPrashanth Sreenivasa 
192*5cabbc6bSPrashanth Sreenivasa 	base = vib->vib_entries;
193*5cabbc6bSPrashanth Sreenivasa 	last = base + vib->vib_phys->vib_count - 1;
194*5cabbc6bSPrashanth Sreenivasa 
195*5cabbc6bSPrashanth Sreenivasa 	ASSERT3U(offset, <, last->vibe_offset);
196*5cabbc6bSPrashanth Sreenivasa 
197*5cabbc6bSPrashanth Sreenivasa 	while (last >= base) {
198*5cabbc6bSPrashanth Sreenivasa 		vdev_indirect_birth_entry_phys_t *p =
199*5cabbc6bSPrashanth Sreenivasa 		    base + ((last - base) / 2);
200*5cabbc6bSPrashanth Sreenivasa 		if (offset >= p->vibe_offset) {
201*5cabbc6bSPrashanth Sreenivasa 			base = p + 1;
202*5cabbc6bSPrashanth Sreenivasa 		} else if (p == vib->vib_entries ||
203*5cabbc6bSPrashanth Sreenivasa 		    offset >= (p - 1)->vibe_offset) {
204*5cabbc6bSPrashanth Sreenivasa 			ASSERT3U(offset + asize, <=, p->vibe_offset);
205*5cabbc6bSPrashanth Sreenivasa 			return (p->vibe_phys_birth_txg);
206*5cabbc6bSPrashanth Sreenivasa 		} else {
207*5cabbc6bSPrashanth Sreenivasa 			last = p - 1;
208*5cabbc6bSPrashanth Sreenivasa 		}
209*5cabbc6bSPrashanth Sreenivasa 	}
210*5cabbc6bSPrashanth Sreenivasa 	ASSERT(!"offset not found");
211*5cabbc6bSPrashanth Sreenivasa 	return (-1);
212*5cabbc6bSPrashanth Sreenivasa }
213