1*5cabbc6bSPrashanth Sreenivasa /* 2*5cabbc6bSPrashanth Sreenivasa * CDDL HEADER START 3*5cabbc6bSPrashanth Sreenivasa * 4*5cabbc6bSPrashanth Sreenivasa * This file and its contents are supplied under the terms of the 5*5cabbc6bSPrashanth Sreenivasa * Common Development and Distribution License ("CDDL"), version 1.0. 6*5cabbc6bSPrashanth Sreenivasa * You may only use this file in accordance with the terms of version 7*5cabbc6bSPrashanth Sreenivasa * 1.0 of the CDDL. 8*5cabbc6bSPrashanth Sreenivasa * 9*5cabbc6bSPrashanth Sreenivasa * A full copy of the text of the CDDL should have accompanied this 10*5cabbc6bSPrashanth Sreenivasa * source. A copy of the CDDL is also available via the Internet at 11*5cabbc6bSPrashanth Sreenivasa * http://www.illumos.org/license/CDDL. 12*5cabbc6bSPrashanth Sreenivasa * 13*5cabbc6bSPrashanth Sreenivasa * CDDL HEADER END 14*5cabbc6bSPrashanth Sreenivasa */ 15*5cabbc6bSPrashanth Sreenivasa 16*5cabbc6bSPrashanth Sreenivasa /* 17*5cabbc6bSPrashanth Sreenivasa * Copyright (c) 2015 by Delphix. All rights reserved. 18*5cabbc6bSPrashanth Sreenivasa */ 19*5cabbc6bSPrashanth Sreenivasa 20*5cabbc6bSPrashanth Sreenivasa #include <sys/dmu_tx.h> 21*5cabbc6bSPrashanth Sreenivasa #include <sys/spa.h> 22*5cabbc6bSPrashanth Sreenivasa #include <sys/dmu.h> 23*5cabbc6bSPrashanth Sreenivasa #include <sys/dsl_pool.h> 24*5cabbc6bSPrashanth Sreenivasa #include <sys/vdev_indirect_births.h> 25*5cabbc6bSPrashanth Sreenivasa 26*5cabbc6bSPrashanth Sreenivasa static boolean_t 27*5cabbc6bSPrashanth Sreenivasa vdev_indirect_births_verify(vdev_indirect_births_t *vib) 28*5cabbc6bSPrashanth Sreenivasa { 29*5cabbc6bSPrashanth Sreenivasa ASSERT(vib != NULL); 30*5cabbc6bSPrashanth Sreenivasa 31*5cabbc6bSPrashanth Sreenivasa ASSERT(vib->vib_object != 0); 32*5cabbc6bSPrashanth Sreenivasa ASSERT(vib->vib_objset != NULL); 33*5cabbc6bSPrashanth Sreenivasa ASSERT(vib->vib_phys != NULL); 34*5cabbc6bSPrashanth Sreenivasa ASSERT(vib->vib_dbuf != NULL); 35*5cabbc6bSPrashanth Sreenivasa 36*5cabbc6bSPrashanth Sreenivasa EQUIV(vib->vib_phys->vib_count > 0, vib->vib_entries != NULL); 37*5cabbc6bSPrashanth Sreenivasa 38*5cabbc6bSPrashanth Sreenivasa return (B_TRUE); 39*5cabbc6bSPrashanth Sreenivasa } 40*5cabbc6bSPrashanth Sreenivasa 41*5cabbc6bSPrashanth Sreenivasa uint64_t 42*5cabbc6bSPrashanth Sreenivasa vdev_indirect_births_count(vdev_indirect_births_t *vib) 43*5cabbc6bSPrashanth Sreenivasa { 44*5cabbc6bSPrashanth Sreenivasa ASSERT(vdev_indirect_births_verify(vib)); 45*5cabbc6bSPrashanth Sreenivasa 46*5cabbc6bSPrashanth Sreenivasa return (vib->vib_phys->vib_count); 47*5cabbc6bSPrashanth Sreenivasa } 48*5cabbc6bSPrashanth Sreenivasa 49*5cabbc6bSPrashanth Sreenivasa uint64_t 50*5cabbc6bSPrashanth Sreenivasa vdev_indirect_births_object(vdev_indirect_births_t *vib) 51*5cabbc6bSPrashanth Sreenivasa { 52*5cabbc6bSPrashanth Sreenivasa ASSERT(vdev_indirect_births_verify(vib)); 53*5cabbc6bSPrashanth Sreenivasa 54*5cabbc6bSPrashanth Sreenivasa return (vib->vib_object); 55*5cabbc6bSPrashanth Sreenivasa } 56*5cabbc6bSPrashanth Sreenivasa 57*5cabbc6bSPrashanth Sreenivasa static uint64_t 58*5cabbc6bSPrashanth Sreenivasa vdev_indirect_births_size_impl(vdev_indirect_births_t *vib) 59*5cabbc6bSPrashanth Sreenivasa { 60*5cabbc6bSPrashanth Sreenivasa return (vib->vib_phys->vib_count * sizeof (*vib->vib_entries)); 61*5cabbc6bSPrashanth Sreenivasa } 62*5cabbc6bSPrashanth Sreenivasa 63*5cabbc6bSPrashanth Sreenivasa void 64*5cabbc6bSPrashanth Sreenivasa vdev_indirect_births_close(vdev_indirect_births_t *vib) 65*5cabbc6bSPrashanth Sreenivasa { 66*5cabbc6bSPrashanth Sreenivasa ASSERT(vdev_indirect_births_verify(vib)); 67*5cabbc6bSPrashanth Sreenivasa 68*5cabbc6bSPrashanth Sreenivasa if (vib->vib_phys->vib_count > 0) { 69*5cabbc6bSPrashanth Sreenivasa uint64_t births_size = vdev_indirect_births_size_impl(vib); 70*5cabbc6bSPrashanth Sreenivasa 71*5cabbc6bSPrashanth Sreenivasa kmem_free(vib->vib_entries, births_size); 72*5cabbc6bSPrashanth Sreenivasa vib->vib_entries = NULL; 73*5cabbc6bSPrashanth Sreenivasa } 74*5cabbc6bSPrashanth Sreenivasa 75*5cabbc6bSPrashanth Sreenivasa dmu_buf_rele(vib->vib_dbuf, vib); 76*5cabbc6bSPrashanth Sreenivasa 77*5cabbc6bSPrashanth Sreenivasa vib->vib_objset = NULL; 78*5cabbc6bSPrashanth Sreenivasa vib->vib_object = 0; 79*5cabbc6bSPrashanth Sreenivasa vib->vib_dbuf = NULL; 80*5cabbc6bSPrashanth Sreenivasa vib->vib_phys = NULL; 81*5cabbc6bSPrashanth Sreenivasa 82*5cabbc6bSPrashanth Sreenivasa kmem_free(vib, sizeof (*vib)); 83*5cabbc6bSPrashanth Sreenivasa } 84*5cabbc6bSPrashanth Sreenivasa 85*5cabbc6bSPrashanth Sreenivasa uint64_t 86*5cabbc6bSPrashanth Sreenivasa vdev_indirect_births_alloc(objset_t *os, dmu_tx_t *tx) 87*5cabbc6bSPrashanth Sreenivasa { 88*5cabbc6bSPrashanth Sreenivasa ASSERT(dmu_tx_is_syncing(tx)); 89*5cabbc6bSPrashanth Sreenivasa 90*5cabbc6bSPrashanth Sreenivasa return (dmu_object_alloc(os, 91*5cabbc6bSPrashanth Sreenivasa DMU_OTN_UINT64_METADATA, SPA_OLD_MAXBLOCKSIZE, 92*5cabbc6bSPrashanth Sreenivasa DMU_OTN_UINT64_METADATA, sizeof (vdev_indirect_birth_phys_t), 93*5cabbc6bSPrashanth Sreenivasa tx)); 94*5cabbc6bSPrashanth Sreenivasa } 95*5cabbc6bSPrashanth Sreenivasa 96*5cabbc6bSPrashanth Sreenivasa vdev_indirect_births_t * 97*5cabbc6bSPrashanth Sreenivasa vdev_indirect_births_open(objset_t *os, uint64_t births_object) 98*5cabbc6bSPrashanth Sreenivasa { 99*5cabbc6bSPrashanth Sreenivasa vdev_indirect_births_t *vib = kmem_zalloc(sizeof (*vib), KM_SLEEP); 100*5cabbc6bSPrashanth Sreenivasa 101*5cabbc6bSPrashanth Sreenivasa vib->vib_objset = os; 102*5cabbc6bSPrashanth Sreenivasa vib->vib_object = births_object; 103*5cabbc6bSPrashanth Sreenivasa 104*5cabbc6bSPrashanth Sreenivasa VERIFY0(dmu_bonus_hold(os, vib->vib_object, vib, &vib->vib_dbuf)); 105*5cabbc6bSPrashanth Sreenivasa vib->vib_phys = vib->vib_dbuf->db_data; 106*5cabbc6bSPrashanth Sreenivasa 107*5cabbc6bSPrashanth Sreenivasa if (vib->vib_phys->vib_count > 0) { 108*5cabbc6bSPrashanth Sreenivasa uint64_t births_size = vdev_indirect_births_size_impl(vib); 109*5cabbc6bSPrashanth Sreenivasa vib->vib_entries = kmem_alloc(births_size, KM_SLEEP); 110*5cabbc6bSPrashanth Sreenivasa VERIFY0(dmu_read(vib->vib_objset, vib->vib_object, 0, 111*5cabbc6bSPrashanth Sreenivasa births_size, vib->vib_entries, DMU_READ_PREFETCH)); 112*5cabbc6bSPrashanth Sreenivasa } 113*5cabbc6bSPrashanth Sreenivasa 114*5cabbc6bSPrashanth Sreenivasa ASSERT(vdev_indirect_births_verify(vib)); 115*5cabbc6bSPrashanth Sreenivasa 116*5cabbc6bSPrashanth Sreenivasa return (vib); 117*5cabbc6bSPrashanth Sreenivasa } 118*5cabbc6bSPrashanth Sreenivasa 119*5cabbc6bSPrashanth Sreenivasa void 120*5cabbc6bSPrashanth Sreenivasa vdev_indirect_births_free(objset_t *os, uint64_t object, dmu_tx_t *tx) 121*5cabbc6bSPrashanth Sreenivasa { 122*5cabbc6bSPrashanth Sreenivasa VERIFY0(dmu_object_free(os, object, tx)); 123*5cabbc6bSPrashanth Sreenivasa } 124*5cabbc6bSPrashanth Sreenivasa 125*5cabbc6bSPrashanth Sreenivasa void 126*5cabbc6bSPrashanth Sreenivasa vdev_indirect_births_add_entry(vdev_indirect_births_t *vib, 127*5cabbc6bSPrashanth Sreenivasa uint64_t max_offset, uint64_t txg, dmu_tx_t *tx) 128*5cabbc6bSPrashanth Sreenivasa { 129*5cabbc6bSPrashanth Sreenivasa vdev_indirect_birth_entry_phys_t vibe; 130*5cabbc6bSPrashanth Sreenivasa uint64_t old_size; 131*5cabbc6bSPrashanth Sreenivasa uint64_t new_size; 132*5cabbc6bSPrashanth Sreenivasa vdev_indirect_birth_entry_phys_t *new_entries; 133*5cabbc6bSPrashanth Sreenivasa 134*5cabbc6bSPrashanth Sreenivasa ASSERT(dmu_tx_is_syncing(tx)); 135*5cabbc6bSPrashanth Sreenivasa ASSERT(dsl_pool_sync_context(dmu_tx_pool(tx))); 136*5cabbc6bSPrashanth Sreenivasa ASSERT(vdev_indirect_births_verify(vib)); 137*5cabbc6bSPrashanth Sreenivasa 138*5cabbc6bSPrashanth Sreenivasa dmu_buf_will_dirty(vib->vib_dbuf, tx); 139*5cabbc6bSPrashanth Sreenivasa 140*5cabbc6bSPrashanth Sreenivasa vibe.vibe_offset = max_offset; 141*5cabbc6bSPrashanth Sreenivasa vibe.vibe_phys_birth_txg = txg; 142*5cabbc6bSPrashanth Sreenivasa 143*5cabbc6bSPrashanth Sreenivasa old_size = vdev_indirect_births_size_impl(vib); 144*5cabbc6bSPrashanth Sreenivasa dmu_write(vib->vib_objset, vib->vib_object, old_size, sizeof (vibe), 145*5cabbc6bSPrashanth Sreenivasa &vibe, tx); 146*5cabbc6bSPrashanth Sreenivasa vib->vib_phys->vib_count++; 147*5cabbc6bSPrashanth Sreenivasa new_size = vdev_indirect_births_size_impl(vib); 148*5cabbc6bSPrashanth Sreenivasa 149*5cabbc6bSPrashanth Sreenivasa new_entries = kmem_alloc(new_size, KM_SLEEP); 150*5cabbc6bSPrashanth Sreenivasa if (old_size > 0) { 151*5cabbc6bSPrashanth Sreenivasa bcopy(vib->vib_entries, new_entries, old_size); 152*5cabbc6bSPrashanth Sreenivasa kmem_free(vib->vib_entries, old_size); 153*5cabbc6bSPrashanth Sreenivasa } 154*5cabbc6bSPrashanth Sreenivasa new_entries[vib->vib_phys->vib_count - 1] = vibe; 155*5cabbc6bSPrashanth Sreenivasa vib->vib_entries = new_entries; 156*5cabbc6bSPrashanth Sreenivasa } 157*5cabbc6bSPrashanth Sreenivasa 158*5cabbc6bSPrashanth Sreenivasa uint64_t 159*5cabbc6bSPrashanth Sreenivasa vdev_indirect_births_last_entry_txg(vdev_indirect_births_t *vib) 160*5cabbc6bSPrashanth Sreenivasa { 161*5cabbc6bSPrashanth Sreenivasa ASSERT(vdev_indirect_births_verify(vib)); 162*5cabbc6bSPrashanth Sreenivasa ASSERT(vib->vib_phys->vib_count > 0); 163*5cabbc6bSPrashanth Sreenivasa 164*5cabbc6bSPrashanth Sreenivasa vdev_indirect_birth_entry_phys_t *last = 165*5cabbc6bSPrashanth Sreenivasa &vib->vib_entries[vib->vib_phys->vib_count - 1]; 166*5cabbc6bSPrashanth Sreenivasa return (last->vibe_phys_birth_txg); 167*5cabbc6bSPrashanth Sreenivasa } 168*5cabbc6bSPrashanth Sreenivasa 169*5cabbc6bSPrashanth Sreenivasa /* 170*5cabbc6bSPrashanth Sreenivasa * Return the txg in which the given range was copied (i.e. its physical 171*5cabbc6bSPrashanth Sreenivasa * birth txg). The specified offset+asize must be contiguously mapped 172*5cabbc6bSPrashanth Sreenivasa * (i.e. not a split block). 173*5cabbc6bSPrashanth Sreenivasa * 174*5cabbc6bSPrashanth Sreenivasa * The entries are sorted by increasing phys_birth, and also by increasing 175*5cabbc6bSPrashanth Sreenivasa * offset. We find the specified offset by binary search. Note that we 176*5cabbc6bSPrashanth Sreenivasa * can not use bsearch() because looking at each entry independently is 177*5cabbc6bSPrashanth Sreenivasa * insufficient to find the correct entry. Each entry implicitly relies 178*5cabbc6bSPrashanth Sreenivasa * on the previous entry: an entry indicates that the offsets from the 179*5cabbc6bSPrashanth Sreenivasa * end of the previous entry to the end of this entry were written in the 180*5cabbc6bSPrashanth Sreenivasa * specified txg. 181*5cabbc6bSPrashanth Sreenivasa */ 182*5cabbc6bSPrashanth Sreenivasa uint64_t 183*5cabbc6bSPrashanth Sreenivasa vdev_indirect_births_physbirth(vdev_indirect_births_t *vib, uint64_t offset, 184*5cabbc6bSPrashanth Sreenivasa uint64_t asize) 185*5cabbc6bSPrashanth Sreenivasa { 186*5cabbc6bSPrashanth Sreenivasa vdev_indirect_birth_entry_phys_t *base; 187*5cabbc6bSPrashanth Sreenivasa vdev_indirect_birth_entry_phys_t *last; 188*5cabbc6bSPrashanth Sreenivasa 189*5cabbc6bSPrashanth Sreenivasa ASSERT(vdev_indirect_births_verify(vib)); 190*5cabbc6bSPrashanth Sreenivasa ASSERT(vib->vib_phys->vib_count > 0); 191*5cabbc6bSPrashanth Sreenivasa 192*5cabbc6bSPrashanth Sreenivasa base = vib->vib_entries; 193*5cabbc6bSPrashanth Sreenivasa last = base + vib->vib_phys->vib_count - 1; 194*5cabbc6bSPrashanth Sreenivasa 195*5cabbc6bSPrashanth Sreenivasa ASSERT3U(offset, <, last->vibe_offset); 196*5cabbc6bSPrashanth Sreenivasa 197*5cabbc6bSPrashanth Sreenivasa while (last >= base) { 198*5cabbc6bSPrashanth Sreenivasa vdev_indirect_birth_entry_phys_t *p = 199*5cabbc6bSPrashanth Sreenivasa base + ((last - base) / 2); 200*5cabbc6bSPrashanth Sreenivasa if (offset >= p->vibe_offset) { 201*5cabbc6bSPrashanth Sreenivasa base = p + 1; 202*5cabbc6bSPrashanth Sreenivasa } else if (p == vib->vib_entries || 203*5cabbc6bSPrashanth Sreenivasa offset >= (p - 1)->vibe_offset) { 204*5cabbc6bSPrashanth Sreenivasa ASSERT3U(offset + asize, <=, p->vibe_offset); 205*5cabbc6bSPrashanth Sreenivasa return (p->vibe_phys_birth_txg); 206*5cabbc6bSPrashanth Sreenivasa } else { 207*5cabbc6bSPrashanth Sreenivasa last = p - 1; 208*5cabbc6bSPrashanth Sreenivasa } 209*5cabbc6bSPrashanth Sreenivasa } 210*5cabbc6bSPrashanth Sreenivasa ASSERT(!"offset not found"); 211*5cabbc6bSPrashanth Sreenivasa return (-1); 212*5cabbc6bSPrashanth Sreenivasa } 213