1*fa9e4066Sahrens /* 2*fa9e4066Sahrens * CDDL HEADER START 3*fa9e4066Sahrens * 4*fa9e4066Sahrens * The contents of this file are subject to the terms of the 5*fa9e4066Sahrens * Common Development and Distribution License, Version 1.0 only 6*fa9e4066Sahrens * (the "License"). You may not use this file except in compliance 7*fa9e4066Sahrens * with the License. 8*fa9e4066Sahrens * 9*fa9e4066Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10*fa9e4066Sahrens * or http://www.opensolaris.org/os/licensing. 11*fa9e4066Sahrens * See the License for the specific language governing permissions 12*fa9e4066Sahrens * and limitations under the License. 13*fa9e4066Sahrens * 14*fa9e4066Sahrens * When distributing Covered Code, include this CDDL HEADER in each 15*fa9e4066Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16*fa9e4066Sahrens * If applicable, add the following below this CDDL HEADER, with the 17*fa9e4066Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 18*fa9e4066Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 19*fa9e4066Sahrens * 20*fa9e4066Sahrens * CDDL HEADER END 21*fa9e4066Sahrens */ 22*fa9e4066Sahrens /* 23*fa9e4066Sahrens * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24*fa9e4066Sahrens * Use is subject to license terms. 25*fa9e4066Sahrens */ 26*fa9e4066Sahrens 27*fa9e4066Sahrens #pragma ident "%Z%%M% %I% %E% SMI" 28*fa9e4066Sahrens 29*fa9e4066Sahrens #include <sys/types.h> 30*fa9e4066Sahrens #include <sys/param.h> 31*fa9e4066Sahrens #include <sys/time.h> 32*fa9e4066Sahrens #include <sys/systm.h> 33*fa9e4066Sahrens #include <sys/sysmacros.h> 34*fa9e4066Sahrens #include <sys/resource.h> 35*fa9e4066Sahrens #include <sys/mntent.h> 36*fa9e4066Sahrens #include <sys/vfs.h> 37*fa9e4066Sahrens #include <sys/vnode.h> 38*fa9e4066Sahrens #include <sys/file.h> 39*fa9e4066Sahrens #include <sys/kmem.h> 40*fa9e4066Sahrens #include <sys/cmn_err.h> 41*fa9e4066Sahrens #include <sys/errno.h> 42*fa9e4066Sahrens #include <sys/unistd.h> 43*fa9e4066Sahrens #include <sys/stat.h> 44*fa9e4066Sahrens #include <sys/mode.h> 45*fa9e4066Sahrens #include <sys/atomic.h> 46*fa9e4066Sahrens #include <vm/pvn.h> 47*fa9e4066Sahrens #include "fs/fs_subr.h" 48*fa9e4066Sahrens #include <sys/zfs_dir.h> 49*fa9e4066Sahrens #include <sys/zfs_acl.h> 50*fa9e4066Sahrens #include <sys/zfs_ioctl.h> 51*fa9e4066Sahrens #include <sys/zfs_znode.h> 52*fa9e4066Sahrens #include <sys/zap.h> 53*fa9e4066Sahrens #include <sys/dmu.h> 54*fa9e4066Sahrens #include <sys/fs/zfs.h> 55*fa9e4066Sahrens 56*fa9e4066Sahrens struct kmem_cache *znode_cache = NULL; 57*fa9e4066Sahrens 58*fa9e4066Sahrens /* 59*fa9e4066Sahrens * Note that znodes can be on one of 2 states: 60*fa9e4066Sahrens * ZCACHE_mru - recently used, currently cached 61*fa9e4066Sahrens * ZCACHE_mfu - frequently used, currently cached 62*fa9e4066Sahrens * When there are no active references to the znode, they 63*fa9e4066Sahrens * are linked onto one of the lists in zcache. These are the 64*fa9e4066Sahrens * only znodes that can be evicted. 65*fa9e4066Sahrens */ 66*fa9e4066Sahrens 67*fa9e4066Sahrens typedef struct zcache_state { 68*fa9e4066Sahrens list_t list; /* linked list of evictable znodes in state */ 69*fa9e4066Sahrens uint64_t lcnt; /* total number of znodes in the linked list */ 70*fa9e4066Sahrens uint64_t cnt; /* total number of all znodes in this state */ 71*fa9e4066Sahrens uint64_t hits; 72*fa9e4066Sahrens kmutex_t mtx; 73*fa9e4066Sahrens } zcache_state_t; 74*fa9e4066Sahrens 75*fa9e4066Sahrens /* The 2 states: */ 76*fa9e4066Sahrens static zcache_state_t ZCACHE_mru; 77*fa9e4066Sahrens static zcache_state_t ZCACHE_mfu; 78*fa9e4066Sahrens 79*fa9e4066Sahrens static struct zcache { 80*fa9e4066Sahrens zcache_state_t *mru; 81*fa9e4066Sahrens zcache_state_t *mfu; 82*fa9e4066Sahrens uint64_t p; /* Target size of mru */ 83*fa9e4066Sahrens uint64_t c; /* Target size of cache */ 84*fa9e4066Sahrens uint64_t c_max; /* Maximum target cache size */ 85*fa9e4066Sahrens 86*fa9e4066Sahrens /* performance stats */ 87*fa9e4066Sahrens uint64_t missed; 88*fa9e4066Sahrens uint64_t evicted; 89*fa9e4066Sahrens uint64_t skipped; 90*fa9e4066Sahrens } zcache; 91*fa9e4066Sahrens 92*fa9e4066Sahrens void zcache_kmem_reclaim(void); 93*fa9e4066Sahrens 94*fa9e4066Sahrens #define ZCACHE_MINTIME (hz>>4) /* 62 ms */ 95*fa9e4066Sahrens 96*fa9e4066Sahrens /* 97*fa9e4066Sahrens * Move the supplied znode to the indicated state. The mutex 98*fa9e4066Sahrens * for the znode must be held by the caller. 99*fa9e4066Sahrens */ 100*fa9e4066Sahrens static void 101*fa9e4066Sahrens zcache_change_state(zcache_state_t *new_state, znode_t *zp) 102*fa9e4066Sahrens { 103*fa9e4066Sahrens /* ASSERT(MUTEX_HELD(hash_mtx)); */ 104*fa9e4066Sahrens ASSERT(zp->z_active); 105*fa9e4066Sahrens 106*fa9e4066Sahrens if (zp->z_zcache_state) { 107*fa9e4066Sahrens ASSERT3U(zp->z_zcache_state->cnt, >=, 1); 108*fa9e4066Sahrens atomic_add_64(&zp->z_zcache_state->cnt, -1); 109*fa9e4066Sahrens } 110*fa9e4066Sahrens atomic_add_64(&new_state->cnt, 1); 111*fa9e4066Sahrens zp->z_zcache_state = new_state; 112*fa9e4066Sahrens } 113*fa9e4066Sahrens 114*fa9e4066Sahrens static void 115*fa9e4066Sahrens zfs_zcache_evict(znode_t *zp, kmutex_t *hash_mtx) 116*fa9e4066Sahrens { 117*fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 118*fa9e4066Sahrens 119*fa9e4066Sahrens ASSERT(zp->z_phys); 120*fa9e4066Sahrens ASSERT(zp->z_dbuf_held); 121*fa9e4066Sahrens 122*fa9e4066Sahrens zp->z_dbuf_held = 0; 123*fa9e4066Sahrens mutex_exit(&zp->z_lock); 124*fa9e4066Sahrens dmu_buf_rele(zp->z_dbuf); 125*fa9e4066Sahrens mutex_exit(hash_mtx); 126*fa9e4066Sahrens VFS_RELE(zfsvfs->z_vfs); 127*fa9e4066Sahrens } 128*fa9e4066Sahrens 129*fa9e4066Sahrens /* 130*fa9e4066Sahrens * Evict znodes from list until we've removed the specified number 131*fa9e4066Sahrens */ 132*fa9e4066Sahrens static void 133*fa9e4066Sahrens zcache_evict_state(zcache_state_t *state, int64_t cnt, zfsvfs_t *zfsvfs) 134*fa9e4066Sahrens { 135*fa9e4066Sahrens int znodes_evicted = 0; 136*fa9e4066Sahrens znode_t *zp, *zp_prev; 137*fa9e4066Sahrens kmutex_t *hash_mtx; 138*fa9e4066Sahrens 139*fa9e4066Sahrens ASSERT(state == zcache.mru || state == zcache.mfu); 140*fa9e4066Sahrens 141*fa9e4066Sahrens mutex_enter(&state->mtx); 142*fa9e4066Sahrens 143*fa9e4066Sahrens for (zp = list_tail(&state->list); zp; zp = zp_prev) { 144*fa9e4066Sahrens zp_prev = list_prev(&state->list, zp); 145*fa9e4066Sahrens if (zfsvfs && zp->z_zfsvfs != zfsvfs) 146*fa9e4066Sahrens continue; 147*fa9e4066Sahrens hash_mtx = ZFS_OBJ_MUTEX(zp); 148*fa9e4066Sahrens if (mutex_tryenter(hash_mtx)) { 149*fa9e4066Sahrens mutex_enter(&zp->z_lock); 150*fa9e4066Sahrens list_remove(&zp->z_zcache_state->list, zp); 151*fa9e4066Sahrens zp->z_zcache_state->lcnt -= 1; 152*fa9e4066Sahrens ASSERT3U(zp->z_zcache_state->cnt, >=, 1); 153*fa9e4066Sahrens atomic_add_64(&zp->z_zcache_state->cnt, -1); 154*fa9e4066Sahrens zp->z_zcache_state = NULL; 155*fa9e4066Sahrens zp->z_zcache_access = 0; 156*fa9e4066Sahrens /* drops z_lock and hash_mtx */ 157*fa9e4066Sahrens zfs_zcache_evict(zp, hash_mtx); 158*fa9e4066Sahrens znodes_evicted += 1; 159*fa9e4066Sahrens atomic_add_64(&zcache.evicted, 1); 160*fa9e4066Sahrens if (znodes_evicted >= cnt) 161*fa9e4066Sahrens break; 162*fa9e4066Sahrens } else { 163*fa9e4066Sahrens atomic_add_64(&zcache.skipped, 1); 164*fa9e4066Sahrens } 165*fa9e4066Sahrens } 166*fa9e4066Sahrens mutex_exit(&state->mtx); 167*fa9e4066Sahrens 168*fa9e4066Sahrens if (znodes_evicted < cnt) 169*fa9e4066Sahrens dprintf("only evicted %lld znodes from %x", 170*fa9e4066Sahrens (longlong_t)znodes_evicted, state); 171*fa9e4066Sahrens } 172*fa9e4066Sahrens 173*fa9e4066Sahrens static void 174*fa9e4066Sahrens zcache_adjust(void) 175*fa9e4066Sahrens { 176*fa9e4066Sahrens uint64_t mrucnt = zcache.mru->lcnt; 177*fa9e4066Sahrens uint64_t mfucnt = zcache.mfu->lcnt; 178*fa9e4066Sahrens uint64_t p = zcache.p; 179*fa9e4066Sahrens uint64_t c = zcache.c; 180*fa9e4066Sahrens 181*fa9e4066Sahrens if (mrucnt > p) 182*fa9e4066Sahrens zcache_evict_state(zcache.mru, mrucnt - p, NULL); 183*fa9e4066Sahrens 184*fa9e4066Sahrens if (mfucnt > 0 && mrucnt + mfucnt > c) { 185*fa9e4066Sahrens int64_t toevict = MIN(mfucnt, mrucnt + mfucnt - c); 186*fa9e4066Sahrens zcache_evict_state(zcache.mfu, toevict, NULL); 187*fa9e4066Sahrens } 188*fa9e4066Sahrens } 189*fa9e4066Sahrens 190*fa9e4066Sahrens /* 191*fa9e4066Sahrens * Flush all *evictable* data from the cache. 192*fa9e4066Sahrens * NOTE: this will not touch "active" (i.e. referenced) data. 193*fa9e4066Sahrens */ 194*fa9e4066Sahrens void 195*fa9e4066Sahrens zfs_zcache_flush(zfsvfs_t *zfsvfs) 196*fa9e4066Sahrens { 197*fa9e4066Sahrens zcache_evict_state(zcache.mru, zcache.mru->lcnt, zfsvfs); 198*fa9e4066Sahrens zcache_evict_state(zcache.mfu, zcache.mfu->lcnt, zfsvfs); 199*fa9e4066Sahrens } 200*fa9e4066Sahrens 201*fa9e4066Sahrens static void 202*fa9e4066Sahrens zcache_try_grow(int64_t cnt) 203*fa9e4066Sahrens { 204*fa9e4066Sahrens int64_t size; 205*fa9e4066Sahrens /* 206*fa9e4066Sahrens * If we're almost to the current target cache size, 207*fa9e4066Sahrens * increment the target cache size 208*fa9e4066Sahrens */ 209*fa9e4066Sahrens size = zcache.mru->lcnt + zcache.mfu->lcnt; 210*fa9e4066Sahrens if ((zcache.c - size) <= 1) { 211*fa9e4066Sahrens atomic_add_64(&zcache.c, cnt); 212*fa9e4066Sahrens if (zcache.c > zcache.c_max) 213*fa9e4066Sahrens zcache.c = zcache.c_max; 214*fa9e4066Sahrens else if (zcache.p + cnt < zcache.c) 215*fa9e4066Sahrens atomic_add_64(&zcache.p, cnt); 216*fa9e4066Sahrens } 217*fa9e4066Sahrens } 218*fa9e4066Sahrens 219*fa9e4066Sahrens /* 220*fa9e4066Sahrens * This routine is called whenever a znode is accessed. 221*fa9e4066Sahrens */ 222*fa9e4066Sahrens static void 223*fa9e4066Sahrens zcache_access(znode_t *zp, kmutex_t *hash_mtx) 224*fa9e4066Sahrens { 225*fa9e4066Sahrens ASSERT(MUTEX_HELD(hash_mtx)); 226*fa9e4066Sahrens 227*fa9e4066Sahrens if (zp->z_zcache_state == NULL) { 228*fa9e4066Sahrens /* 229*fa9e4066Sahrens * This znode is not in the cache. 230*fa9e4066Sahrens * Add the new znode to the MRU state. 231*fa9e4066Sahrens */ 232*fa9e4066Sahrens 233*fa9e4066Sahrens zcache_try_grow(1); 234*fa9e4066Sahrens 235*fa9e4066Sahrens ASSERT(zp->z_zcache_access == 0); 236*fa9e4066Sahrens zp->z_zcache_access = lbolt; 237*fa9e4066Sahrens zcache_change_state(zcache.mru, zp); 238*fa9e4066Sahrens mutex_exit(hash_mtx); 239*fa9e4066Sahrens 240*fa9e4066Sahrens /* 241*fa9e4066Sahrens * If we are using less than 2/3 of our total target 242*fa9e4066Sahrens * cache size, bump up the target size for the MRU 243*fa9e4066Sahrens * list. 244*fa9e4066Sahrens */ 245*fa9e4066Sahrens if (zcache.mru->lcnt + zcache.mfu->lcnt < zcache.c*2/3) { 246*fa9e4066Sahrens zcache.p = zcache.mru->lcnt + zcache.c/6; 247*fa9e4066Sahrens } 248*fa9e4066Sahrens 249*fa9e4066Sahrens zcache_adjust(); 250*fa9e4066Sahrens 251*fa9e4066Sahrens atomic_add_64(&zcache.missed, 1); 252*fa9e4066Sahrens } else if (zp->z_zcache_state == zcache.mru) { 253*fa9e4066Sahrens /* 254*fa9e4066Sahrens * This znode has been "accessed" only once so far, 255*fa9e4066Sahrens * Move it to the MFU state. 256*fa9e4066Sahrens */ 257*fa9e4066Sahrens if (lbolt > zp->z_zcache_access + ZCACHE_MINTIME) { 258*fa9e4066Sahrens /* 259*fa9e4066Sahrens * More than 125ms have passed since we 260*fa9e4066Sahrens * instantiated this buffer. Move it to the 261*fa9e4066Sahrens * most frequently used state. 262*fa9e4066Sahrens */ 263*fa9e4066Sahrens zp->z_zcache_access = lbolt; 264*fa9e4066Sahrens zcache_change_state(zcache.mfu, zp); 265*fa9e4066Sahrens } 266*fa9e4066Sahrens atomic_add_64(&zcache.mru->hits, 1); 267*fa9e4066Sahrens mutex_exit(hash_mtx); 268*fa9e4066Sahrens } else { 269*fa9e4066Sahrens ASSERT(zp->z_zcache_state == zcache.mfu); 270*fa9e4066Sahrens /* 271*fa9e4066Sahrens * This buffer has been accessed more than once. 272*fa9e4066Sahrens * Keep it in the MFU state. 273*fa9e4066Sahrens */ 274*fa9e4066Sahrens atomic_add_64(&zcache.mfu->hits, 1); 275*fa9e4066Sahrens mutex_exit(hash_mtx); 276*fa9e4066Sahrens } 277*fa9e4066Sahrens } 278*fa9e4066Sahrens 279*fa9e4066Sahrens static void 280*fa9e4066Sahrens zcache_init(void) 281*fa9e4066Sahrens { 282*fa9e4066Sahrens zcache.c = 20; 283*fa9e4066Sahrens zcache.c_max = 50; 284*fa9e4066Sahrens 285*fa9e4066Sahrens zcache.mru = &ZCACHE_mru; 286*fa9e4066Sahrens zcache.mfu = &ZCACHE_mfu; 287*fa9e4066Sahrens 288*fa9e4066Sahrens list_create(&zcache.mru->list, sizeof (znode_t), 289*fa9e4066Sahrens offsetof(znode_t, z_zcache_node)); 290*fa9e4066Sahrens list_create(&zcache.mfu->list, sizeof (znode_t), 291*fa9e4066Sahrens offsetof(znode_t, z_zcache_node)); 292*fa9e4066Sahrens } 293*fa9e4066Sahrens 294*fa9e4066Sahrens static void 295*fa9e4066Sahrens zcache_fini(void) 296*fa9e4066Sahrens { 297*fa9e4066Sahrens zfs_zcache_flush(NULL); 298*fa9e4066Sahrens 299*fa9e4066Sahrens list_destroy(&zcache.mru->list); 300*fa9e4066Sahrens list_destroy(&zcache.mfu->list); 301*fa9e4066Sahrens } 302*fa9e4066Sahrens 303*fa9e4066Sahrens /*ARGSUSED*/ 304*fa9e4066Sahrens static void 305*fa9e4066Sahrens znode_pageout_func(dmu_buf_t *dbuf, void *user_ptr) 306*fa9e4066Sahrens { 307*fa9e4066Sahrens znode_t *zp = user_ptr; 308*fa9e4066Sahrens vnode_t *vp = ZTOV(zp); 309*fa9e4066Sahrens 310*fa9e4066Sahrens if (vp->v_count == 0) { 311*fa9e4066Sahrens vn_invalid(vp); 312*fa9e4066Sahrens zfs_znode_free(zp); 313*fa9e4066Sahrens } 314*fa9e4066Sahrens } 315*fa9e4066Sahrens 316*fa9e4066Sahrens /*ARGSUSED*/ 317*fa9e4066Sahrens static int 318*fa9e4066Sahrens zfs_znode_cache_constructor(void *buf, void *cdrarg, int kmflags) 319*fa9e4066Sahrens { 320*fa9e4066Sahrens znode_t *zp = buf; 321*fa9e4066Sahrens 322*fa9e4066Sahrens zp->z_vnode = vn_alloc(KM_SLEEP); 323*fa9e4066Sahrens zp->z_vnode->v_data = (caddr_t)zp; 324*fa9e4066Sahrens mutex_init(&zp->z_lock, NULL, MUTEX_DEFAULT, NULL); 325*fa9e4066Sahrens rw_init(&zp->z_map_lock, NULL, RW_DEFAULT, NULL); 326*fa9e4066Sahrens rw_init(&zp->z_grow_lock, NULL, RW_DEFAULT, NULL); 327*fa9e4066Sahrens rw_init(&zp->z_append_lock, NULL, RW_DEFAULT, NULL); 328*fa9e4066Sahrens mutex_init(&zp->z_acl_lock, NULL, MUTEX_DEFAULT, NULL); 329*fa9e4066Sahrens zp->z_dbuf_held = 0; 330*fa9e4066Sahrens zp->z_dirlocks = 0; 331*fa9e4066Sahrens return (0); 332*fa9e4066Sahrens } 333*fa9e4066Sahrens 334*fa9e4066Sahrens /*ARGSUSED*/ 335*fa9e4066Sahrens static void 336*fa9e4066Sahrens zfs_znode_cache_destructor(void *buf, void *cdarg) 337*fa9e4066Sahrens { 338*fa9e4066Sahrens znode_t *zp = buf; 339*fa9e4066Sahrens 340*fa9e4066Sahrens ASSERT(zp->z_dirlocks == 0); 341*fa9e4066Sahrens mutex_destroy(&zp->z_lock); 342*fa9e4066Sahrens rw_destroy(&zp->z_map_lock); 343*fa9e4066Sahrens rw_destroy(&zp->z_grow_lock); 344*fa9e4066Sahrens rw_destroy(&zp->z_append_lock); 345*fa9e4066Sahrens mutex_destroy(&zp->z_acl_lock); 346*fa9e4066Sahrens 347*fa9e4066Sahrens ASSERT(zp->z_dbuf_held == 0); 348*fa9e4066Sahrens ASSERT(ZTOV(zp)->v_count == 0); 349*fa9e4066Sahrens vn_free(ZTOV(zp)); 350*fa9e4066Sahrens } 351*fa9e4066Sahrens 352*fa9e4066Sahrens void 353*fa9e4066Sahrens zfs_znode_init(void) 354*fa9e4066Sahrens { 355*fa9e4066Sahrens /* 356*fa9e4066Sahrens * Initialize zcache 357*fa9e4066Sahrens */ 358*fa9e4066Sahrens ASSERT(znode_cache == NULL); 359*fa9e4066Sahrens znode_cache = kmem_cache_create("zfs_znode_cache", 360*fa9e4066Sahrens sizeof (znode_t), 0, zfs_znode_cache_constructor, 361*fa9e4066Sahrens zfs_znode_cache_destructor, NULL, NULL, NULL, 0); 362*fa9e4066Sahrens 363*fa9e4066Sahrens zcache_init(); 364*fa9e4066Sahrens } 365*fa9e4066Sahrens 366*fa9e4066Sahrens void 367*fa9e4066Sahrens zfs_znode_fini(void) 368*fa9e4066Sahrens { 369*fa9e4066Sahrens zcache_fini(); 370*fa9e4066Sahrens 371*fa9e4066Sahrens /* 372*fa9e4066Sahrens * Cleanup vfs & vnode ops 373*fa9e4066Sahrens */ 374*fa9e4066Sahrens zfs_remove_op_tables(); 375*fa9e4066Sahrens 376*fa9e4066Sahrens /* 377*fa9e4066Sahrens * Cleanup zcache 378*fa9e4066Sahrens */ 379*fa9e4066Sahrens if (znode_cache) 380*fa9e4066Sahrens kmem_cache_destroy(znode_cache); 381*fa9e4066Sahrens znode_cache = NULL; 382*fa9e4066Sahrens } 383*fa9e4066Sahrens 384*fa9e4066Sahrens struct vnodeops *zfs_dvnodeops; 385*fa9e4066Sahrens struct vnodeops *zfs_fvnodeops; 386*fa9e4066Sahrens struct vnodeops *zfs_symvnodeops; 387*fa9e4066Sahrens struct vnodeops *zfs_xdvnodeops; 388*fa9e4066Sahrens struct vnodeops *zfs_evnodeops; 389*fa9e4066Sahrens 390*fa9e4066Sahrens void 391*fa9e4066Sahrens zfs_remove_op_tables() 392*fa9e4066Sahrens { 393*fa9e4066Sahrens /* 394*fa9e4066Sahrens * Remove vfs ops 395*fa9e4066Sahrens */ 396*fa9e4066Sahrens ASSERT(zfsfstype); 397*fa9e4066Sahrens (void) vfs_freevfsops_by_type(zfsfstype); 398*fa9e4066Sahrens zfsfstype = 0; 399*fa9e4066Sahrens 400*fa9e4066Sahrens /* 401*fa9e4066Sahrens * Remove vnode ops 402*fa9e4066Sahrens */ 403*fa9e4066Sahrens if (zfs_dvnodeops) 404*fa9e4066Sahrens vn_freevnodeops(zfs_dvnodeops); 405*fa9e4066Sahrens if (zfs_fvnodeops) 406*fa9e4066Sahrens vn_freevnodeops(zfs_fvnodeops); 407*fa9e4066Sahrens if (zfs_symvnodeops) 408*fa9e4066Sahrens vn_freevnodeops(zfs_symvnodeops); 409*fa9e4066Sahrens if (zfs_xdvnodeops) 410*fa9e4066Sahrens vn_freevnodeops(zfs_xdvnodeops); 411*fa9e4066Sahrens if (zfs_evnodeops) 412*fa9e4066Sahrens vn_freevnodeops(zfs_evnodeops); 413*fa9e4066Sahrens 414*fa9e4066Sahrens zfs_dvnodeops = NULL; 415*fa9e4066Sahrens zfs_fvnodeops = NULL; 416*fa9e4066Sahrens zfs_symvnodeops = NULL; 417*fa9e4066Sahrens zfs_xdvnodeops = NULL; 418*fa9e4066Sahrens zfs_evnodeops = NULL; 419*fa9e4066Sahrens } 420*fa9e4066Sahrens 421*fa9e4066Sahrens extern const fs_operation_def_t zfs_dvnodeops_template[]; 422*fa9e4066Sahrens extern const fs_operation_def_t zfs_fvnodeops_template[]; 423*fa9e4066Sahrens extern const fs_operation_def_t zfs_xdvnodeops_template[]; 424*fa9e4066Sahrens extern const fs_operation_def_t zfs_symvnodeops_template[]; 425*fa9e4066Sahrens extern const fs_operation_def_t zfs_evnodeops_template[]; 426*fa9e4066Sahrens 427*fa9e4066Sahrens int 428*fa9e4066Sahrens zfs_create_op_tables() 429*fa9e4066Sahrens { 430*fa9e4066Sahrens int error; 431*fa9e4066Sahrens 432*fa9e4066Sahrens /* 433*fa9e4066Sahrens * zfs_dvnodeops can be set if mod_remove() calls mod_installfs() 434*fa9e4066Sahrens * due to a failure to remove the the 2nd modlinkage (zfs_modldrv). 435*fa9e4066Sahrens * In this case we just return as the ops vectors are already set up. 436*fa9e4066Sahrens */ 437*fa9e4066Sahrens if (zfs_dvnodeops) 438*fa9e4066Sahrens return (0); 439*fa9e4066Sahrens 440*fa9e4066Sahrens error = vn_make_ops(MNTTYPE_ZFS, zfs_dvnodeops_template, 441*fa9e4066Sahrens &zfs_dvnodeops); 442*fa9e4066Sahrens if (error) 443*fa9e4066Sahrens return (error); 444*fa9e4066Sahrens 445*fa9e4066Sahrens error = vn_make_ops(MNTTYPE_ZFS, zfs_fvnodeops_template, 446*fa9e4066Sahrens &zfs_fvnodeops); 447*fa9e4066Sahrens if (error) 448*fa9e4066Sahrens return (error); 449*fa9e4066Sahrens 450*fa9e4066Sahrens error = vn_make_ops(MNTTYPE_ZFS, zfs_symvnodeops_template, 451*fa9e4066Sahrens &zfs_symvnodeops); 452*fa9e4066Sahrens if (error) 453*fa9e4066Sahrens return (error); 454*fa9e4066Sahrens 455*fa9e4066Sahrens error = vn_make_ops(MNTTYPE_ZFS, zfs_xdvnodeops_template, 456*fa9e4066Sahrens &zfs_xdvnodeops); 457*fa9e4066Sahrens if (error) 458*fa9e4066Sahrens return (error); 459*fa9e4066Sahrens 460*fa9e4066Sahrens error = vn_make_ops(MNTTYPE_ZFS, zfs_evnodeops_template, 461*fa9e4066Sahrens &zfs_evnodeops); 462*fa9e4066Sahrens 463*fa9e4066Sahrens return (error); 464*fa9e4066Sahrens } 465*fa9e4066Sahrens 466*fa9e4066Sahrens /* 467*fa9e4066Sahrens * zfs_init_fs - Initialize the zfsvfs struct and the file system 468*fa9e4066Sahrens * incore "master" object. Verify version compatibility. 469*fa9e4066Sahrens */ 470*fa9e4066Sahrens int 471*fa9e4066Sahrens zfs_init_fs(zfsvfs_t *zfsvfs, znode_t **zpp, cred_t *cr) 472*fa9e4066Sahrens { 473*fa9e4066Sahrens extern int zfsfstype; 474*fa9e4066Sahrens 475*fa9e4066Sahrens objset_t *os = zfsvfs->z_os; 476*fa9e4066Sahrens uint64_t zoid; 477*fa9e4066Sahrens uint64_t version = ZFS_VERSION; 478*fa9e4066Sahrens int i, error; 479*fa9e4066Sahrens dmu_object_info_t doi; 480*fa9e4066Sahrens dmu_objset_stats_t *stats; 481*fa9e4066Sahrens 482*fa9e4066Sahrens *zpp = NULL; 483*fa9e4066Sahrens 484*fa9e4066Sahrens /* 485*fa9e4066Sahrens * XXX - hack to auto-create the pool root filesystem at 486*fa9e4066Sahrens * the first attempted mount. 487*fa9e4066Sahrens */ 488*fa9e4066Sahrens if (dmu_object_info(os, MASTER_NODE_OBJ, &doi) == ENOENT) { 489*fa9e4066Sahrens dmu_tx_t *tx = dmu_tx_create(os); 490*fa9e4066Sahrens 491*fa9e4066Sahrens dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, 3); /* master node */ 492*fa9e4066Sahrens dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, 1); /* delete queue */ 493*fa9e4066Sahrens dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT); /* root node */ 494*fa9e4066Sahrens error = dmu_tx_assign(tx, TXG_WAIT); 495*fa9e4066Sahrens ASSERT3U(error, ==, 0); 496*fa9e4066Sahrens zfs_create_fs(os, cr, tx); 497*fa9e4066Sahrens dmu_tx_commit(tx); 498*fa9e4066Sahrens } 499*fa9e4066Sahrens 500*fa9e4066Sahrens if (zap_lookup(os, MASTER_NODE_OBJ, ZFS_VERSION_OBJ, 8, 1, &version)) { 501*fa9e4066Sahrens return (EINVAL); 502*fa9e4066Sahrens } else if (version != ZFS_VERSION) { 503*fa9e4066Sahrens (void) printf("Mismatched versions: File system " 504*fa9e4066Sahrens "is version %lld on-disk format, which is " 505*fa9e4066Sahrens "incompatible with this software version %lld!", 506*fa9e4066Sahrens (u_longlong_t)version, ZFS_VERSION); 507*fa9e4066Sahrens return (ENOTSUP); 508*fa9e4066Sahrens } 509*fa9e4066Sahrens 510*fa9e4066Sahrens /* 511*fa9e4066Sahrens * The fsid is 64 bits, composed of an 8-bit fs type, which 512*fa9e4066Sahrens * separates our fsid from any other filesystem types, and a 513*fa9e4066Sahrens * 56-bit objset unique ID. The objset unique ID is unique to 514*fa9e4066Sahrens * all objsets open on this system, provided by unique_create(). 515*fa9e4066Sahrens * The 8-bit fs type must be put in the low bits of fsid[1] 516*fa9e4066Sahrens * because that's where other Solaris filesystems put it. 517*fa9e4066Sahrens */ 518*fa9e4066Sahrens stats = kmem_alloc(sizeof (dmu_objset_stats_t), KM_SLEEP); 519*fa9e4066Sahrens dmu_objset_stats(os, stats); 520*fa9e4066Sahrens ASSERT((stats->dds_fsid_guid & ~((1ULL<<56)-1)) == 0); 521*fa9e4066Sahrens zfsvfs->z_vfs->vfs_fsid.val[0] = stats->dds_fsid_guid; 522*fa9e4066Sahrens zfsvfs->z_vfs->vfs_fsid.val[1] = ((stats->dds_fsid_guid>>32) << 8) | 523*fa9e4066Sahrens zfsfstype & 0xFF; 524*fa9e4066Sahrens kmem_free(stats, sizeof (dmu_objset_stats_t)); 525*fa9e4066Sahrens stats = NULL; 526*fa9e4066Sahrens 527*fa9e4066Sahrens if (zap_lookup(os, MASTER_NODE_OBJ, ZFS_ROOT_OBJ, 8, 1, &zoid)) { 528*fa9e4066Sahrens return (EINVAL); 529*fa9e4066Sahrens } 530*fa9e4066Sahrens ASSERT(zoid != 0); 531*fa9e4066Sahrens zfsvfs->z_root = zoid; 532*fa9e4066Sahrens 533*fa9e4066Sahrens /* 534*fa9e4066Sahrens * Create the per mount vop tables. 535*fa9e4066Sahrens */ 536*fa9e4066Sahrens 537*fa9e4066Sahrens /* 538*fa9e4066Sahrens * Initialize zget mutex's 539*fa9e4066Sahrens */ 540*fa9e4066Sahrens for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) 541*fa9e4066Sahrens mutex_init(&zfsvfs->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL); 542*fa9e4066Sahrens 543*fa9e4066Sahrens error = zfs_zget(zfsvfs, zoid, zpp); 544*fa9e4066Sahrens if (error) 545*fa9e4066Sahrens return (error); 546*fa9e4066Sahrens ASSERT3U((*zpp)->z_id, ==, zoid); 547*fa9e4066Sahrens 548*fa9e4066Sahrens if (zap_lookup(os, MASTER_NODE_OBJ, ZFS_DELETE_QUEUE, 8, 1, &zoid)) { 549*fa9e4066Sahrens return (EINVAL); 550*fa9e4066Sahrens } 551*fa9e4066Sahrens 552*fa9e4066Sahrens zfsvfs->z_dqueue = zoid; 553*fa9e4066Sahrens 554*fa9e4066Sahrens /* 555*fa9e4066Sahrens * Initialize delete head structure 556*fa9e4066Sahrens * Thread(s) will be started/stopped via 557*fa9e4066Sahrens * readonly_changed_cb() depending 558*fa9e4066Sahrens * on whether this is rw/ro mount. 559*fa9e4066Sahrens */ 560*fa9e4066Sahrens list_create(&zfsvfs->z_delete_head.z_znodes, 561*fa9e4066Sahrens sizeof (znode_t), offsetof(znode_t, z_list_node)); 562*fa9e4066Sahrens 563*fa9e4066Sahrens return (0); 564*fa9e4066Sahrens } 565*fa9e4066Sahrens 566*fa9e4066Sahrens /* 567*fa9e4066Sahrens * Construct a new znode/vnode and intialize. 568*fa9e4066Sahrens * 569*fa9e4066Sahrens * This does not do a call to dmu_set_user() that is 570*fa9e4066Sahrens * up to the caller to do, in case you don't want to 571*fa9e4066Sahrens * return the znode 572*fa9e4066Sahrens */ 573*fa9e4066Sahrens znode_t * 574*fa9e4066Sahrens zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, uint64_t obj_num, int blksz) 575*fa9e4066Sahrens { 576*fa9e4066Sahrens znode_t *zp; 577*fa9e4066Sahrens vnode_t *vp; 578*fa9e4066Sahrens 579*fa9e4066Sahrens zp = kmem_cache_alloc(znode_cache, KM_SLEEP); 580*fa9e4066Sahrens 581*fa9e4066Sahrens ASSERT(zp->z_dirlocks == NULL); 582*fa9e4066Sahrens 583*fa9e4066Sahrens zp->z_phys = db->db_data; 584*fa9e4066Sahrens zp->z_zfsvfs = zfsvfs; 585*fa9e4066Sahrens zp->z_active = 1; 586*fa9e4066Sahrens zp->z_reap = 0; 587*fa9e4066Sahrens zp->z_atime_dirty = 0; 588*fa9e4066Sahrens zp->z_dbuf_held = 0; 589*fa9e4066Sahrens zp->z_mapcnt = 0; 590*fa9e4066Sahrens zp->z_last_itx = 0; 591*fa9e4066Sahrens zp->z_dbuf = db; 592*fa9e4066Sahrens zp->z_id = obj_num; 593*fa9e4066Sahrens zp->z_blksz = blksz; 594*fa9e4066Sahrens zp->z_seq = 0x7A4653; 595*fa9e4066Sahrens 596*fa9e4066Sahrens bzero(&zp->z_zcache_node, sizeof (list_node_t)); 597*fa9e4066Sahrens 598*fa9e4066Sahrens mutex_enter(&zfsvfs->z_znodes_lock); 599*fa9e4066Sahrens list_insert_tail(&zfsvfs->z_all_znodes, zp); 600*fa9e4066Sahrens mutex_exit(&zfsvfs->z_znodes_lock); 601*fa9e4066Sahrens 602*fa9e4066Sahrens vp = ZTOV(zp); 603*fa9e4066Sahrens vn_reinit(vp); 604*fa9e4066Sahrens 605*fa9e4066Sahrens vp->v_vfsp = zfsvfs->z_parent->z_vfs; 606*fa9e4066Sahrens vp->v_type = IFTOVT((mode_t)zp->z_phys->zp_mode); 607*fa9e4066Sahrens 608*fa9e4066Sahrens switch (vp->v_type) { 609*fa9e4066Sahrens case VDIR: 610*fa9e4066Sahrens if (zp->z_phys->zp_flags & ZFS_XATTR) { 611*fa9e4066Sahrens vn_setops(vp, zfs_xdvnodeops); 612*fa9e4066Sahrens vp->v_flag |= V_XATTRDIR; 613*fa9e4066Sahrens } else 614*fa9e4066Sahrens vn_setops(vp, zfs_dvnodeops); 615*fa9e4066Sahrens break; 616*fa9e4066Sahrens case VBLK: 617*fa9e4066Sahrens case VCHR: 618*fa9e4066Sahrens vp->v_rdev = (dev_t)zp->z_phys->zp_rdev; 619*fa9e4066Sahrens /*FALLTHROUGH*/ 620*fa9e4066Sahrens case VFIFO: 621*fa9e4066Sahrens case VSOCK: 622*fa9e4066Sahrens case VDOOR: 623*fa9e4066Sahrens vn_setops(vp, zfs_fvnodeops); 624*fa9e4066Sahrens break; 625*fa9e4066Sahrens case VREG: 626*fa9e4066Sahrens vp->v_flag |= VMODSORT; 627*fa9e4066Sahrens vn_setops(vp, zfs_fvnodeops); 628*fa9e4066Sahrens break; 629*fa9e4066Sahrens case VLNK: 630*fa9e4066Sahrens vn_setops(vp, zfs_symvnodeops); 631*fa9e4066Sahrens break; 632*fa9e4066Sahrens default: 633*fa9e4066Sahrens vn_setops(vp, zfs_evnodeops); 634*fa9e4066Sahrens break; 635*fa9e4066Sahrens } 636*fa9e4066Sahrens 637*fa9e4066Sahrens return (zp); 638*fa9e4066Sahrens } 639*fa9e4066Sahrens 640*fa9e4066Sahrens static void 641*fa9e4066Sahrens zfs_znode_dmu_init(znode_t *zp) 642*fa9e4066Sahrens { 643*fa9e4066Sahrens znode_t *nzp; 644*fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 645*fa9e4066Sahrens dmu_buf_t *db = zp->z_dbuf; 646*fa9e4066Sahrens 647*fa9e4066Sahrens mutex_enter(&zp->z_lock); 648*fa9e4066Sahrens 649*fa9e4066Sahrens nzp = dmu_buf_set_user(db, zp, &zp->z_phys, znode_pageout_func); 650*fa9e4066Sahrens 651*fa9e4066Sahrens /* 652*fa9e4066Sahrens * there should be no 653*fa9e4066Sahrens * concurrent zgets on this object. 654*fa9e4066Sahrens */ 655*fa9e4066Sahrens ASSERT3P(nzp, ==, NULL); 656*fa9e4066Sahrens 657*fa9e4066Sahrens /* 658*fa9e4066Sahrens * Slap on VROOT if we are the root znode 659*fa9e4066Sahrens */ 660*fa9e4066Sahrens if (zp->z_id == zfsvfs->z_root) { 661*fa9e4066Sahrens ZTOV(zp)->v_flag |= VROOT; 662*fa9e4066Sahrens } 663*fa9e4066Sahrens 664*fa9e4066Sahrens zp->z_zcache_state = NULL; 665*fa9e4066Sahrens zp->z_zcache_access = 0; 666*fa9e4066Sahrens 667*fa9e4066Sahrens ASSERT(zp->z_dbuf_held == 0); 668*fa9e4066Sahrens zp->z_dbuf_held = 1; 669*fa9e4066Sahrens VFS_HOLD(zfsvfs->z_vfs); 670*fa9e4066Sahrens mutex_exit(&zp->z_lock); 671*fa9e4066Sahrens vn_exists(ZTOV(zp)); 672*fa9e4066Sahrens } 673*fa9e4066Sahrens 674*fa9e4066Sahrens /* 675*fa9e4066Sahrens * Create a new DMU object to hold a zfs znode. 676*fa9e4066Sahrens * 677*fa9e4066Sahrens * IN: dzp - parent directory for new znode 678*fa9e4066Sahrens * vap - file attributes for new znode 679*fa9e4066Sahrens * tx - dmu transaction id for zap operations 680*fa9e4066Sahrens * cr - credentials of caller 681*fa9e4066Sahrens * flag - flags: 682*fa9e4066Sahrens * IS_ROOT_NODE - new object will be root 683*fa9e4066Sahrens * IS_XATTR - new object is an attribute 684*fa9e4066Sahrens * IS_REPLAY - intent log replay 685*fa9e4066Sahrens * 686*fa9e4066Sahrens * OUT: oid - ID of created object 687*fa9e4066Sahrens * 688*fa9e4066Sahrens */ 689*fa9e4066Sahrens void 690*fa9e4066Sahrens zfs_mknode(znode_t *dzp, vattr_t *vap, uint64_t *oid, dmu_tx_t *tx, cred_t *cr, 691*fa9e4066Sahrens uint_t flag, znode_t **zpp, int bonuslen) 692*fa9e4066Sahrens { 693*fa9e4066Sahrens dmu_buf_t *dbp; 694*fa9e4066Sahrens znode_phys_t *pzp; 695*fa9e4066Sahrens znode_t *zp; 696*fa9e4066Sahrens zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 697*fa9e4066Sahrens timestruc_t now; 698*fa9e4066Sahrens uint64_t gen; 699*fa9e4066Sahrens int err; 700*fa9e4066Sahrens 701*fa9e4066Sahrens ASSERT(vap && (vap->va_mask & (AT_TYPE|AT_MODE)) == (AT_TYPE|AT_MODE)); 702*fa9e4066Sahrens 703*fa9e4066Sahrens if (zfsvfs->z_assign >= TXG_INITIAL) { /* ZIL replay */ 704*fa9e4066Sahrens *oid = vap->va_nodeid; 705*fa9e4066Sahrens flag |= IS_REPLAY; 706*fa9e4066Sahrens now = vap->va_ctime; /* see zfs_replay_create() */ 707*fa9e4066Sahrens gen = vap->va_nblocks; /* ditto */ 708*fa9e4066Sahrens } else { 709*fa9e4066Sahrens *oid = 0; 710*fa9e4066Sahrens gethrestime(&now); 711*fa9e4066Sahrens gen = dmu_tx_get_txg(tx); 712*fa9e4066Sahrens } 713*fa9e4066Sahrens 714*fa9e4066Sahrens /* 715*fa9e4066Sahrens * Create a new DMU object. 716*fa9e4066Sahrens */ 717*fa9e4066Sahrens if (vap->va_type == VDIR) { 718*fa9e4066Sahrens if (flag & IS_REPLAY) { 719*fa9e4066Sahrens err = zap_create_claim(zfsvfs->z_os, *oid, 720*fa9e4066Sahrens DMU_OT_DIRECTORY_CONTENTS, 721*fa9e4066Sahrens DMU_OT_ZNODE, sizeof (znode_phys_t) + bonuslen, tx); 722*fa9e4066Sahrens ASSERT3U(err, ==, 0); 723*fa9e4066Sahrens } else { 724*fa9e4066Sahrens *oid = zap_create(zfsvfs->z_os, 725*fa9e4066Sahrens DMU_OT_DIRECTORY_CONTENTS, 726*fa9e4066Sahrens DMU_OT_ZNODE, sizeof (znode_phys_t) + bonuslen, tx); 727*fa9e4066Sahrens } 728*fa9e4066Sahrens } else { 729*fa9e4066Sahrens if (flag & IS_REPLAY) { 730*fa9e4066Sahrens err = dmu_object_claim(zfsvfs->z_os, *oid, 731*fa9e4066Sahrens DMU_OT_PLAIN_FILE_CONTENTS, 0, 732*fa9e4066Sahrens DMU_OT_ZNODE, sizeof (znode_phys_t) + bonuslen, tx); 733*fa9e4066Sahrens ASSERT3U(err, ==, 0); 734*fa9e4066Sahrens } else { 735*fa9e4066Sahrens *oid = dmu_object_alloc(zfsvfs->z_os, 736*fa9e4066Sahrens DMU_OT_PLAIN_FILE_CONTENTS, 0, 737*fa9e4066Sahrens DMU_OT_ZNODE, sizeof (znode_phys_t) + bonuslen, tx); 738*fa9e4066Sahrens } 739*fa9e4066Sahrens } 740*fa9e4066Sahrens dbp = dmu_bonus_hold(zfsvfs->z_os, *oid); 741*fa9e4066Sahrens dmu_buf_will_dirty(dbp, tx); 742*fa9e4066Sahrens 743*fa9e4066Sahrens /* 744*fa9e4066Sahrens * Initialize the znode physical data to zero. 745*fa9e4066Sahrens */ 746*fa9e4066Sahrens ASSERT(dbp->db_size >= sizeof (znode_phys_t)); 747*fa9e4066Sahrens bzero(dbp->db_data, dbp->db_size); 748*fa9e4066Sahrens pzp = dbp->db_data; 749*fa9e4066Sahrens 750*fa9e4066Sahrens /* 751*fa9e4066Sahrens * If this is the root, fix up the half-initialized parent pointer 752*fa9e4066Sahrens * to reference the just-allocated physical data area. 753*fa9e4066Sahrens */ 754*fa9e4066Sahrens if (flag & IS_ROOT_NODE) { 755*fa9e4066Sahrens dzp->z_phys = pzp; 756*fa9e4066Sahrens dzp->z_id = *oid; 757*fa9e4066Sahrens } 758*fa9e4066Sahrens 759*fa9e4066Sahrens /* 760*fa9e4066Sahrens * If parent is an xattr, so am I. 761*fa9e4066Sahrens */ 762*fa9e4066Sahrens if (dzp->z_phys->zp_flags & ZFS_XATTR) 763*fa9e4066Sahrens flag |= IS_XATTR; 764*fa9e4066Sahrens 765*fa9e4066Sahrens if (vap->va_type == VBLK || vap->va_type == VCHR) { 766*fa9e4066Sahrens pzp->zp_rdev = vap->va_rdev; 767*fa9e4066Sahrens } 768*fa9e4066Sahrens 769*fa9e4066Sahrens if (vap->va_type == VDIR) { 770*fa9e4066Sahrens pzp->zp_size = 2; /* contents ("." and "..") */ 771*fa9e4066Sahrens pzp->zp_links = (flag & (IS_ROOT_NODE | IS_XATTR)) ? 2 : 1; 772*fa9e4066Sahrens } 773*fa9e4066Sahrens 774*fa9e4066Sahrens pzp->zp_parent = dzp->z_id; 775*fa9e4066Sahrens if (flag & IS_XATTR) 776*fa9e4066Sahrens pzp->zp_flags |= ZFS_XATTR; 777*fa9e4066Sahrens 778*fa9e4066Sahrens pzp->zp_gen = gen; 779*fa9e4066Sahrens 780*fa9e4066Sahrens ZFS_TIME_ENCODE(&now, pzp->zp_crtime); 781*fa9e4066Sahrens ZFS_TIME_ENCODE(&now, pzp->zp_ctime); 782*fa9e4066Sahrens 783*fa9e4066Sahrens if (vap->va_mask & AT_ATIME) { 784*fa9e4066Sahrens ZFS_TIME_ENCODE(&vap->va_atime, pzp->zp_atime); 785*fa9e4066Sahrens } else { 786*fa9e4066Sahrens ZFS_TIME_ENCODE(&now, pzp->zp_atime); 787*fa9e4066Sahrens } 788*fa9e4066Sahrens 789*fa9e4066Sahrens if (vap->va_mask & AT_MTIME) { 790*fa9e4066Sahrens ZFS_TIME_ENCODE(&vap->va_mtime, pzp->zp_mtime); 791*fa9e4066Sahrens } else { 792*fa9e4066Sahrens ZFS_TIME_ENCODE(&now, pzp->zp_mtime); 793*fa9e4066Sahrens } 794*fa9e4066Sahrens 795*fa9e4066Sahrens pzp->zp_mode = MAKEIMODE(vap->va_type, vap->va_mode); 796*fa9e4066Sahrens zp = zfs_znode_alloc(zfsvfs, dbp, *oid, 0); 797*fa9e4066Sahrens 798*fa9e4066Sahrens zfs_perm_init(zp, dzp, flag, vap, tx, cr); 799*fa9e4066Sahrens 800*fa9e4066Sahrens if (zpp) { 801*fa9e4066Sahrens kmutex_t *hash_mtx = ZFS_OBJ_MUTEX(zp); 802*fa9e4066Sahrens 803*fa9e4066Sahrens mutex_enter(hash_mtx); 804*fa9e4066Sahrens zfs_znode_dmu_init(zp); 805*fa9e4066Sahrens zcache_access(zp, hash_mtx); 806*fa9e4066Sahrens *zpp = zp; 807*fa9e4066Sahrens } else { 808*fa9e4066Sahrens ZTOV(zp)->v_count = 0; 809*fa9e4066Sahrens dmu_buf_rele(dbp); 810*fa9e4066Sahrens zfs_znode_free(zp); 811*fa9e4066Sahrens } 812*fa9e4066Sahrens } 813*fa9e4066Sahrens 814*fa9e4066Sahrens int 815*fa9e4066Sahrens zfs_zget(zfsvfs_t *zfsvfs, uint64_t obj_num, znode_t **zpp) 816*fa9e4066Sahrens { 817*fa9e4066Sahrens dmu_object_info_t doi; 818*fa9e4066Sahrens dmu_buf_t *db; 819*fa9e4066Sahrens znode_t *zp; 820*fa9e4066Sahrens 821*fa9e4066Sahrens *zpp = NULL; 822*fa9e4066Sahrens 823*fa9e4066Sahrens ZFS_OBJ_HOLD_ENTER(zfsvfs, obj_num); 824*fa9e4066Sahrens 825*fa9e4066Sahrens db = dmu_bonus_hold(zfsvfs->z_os, obj_num); 826*fa9e4066Sahrens if (db == NULL) { 827*fa9e4066Sahrens ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); 828*fa9e4066Sahrens return (ENOENT); 829*fa9e4066Sahrens } 830*fa9e4066Sahrens 831*fa9e4066Sahrens dmu_object_info_from_db(db, &doi); 832*fa9e4066Sahrens if (doi.doi_bonus_type != DMU_OT_ZNODE || 833*fa9e4066Sahrens doi.doi_bonus_size < sizeof (znode_phys_t)) { 834*fa9e4066Sahrens dmu_buf_rele(db); 835*fa9e4066Sahrens ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); 836*fa9e4066Sahrens return (EINVAL); 837*fa9e4066Sahrens } 838*fa9e4066Sahrens dmu_buf_read(db); 839*fa9e4066Sahrens 840*fa9e4066Sahrens ASSERT(db->db_object == obj_num); 841*fa9e4066Sahrens ASSERT(db->db_offset == -1); 842*fa9e4066Sahrens ASSERT(db->db_data != NULL); 843*fa9e4066Sahrens 844*fa9e4066Sahrens zp = dmu_buf_get_user(db); 845*fa9e4066Sahrens 846*fa9e4066Sahrens if (zp != NULL) { 847*fa9e4066Sahrens mutex_enter(&zp->z_lock); 848*fa9e4066Sahrens 849*fa9e4066Sahrens ASSERT3U(zp->z_id, ==, obj_num); 850*fa9e4066Sahrens if (zp->z_reap) { 851*fa9e4066Sahrens dmu_buf_rele(db); 852*fa9e4066Sahrens mutex_exit(&zp->z_lock); 853*fa9e4066Sahrens ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); 854*fa9e4066Sahrens return (ENOENT); 855*fa9e4066Sahrens } else if (zp->z_dbuf_held) { 856*fa9e4066Sahrens dmu_buf_rele(db); 857*fa9e4066Sahrens } else { 858*fa9e4066Sahrens zp->z_dbuf_held = 1; 859*fa9e4066Sahrens VFS_HOLD(zfsvfs->z_vfs); 860*fa9e4066Sahrens } 861*fa9e4066Sahrens 862*fa9e4066Sahrens if (zp->z_active == 0) { 863*fa9e4066Sahrens zp->z_active = 1; 864*fa9e4066Sahrens if (list_link_active(&zp->z_zcache_node)) { 865*fa9e4066Sahrens mutex_enter(&zp->z_zcache_state->mtx); 866*fa9e4066Sahrens list_remove(&zp->z_zcache_state->list, zp); 867*fa9e4066Sahrens zp->z_zcache_state->lcnt -= 1; 868*fa9e4066Sahrens mutex_exit(&zp->z_zcache_state->mtx); 869*fa9e4066Sahrens } 870*fa9e4066Sahrens } 871*fa9e4066Sahrens VN_HOLD(ZTOV(zp)); 872*fa9e4066Sahrens mutex_exit(&zp->z_lock); 873*fa9e4066Sahrens zcache_access(zp, ZFS_OBJ_MUTEX(zp)); 874*fa9e4066Sahrens *zpp = zp; 875*fa9e4066Sahrens return (0); 876*fa9e4066Sahrens } 877*fa9e4066Sahrens 878*fa9e4066Sahrens /* 879*fa9e4066Sahrens * Not found create new znode/vnode 880*fa9e4066Sahrens */ 881*fa9e4066Sahrens zp = zfs_znode_alloc(zfsvfs, db, obj_num, doi.doi_data_block_size); 882*fa9e4066Sahrens ASSERT3U(zp->z_id, ==, obj_num); 883*fa9e4066Sahrens zfs_znode_dmu_init(zp); 884*fa9e4066Sahrens zcache_access(zp, ZFS_OBJ_MUTEX(zp)); 885*fa9e4066Sahrens *zpp = zp; 886*fa9e4066Sahrens return (0); 887*fa9e4066Sahrens } 888*fa9e4066Sahrens 889*fa9e4066Sahrens void 890*fa9e4066Sahrens zfs_znode_delete(znode_t *zp, dmu_tx_t *tx) 891*fa9e4066Sahrens { 892*fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 893*fa9e4066Sahrens int error; 894*fa9e4066Sahrens 895*fa9e4066Sahrens ZFS_OBJ_HOLD_ENTER(zfsvfs, zp->z_id); 896*fa9e4066Sahrens if (zp->z_phys->zp_acl.z_acl_extern_obj) { 897*fa9e4066Sahrens error = dmu_object_free(zfsvfs->z_os, 898*fa9e4066Sahrens zp->z_phys->zp_acl.z_acl_extern_obj, tx); 899*fa9e4066Sahrens ASSERT3U(error, ==, 0); 900*fa9e4066Sahrens } 901*fa9e4066Sahrens if (zp->z_zcache_state) { 902*fa9e4066Sahrens ASSERT3U(zp->z_zcache_state->cnt, >=, 1); 903*fa9e4066Sahrens atomic_add_64(&zp->z_zcache_state->cnt, -1); 904*fa9e4066Sahrens } 905*fa9e4066Sahrens error = dmu_object_free(zfsvfs->z_os, zp->z_id, tx); 906*fa9e4066Sahrens ASSERT3U(error, ==, 0); 907*fa9e4066Sahrens zp->z_dbuf_held = 0; 908*fa9e4066Sahrens ZFS_OBJ_HOLD_EXIT(zfsvfs, zp->z_id); 909*fa9e4066Sahrens dmu_buf_rele(zp->z_dbuf); 910*fa9e4066Sahrens } 911*fa9e4066Sahrens 912*fa9e4066Sahrens void 913*fa9e4066Sahrens zfs_zinactive(znode_t *zp) 914*fa9e4066Sahrens { 915*fa9e4066Sahrens vnode_t *vp = ZTOV(zp); 916*fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 917*fa9e4066Sahrens uint64_t z_id = zp->z_id; 918*fa9e4066Sahrens 919*fa9e4066Sahrens ASSERT(zp->z_dbuf_held && zp->z_phys); 920*fa9e4066Sahrens 921*fa9e4066Sahrens /* 922*fa9e4066Sahrens * Don't allow a zfs_zget() while were trying to release this znode 923*fa9e4066Sahrens */ 924*fa9e4066Sahrens ZFS_OBJ_HOLD_ENTER(zfsvfs, z_id); 925*fa9e4066Sahrens 926*fa9e4066Sahrens mutex_enter(&zp->z_lock); 927*fa9e4066Sahrens mutex_enter(&vp->v_lock); 928*fa9e4066Sahrens vp->v_count--; 929*fa9e4066Sahrens if (vp->v_count > 0 || vn_has_cached_data(vp)) { 930*fa9e4066Sahrens /* 931*fa9e4066Sahrens * If the hold count is greater than zero, somebody has 932*fa9e4066Sahrens * obtained a new reference on this znode while we were 933*fa9e4066Sahrens * processing it here, so we are done. If we still have 934*fa9e4066Sahrens * mapped pages then we are also done, since we don't 935*fa9e4066Sahrens * want to inactivate the znode until the pages get pushed. 936*fa9e4066Sahrens * 937*fa9e4066Sahrens * XXX - if vn_has_cached_data(vp) is true, but count == 0, 938*fa9e4066Sahrens * this seems like it would leave the znode hanging with 939*fa9e4066Sahrens * no chance to go inactive... 940*fa9e4066Sahrens */ 941*fa9e4066Sahrens mutex_exit(&vp->v_lock); 942*fa9e4066Sahrens mutex_exit(&zp->z_lock); 943*fa9e4066Sahrens ZFS_OBJ_HOLD_EXIT(zfsvfs, z_id); 944*fa9e4066Sahrens return; 945*fa9e4066Sahrens } 946*fa9e4066Sahrens mutex_exit(&vp->v_lock); 947*fa9e4066Sahrens zp->z_active = 0; 948*fa9e4066Sahrens 949*fa9e4066Sahrens /* 950*fa9e4066Sahrens * If this was the last reference to a file with no links, 951*fa9e4066Sahrens * remove the file from the file system. 952*fa9e4066Sahrens */ 953*fa9e4066Sahrens if (zp->z_reap) { 954*fa9e4066Sahrens mutex_exit(&zp->z_lock); 955*fa9e4066Sahrens ZFS_OBJ_HOLD_EXIT(zfsvfs, z_id); 956*fa9e4066Sahrens ASSERT3U(zp->z_zcache_state->cnt, >=, 1); 957*fa9e4066Sahrens atomic_add_64(&zp->z_zcache_state->cnt, -1); 958*fa9e4066Sahrens zp->z_zcache_state = NULL; 959*fa9e4066Sahrens /* XATTR files are not put on the delete queue */ 960*fa9e4066Sahrens if (zp->z_phys->zp_flags & ZFS_XATTR) { 961*fa9e4066Sahrens zfs_rmnode(zp); 962*fa9e4066Sahrens } else { 963*fa9e4066Sahrens mutex_enter(&zfsvfs->z_delete_head.z_mutex); 964*fa9e4066Sahrens list_insert_tail(&zfsvfs->z_delete_head.z_znodes, zp); 965*fa9e4066Sahrens zfsvfs->z_delete_head.z_znode_count++; 966*fa9e4066Sahrens cv_broadcast(&zfsvfs->z_delete_head.z_cv); 967*fa9e4066Sahrens mutex_exit(&zfsvfs->z_delete_head.z_mutex); 968*fa9e4066Sahrens } 969*fa9e4066Sahrens VFS_RELE(zfsvfs->z_vfs); 970*fa9e4066Sahrens return; 971*fa9e4066Sahrens } 972*fa9e4066Sahrens 973*fa9e4066Sahrens /* 974*fa9e4066Sahrens * If the file system for this znode is no longer mounted, 975*fa9e4066Sahrens * evict the znode now, don't put it in the cache. 976*fa9e4066Sahrens */ 977*fa9e4066Sahrens if (zfsvfs->z_unmounted1) { 978*fa9e4066Sahrens zfs_zcache_evict(zp, ZFS_OBJ_MUTEX(zp)); 979*fa9e4066Sahrens return; 980*fa9e4066Sahrens } 981*fa9e4066Sahrens 982*fa9e4066Sahrens /* put znode on evictable list */ 983*fa9e4066Sahrens mutex_enter(&zp->z_zcache_state->mtx); 984*fa9e4066Sahrens list_insert_head(&zp->z_zcache_state->list, zp); 985*fa9e4066Sahrens zp->z_zcache_state->lcnt += 1; 986*fa9e4066Sahrens mutex_exit(&zp->z_zcache_state->mtx); 987*fa9e4066Sahrens mutex_exit(&zp->z_lock); 988*fa9e4066Sahrens ZFS_OBJ_HOLD_EXIT(zfsvfs, z_id); 989*fa9e4066Sahrens } 990*fa9e4066Sahrens 991*fa9e4066Sahrens void 992*fa9e4066Sahrens zfs_znode_free(znode_t *zp) 993*fa9e4066Sahrens { 994*fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 995*fa9e4066Sahrens 996*fa9e4066Sahrens mutex_enter(&zfsvfs->z_znodes_lock); 997*fa9e4066Sahrens list_remove(&zfsvfs->z_all_znodes, zp); 998*fa9e4066Sahrens mutex_exit(&zfsvfs->z_znodes_lock); 999*fa9e4066Sahrens 1000*fa9e4066Sahrens kmem_cache_free(znode_cache, zp); 1001*fa9e4066Sahrens } 1002*fa9e4066Sahrens 1003*fa9e4066Sahrens void 1004*fa9e4066Sahrens zfs_time_stamper_locked(znode_t *zp, uint_t flag, dmu_tx_t *tx) 1005*fa9e4066Sahrens { 1006*fa9e4066Sahrens timestruc_t now; 1007*fa9e4066Sahrens 1008*fa9e4066Sahrens ASSERT(MUTEX_HELD(&zp->z_lock)); 1009*fa9e4066Sahrens 1010*fa9e4066Sahrens gethrestime(&now); 1011*fa9e4066Sahrens 1012*fa9e4066Sahrens if (tx) { 1013*fa9e4066Sahrens dmu_buf_will_dirty(zp->z_dbuf, tx); 1014*fa9e4066Sahrens zp->z_atime_dirty = 0; 1015*fa9e4066Sahrens zp->z_seq++; 1016*fa9e4066Sahrens } else { 1017*fa9e4066Sahrens zp->z_atime_dirty = 1; 1018*fa9e4066Sahrens } 1019*fa9e4066Sahrens 1020*fa9e4066Sahrens if (flag & AT_ATIME) 1021*fa9e4066Sahrens ZFS_TIME_ENCODE(&now, zp->z_phys->zp_atime); 1022*fa9e4066Sahrens 1023*fa9e4066Sahrens if (flag & AT_MTIME) 1024*fa9e4066Sahrens ZFS_TIME_ENCODE(&now, zp->z_phys->zp_mtime); 1025*fa9e4066Sahrens 1026*fa9e4066Sahrens if (flag & AT_CTIME) 1027*fa9e4066Sahrens ZFS_TIME_ENCODE(&now, zp->z_phys->zp_ctime); 1028*fa9e4066Sahrens } 1029*fa9e4066Sahrens 1030*fa9e4066Sahrens /* 1031*fa9e4066Sahrens * Update the requested znode timestamps with the current time. 1032*fa9e4066Sahrens * If we are in a transaction, then go ahead and mark the znode 1033*fa9e4066Sahrens * dirty in the transaction so the timestamps will go to disk. 1034*fa9e4066Sahrens * Otherwise, we will get pushed next time the znode is updated 1035*fa9e4066Sahrens * in a transaction, or when this znode eventually goes inactive. 1036*fa9e4066Sahrens * 1037*fa9e4066Sahrens * Why is this OK? 1038*fa9e4066Sahrens * 1 - Only the ACCESS time is ever updated outside of a transaction. 1039*fa9e4066Sahrens * 2 - Multiple consecutive updates will be collapsed into a single 1040*fa9e4066Sahrens * znode update by the transaction grouping semantics of the DMU. 1041*fa9e4066Sahrens */ 1042*fa9e4066Sahrens void 1043*fa9e4066Sahrens zfs_time_stamper(znode_t *zp, uint_t flag, dmu_tx_t *tx) 1044*fa9e4066Sahrens { 1045*fa9e4066Sahrens mutex_enter(&zp->z_lock); 1046*fa9e4066Sahrens zfs_time_stamper_locked(zp, flag, tx); 1047*fa9e4066Sahrens mutex_exit(&zp->z_lock); 1048*fa9e4066Sahrens } 1049*fa9e4066Sahrens 1050*fa9e4066Sahrens /* 1051*fa9e4066Sahrens * Grow the block size for a file. This may involve migrating data 1052*fa9e4066Sahrens * from the bonus buffer into a data block (when we grow beyond the 1053*fa9e4066Sahrens * bonus buffer data area). 1054*fa9e4066Sahrens * 1055*fa9e4066Sahrens * IN: zp - znode of file to free data in. 1056*fa9e4066Sahrens * size - requested block size 1057*fa9e4066Sahrens * tx - open transaction. 1058*fa9e4066Sahrens * 1059*fa9e4066Sahrens * RETURN: 0 if success 1060*fa9e4066Sahrens * error code if failure 1061*fa9e4066Sahrens * 1062*fa9e4066Sahrens * NOTE: this function assumes that the znode is write locked. 1063*fa9e4066Sahrens */ 1064*fa9e4066Sahrens int 1065*fa9e4066Sahrens zfs_grow_blocksize(znode_t *zp, uint64_t size, dmu_tx_t *tx) 1066*fa9e4066Sahrens { 1067*fa9e4066Sahrens int error; 1068*fa9e4066Sahrens u_longlong_t dummy; 1069*fa9e4066Sahrens 1070*fa9e4066Sahrens ASSERT(rw_write_held(&zp->z_grow_lock)); 1071*fa9e4066Sahrens 1072*fa9e4066Sahrens if (size <= zp->z_blksz) 1073*fa9e4066Sahrens return (0); 1074*fa9e4066Sahrens /* 1075*fa9e4066Sahrens * If the file size is already greater than the current blocksize, 1076*fa9e4066Sahrens * we will not grow. If there is more than one block in a file, 1077*fa9e4066Sahrens * the blocksize cannot change. 1078*fa9e4066Sahrens */ 1079*fa9e4066Sahrens if (zp->z_blksz && zp->z_phys->zp_size > zp->z_blksz) 1080*fa9e4066Sahrens return (0); 1081*fa9e4066Sahrens 1082*fa9e4066Sahrens error = dmu_object_set_blocksize(zp->z_zfsvfs->z_os, zp->z_id, 1083*fa9e4066Sahrens size, 0, tx); 1084*fa9e4066Sahrens if (error == ENOTSUP) 1085*fa9e4066Sahrens return (0); 1086*fa9e4066Sahrens ASSERT3U(error, ==, 0); 1087*fa9e4066Sahrens 1088*fa9e4066Sahrens /* What blocksize did we actually get? */ 1089*fa9e4066Sahrens dmu_object_size_from_db(zp->z_dbuf, &zp->z_blksz, &dummy); 1090*fa9e4066Sahrens 1091*fa9e4066Sahrens return (0); 1092*fa9e4066Sahrens } 1093*fa9e4066Sahrens 1094*fa9e4066Sahrens /* 1095*fa9e4066Sahrens * This is a dummy interface used when pvn_vplist_dirty() should *not* 1096*fa9e4066Sahrens * be calling back into the fs for a putpage(). E.g.: when truncating 1097*fa9e4066Sahrens * a file, the pages being "thrown away* don't need to be written out. 1098*fa9e4066Sahrens */ 1099*fa9e4066Sahrens /* ARGSUSED */ 1100*fa9e4066Sahrens static int 1101*fa9e4066Sahrens zfs_no_putpage(vnode_t *vp, page_t *pp, u_offset_t *offp, size_t *lenp, 1102*fa9e4066Sahrens int flags, cred_t *cr) 1103*fa9e4066Sahrens { 1104*fa9e4066Sahrens ASSERT(0); 1105*fa9e4066Sahrens return (0); 1106*fa9e4066Sahrens } 1107*fa9e4066Sahrens 1108*fa9e4066Sahrens /* 1109*fa9e4066Sahrens * Free space in a file. Currently, this function only 1110*fa9e4066Sahrens * supports freeing space at the end of the file. 1111*fa9e4066Sahrens * 1112*fa9e4066Sahrens * IN: zp - znode of file to free data in. 1113*fa9e4066Sahrens * from - start of section to free. 1114*fa9e4066Sahrens * len - length of section to free (0 => to EOF). 1115*fa9e4066Sahrens * flag - current file open mode flags. 1116*fa9e4066Sahrens * tx - open transaction. 1117*fa9e4066Sahrens * 1118*fa9e4066Sahrens * RETURN: 0 if success 1119*fa9e4066Sahrens * error code if failure 1120*fa9e4066Sahrens */ 1121*fa9e4066Sahrens int 1122*fa9e4066Sahrens zfs_freesp(znode_t *zp, uint64_t from, uint64_t len, int flag, dmu_tx_t *tx, 1123*fa9e4066Sahrens cred_t *cr) 1124*fa9e4066Sahrens { 1125*fa9e4066Sahrens vnode_t *vp = ZTOV(zp); 1126*fa9e4066Sahrens uint64_t size = zp->z_phys->zp_size; 1127*fa9e4066Sahrens uint64_t end = from + len; 1128*fa9e4066Sahrens int have_grow_lock, error; 1129*fa9e4066Sahrens 1130*fa9e4066Sahrens have_grow_lock = RW_WRITE_HELD(&zp->z_grow_lock); 1131*fa9e4066Sahrens 1132*fa9e4066Sahrens /* 1133*fa9e4066Sahrens * Nothing to do if file already at desired length. 1134*fa9e4066Sahrens */ 1135*fa9e4066Sahrens if (len == 0 && size == from) { 1136*fa9e4066Sahrens return (0); 1137*fa9e4066Sahrens } 1138*fa9e4066Sahrens 1139*fa9e4066Sahrens /* 1140*fa9e4066Sahrens * Check for any locks in the region to be freed. 1141*fa9e4066Sahrens */ 1142*fa9e4066Sahrens if (MANDLOCK(vp, (mode_t)zp->z_phys->zp_mode)) { 1143*fa9e4066Sahrens uint64_t start; 1144*fa9e4066Sahrens 1145*fa9e4066Sahrens if (size > from) 1146*fa9e4066Sahrens start = from; 1147*fa9e4066Sahrens else 1148*fa9e4066Sahrens start = size; 1149*fa9e4066Sahrens if (error = chklock(vp, FWRITE, start, 0, flag, NULL)) 1150*fa9e4066Sahrens return (error); 1151*fa9e4066Sahrens } 1152*fa9e4066Sahrens 1153*fa9e4066Sahrens if (end > zp->z_blksz && (!ISP2(zp->z_blksz) || 1154*fa9e4066Sahrens zp->z_blksz < zp->z_zfsvfs->z_max_blksz)) { 1155*fa9e4066Sahrens uint64_t new_blksz; 1156*fa9e4066Sahrens /* 1157*fa9e4066Sahrens * We are growing the file past the current block size. 1158*fa9e4066Sahrens */ 1159*fa9e4066Sahrens if (zp->z_blksz > zp->z_zfsvfs->z_max_blksz) { 1160*fa9e4066Sahrens ASSERT(!ISP2(zp->z_blksz)); 1161*fa9e4066Sahrens new_blksz = MIN(end, SPA_MAXBLOCKSIZE); 1162*fa9e4066Sahrens } else { 1163*fa9e4066Sahrens new_blksz = MIN(end, zp->z_zfsvfs->z_max_blksz); 1164*fa9e4066Sahrens } 1165*fa9e4066Sahrens error = zfs_grow_blocksize(zp, new_blksz, tx); 1166*fa9e4066Sahrens ASSERT(error == 0); 1167*fa9e4066Sahrens } 1168*fa9e4066Sahrens if (end > size || len == 0) 1169*fa9e4066Sahrens zp->z_phys->zp_size = end; 1170*fa9e4066Sahrens if (from > size) 1171*fa9e4066Sahrens return (0); 1172*fa9e4066Sahrens 1173*fa9e4066Sahrens if (have_grow_lock) 1174*fa9e4066Sahrens rw_downgrade(&zp->z_grow_lock); 1175*fa9e4066Sahrens /* 1176*fa9e4066Sahrens * Clear any mapped pages in the truncated region. 1177*fa9e4066Sahrens */ 1178*fa9e4066Sahrens rw_enter(&zp->z_map_lock, RW_WRITER); 1179*fa9e4066Sahrens if (vn_has_cached_data(vp)) { 1180*fa9e4066Sahrens page_t *pp; 1181*fa9e4066Sahrens uint64_t start = from & PAGEMASK; 1182*fa9e4066Sahrens int off = from & PAGEOFFSET; 1183*fa9e4066Sahrens 1184*fa9e4066Sahrens if (off != 0 && (pp = page_lookup(vp, start, SE_SHARED))) { 1185*fa9e4066Sahrens /* 1186*fa9e4066Sahrens * We need to zero a partial page. 1187*fa9e4066Sahrens */ 1188*fa9e4066Sahrens pagezero(pp, off, PAGESIZE - off); 1189*fa9e4066Sahrens start += PAGESIZE; 1190*fa9e4066Sahrens page_unlock(pp); 1191*fa9e4066Sahrens } 1192*fa9e4066Sahrens error = pvn_vplist_dirty(vp, start, zfs_no_putpage, 1193*fa9e4066Sahrens B_INVAL | B_TRUNC, cr); 1194*fa9e4066Sahrens ASSERT(error == 0); 1195*fa9e4066Sahrens } 1196*fa9e4066Sahrens rw_exit(&zp->z_map_lock); 1197*fa9e4066Sahrens 1198*fa9e4066Sahrens if (!have_grow_lock) 1199*fa9e4066Sahrens rw_enter(&zp->z_grow_lock, RW_READER); 1200*fa9e4066Sahrens 1201*fa9e4066Sahrens if (len == 0) 1202*fa9e4066Sahrens len = -1; 1203*fa9e4066Sahrens else if (end > size) 1204*fa9e4066Sahrens len = size - from; 1205*fa9e4066Sahrens dmu_free_range(zp->z_zfsvfs->z_os, zp->z_id, from, len, tx); 1206*fa9e4066Sahrens 1207*fa9e4066Sahrens if (!have_grow_lock) 1208*fa9e4066Sahrens rw_exit(&zp->z_grow_lock); 1209*fa9e4066Sahrens 1210*fa9e4066Sahrens return (0); 1211*fa9e4066Sahrens } 1212*fa9e4066Sahrens 1213*fa9e4066Sahrens 1214*fa9e4066Sahrens void 1215*fa9e4066Sahrens zfs_create_fs(objset_t *os, cred_t *cr, dmu_tx_t *tx) 1216*fa9e4066Sahrens { 1217*fa9e4066Sahrens zfsvfs_t zfsvfs; 1218*fa9e4066Sahrens uint64_t moid, doid, roid = 0; 1219*fa9e4066Sahrens uint64_t version = ZFS_VERSION; 1220*fa9e4066Sahrens int error; 1221*fa9e4066Sahrens znode_t *rootzp = NULL; 1222*fa9e4066Sahrens vnode_t *vp; 1223*fa9e4066Sahrens vattr_t vattr; 1224*fa9e4066Sahrens 1225*fa9e4066Sahrens /* 1226*fa9e4066Sahrens * First attempt to create master node. 1227*fa9e4066Sahrens */ 1228*fa9e4066Sahrens moid = MASTER_NODE_OBJ; 1229*fa9e4066Sahrens error = zap_create_claim(os, moid, DMU_OT_MASTER_NODE, 1230*fa9e4066Sahrens DMU_OT_NONE, 0, tx); 1231*fa9e4066Sahrens ASSERT(error == 0); 1232*fa9e4066Sahrens 1233*fa9e4066Sahrens /* 1234*fa9e4066Sahrens * Set starting attributes. 1235*fa9e4066Sahrens */ 1236*fa9e4066Sahrens 1237*fa9e4066Sahrens error = zap_update(os, moid, ZFS_VERSION_OBJ, 8, 1, &version, tx); 1238*fa9e4066Sahrens ASSERT(error == 0); 1239*fa9e4066Sahrens 1240*fa9e4066Sahrens /* 1241*fa9e4066Sahrens * Create a delete queue. 1242*fa9e4066Sahrens */ 1243*fa9e4066Sahrens doid = zap_create(os, DMU_OT_DELETE_QUEUE, DMU_OT_NONE, 0, tx); 1244*fa9e4066Sahrens 1245*fa9e4066Sahrens error = zap_add(os, moid, ZFS_DELETE_QUEUE, 8, 1, &doid, tx); 1246*fa9e4066Sahrens ASSERT(error == 0); 1247*fa9e4066Sahrens 1248*fa9e4066Sahrens /* 1249*fa9e4066Sahrens * Create root znode. Create minimal znode/vnode/zfsvfs 1250*fa9e4066Sahrens * to allow zfs_mknode to work. 1251*fa9e4066Sahrens */ 1252*fa9e4066Sahrens vattr.va_mask = AT_MODE|AT_UID|AT_GID|AT_TYPE; 1253*fa9e4066Sahrens vattr.va_type = VDIR; 1254*fa9e4066Sahrens vattr.va_mode = S_IFDIR|0755; 1255*fa9e4066Sahrens vattr.va_uid = 0; 1256*fa9e4066Sahrens vattr.va_gid = 3; 1257*fa9e4066Sahrens 1258*fa9e4066Sahrens rootzp = kmem_cache_alloc(znode_cache, KM_SLEEP); 1259*fa9e4066Sahrens rootzp->z_zfsvfs = &zfsvfs; 1260*fa9e4066Sahrens rootzp->z_active = 1; 1261*fa9e4066Sahrens rootzp->z_reap = 0; 1262*fa9e4066Sahrens rootzp->z_atime_dirty = 0; 1263*fa9e4066Sahrens rootzp->z_dbuf_held = 0; 1264*fa9e4066Sahrens 1265*fa9e4066Sahrens vp = ZTOV(rootzp); 1266*fa9e4066Sahrens vn_reinit(vp); 1267*fa9e4066Sahrens vp->v_type = VDIR; 1268*fa9e4066Sahrens 1269*fa9e4066Sahrens bzero(&zfsvfs, sizeof (zfsvfs_t)); 1270*fa9e4066Sahrens 1271*fa9e4066Sahrens zfsvfs.z_os = os; 1272*fa9e4066Sahrens zfsvfs.z_assign = TXG_NOWAIT; 1273*fa9e4066Sahrens zfsvfs.z_parent = &zfsvfs; 1274*fa9e4066Sahrens 1275*fa9e4066Sahrens mutex_init(&zfsvfs.z_znodes_lock, NULL, MUTEX_DEFAULT, NULL); 1276*fa9e4066Sahrens list_create(&zfsvfs.z_all_znodes, sizeof (znode_t), 1277*fa9e4066Sahrens offsetof(znode_t, z_link_node)); 1278*fa9e4066Sahrens 1279*fa9e4066Sahrens zfs_mknode(rootzp, &vattr, &roid, tx, cr, IS_ROOT_NODE, NULL, 0); 1280*fa9e4066Sahrens ASSERT3U(rootzp->z_id, ==, roid); 1281*fa9e4066Sahrens error = zap_add(os, moid, ZFS_ROOT_OBJ, 8, 1, &roid, tx); 1282*fa9e4066Sahrens ASSERT(error == 0); 1283*fa9e4066Sahrens 1284*fa9e4066Sahrens ZTOV(rootzp)->v_count = 0; 1285*fa9e4066Sahrens kmem_cache_free(znode_cache, rootzp); 1286*fa9e4066Sahrens } 1287