/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2011 Bayard G. Bell. All rights reserved. * Copyright 2013 Joyent, Inc. All rights reserved. */ /* * Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T * All rights reserved. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * This is the loadable module wrapper. */ #include #include #include #include #include #include #include #include /* * The pseudo NFS filesystem to allow diskless booting to dynamically * mount either a NFS V2, NFS V3, or NFS V4 filesystem. This only implements * the VFS_MOUNTROOT op and is only intended to be used by the * diskless booting code until the real root filesystem is mounted. * Nothing else should ever call this! * * The strategy is that if the initial rootfs type is set to "nfsdyn" * by loadrootmodules() this filesystem is called to mount the * root filesystem. It first attempts to mount a V4 filesystem, and if that * fails due to an RPC version mismatch it tries V3 and finally V2. * Once the real mount succeeds the vfsops and rootfs name are changed * to reflect the real filesystem type. */ static int nfsdyninit(int, char *); static int nfsdyn_mountroot(vfs_t *, whymountroot_t); vfsops_t *nfsdyn_vfsops; /* * The following data structures are used to configure the NFS * system call, the NFS Version 2 client VFS, and the NFS Version * 3 client VFS into the system. The NFS Version 4 structures are defined in * nfs4_common.c */ /* * The NFS system call. */ static struct sysent nfssysent = { 2, SE_32RVAL1 | SE_ARGC | SE_NOUNLOAD, nfssys }; static struct modlsys modlsys = { &mod_syscallops, "NFS syscall, client, and common", &nfssysent }; #ifdef _SYSCALL32_IMPL static struct modlsys modlsys32 = { &mod_syscallops32, "NFS syscall, client, and common (32-bit)", &nfssysent }; #endif /* _SYSCALL32_IMPL */ /* * The NFS Dynamic client VFS. */ static vfsdef_t vfw = { VFSDEF_VERSION, "nfsdyn", nfsdyninit, 0, NULL }; static struct modlfs modlfs = { &mod_fsops, "network filesystem", &vfw }; /* * The NFS Version 2 client VFS. */ static vfsdef_t vfw2 = { VFSDEF_VERSION, "nfs", nfsinit, VSW_CANREMOUNT|VSW_NOTZONESAFE|VSW_STATS, NULL }; static struct modlfs modlfs2 = { &mod_fsops, "network filesystem version 2", &vfw2 }; /* * The NFS Version 3 client VFS. */ static vfsdef_t vfw3 = { VFSDEF_VERSION, "nfs3", nfs3init, VSW_CANREMOUNT|VSW_NOTZONESAFE|VSW_STATS, NULL }; static struct modlfs modlfs3 = { &mod_fsops, "network filesystem version 3", &vfw3 }; extern struct modlfs modlfs4; /* * We have too many linkage structures so we define our own XXX */ struct modlinkage_big { int ml_rev; /* rev of loadable modules system */ void *ml_linkage[7]; /* NULL terminated list of */ /* linkage structures */ }; /* * All of the module configuration linkages required to configure * the system call and client VFS's into the system. */ static struct modlinkage_big modlinkage = { MODREV_1, &modlsys, #ifdef _SYSCALL32_IMPL &modlsys32, #endif &modlfs, &modlfs2, &modlfs3, &modlfs4, NULL }; /* * This routine is invoked automatically when the kernel module * containing this routine is loaded. This allows module specific * initialization to be done when the module is loaded. */ int _init(void) { int status; if ((status = nfs_clntinit()) != 0) { cmn_err(CE_WARN, "_init: nfs_clntinit failed"); return (status); } /* * Create the version specific kstats. * * PSARC 2001/697 Contract Private Interface * All nfs kstats are under SunMC contract * Please refer to the PSARC listed above and contact * SunMC before making any changes! * * Changes must be reviewed by Solaris File Sharing * Changes must be communicated to contract-2001-697@sun.com * */ zone_key_create(&nfsstat_zone_key, nfsstat_zone_init, NULL, nfsstat_zone_fini); status = mod_install((struct modlinkage *)&modlinkage); if (status) { (void) zone_key_delete(nfsstat_zone_key); /* * Failed to install module, cleanup previous * initialization work. */ nfs_clntfini(); /* * Clean up work performed indirectly by mod_installfs() * as a result of our call to mod_install(). */ nfs4fini(); nfs3fini(); nfsfini(); } return (status); } int _fini(void) { /* Don't allow module to be unloaded */ return (EBUSY); } int _info(struct modinfo *modinfop) { return (mod_info((struct modlinkage *)&modlinkage, modinfop)); } /* * General utilities */ /* * Returns the preferred transfer size in bytes based on * what network interfaces are available. */ int nfstsize(void) { /* * For the moment, just return NFS_MAXDATA until we can query the * appropriate transport. */ return (NFS_MAXDATA); } /* * Returns the preferred transfer size in bytes based on * what network interfaces are available. */ /* this should reflect the largest transfer size possible */ static int nfs3_max_transfer_size = 1024 * 1024; int nfs3tsize(void) { /* * For the moment, just return nfs3_max_transfer_size until we * can query the appropriate transport. */ return (nfs3_max_transfer_size); } static uint_t nfs3_max_transfer_size_clts = 32 * 1024; static uint_t nfs3_max_transfer_size_cots = 1024 * 1024; static uint_t nfs3_max_transfer_size_rdma = 1024 * 1024; uint_t nfs3_tsize(struct knetconfig *knp) { if (knp->knc_semantics == NC_TPI_COTS_ORD || knp->knc_semantics == NC_TPI_COTS) return (nfs3_max_transfer_size_cots); if (knp->knc_semantics == NC_TPI_RDMA) return (nfs3_max_transfer_size_rdma); return (nfs3_max_transfer_size_clts); } uint_t rfs3_tsize(struct svc_req *req) { if (req->rq_xprt->xp_type == T_COTS_ORD || req->rq_xprt->xp_type == T_COTS) return (nfs3_max_transfer_size_cots); if (req->rq_xprt->xp_type == T_RDMA) return (nfs3_max_transfer_size_rdma); return (nfs3_max_transfer_size_clts); } /* ARGSUSED */ static int nfsdyninit(int fstyp, char *name) { static const fs_operation_def_t nfsdyn_vfsops_template[] = { VFSNAME_MOUNTROOT, { .vfs_mountroot = nfsdyn_mountroot }, NULL, NULL }; int error; error = vfs_setfsops(fstyp, nfsdyn_vfsops_template, &nfsdyn_vfsops); if (error != 0) return (error); return (0); } /* ARGSUSED */ static int nfsdyn_mountroot(vfs_t *vfsp, whymountroot_t why) { char root_hostname[SYS_NMLN+1]; struct servinfo *svp; int error; int vfsflags; char *root_path; struct pathname pn; char *name; static char token[10]; struct nfs_args args; /* nfs mount arguments */ bzero(&args, sizeof (args)); /* do this BEFORE getfile which causes xid stamps to be initialized */ clkset(-1L); /* hack for now - until we get time svc? */ if (why == ROOT_REMOUNT) { /* * Shouldn't happen. */ panic("nfs3_mountroot: why == ROOT_REMOUNT\n"); } if (why == ROOT_UNMOUNT) { /* * Nothing to do for NFS. */ return (0); } /* * why == ROOT_INIT */ name = token; *name = 0; getfsname("root", name, sizeof (token)); pn_alloc(&pn); root_path = pn.pn_path; svp = kmem_zalloc(sizeof (*svp), KM_SLEEP); mutex_init(&svp->sv_lock, NULL, MUTEX_DEFAULT, NULL); svp->sv_knconf = kmem_zalloc(sizeof (*svp->sv_knconf), KM_SLEEP); svp->sv_knconf->knc_protofmly = kmem_alloc(KNC_STRSIZE, KM_SLEEP); svp->sv_knconf->knc_proto = kmem_alloc(KNC_STRSIZE, KM_SLEEP); /* * First try version 4 */ vfs_setops(vfsp, nfs4_vfsops); args.addr = &svp->sv_addr; args.fh = (char *)&svp->sv_fhandle; args.knconf = svp->sv_knconf; args.hostname = root_hostname; vfsflags = 0; if (error = mount_root(*name ? name : "root", root_path, NFS_V4, &args, &vfsflags)) { if (error != EPROTONOSUPPORT) { nfs_cmn_err(error, CE_WARN, "Unable to mount NFS root filesystem: %m"); sv_free(svp); pn_free(&pn); vfs_setops(vfsp, nfsdyn_vfsops); return (error); } /* * Then try version 3 */ bzero(&args, sizeof (args)); vfs_setops(vfsp, nfs3_vfsops); args.addr = &svp->sv_addr; args.fh = (char *)&svp->sv_fhandle; args.knconf = svp->sv_knconf; args.hostname = root_hostname; vfsflags = 0; if (error = mount_root(*name ? name : "root", root_path, NFS_V3, &args, &vfsflags)) { if (error != EPROTONOSUPPORT) { nfs_cmn_err(error, CE_WARN, "Unable to mount NFS root filesystem: %m"); sv_free(svp); pn_free(&pn); vfs_setops(vfsp, nfsdyn_vfsops); return (error); } /* * Finally, try version 2 */ bzero(&args, sizeof (args)); args.addr = &svp->sv_addr; args.fh = (char *)&svp->sv_fhandle.fh_buf; args.knconf = svp->sv_knconf; args.hostname = root_hostname; vfsflags = 0; vfs_setops(vfsp, nfs_vfsops); if (error = mount_root(*name ? name : "root", root_path, NFS_VERSION, &args, &vfsflags)) { nfs_cmn_err(error, CE_WARN, "Unable to mount NFS root filesystem: %m"); sv_free(svp); pn_free(&pn); vfs_setops(vfsp, nfsdyn_vfsops); return (error); } } } sv_free(svp); pn_free(&pn); return (VFS_MOUNTROOT(vfsp, why)); } int nfs_setopts(vnode_t *vp, model_t model, struct nfs_args *buf) { mntinfo_t *mi; /* mount info, pointed at by vfs */ STRUCT_HANDLE(nfs_args, args); int flags; #ifdef lint model = model; #endif STRUCT_SET_HANDLE(args, model, buf); flags = STRUCT_FGET(args, flags); /* * Set option fields in mount info record */ mi = VTOMI(vp); if (flags & NFSMNT_NOAC) { mi->mi_flags |= MI_NOAC; PURGE_ATTRCACHE(vp); } if (flags & NFSMNT_NOCTO) mi->mi_flags |= MI_NOCTO; if (flags & NFSMNT_LLOCK) mi->mi_flags |= MI_LLOCK; if (flags & NFSMNT_GRPID) mi->mi_flags |= MI_GRPID; if (flags & NFSMNT_RETRANS) { if (STRUCT_FGET(args, retrans) < 0) return (EINVAL); mi->mi_retrans = STRUCT_FGET(args, retrans); } if (flags & NFSMNT_TIMEO) { if (STRUCT_FGET(args, timeo) <= 0) return (EINVAL); mi->mi_timeo = STRUCT_FGET(args, timeo); /* * The following scales the standard deviation and * and current retransmission timer to match the * initial value for the timeout specified. */ mi->mi_timers[NFS_CALLTYPES].rt_deviate = (mi->mi_timeo * hz * 2) / 5; mi->mi_timers[NFS_CALLTYPES].rt_rtxcur = mi->mi_timeo * hz / 10; } if (flags & NFSMNT_RSIZE) { if (STRUCT_FGET(args, rsize) <= 0) return (EINVAL); mi->mi_tsize = MIN(mi->mi_tsize, STRUCT_FGET(args, rsize)); mi->mi_curread = MIN(mi->mi_curread, mi->mi_tsize); } if (flags & NFSMNT_WSIZE) { if (STRUCT_FGET(args, wsize) <= 0) return (EINVAL); mi->mi_stsize = MIN(mi->mi_stsize, STRUCT_FGET(args, wsize)); mi->mi_curwrite = MIN(mi->mi_curwrite, mi->mi_stsize); } if (flags & NFSMNT_ACREGMIN) { if (STRUCT_FGET(args, acregmin) < 0) mi->mi_acregmin = ACMINMAX; else mi->mi_acregmin = MIN(STRUCT_FGET(args, acregmin), ACMINMAX); mi->mi_acregmin = SEC2HR(mi->mi_acregmin); } if (flags & NFSMNT_ACREGMAX) { if (STRUCT_FGET(args, acregmax) < 0) mi->mi_acregmax = ACMAXMAX; else mi->mi_acregmax = MIN(STRUCT_FGET(args, acregmax), ACMAXMAX); mi->mi_acregmax = SEC2HR(mi->mi_acregmax); } if (flags & NFSMNT_ACDIRMIN) { if (STRUCT_FGET(args, acdirmin) < 0) mi->mi_acdirmin = ACMINMAX; else mi->mi_acdirmin = MIN(STRUCT_FGET(args, acdirmin), ACMINMAX); mi->mi_acdirmin = SEC2HR(mi->mi_acdirmin); } if (flags & NFSMNT_ACDIRMAX) { if (STRUCT_FGET(args, acdirmax) < 0) mi->mi_acdirmax = ACMAXMAX; else mi->mi_acdirmax = MIN(STRUCT_FGET(args, acdirmax), ACMAXMAX); mi->mi_acdirmax = SEC2HR(mi->mi_acdirmax); } if (flags & NFSMNT_LOOPBACK) mi->mi_flags |= MI_LOOPBACK; return (0); } /* * Set or Clear direct I/O flag * VOP_RWLOCK() is held for write access to prevent a race condition * which would occur if a process is in the middle of a write when * directio flag gets set. It is possible that all pages may not get flushed. */ /* ARGSUSED */ int nfs_directio(vnode_t *vp, int cmd, cred_t *cr) { int error = 0; rnode_t *rp; rp = VTOR(vp); if (cmd == DIRECTIO_ON) { if (rp->r_flags & RDIRECTIO) return (0); /* * Flush the page cache. */ (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL); if (rp->r_flags & RDIRECTIO) { VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL); return (0); } if (vn_has_cached_data(vp) && ((rp->r_flags & RDIRTY) || rp->r_awcount > 0)) { error = VOP_PUTPAGE(vp, (offset_t)0, (uint_t)0, B_INVAL, cr, NULL); if (error) { if (error == ENOSPC || error == EDQUOT) { mutex_enter(&rp->r_statelock); if (!rp->r_error) rp->r_error = error; mutex_exit(&rp->r_statelock); } VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL); return (error); } } mutex_enter(&rp->r_statelock); rp->r_flags |= RDIRECTIO; mutex_exit(&rp->r_statelock); VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL); return (0); } if (cmd == DIRECTIO_OFF) { mutex_enter(&rp->r_statelock); rp->r_flags &= ~RDIRECTIO; /* disable direct mode */ mutex_exit(&rp->r_statelock); return (0); } return (EINVAL); }