nfsd.c revision e8279403f4930250f15c0eb769ee554729c61517
12965785mm/*
22965785mm * CDDL HEADER START
32965785mm *
42965785mm * The contents of this file are subject to the terms of the
52965785mm * Common Development and Distribution License (the "License").
62965785mm * You may not use this file except in compliance with the License.
72965785mm *
82965785mm * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
92965785mm * or http://www.opensolaris.org/os/licensing.
102965785mm * See the License for the specific language governing permissions
112965785mm * and limitations under the License.
122965785mm *
132965785mm * When distributing Covered Code, include this CDDL HEADER in each
142965785mm * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
152965785mm * If applicable, add the following below this CDDL HEADER, with the
162965785mm * fields enclosed by brackets "[]" replaced with your own identifying
172965785mm * information: Portions Copyright [yyyy] [name of copyright owner]
182965785mm *
192965785mm * CDDL HEADER END
202965785mm */
212965785mm/*
222965785mm * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
232965785mm * Use is subject to license terms.
242965785mm */
252965785mm
262965785mm/*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T		*/
27366f427mm/*	  All Rights Reserved  	*/
28e7b2401mm
29e7b2401mm/*
30e7b2401mm * University Copyright- Copyright (c) 1982, 1986, 1988
312f6e434mm * The Regents of the University of California
322f6e434mm * All Rights Reserved
332f6e434mm *
342965785mm * University Acknowledgment- Portions of this document are derived from
352f6e434mm * software developed by the University of California, Berkeley, and its
362f6e434mm * contributors.
372f6e434mm */
38e7b2401mm
39e7b2401mm/* LINTLIBRARY */
40e7b2401mm/* PROTOLIB1 */
41e7b2401mm
42e7b2401mm#pragma ident	"%Z%%M%	%I%	%E% SMI"
43e7b2401mm
44e7b2401mm/* NFS server */
45e7b2401mm
46e7b2401mm#include <sys/param.h>
47e7b2401mm#include <sys/types.h>
48f3bc6camm#include <sys/stat.h>
49f3bc6camm#include <syslog.h>
50f3bc6camm#include <tiuser.h>
512f6e434mm#include <rpc/rpc.h>
522965785mm#include <errno.h>
532f6e434mm#include <thread.h>
542f6e434mm#include <sys/resource.h>
552f6e434mm#include <sys/time.h>
562965785mm#include <sys/file.h>
572965785mm#include <nfs/nfs.h>
582965785mm#include <nfs/nfs_acl.h>
5966c81a3mm#include <nfs/nfssys.h>
6066c81a3mm#include <stdio.h>
619787405mm#include <stdio_ext.h>
629787405mm#include <stdlib.h>
639787405mm#include <signal.h>
649787405mm#include <netconfig.h>
659787405mm#include <netdir.h>
669787405mm#include <string.h>
679787405mm#include <unistd.h>
689787405mm#include <stropts.h>
699787405mm#include <sys/tihdr.h>
709d8c43fmm#include <poll.h>
719d8c43fmm#include <priv_utils.h>
729d8c43fmm#include <sys/tiuser.h>
739d8c43fmm#include <netinet/tcp.h>
749d8c43fmm#include <deflt.h>
759d8c43fmm#include <rpcsvc/daemon_utils.h>
769d8c43fmm#include <rpcsvc/nfs4_prot.h>
779d8c43fmm#include <libnvpair.h>
789d8c43fmm#include "nfs_tbind.h"
799d8c43fmm#include "thrpool.h"
809d8c43fmm
8166c81a3mm/* quiesce requests will be ignored if nfs_server_vers_max < QUIESCE_VERSMIN */
8266c81a3mm#define	QUIESCE_VERSMIN	4
8366c81a3mm/* DSS: distributed stable storage */
8466c81a3mm#define	DSS_VERSMIN	4
859787405mm
869787405mmstatic	int	nfssvc(int, struct netbuf, struct netconfig *);
879787405mmstatic	int	nfssvcpool(int maxservers);
882965785mmstatic	int	dss_init(uint_t npaths, char **pathnames);
892965785mmstatic	void	dss_mkleafdirs(uint_t npaths, char **pathnames);
902965785mmstatic	void	dss_mkleafdir(char *dir, char *leaf, char *path);
912965785mmstatic	void	usage(void);
922965785mmint		qstrcmp(const void *s1, const void *s2);
932965785mm
942965785mmextern	int	_nfssys(int, void *);
952965785mm
962965785mm/* signal handlers */
972965785mmstatic void sigflush(int);
982965785mmstatic void quiesce(int);
992965785mm
1002965785mmstatic	char	*MyName;
1012965785mmstatic	NETSELDECL(defaultproviders)[] = { "/dev/tcp6", "/dev/tcp", "/dev/udp",
1022965785mm					    "/dev/udp6", NULL };
1032965785mm/* static	NETSELDECL(defaultprotos)[] =	{ NC_UDP, NC_TCP, NULL }; */
1042965785mm/*
1052965785mm * The following are all globals used by routines in nfs_tbind.c.
1062965785mm */
1072965785mmsize_t	end_listen_fds;		/* used by conn_close_oldest() */
1082965785mmsize_t	num_fds = 0;		/* used by multiple routines */
109366f427mmint	listen_backlog = 32;	/* used by bind_to_{provider,proto}() */
1102965785mmint	num_servers;		/* used by cots_listen_event() */
1112965785mmint	(*Mysvc)(int, struct netbuf, struct netconfig *) = nfssvc;
1122965785mm				/* used by cots_listen_event() */
1132965785mmint	max_conns_allowed = -1;	/* used by cots_listen_event() */
1142965785mm
1152965785mm/*
1162965785mm * Keep track of min/max versions of NFS protocol to be started.
1172965785mm * Start with the defaults (min == 2, max == 3).  We have the
1182965785mm * capability of starting vers=4 but only if the user requests it.
1192965785mm */
1202965785mmint	nfs_server_vers_min = NFS_VERSMIN_DEFAULT;
1212965785mmint	nfs_server_vers_max = NFS_VERSMAX_DEFAULT;
1222f6e434mm
1232965785mm/*
1242965785mm * Set the default for server delegation enablement and set per
1252965785mm * /etc/default/nfs configuration (if present).
1262965785mm */
1272965785mmint	nfs_server_delegation = NFS_SERVER_DELEGATION_DEFAULT;
1282965785mm
1292965785mmint
1302965785mmmain(int ac, char *av[])
1312965785mm{
1322965785mm	char *dir = "/";
1332965785mm	int allflag = 0;
1342965785mm	int df_allflag = 0;
1352965785mm	int opt_cnt = 0;
1362965785mm	int maxservers = 1;	/* zero allows inifinte number of threads */
1372965785mm	int maxservers_set = 0;
1382965785mm	int logmaxservers = 0;
1392965785mm	int pid;
1402965785mm	int i;
1412965785mm	char *provider = (char *)NULL;
1422965785mm	char *df_provider = (char *)NULL;
1432965785mm	struct protob *protobp0, *protobp;
1442965785mm	NETSELDECL(proto) = NULL;
1452965785mm	NETSELDECL(df_proto) = NULL;
1462965785mm	NETSELPDECL(providerp);
147dbac001mm	char *defval;
148dbac001mm	boolean_t can_do_mlp;
149dbac001mm	uint_t dss_npaths = 0;
150dbac001mm	char **dss_pathnames = NULL;
151dbac001mm	sigset_t sgset;
152dbac001mm
153dbac001mm	MyName = *av;
1543404e2cmm
1553404e2cmm	/*
1563404e2cmm	 * Initializations that require more privileges than we need to run.
1573404e2cmm	 */
1583404e2cmm	(void) _create_daemon_lock(NFSD, DAEMON_UID, DAEMON_GID);
1593404e2cmm	svcsetprio();
1603404e2cmm
1613404e2cmm	can_do_mlp = priv_ineffect(PRIV_NET_BINDMLP);
1623404e2cmm	if (__init_daemon_priv(PU_RESETGROUPS|PU_CLEARLIMITSET,
1633404e2cmm	    DAEMON_UID, DAEMON_GID, PRIV_SYS_NFS,
1642965785mm	    can_do_mlp ? PRIV_NET_BINDMLP : NULL, NULL) == -1) {
165e7b2401mm		(void) fprintf(stderr, "%s should be run with"
166e7b2401mm		    " sufficient privileges\n", av[0]);
167e7b2401mm		exit(1);
168e7b2401mm	}
169e7b2401mm
170e7b2401mm	(void) enable_extended_FILE_stdio(-1, -1);
171e9183e3mm
172e9183e3mm	/*
173e9183e3mm	 * Read in the values from config file first before we check
174e9183e3mm	 * commandline options so the options override the file.
175e9183e3mm	 */
176e9183e3mm	if ((defopen(NFSADMIN)) == 0) {
177e9183e3mm		if ((defval = defread("NFSD_MAX_CONNECTIONS=")) != NULL) {
178e9183e3mm			errno = 0;
179e9183e3mm			max_conns_allowed = strtol(defval, (char **)NULL, 10);
180e9183e3mm			if (errno != 0) {
181e9183e3mm				max_conns_allowed = -1;
182e9183e3mm			}
183e9183e3mm		}
184e9183e3mm		if ((defval = defread("NFSD_LISTEN_BACKLOG=")) != NULL) {
185e9183e3mm			errno = 0;
186e9183e3mm			listen_backlog = strtol(defval, (char **)NULL, 10);
187e9183e3mm			if (errno != 0) {
188e9183e3mm				listen_backlog = 32;
189e9183e3mm			}
190e9183e3mm		}
191e9183e3mm		if ((defval = defread("NFSD_PROTOCOL=")) != NULL) {
192e9183e3mm			df_proto = strdup(defval);
193e9183e3mm			opt_cnt++;
194e9183e3mm			if (strncasecmp("ALL", defval, 3) == 0) {
195e9183e3mm				free(df_proto);
196e9183e3mm				df_proto = NULL;
197e7b2401mm				df_allflag = 1;
198e7b2401mm			}
1992965785mm		}
2002965785mm		if ((defval = defread("NFSD_DEVICE=")) != NULL) {
2012965785mm			df_provider = strdup(defval);
2022965785mm			opt_cnt++;
2032965785mm		}
2042965785mm		if ((defval = defread("NFSD_SERVERS=")) != NULL) {
2052965785mm			errno = 0;
2062965785mm			maxservers = strtol(defval, (char **)NULL, 10);
2072965785mm			if (errno != 0) {
2082965785mm				maxservers = 1;
2092965785mm			} else {
2102965785mm				maxservers_set = 1;
2112965785mm			}
2122965785mm		}
2132965785mm		if ((defval = defread("NFS_SERVER_VERSMIN=")) != NULL) {
214e9183e3mm			errno = 0;
215e9183e3mm			nfs_server_vers_min =
2162965785mm			    strtol(defval, (char **)NULL, 10);
2172965785mm			if (errno != 0) {
218e9183e3mm				nfs_server_vers_min = NFS_VERSMIN_DEFAULT;
2192965785mm			}
220e9183e3mm		}
221e9183e3mm		if ((defval = defread("NFS_SERVER_VERSMAX=")) != NULL) {
222e9183e3mm			errno = 0;
2232965785mm			nfs_server_vers_max =
2242965785mm			    strtol(defval, (char **)NULL, 10);
2252965785mm			if (errno != 0) {
2262965785mm				nfs_server_vers_max = NFS_VERSMAX_DEFAULT;
227e9183e3mm			}
228e9183e3mm		}
229e9183e3mm		if ((defval = defread("NFS_SERVER_DELEGATION=")) != NULL) {
230e9183e3mm			if (strcmp(defval, "off") == 0) {
231e9183e3mm				nfs_server_delegation = FALSE;
232e9183e3mm			}
233e9183e3mm		}
234e9183e3mm
235e9183e3mm		/* close defaults file */
236e9183e3mm		defopen(NULL);
237e9183e3mm	}
238e9183e3mm
239e9183e3mm	/*
240e9183e3mm	 * Conflict options error messages.
241e9183e3mm	 */
242e9183e3mm	if (opt_cnt > 1) {
243e9183e3mm		(void) fprintf(stderr, "\nConflicting options, only one of "
244e9183e3mm		    "the following options can be specified\n"
245e9183e3mm		    "in " NFSADMIN ":\n"
246e9183e3mm		    "\tNFSD_PROTOCOL=ALL\n"
247e9183e3mm		    "\tNFSD_PROTOCOL=protocol\n"
248e9183e3mm		    "\tNFSD_DEVICE=device\n\n");
249e9183e3mm		usage();
250e9183e3mm	}
251e9183e3mm	opt_cnt = 0;
252e9183e3mm
253e9183e3mm	while ((i = getopt(ac, av, "ac:p:s:t:l:")) != EOF) {
254e9183e3mm		switch (i) {
255e9183e3mm		case 'a':
256e9183e3mm			free(df_proto);
257e9183e3mm			df_proto = NULL;
258e9183e3mm			free(df_provider);
259e9183e3mm			df_provider = NULL;
260e9183e3mm
261e9183e3mm			allflag = 1;
262e9183e3mm			opt_cnt++;
263e9183e3mm			break;
264e9183e3mm
265e9183e3mm		case 'c':
266e9183e3mm			max_conns_allowed = atoi(optarg);
267e9183e3mm			break;
268e9183e3mm
269e9183e3mm		case 'p':
270e9183e3mm			proto = optarg;
271e9183e3mm			df_allflag = 0;
272e9183e3mm			opt_cnt++;
273e9183e3mm			break;
274e9183e3mm
275e9183e3mm		/*
276e9183e3mm		 * DSS: NFSv4 distributed stable storage.
277e9183e3mm		 *
278e9183e3mm		 * This is a Contracted Project Private interface, for
279e9183e3mm		 * the sole use of Sun Cluster HA-NFS. See PSARC/2006/313.
280e9183e3mm		 */
281e9183e3mm		case 's':
282e9183e3mm			if (strlen(optarg) < MAXPATHLEN) {
283e9183e3mm				/* first "-s" option encountered? */
284e9183e3mm				if (dss_pathnames == NULL) {
285e9183e3mm					/*
286e9183e3mm					 * Allocate maximum possible space
287e9183e3mm					 * required given cmdline arg count;
288e9183e3mm					 * "-s <path>" consumes two args.
289e9183e3mm					 */
290e9183e3mm					size_t sz = (ac / 2) * sizeof (char *);
291e9183e3mm					dss_pathnames = (char **)malloc(sz);
292e9183e3mm					if (dss_pathnames == NULL) {
293e9183e3mm						(void) fprintf(stderr, "%s: "
294e9183e3mm						    "dss paths malloc failed\n",
295e9183e3mm						    av[0]);
296e9183e3mm						exit(1);
297e9183e3mm					}
298e9183e3mm					(void) memset(dss_pathnames, 0, sz);
299e9183e3mm				}
300e9183e3mm				dss_pathnames[dss_npaths] = optarg;
30178b3d48mm				dss_npaths++;
302e9183e3mm			} else {
303e9183e3mm				(void) fprintf(stderr,
304e9183e3mm				    "%s: -s pathname too long.\n", av[0]);
305e9183e3mm			}
306e9183e3mm			break;
307e9183e3mm
308e9183e3mm		case 't':
3092965785mm			provider = optarg;
3102965785mm			df_allflag = 0;
3112965785mm			opt_cnt++;
3122965785mm			break;
3132965785mm
3142965785mm		case 'l':
3152965785mm			listen_backlog = atoi(optarg);
3162965785mm			break;
3172965785mm
3182965785mm		case '?':
3192965785mm			usage();
3202965785mm			/* NOTREACHED */
3212965785mm		}
3222965785mm	}
323e7b2401mm
3242965785mm	allflag = df_allflag;
3252965785mm	if (proto == NULL)
3262965785mm		proto = df_proto;
3272965785mm	if (provider == NULL)
3282965785mm		provider = df_provider;
3292965785mm
3302965785mm	/*
3312f6e434mm	 * Conflict options error messages.
3322965785mm	 */
3332965785mm	if (opt_cnt > 1) {
3342965785mm		(void) fprintf(stderr, "\nConflicting options, only one of "
3352965785mm		    "the following options can be specified\n"
3362965785mm		    "on the command line:\n"
3372965785mm		    "\t-a\n"
3382965785mm		    "\t-p protocol\n"
3392965785mm		    "\t-t transport\n\n");
3403404e2cmm		usage();
3412965785mm	}
3422965785mm
3432965785mm	if (proto != NULL &&
3442965785mm	    strncasecmp(proto, NC_UDP, strlen(NC_UDP)) == 0) {
3452965785mm		if (nfs_server_vers_max == NFS_V4) {
3462965785mm			if (nfs_server_vers_min == NFS_V4) {
347fed7941mm				syslog(LOG_ERR,
348fed7941mm				    "NFS version 4 is not supported "
349fed7941mm				    "with the UDP protocol.  Exiting\n");
350fed7941mm				fprintf(stderr,
351fed7941mm				    "NFS version 4 is not supported "
352fed7941mm				    "with the UDP protocol.  Exiting\n");
3532965785mm				exit(3);
3542965785mm			} else {
3552965785mm				fprintf(stderr,
3562965785mm				    "NFS version 4 is not supported "
3572965785mm				    "with the UDP protocol.\n");
3582965785mm			}
3592965785mm		}
3602965785mm	}
3612965785mm
3622965785mm	/*
3632965785mm	 * If there is exactly one more argument, it is the number of
3642965785mm	 * servers.
3652965785mm	 */
3662f6e434mm	if (optind == ac - 1) {
3672f6e434mm		maxservers = atoi(av[optind]);
3682965785mm		maxservers_set = 1;
3692965785mm	}
3702965785mm	/*
3712965785mm	 * If there are two or more arguments, then this is a usage error.
3722965785mm	 */
3732965785mm	else if (optind < ac - 1)
3742965785mm		usage();
3752965785mm	/*
3762965785mm	 * Check the ranges for min/max version specified
3772965785mm	 */
3782965785mm	else if ((nfs_server_vers_min > nfs_server_vers_max) ||
3792965785mm	    (nfs_server_vers_min < NFS_VERSMIN) ||
3802965785mm	    (nfs_server_vers_max > NFS_VERSMAX))
3812965785mm		usage();
3822965785mm	/*
3832965785mm	 * There are no additional arguments, and we haven't set maxservers
3842965785mm	 * explicitly via the config file, we use a default number of
3852965785mm	 * servers.  We will log this.
3862965785mm	 */
3872965785mm	else if (maxservers_set == 0)
3882965785mm		logmaxservers = 1;
3892965785mm
3902965785mm	/*
3911c0c7f8mm	 * Basic Sanity checks on options
3922965785mm	 *
3932965785mm	 * max_conns_allowed must be positive, except for the special
3942965785mm	 * value of -1 which is used internally to mean unlimited, -1 isn't
3952965785mm	 * documented but we allow it anyway.
3962965785mm	 *
3972965785mm	 * maxservers must be positive
3982965785mm	 * listen_backlog must be positive or zero
3992965785mm	 */
4002965785mm	if (((max_conns_allowed != -1) && (max_conns_allowed <= 0)) ||
4012965785mm	    (listen_backlog < 0) || (maxservers <= 0)) {
4022965785mm		usage();
4032965785mm	}
4042965785mm
4052965785mm	/*
4062965785mm	 * Set current dir to server root
4072965785mm	 */
4082965785mm	if (chdir(dir) < 0) {
4091c0c7f8mm		(void) fprintf(stderr, "%s:  ", MyName);
4102965785mm		perror(dir);
4112965785mm		exit(1);
4122965785mm	}
4132965785mm
4142965785mm#ifndef DEBUG
4152965785mm	/*
4162965785mm	 * Background
4172965785mm	 */
4182965785mm	pid = fork();
4192965785mm	if (pid < 0) {
4202965785mm		perror("nfsd: fork");
4212965785mm		exit(1);
4222965785mm	}
4232965785mm	if (pid != 0)
4242965785mm		exit(0);
4252f6e434mm
4262f6e434mm	/*
4272f6e434mm	 * Close existing file descriptors, open "/dev/null" as
4282f6e434mm	 * standard input, output, and error, and detach from
4292f6e434mm	 * controlling terminal.
4302f6e434mm	 */
4312f6e434mm	closefrom(0);
4322f6e434mm	(void) open("/dev/null", O_RDONLY);
4332965785mm	(void) open("/dev/null", O_WRONLY);
4342965785mm	(void) dup(1);
4352965785mm	(void) setsid();
4362965785mm#endif
4372965785mm	openlog(MyName, LOG_PID | LOG_NDELAY, LOG_DAEMON);
4382965785mm
4392965785mm	/*
4402965785mm	 * establish our lock on the lock file and write our pid to it.
4412965785mm	 * exit if some other process holds the lock, or if there's any
4422965785mm	 * error in writing/locking the file.
4432965785mm	 */
4442f6e434mm	pid = _enter_daemon_lock(NFSD);
4452965785mm	switch (pid) {
4462f6e434mm	case 0:
4472f6e434mm		break;
4482f6e434mm	case -1:
4492965785mm		syslog(LOG_ERR, "error locking for %s: %s", NFSD,
4502f6e434mm		    strerror(errno));
4512f6e434mm		exit(2);
4522965785mm	default:
4532965785mm		/* daemon was already running */
4542965785mm		exit(0);
4552965785mm	}
4562965785mm
4572965785mm	/*
4582965785mm	 * If we've been given a list of paths to be used for distributed
4592965785mm	 * stable storage, and provided we're going to run a version
4602965785mm	 * that supports it, setup the DSS paths.
4612965785mm	 */
4622965785mm	if (dss_pathnames != NULL && nfs_server_vers_max >= DSS_VERSMIN) {
4632965785mm		if (dss_init(dss_npaths, dss_pathnames) != 0) {
4642965785mm			syslog(LOG_ERR, "dss_init failed. Exiting.");
4652965785mm			exit(1);
4662965785mm		}
4672965785mm	}
4682965785mm
4692965785mm	/*
4702965785mm	 * Block all signals till we spawn other
4712965785mm	 * threads.
4722965785mm	 */
4732965785mm	(void) sigfillset(&sgset);
4742965785mm	(void) thr_sigsetmask(SIG_BLOCK, &sgset, NULL);
4752965785mm
4762965785mm	if (logmaxservers) {
4772965785mm		(void) syslog(LOG_INFO,
4782f6e434mm		    "Number of servers not specified. Using default of %d.",
4792965785mm		    maxservers);
4802965785mm	}
4811c0c7f8mm
4822965785mm	/*
4832965785mm	 * Make sure to unregister any previous versions in case the
4842965785mm	 * user is reconfiguring the server in interesting ways.
4852965785mm	 */
4862965785mm	svc_unreg(NFS_PROGRAM, NFS_VERSION);
4872965785mm	svc_unreg(NFS_PROGRAM, NFS_V3);
4882f6e434mm	svc_unreg(NFS_PROGRAM, NFS_V4);
4892965785mm	svc_unreg(NFS_ACL_PROGRAM, NFS_ACL_V2);
4902965785mm	svc_unreg(NFS_ACL_PROGRAM, NFS_ACL_V3);
4912965785mm
4922965785mm	/*
4932965785mm	 * Set up kernel RPC thread pool for the NFS server.
4942965785mm	 */
4952965785mm	if (nfssvcpool(maxservers)) {
4962965785mm		(void) syslog(LOG_ERR,
4972965785mm		    "Can't set up kernel NFS service: %m. Exiting");
4982965785mm		exit(1);
4992965785mm	}
5002965785mm
5012965785mm
5022965785mm	/*
5032965785mm	 * Set up blocked thread to do LWP creation on behalf of the kernel.
5042965785mm	 */
5052965785mm	if (svcwait(NFS_SVCPOOL_ID)) {
5062965785mm		(void) syslog(LOG_ERR,
5072965785mm		    "Can't set up NFS pool creator: %m, Exiting");
5082965785mm		exit(1);
5092965785mm	}
5102965785mm
5112965785mm	/*
5122965785mm	 * RDMA start and stop thread.
5132965785mm	 * Per pool RDMA listener creation and
5142965785mm	 * destructor thread.
5152965785mm	 *
5162965785mm	 * start rdma services and block in the kernel.
5172965785mm	 */
5182965785mm	if (svcrdma(NFS_SVCPOOL_ID, nfs_server_vers_min, nfs_server_vers_max,
5192965785mm	    nfs_server_delegation)) {
5202965785mm		(void) syslog(LOG_ERR,
5212965785mm		    "Can't set up RDMA creator thread : %m.");
5222965785mm	}
5232965785mm
5242965785mm	/*
5252965785mm	 * Now open up for signal delivery
5262965785mm	 */
5272965785mm
5282965785mm	(void) thr_sigsetmask(SIG_UNBLOCK, &sgset, NULL);
5292965785mm	sigset(SIGTERM, sigflush);
5302965785mm	sigset(SIGUSR1, quiesce);
5312965785mm
5322965785mm	/*
5332f6e434mm	 * Build a protocol block list for registration.
5342f6e434mm	 */
5352965785mm	protobp0 = protobp = (struct protob *)malloc(sizeof (struct protob));
5362f6e434mm	protobp->serv = "NFS";
5372965785mm	protobp->versmin = nfs_server_vers_min;
5382965785mm	protobp->versmax = nfs_server_vers_max;
5392965785mm	protobp->program = NFS_PROGRAM;
5402965785mm
5412965785mm	protobp->next = (struct protob *)malloc(sizeof (struct protob));
5422965785mm	protobp = protobp->next;
5432965785mm	protobp->serv = "NFS_ACL";		/* not used */
5442965785mm	protobp->versmin = nfs_server_vers_min;
5452965785mm	/* XXX - this needs work to get the version just right */
5462965785mm	protobp->versmax = (nfs_server_vers_max > NFS_ACL_V3) ?
5472965785mm	    NFS_ACL_V3 : nfs_server_vers_max;
5482965785mm	protobp->program = NFS_ACL_PROGRAM;
5492965785mm	protobp->next = (struct protob *)NULL;
5502965785mm
5512965785mm	if (allflag) {
5522965785mm		if (do_all(protobp0, nfssvc, 0) == -1)
5532965785mm			exit(1);
5542965785mm	} else if (proto) {
5552965785mm		/* there's more than one match for the same protocol */
5562f6e434mm		struct netconfig *nconf;
5572f6e434mm		NCONF_HANDLE *nc;
5582f6e434mm		bool_t	protoFound = FALSE;
5592965785mm		if ((nc = setnetconfig()) == (NCONF_HANDLE *) NULL) {
5602f6e434mm			syslog(LOG_ERR, "setnetconfig failed: %m");
5612965785mm			goto done;
5622965785mm		}
5632965785mm		while (nconf = getnetconfig(nc)) {
5642f6e434mm			if (strcmp(nconf->nc_proto, proto) == 0) {
5652965785mm				protoFound = TRUE;
5662965785mm				do_one(nconf->nc_device, NULL,
5672965785mm				    protobp0, nfssvc, 0);
5682965785mm			}
5692965785mm		}
5702965785mm		(void) endnetconfig(nc);
5712965785mm		if (protoFound == FALSE)
5722965785mm			syslog(LOG_ERR, "couldn't find netconfig entry \
5732965785mm			    for protocol %s", proto);
5742965785mm
5752965785mm	} else if (provider)
5762965785mm		do_one(provider, proto, protobp0, nfssvc, 0);
5772965785mm	else {
5782965785mm		for (providerp = defaultproviders;
5792965785mm		    *providerp != NULL; providerp++) {
5802965785mm			provider = *providerp;
5812965785mm			do_one(provider, NULL, protobp0, nfssvc, 0);
5822965785mm		}
5832965785mm	}
5842965785mmdone:
5852965785mm
5862965785mm	free(protobp);
5872965785mm	free(protobp0);
5882965785mm
5892965785mm
5902965785mm	if (num_fds == 0) {
5912965785mm		(void) syslog(LOG_ERR,
5922965785mm		"Could not start NFS service for any protocol. Exiting");
5932965785mm		exit(1);
5942965785mm	}
5952965785mm
5962965785mm	end_listen_fds = num_fds;
5972965785mm
5982965785mm	/*
5992965785mm	 * Get rid of unneeded privileges.
6002965785mm	 */
6012965785mm	__fini_daemon_priv(PRIV_PROC_FORK, PRIV_PROC_EXEC, PRIV_PROC_SESSION,
6022965785mm	    PRIV_FILE_LINK_ANY, PRIV_PROC_INFO, (char *)NULL);
6032965785mm
6042965785mm	/*
6052965785mm	 * Poll for non-data control events on the transport descriptors.
6062965785mm	 */
6072965785mm	poll_for_action();
6082965785mm
6092965785mm	/*
6102965785mm	 * If we get here, something failed in poll_for_action().
6112965785mm	 */
6122965785mm	return (1);
6132965785mm}
6142965785mm
6152f6e434mmstatic int
6162f6e434mmnfssvcpool(int maxservers)
6172f6e434mm{
6182f6e434mm	struct svcpool_args npa;
6192f6e434mm
6202f6e434mm	npa.id = NFS_SVCPOOL_ID;
6212f6e434mm	npa.maxthreads = maxservers;
6222f6e434mm	npa.redline = 0;
6232f6e434mm	npa.qsize = 0;
6242f6e434mm	npa.timeout = 0;
6252f6e434mm	npa.stksize = 0;
6262f6e434mm	npa.max_same_xprt = 0;
6272f6e434mm	return (_nfssys(SVCPOOL_CREATE, &npa));
6282f6e434mm}
6292f6e434mm
6302f6e434mm/*
6312f6e434mm * Establish NFS service thread.
6322f6e434mm */
6332f6e434mmstatic int
6342f6e434mmnfssvc(int fd, struct netbuf addrmask, struct netconfig *nconf)
6352f6e434mm{
6362f6e434mm	struct nfs_svc_args nsa;
6372f6e434mm
6382f6e434mm	nsa.fd = fd;
6392f6e434mm	nsa.netid = nconf->nc_netid;
6402f6e434mm	nsa.addrmask = addrmask;
6412f6e434mm	if (strncasecmp(nconf->nc_proto, NC_UDP, strlen(NC_UDP)) == 0) {
6422f6e434mm		nsa.versmax = (nfs_server_vers_max > NFS_V3) ?
6432f6e434mm		    NFS_V3 : nfs_server_vers_max;
6442f6e434mm		nsa.versmin = nfs_server_vers_min;
6452f6e434mm		/*
6462f6e434mm		 * If no version left, silently do nothing, previous
6472f6e434mm		 * checks will have assured at least TCP is available.
6482f6e434mm		 */
6492f6e434mm		if (nsa.versmin > nsa.versmax)
6502f6e434mm			return (0);
6512f6e434mm	} else {
6522f6e434mm		nsa.versmax = nfs_server_vers_max;
6532f6e434mm		nsa.versmin = nfs_server_vers_min;
6542f6e434mm	}
6552f6e434mm	nsa.delegation = nfs_server_delegation;
65659f8578mm	return (_nfssys(NFS_SVC, &nsa));
6572f6e434mm}
6582f6e434mm
6592f6e434mmstatic void
6602f6e434mmusage(void)
6612f6e434mm{
6622f6e434mm	(void) fprintf(stderr,
6632f6e434mm"usage: %s [ -a ] [ -c max_conns ] [ -p protocol ] [ -t transport ] ", MyName);
6642f6e434mm	(void) fprintf(stderr, "\n[ -l listen_backlog ] [ nservers ]\n");
6652f6e434mm	(void) fprintf(stderr,
6662f6e434mm"\twhere -a causes <nservers> to be started on each appropriate transport,\n");
6672f6e434mm	(void) fprintf(stderr,
6682f6e434mm"\tmax_conns is the maximum number of concurrent connections allowed,\n");
6692f6e434mm	(void) fprintf(stderr, "\t\tand max_conns must be a decimal number");
6702f6e434mm	(void) fprintf(stderr, "> zero,\n");
6712f6e434mm	(void) fprintf(stderr, "\tprotocol is a protocol identifier,\n");
6722f6e434mm	(void) fprintf(stderr,
6732f6e434mm	    "\ttransport is a transport provider name (i.e. device),\n");
6742f6e434mm	(void) fprintf(stderr,
6752f6e434mm	    "\tlisten_backlog is the TCP listen backlog,\n");
6762f6e434mm	(void) fprintf(stderr,
6772f6e434mm	    "\tand <nservers> must be a decimal number > zero.\n");
6782f6e434mm	exit(1);
6792f6e434mm}
6802f6e434mm
6812f6e434mm/*
6822f6e434mm * Issue nfssys system call to flush all logging buffers asynchronously.
6832f6e434mm *
6842f6e434mm * NOTICE: It is extremely important to flush NFS logging buffers when
6852f6e434mm *	   nfsd exits. When the system is halted or rebooted nfslogd
6862f6e434mm *	   may not have an opportunity to flush the buffers.
6872f6e434mm */
6882f6e434mmstatic void
6892f6e434mmnfsl_flush()
6902f6e434mm{
6912f6e434mm	struct nfsl_flush_args nfa;
6922f6e434mm
6932f6e434mm	memset((void *)&nfa, 0, sizeof (nfa));
6942f6e434mm	nfa.version = NFSL_FLUSH_ARGS_VERS;
69559f8578mm	nfa.directive = NFSL_ALL;	/* flush all asynchronously */
6962f6e434mm
6972f6e434mm	if (_nfssys(LOG_FLUSH, &nfa) < 0)
6982f6e434mm		syslog(LOG_ERR, "_nfssys(LOG_FLUSH) failed: %s\n",
6992f6e434mm		    strerror(errno));
7002f6e434mm}
7012f6e434mm
7022f6e434mm/*
7032f6e434mm * SIGTERM handler.
7042f6e434mm * Flush logging buffers and exit.
7052965785mm */
7062965785mmstatic void
7072965785mmsigflush(int sig)
7082f6e434mm{
7092965785mm	nfsl_flush();
7102f6e434mm	_exit(0);
7112965785mm}
7122965785mm
7132965785mm/*
7142965785mm * SIGUSR1 handler.
7152965785mm *
7162965785mm * Request that server quiesce, then (nfsd) exit. For subsequent warm start.
7173404e2cmm *
7183404e2cmm * This is a Contracted Project Private interface, for the sole use
7193404e2cmm * of Sun Cluster HA-NFS. See PSARC/2004/497.
7203404e2cmm *
7212965785mm * Equivalent to SIGTERM handler if nfs_server_vers_max < QUIESCE_VERSMIN.
7222965785mm */
7232965785mmstatic void
7242965785mmquiesce(int sig)
7252965785mm{
7262965785mm	int error;
7272965785mm	int id = NFS_SVCPOOL_ID;
7282965785mm
7292f6e434mm	if (nfs_server_vers_max >= QUIESCE_VERSMIN) {
7302f6e434mm		/* Request server quiesce at next shutdown */
7312f6e434mm		error = _nfssys(NFS4_SVC_REQUEST_QUIESCE, &id);
7322f6e434mm
7332f6e434mm		/*
7342f6e434mm		 * ENOENT is returned if there is no matching SVC pool
7352f6e434mm		 * for the id. Possibly because the pool is not yet setup.
7362f6e434mm		 * In this case, just exit as if no error. For all other errors,
7372f6e434mm		 * just return and allow caller to retry.
7382f6e434mm		 */
7392f6e434mm		if (error && errno != ENOENT) {
7402f6e434mm			syslog(LOG_ERR,
7412f6e434mm			    "_nfssys(NFS4_SVC_REQUEST_QUIESCE) failed: %s",
7422f6e434mm			    strerror(errno));
7432f6e434mm			return;
7442f6e434mm		}
7452f6e434mm	}
7462f6e434mm
7472f6e434mm	/* Flush logging buffers */
7482f6e434mm	nfsl_flush();
7492f6e434mm
7502f6e434mm	_exit(0);
7512f6e434mm}
7522f6e434mm
7532f6e434mm/*
7541c0c7f8mm * DSS: distributed stable storage.
7552f6e434mm * Create leaf directories as required, keeping an eye on path
7562f6e434mm * lengths. Calls exit(1) on failure.
7572f6e434mm * The pathnames passed in must already exist, and must be writeable by nfsd.
7582f6e434mm * Note: the leaf directories under NFS4_VAR_DIR are not created here;
7592f6e434mm * they're created at pkg install.
7602965785mm */
7612965785mmstatic void
7622965785mmdss_mkleafdirs(uint_t npaths, char **pathnames)
7632965785mm{
7642965785mm	int i;
7652965785mm	char *tmppath = NULL;
7662965785mm
7672f6e434mm	/*
7682965785mm	 * Create the temporary storage used by dss_mkleafdir() here,
7692f6e434mm	 * rather than in that function, so that it only needs to be
7702f6e434mm	 * done once, rather than once for each call. Too big to put
7712965785mm	 * on the function's stack.
7722965785mm	 */
7732965785mm	tmppath = (char *)malloc(MAXPATHLEN);
7742965785mm	if (tmppath == NULL) {
775366f427mm		syslog(LOG_ERR, "tmppath malloc failed. Exiting");
776366f427mm		exit(1);
7772f6e434mm	}
7782f6e434mm
7792f6e434mm	for (i = 0; i < npaths; i++) {
7802f6e434mm		char *p = pathnames[i];
7812965785mm
7822965785mm		dss_mkleafdir(p, NFS4_DSS_STATE_LEAF, tmppath);
7832965785mm		dss_mkleafdir(p, NFS4_DSS_OLDSTATE_LEAF, tmppath);
7842965785mm	}
7852965785mm
7862965785mm	free(tmppath);
7872965785mm}
7882965785mm
7892965785mm/*
7902965785mm * Create "leaf" in "dir" (which must already exist).
7912965785mm * leaf: should start with a '/'
7922965785mm */
7932965785mmstatic void
7942965785mmdss_mkleafdir(char *dir, char *leaf, char *tmppath)
7952965785mm{
7962965785mm	/* MAXPATHLEN includes the terminating NUL */
7972965785mm	if (strlen(dir) + strlen(leaf) > MAXPATHLEN - 1) {
7982965785mm		syslog(LOG_ERR, "stable storage path too long: %s%s. Exiting",
7992965785mm		    dir, leaf);
8002965785mm		exit(1);
8012965785mm	}
8022965785mm
8032965785mm	(void) snprintf(tmppath, MAXPATHLEN, "%s/%s", dir, leaf);
8042965785mm
8052965785mm	/* the directory may already exist: that's OK */
8062965785mm	if (mkdir(tmppath, NFS4_DSS_DIR_MODE) == -1 && errno != EEXIST) {
8072965785mm		syslog(LOG_ERR, "error creating stable storage directory: "
8082965785mm		    "%s: %s. Exiting", strerror(errno), tmppath);
8092965785mm		exit(1);
8102965785mm	}
8112965785mm}
8122965785mm
8132965785mm/*
8142965785mm * Create the storage dirs, and pass the path list to the kernel.
8152965785mm * This requires the nfssrv module to be loaded; the _nfssys() syscall
8162965785mm * will fail ENOTSUP if it is not.
8172965785mm * Use libnvpair(3LIB) to pass the data to the kernel.
8182965785mm */
8192965785mmstatic int
8202965785mmdss_init(uint_t npaths, char **pathnames)
8212965785mm{
8222965785mm	int i, j, nskipped, error;
8232965785mm	char *bufp;
8242965785mm	uint32_t bufsize;
8252965785mm	size_t buflen;
8262965785mm	nvlist_t *nvl;
8272965785mm
8282965785mm	if (npaths > 1) {
8292965785mm		/*
8302965785mm		 * We need to remove duplicate paths; this might be user error
8312965785mm		 * in the general case, but HA-NFSv4 can also cause this.
8322f6e434mm		 * Sort the pathnames array, and NULL out duplicates,
8332f6e434mm		 * then write the non-NULL entries to a new array.
8342f6e434mm		 * Sorting will also allow the kernel to optimise its searches.
8352965785mm		 */
8362965785mm
8372965785mm		qsort(pathnames, npaths, sizeof (char *), qstrcmp);
8382965785mm
8392965785mm		/* now NULL out any duplicates */
8402965785mm		i = 0; j = 1; nskipped = 0;
8412965785mm		while (j < npaths) {
8422965785mm			if (strcmp(pathnames[i], pathnames[j]) == NULL) {
8432965785mm				pathnames[j] = NULL;
8442965785mm				j++;
8452965785mm				nskipped++;
8462965785mm				continue;
8472965785mm			}
8482965785mm
8492965785mm			/* skip i over any of its NULLed duplicates */
8502965785mm			i = j++;
8512965785mm		}
8522965785mm
8532965785mm		/* finally, write the non-NULL entries to a new array */
8542965785mm		if (nskipped > 0) {
8552965785mm			int nreal;
8562965785mm			size_t sz;
8572965785mm			char **tmp_pathnames;
8582965785mm
8592965785mm			nreal = npaths - nskipped;
8602965785mm
8612965785mm			sz = nreal * sizeof (char *);
8622965785mm			tmp_pathnames = (char **)malloc(sz);
8632965785mm			if (tmp_pathnames == NULL) {
8642965785mm				syslog(LOG_ERR, "tmp_pathnames malloc failed");
8652965785mm				exit(1);
8662965785mm			}
8672965785mm
8682965785mm			for (i = 0, j = 0; i < npaths; i++)
8692965785mm				if (pathnames[i] != NULL)
8702965785mm					tmp_pathnames[j++] = pathnames[i];
8712965785mm			free(pathnames);
8722965785mm			pathnames = tmp_pathnames;
8732965785mm			npaths = nreal;
8742965785mm		}
8752965785mm
8762965785mm	}
8772965785mm
8782965785mm	/* Create directories to store the distributed state files */
8792965785mm	dss_mkleafdirs(npaths, pathnames);
8802965785mm
8812965785mm	/* Create the name-value pair list */
8822965785mm	error = nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0);
8832965785mm	if (error) {
8842965785mm		syslog(LOG_ERR, "nvlist_alloc failed: %s.", strerror(errno));
8852965785mm		return (1);
8862965785mm	}
8872965785mm
8882965785mm	/* Add the pathnames array as a single name-value pair */
8892965785mm	error = nvlist_add_string_array(nvl, NFS4_DSS_NVPAIR_NAME,
8902965785mm	    pathnames, npaths);
8912965785mm	if (error) {
8922965785mm		syslog(LOG_ERR, "nvlist_add_string_array failed: %s.",
8932965785mm		    strerror(errno));
8942965785mm		nvlist_free(nvl);
8952965785mm		return (1);
8962965785mm	}
8972965785mm
8982965785mm	/*
899366f427mm	 * Pack list into contiguous memory, for passing to kernel.
900366f427mm	 * nvlist_pack() will allocate the memory for the buffer,
9012965785mm	 * which we should free() when no longer needed.
9022965785mm	 * NV_ENCODE_XDR for safety across ILP32/LP64 kernel boundary.
9032965785mm	 */
9042965785mm	bufp = NULL;
9052965785mm	error = nvlist_pack(nvl, &bufp, &buflen, NV_ENCODE_XDR, 0);
9062965785mm	if (error) {
9072965785mm		syslog(LOG_ERR, "nvlist_pack failed: %s.", strerror(errno));
9082965785mm		nvlist_free(nvl);
9092965785mm		return (1);
9102965785mm	}
9112965785mm
9122965785mm	/* Now we have the packed buffer, we no longer need the list */
9132965785mm	nvlist_free(nvl);
9142965785mm
9152f6e434mm	/*
9162965785mm	 * Let the kernel know in advance how big the buffer is.
9172f6e434mm	 * NOTE: we cannot just pass buflen, since size_t is a long, and
9182965785mm	 * thus a different size between ILP32 userland and LP64 kernel.
9192965785mm	 * Use an int for the transfer, since that should be big enough;
9202965785mm	 * this is a no-op at the moment, here, since nfsd is 32-bit, but
9212965785mm	 * that could change.
9222965785mm	 */
9233404e2cmm	bufsize = (uint32_t)buflen;
9243404e2cmm	error = _nfssys(NFS4_DSS_SETPATHS_SIZE, &bufsize);
9253404e2cmm	if (error) {
9263404e2cmm		syslog(LOG_ERR,
9273404e2cmm		    "_nfssys(NFS4_DSS_SETPATHS_SIZE) failed: %s. ",
9283404e2cmm		    strerror(errno));
9293404e2cmm		free(bufp);
9303404e2cmm		return (1);
9313404e2cmm	}
9323404e2cmm
9333404e2cmm	/* Pass the packed buffer to the kernel */
9343404e2cmm	error = _nfssys(NFS4_DSS_SETPATHS, bufp);
9353404e2cmm	if (error) {
9363404e2cmm		syslog(LOG_ERR,
9373404e2cmm		    "_nfssys(NFS4_DSS_SETPATHS) failed: %s. ", strerror(errno));
9383404e2cmm		free(bufp);
9393404e2cmm		return (1);
9403404e2cmm	}
9413404e2cmm
9423404e2cmm	/*
9433404e2cmm	 * The kernel has now unpacked the buffer and extracted the
9443404e2cmm	 * pathnames array, we no longer need the buffer.
9453404e2cmm	 */
9463404e2cmm	free(bufp);
9473404e2cmm
9483404e2cmm	return (0);
9493404e2cmm}
9503404e2cmm
9512965785mm/*
9522965785mm * Quick sort string compare routine, for qsort.
9532f6e434mm * Needed to make arg types correct.
9542965785mm */
9552965785mmint
9562965785mmqstrcmp(const void *p1, const void *p2)
9572965785mm{
9582965785mm	char *s1 = *((char **)p1);
9592965785mm	char *s2 = *((char **)p2);
9602f6e434mm
9612965785mm	return (strcmp(s1, s2));
9622965785mm}
9632f6e434mm