nfsd.c revision 250a073308fb9258903f57b76eeb2470c6926efe
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26/*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T		*/
27/*	  All Rights Reserved  	*/
28
29/*
30 * University Copyright- Copyright (c) 1982, 1986, 1988
31 * The Regents of the University of California
32 * All Rights Reserved
33 *
34 * University Acknowledgment- Portions of this document are derived from
35 * software developed by the University of California, Berkeley, and its
36 * contributors.
37 */
38
39/* LINTLIBRARY */
40/* PROTOLIB1 */
41
42#pragma ident	"%Z%%M%	%I%	%E% SMI"
43
44/* NFS server */
45
46#include <sys/param.h>
47#include <sys/types.h>
48#include <sys/stat.h>
49#include <syslog.h>
50#include <tiuser.h>
51#include <rpc/rpc.h>
52#include <errno.h>
53#include <thread.h>
54#include <sys/resource.h>
55#include <sys/time.h>
56#include <sys/file.h>
57#include <nfs/nfs.h>
58#include <nfs/nfs_acl.h>
59#include <nfs/nfssys.h>
60#include <stdio.h>
61#include <stdio_ext.h>
62#include <stdlib.h>
63#include <signal.h>
64#include <netconfig.h>
65#include <netdir.h>
66#include <string.h>
67#include <unistd.h>
68#include <stropts.h>
69#include <sys/tihdr.h>
70#include <sys/wait.h>
71#include <poll.h>
72#include <priv_utils.h>
73#include <sys/tiuser.h>
74#include <netinet/tcp.h>
75#include <deflt.h>
76#include <rpcsvc/daemon_utils.h>
77#include <rpcsvc/nfs4_prot.h>
78#include <libnvpair.h>
79#include "nfs_tbind.h"
80#include "thrpool.h"
81
82/* quiesce requests will be ignored if nfs_server_vers_max < QUIESCE_VERSMIN */
83#define	QUIESCE_VERSMIN	4
84/* DSS: distributed stable storage */
85#define	DSS_VERSMIN	4
86
87static	int	nfssvc(int, struct netbuf, struct netconfig *);
88static	int	nfssvcpool(int maxservers);
89static	int	dss_init(uint_t npaths, char **pathnames);
90static	void	dss_mkleafdirs(uint_t npaths, char **pathnames);
91static	void	dss_mkleafdir(char *dir, char *leaf, char *path);
92static	void	usage(void);
93int		qstrcmp(const void *s1, const void *s2);
94
95extern	int	_nfssys(int, void *);
96
97extern int	daemonize_init(void);
98extern void	daemonize_fini(int fd);
99
100/* signal handlers */
101static void sigflush(int);
102static void quiesce(int);
103
104static	char	*MyName;
105static	NETSELDECL(defaultproviders)[] = { "/dev/tcp6", "/dev/tcp", "/dev/udp",
106					    "/dev/udp6", NULL };
107/* static	NETSELDECL(defaultprotos)[] =	{ NC_UDP, NC_TCP, NULL }; */
108/*
109 * The following are all globals used by routines in nfs_tbind.c.
110 */
111size_t	end_listen_fds;		/* used by conn_close_oldest() */
112size_t	num_fds = 0;		/* used by multiple routines */
113int	listen_backlog = 32;	/* used by bind_to_{provider,proto}() */
114int	num_servers;		/* used by cots_listen_event() */
115int	(*Mysvc)(int, struct netbuf, struct netconfig *) = nfssvc;
116				/* used by cots_listen_event() */
117int	max_conns_allowed = -1;	/* used by cots_listen_event() */
118
119/*
120 * Keep track of min/max versions of NFS protocol to be started.
121 * Start with the defaults (min == 2, max == 3).  We have the
122 * capability of starting vers=4 but only if the user requests it.
123 */
124int	nfs_server_vers_min = NFS_VERSMIN_DEFAULT;
125int	nfs_server_vers_max = NFS_VERSMAX_DEFAULT;
126
127/*
128 * Set the default for server delegation enablement and set per
129 * /etc/default/nfs configuration (if present).
130 */
131int	nfs_server_delegation = NFS_SERVER_DELEGATION_DEFAULT;
132
133int
134main(int ac, char *av[])
135{
136	char *dir = "/";
137	int allflag = 0;
138	int df_allflag = 0;
139	int opt_cnt = 0;
140	int maxservers = 1;	/* zero allows inifinte number of threads */
141	int maxservers_set = 0;
142	int logmaxservers = 0;
143	int pid;
144	int i;
145	char *provider = (char *)NULL;
146	char *df_provider = (char *)NULL;
147	struct protob *protobp0, *protobp;
148	NETSELDECL(proto) = NULL;
149	NETSELDECL(df_proto) = NULL;
150	NETSELPDECL(providerp);
151	char *defval;
152	boolean_t can_do_mlp;
153	uint_t dss_npaths = 0;
154	char **dss_pathnames = NULL;
155	sigset_t sgset;
156
157	int pipe_fd = -1;
158
159	MyName = *av;
160
161	/*
162	 * Initializations that require more privileges than we need to run.
163	 */
164	(void) _create_daemon_lock(NFSD, DAEMON_UID, DAEMON_GID);
165	svcsetprio();
166
167	can_do_mlp = priv_ineffect(PRIV_NET_BINDMLP);
168	if (__init_daemon_priv(PU_RESETGROUPS|PU_CLEARLIMITSET,
169	    DAEMON_UID, DAEMON_GID, PRIV_SYS_NFS,
170	    can_do_mlp ? PRIV_NET_BINDMLP : NULL, NULL) == -1) {
171		(void) fprintf(stderr, "%s should be run with"
172		    " sufficient privileges\n", av[0]);
173		exit(1);
174	}
175
176	(void) enable_extended_FILE_stdio(-1, -1);
177
178	/*
179	 * Read in the values from config file first before we check
180	 * command line options so the options override the file.
181	 */
182	if ((defopen(NFSADMIN)) == 0) {
183		if ((defval = defread("NFSD_MAX_CONNECTIONS=")) != NULL) {
184			errno = 0;
185			max_conns_allowed = strtol(defval, (char **)NULL, 10);
186			if (errno != 0) {
187				max_conns_allowed = -1;
188			}
189		}
190		if ((defval = defread("NFSD_LISTEN_BACKLOG=")) != NULL) {
191			errno = 0;
192			listen_backlog = strtol(defval, (char **)NULL, 10);
193			if (errno != 0) {
194				listen_backlog = 32;
195			}
196		}
197		if ((defval = defread("NFSD_PROTOCOL=")) != NULL) {
198			df_proto = strdup(defval);
199			opt_cnt++;
200			if (strncasecmp("ALL", defval, 3) == 0) {
201				free(df_proto);
202				df_proto = NULL;
203				df_allflag = 1;
204			}
205		}
206		if ((defval = defread("NFSD_DEVICE=")) != NULL) {
207			df_provider = strdup(defval);
208			opt_cnt++;
209		}
210		if ((defval = defread("NFSD_SERVERS=")) != NULL) {
211			errno = 0;
212			maxservers = strtol(defval, (char **)NULL, 10);
213			if (errno != 0) {
214				maxservers = 1;
215			} else {
216				maxservers_set = 1;
217			}
218		}
219		if ((defval = defread("NFS_SERVER_VERSMIN=")) != NULL) {
220			errno = 0;
221			nfs_server_vers_min =
222			    strtol(defval, (char **)NULL, 10);
223			if (errno != 0) {
224				nfs_server_vers_min = NFS_VERSMIN_DEFAULT;
225			}
226		}
227		if ((defval = defread("NFS_SERVER_VERSMAX=")) != NULL) {
228			errno = 0;
229			nfs_server_vers_max =
230			    strtol(defval, (char **)NULL, 10);
231			if (errno != 0) {
232				nfs_server_vers_max = NFS_VERSMAX_DEFAULT;
233			}
234		}
235		if ((defval = defread("NFS_SERVER_DELEGATION=")) != NULL) {
236			if (strcmp(defval, "off") == 0) {
237				nfs_server_delegation = FALSE;
238			}
239		}
240
241		/* close defaults file */
242		defopen(NULL);
243	}
244
245	/*
246	 * Conflict options error messages.
247	 */
248	if (opt_cnt > 1) {
249		(void) fprintf(stderr, "\nConflicting options, only one of "
250		    "the following options can be specified\n"
251		    "in " NFSADMIN ":\n"
252		    "\tNFSD_PROTOCOL=ALL\n"
253		    "\tNFSD_PROTOCOL=protocol\n"
254		    "\tNFSD_DEVICE=device\n\n");
255		usage();
256	}
257	opt_cnt = 0;
258
259	while ((i = getopt(ac, av, "ac:p:s:t:l:")) != EOF) {
260		switch (i) {
261		case 'a':
262			free(df_proto);
263			df_proto = NULL;
264			free(df_provider);
265			df_provider = NULL;
266
267			allflag = 1;
268			opt_cnt++;
269			break;
270
271		case 'c':
272			max_conns_allowed = atoi(optarg);
273			break;
274
275		case 'p':
276			proto = optarg;
277			df_allflag = 0;
278			opt_cnt++;
279			break;
280
281		/*
282		 * DSS: NFSv4 distributed stable storage.
283		 *
284		 * This is a Contracted Project Private interface, for
285		 * the sole use of Sun Cluster HA-NFS. See PSARC/2006/313.
286		 */
287		case 's':
288			if (strlen(optarg) < MAXPATHLEN) {
289				/* first "-s" option encountered? */
290				if (dss_pathnames == NULL) {
291					/*
292					 * Allocate maximum possible space
293					 * required given cmdline arg count;
294					 * "-s <path>" consumes two args.
295					 */
296					size_t sz = (ac / 2) * sizeof (char *);
297					dss_pathnames = (char **)malloc(sz);
298					if (dss_pathnames == NULL) {
299						(void) fprintf(stderr, "%s: "
300						    "dss paths malloc failed\n",
301						    av[0]);
302						exit(1);
303					}
304					(void) memset(dss_pathnames, 0, sz);
305				}
306				dss_pathnames[dss_npaths] = optarg;
307				dss_npaths++;
308			} else {
309				(void) fprintf(stderr,
310				    "%s: -s pathname too long.\n", av[0]);
311			}
312			break;
313
314		case 't':
315			provider = optarg;
316			df_allflag = 0;
317			opt_cnt++;
318			break;
319
320		case 'l':
321			listen_backlog = atoi(optarg);
322			break;
323
324		case '?':
325			usage();
326			/* NOTREACHED */
327		}
328	}
329
330	allflag = df_allflag;
331	if (proto == NULL)
332		proto = df_proto;
333	if (provider == NULL)
334		provider = df_provider;
335
336	/*
337	 * Conflict options error messages.
338	 */
339	if (opt_cnt > 1) {
340		(void) fprintf(stderr, "\nConflicting options, only one of "
341		    "the following options can be specified\n"
342		    "on the command line:\n"
343		    "\t-a\n"
344		    "\t-p protocol\n"
345		    "\t-t transport\n\n");
346		usage();
347	}
348
349	if (proto != NULL &&
350	    strncasecmp(proto, NC_UDP, strlen(NC_UDP)) == 0) {
351		if (nfs_server_vers_max == NFS_V4) {
352			if (nfs_server_vers_min == NFS_V4) {
353				fprintf(stderr,
354				    "NFS version 4 is not supported "
355				    "with the UDP protocol.  Exiting\n");
356				exit(3);
357			} else {
358				fprintf(stderr,
359				    "NFS version 4 is not supported "
360				    "with the UDP protocol.\n");
361			}
362		}
363	}
364
365	/*
366	 * If there is exactly one more argument, it is the number of
367	 * servers.
368	 */
369	if (optind == ac - 1) {
370		maxservers = atoi(av[optind]);
371		maxservers_set = 1;
372	}
373	/*
374	 * If there are two or more arguments, then this is a usage error.
375	 */
376	else if (optind < ac - 1)
377		usage();
378	/*
379	 * Check the ranges for min/max version specified
380	 */
381	else if ((nfs_server_vers_min > nfs_server_vers_max) ||
382	    (nfs_server_vers_min < NFS_VERSMIN) ||
383	    (nfs_server_vers_max > NFS_VERSMAX))
384		usage();
385	/*
386	 * There are no additional arguments, and we haven't set maxservers
387	 * explicitly via the config file, we use a default number of
388	 * servers.  We will log this.
389	 */
390	else if (maxservers_set == 0)
391		logmaxservers = 1;
392
393	/*
394	 * Basic Sanity checks on options
395	 *
396	 * max_conns_allowed must be positive, except for the special
397	 * value of -1 which is used internally to mean unlimited, -1 isn't
398	 * documented but we allow it anyway.
399	 *
400	 * maxservers must be positive
401	 * listen_backlog must be positive or zero
402	 */
403	if (((max_conns_allowed != -1) && (max_conns_allowed <= 0)) ||
404	    (listen_backlog < 0) || (maxservers <= 0)) {
405		usage();
406	}
407
408	/*
409	 * Set current dir to server root
410	 */
411	if (chdir(dir) < 0) {
412		(void) fprintf(stderr, "%s:  ", MyName);
413		perror(dir);
414		exit(1);
415	}
416
417#ifndef DEBUG
418	pipe_fd = daemonize_init();
419#endif
420
421	openlog(MyName, LOG_PID | LOG_NDELAY, LOG_DAEMON);
422
423	/*
424	 * establish our lock on the lock file and write our pid to it.
425	 * exit if some other process holds the lock, or if there's any
426	 * error in writing/locking the file.
427	 */
428	pid = _enter_daemon_lock(NFSD);
429	switch (pid) {
430	case 0:
431		break;
432	case -1:
433		fprintf(stderr, "error locking for %s: %s", NFSD,
434		    strerror(errno));
435		exit(2);
436	default:
437		/* daemon was already running */
438		exit(0);
439	}
440
441	/*
442	 * If we've been given a list of paths to be used for distributed
443	 * stable storage, and provided we're going to run a version
444	 * that supports it, setup the DSS paths.
445	 */
446	if (dss_pathnames != NULL && nfs_server_vers_max >= DSS_VERSMIN) {
447		if (dss_init(dss_npaths, dss_pathnames) != 0) {
448			fprintf(stderr, "%s", "dss_init failed. Exiting.");
449			exit(1);
450		}
451	}
452
453	/*
454	 * Block all signals till we spawn other
455	 * threads.
456	 */
457	(void) sigfillset(&sgset);
458	(void) thr_sigsetmask(SIG_BLOCK, &sgset, NULL);
459
460	if (logmaxservers) {
461		fprintf(stderr,
462		    "Number of servers not specified. Using default of %d.",
463		    maxservers);
464	}
465
466	/*
467	 * Make sure to unregister any previous versions in case the
468	 * user is reconfiguring the server in interesting ways.
469	 */
470	svc_unreg(NFS_PROGRAM, NFS_VERSION);
471	svc_unreg(NFS_PROGRAM, NFS_V3);
472	svc_unreg(NFS_PROGRAM, NFS_V4);
473	svc_unreg(NFS_ACL_PROGRAM, NFS_ACL_V2);
474	svc_unreg(NFS_ACL_PROGRAM, NFS_ACL_V3);
475
476	/*
477	 * Set up kernel RPC thread pool for the NFS server.
478	 */
479	if (nfssvcpool(maxservers)) {
480		fprintf(stderr, "Can't set up kernel NFS service: %s. Exiting",
481		    strerror(errno));
482		exit(1);
483	}
484
485	/*
486	 * Set up blocked thread to do LWP creation on behalf of the kernel.
487	 */
488	if (svcwait(NFS_SVCPOOL_ID)) {
489		fprintf(stderr, "Can't set up NFS pool creator: %s. Exiting",
490		    strerror(errno));
491		exit(1);
492	}
493
494	/*
495	 * RDMA start and stop thread.
496	 * Per pool RDMA listener creation and
497	 * destructor thread.
498	 *
499	 * start rdma services and block in the kernel.
500	 */
501	if (svcrdma(NFS_SVCPOOL_ID, nfs_server_vers_min, nfs_server_vers_max,
502	    nfs_server_delegation)) {
503		fprintf(stderr, "Can't set up RDMA creator thread : %s",
504		    strerror(errno));
505	}
506
507	/*
508	 * Now open up for signal delivery
509	 */
510
511	(void) thr_sigsetmask(SIG_UNBLOCK, &sgset, NULL);
512	sigset(SIGTERM, sigflush);
513	sigset(SIGUSR1, quiesce);
514
515	/*
516	 * Build a protocol block list for registration.
517	 */
518	protobp0 = protobp = (struct protob *)malloc(sizeof (struct protob));
519	protobp->serv = "NFS";
520	protobp->versmin = nfs_server_vers_min;
521	protobp->versmax = nfs_server_vers_max;
522	protobp->program = NFS_PROGRAM;
523
524	protobp->next = (struct protob *)malloc(sizeof (struct protob));
525	protobp = protobp->next;
526	protobp->serv = "NFS_ACL";		/* not used */
527	protobp->versmin = nfs_server_vers_min;
528	/* XXX - this needs work to get the version just right */
529	protobp->versmax = (nfs_server_vers_max > NFS_ACL_V3) ?
530	    NFS_ACL_V3 : nfs_server_vers_max;
531	protobp->program = NFS_ACL_PROGRAM;
532	protobp->next = (struct protob *)NULL;
533
534	if (allflag) {
535		if (do_all(protobp0, nfssvc, 0) == -1) {
536			fprintf(stderr, "setnetconfig failed : %s",
537			    strerror(errno));
538			exit(1);
539		}
540	} else if (proto) {
541		/* there's more than one match for the same protocol */
542		struct netconfig *nconf;
543		NCONF_HANDLE *nc;
544		bool_t	protoFound = FALSE;
545		if ((nc = setnetconfig()) == (NCONF_HANDLE *) NULL) {
546			fprintf(stderr, "setnetconfig failed : %s",
547			    strerror(errno));
548			goto done;
549		}
550		while (nconf = getnetconfig(nc)) {
551			if (strcmp(nconf->nc_proto, proto) == 0) {
552				protoFound = TRUE;
553				do_one(nconf->nc_device, NULL,
554				    protobp0, nfssvc, 0);
555			}
556		}
557		(void) endnetconfig(nc);
558		if (protoFound == FALSE) {
559			fprintf(stderr,
560			    "couldn't find netconfig entry for protocol %s",
561			    proto);
562		}
563	} else if (provider)
564		do_one(provider, proto, protobp0, nfssvc, 0);
565	else {
566		for (providerp = defaultproviders;
567		    *providerp != NULL; providerp++) {
568			provider = *providerp;
569			do_one(provider, NULL, protobp0, nfssvc, 0);
570		}
571	}
572done:
573
574	free(protobp);
575	free(protobp0);
576
577	if (num_fds == 0) {
578		fprintf(stderr, "Could not start NFS service for any protocol."
579		    " Exiting");
580		exit(1);
581	}
582
583	end_listen_fds = num_fds;
584
585	/*
586	 * nfsd is up and running as far as we are concerned.
587	 */
588	daemonize_fini(pipe_fd);
589
590	/*
591	 * Get rid of unneeded privileges.
592	 */
593	__fini_daemon_priv(PRIV_PROC_FORK, PRIV_PROC_EXEC, PRIV_PROC_SESSION,
594	    PRIV_FILE_LINK_ANY, PRIV_PROC_INFO, (char *)NULL);
595
596	/*
597	 * Poll for non-data control events on the transport descriptors.
598	 */
599	poll_for_action();
600
601	/*
602	 * If we get here, something failed in poll_for_action().
603	 */
604	return (1);
605}
606
607static int
608nfssvcpool(int maxservers)
609{
610	struct svcpool_args npa;
611
612	npa.id = NFS_SVCPOOL_ID;
613	npa.maxthreads = maxservers;
614	npa.redline = 0;
615	npa.qsize = 0;
616	npa.timeout = 0;
617	npa.stksize = 0;
618	npa.max_same_xprt = 0;
619	return (_nfssys(SVCPOOL_CREATE, &npa));
620}
621
622/*
623 * Establish NFS service thread.
624 */
625static int
626nfssvc(int fd, struct netbuf addrmask, struct netconfig *nconf)
627{
628	struct nfs_svc_args nsa;
629
630	nsa.fd = fd;
631	nsa.netid = nconf->nc_netid;
632	nsa.addrmask = addrmask;
633	if (strncasecmp(nconf->nc_proto, NC_UDP, strlen(NC_UDP)) == 0) {
634		nsa.versmax = (nfs_server_vers_max > NFS_V3) ?
635		    NFS_V3 : nfs_server_vers_max;
636		nsa.versmin = nfs_server_vers_min;
637		/*
638		 * If no version left, silently do nothing, previous
639		 * checks will have assured at least TCP is available.
640		 */
641		if (nsa.versmin > nsa.versmax)
642			return (0);
643	} else {
644		nsa.versmax = nfs_server_vers_max;
645		nsa.versmin = nfs_server_vers_min;
646	}
647	nsa.delegation = nfs_server_delegation;
648	return (_nfssys(NFS_SVC, &nsa));
649}
650
651static void
652usage(void)
653{
654	(void) fprintf(stderr,
655"usage: %s [ -a ] [ -c max_conns ] [ -p protocol ] [ -t transport ] ", MyName);
656	(void) fprintf(stderr, "\n[ -l listen_backlog ] [ nservers ]\n");
657	(void) fprintf(stderr,
658"\twhere -a causes <nservers> to be started on each appropriate transport,\n");
659	(void) fprintf(stderr,
660"\tmax_conns is the maximum number of concurrent connections allowed,\n");
661	(void) fprintf(stderr, "\t\tand max_conns must be a decimal number");
662	(void) fprintf(stderr, "> zero,\n");
663	(void) fprintf(stderr, "\tprotocol is a protocol identifier,\n");
664	(void) fprintf(stderr,
665	    "\ttransport is a transport provider name (i.e. device),\n");
666	(void) fprintf(stderr,
667	    "\tlisten_backlog is the TCP listen backlog,\n");
668	(void) fprintf(stderr,
669	    "\tand <nservers> must be a decimal number > zero.\n");
670	exit(1);
671}
672
673/*
674 * Issue nfssys system call to flush all logging buffers asynchronously.
675 *
676 * NOTICE: It is extremely important to flush NFS logging buffers when
677 *	   nfsd exits. When the system is halted or rebooted nfslogd
678 *	   may not have an opportunity to flush the buffers.
679 */
680static void
681nfsl_flush()
682{
683	struct nfsl_flush_args nfa;
684
685	memset((void *)&nfa, 0, sizeof (nfa));
686	nfa.version = NFSL_FLUSH_ARGS_VERS;
687	nfa.directive = NFSL_ALL;	/* flush all asynchronously */
688
689	if (_nfssys(LOG_FLUSH, &nfa) < 0)
690		syslog(LOG_ERR, "_nfssys(LOG_FLUSH) failed: %s\n",
691		    strerror(errno));
692}
693
694/*
695 * SIGTERM handler.
696 * Flush logging buffers and exit.
697 */
698static void
699sigflush(int sig)
700{
701	nfsl_flush();
702	_exit(0);
703}
704
705/*
706 * SIGUSR1 handler.
707 *
708 * Request that server quiesce, then (nfsd) exit. For subsequent warm start.
709 *
710 * This is a Contracted Project Private interface, for the sole use
711 * of Sun Cluster HA-NFS. See PSARC/2004/497.
712 *
713 * Equivalent to SIGTERM handler if nfs_server_vers_max < QUIESCE_VERSMIN.
714 */
715static void
716quiesce(int sig)
717{
718	int error;
719	int id = NFS_SVCPOOL_ID;
720
721	if (nfs_server_vers_max >= QUIESCE_VERSMIN) {
722		/* Request server quiesce at next shutdown */
723		error = _nfssys(NFS4_SVC_REQUEST_QUIESCE, &id);
724
725		/*
726		 * ENOENT is returned if there is no matching SVC pool
727		 * for the id. Possibly because the pool is not yet setup.
728		 * In this case, just exit as if no error. For all other errors,
729		 * just return and allow caller to retry.
730		 */
731		if (error && errno != ENOENT) {
732			syslog(LOG_ERR,
733			    "_nfssys(NFS4_SVC_REQUEST_QUIESCE) failed: %s",
734			    strerror(errno));
735			return;
736		}
737	}
738
739	/* Flush logging buffers */
740	nfsl_flush();
741
742	_exit(0);
743}
744
745/*
746 * DSS: distributed stable storage.
747 * Create leaf directories as required, keeping an eye on path
748 * lengths. Calls exit(1) on failure.
749 * The pathnames passed in must already exist, and must be writeable by nfsd.
750 * Note: the leaf directories under NFS4_VAR_DIR are not created here;
751 * they're created at pkg install.
752 */
753static void
754dss_mkleafdirs(uint_t npaths, char **pathnames)
755{
756	int i;
757	char *tmppath = NULL;
758
759	/*
760	 * Create the temporary storage used by dss_mkleafdir() here,
761	 * rather than in that function, so that it only needs to be
762	 * done once, rather than once for each call. Too big to put
763	 * on the function's stack.
764	 */
765	tmppath = (char *)malloc(MAXPATHLEN);
766	if (tmppath == NULL) {
767		syslog(LOG_ERR, "tmppath malloc failed. Exiting");
768		exit(1);
769	}
770
771	for (i = 0; i < npaths; i++) {
772		char *p = pathnames[i];
773
774		dss_mkleafdir(p, NFS4_DSS_STATE_LEAF, tmppath);
775		dss_mkleafdir(p, NFS4_DSS_OLDSTATE_LEAF, tmppath);
776	}
777
778	free(tmppath);
779}
780
781/*
782 * Create "leaf" in "dir" (which must already exist).
783 * leaf: should start with a '/'
784 */
785static void
786dss_mkleafdir(char *dir, char *leaf, char *tmppath)
787{
788	/* MAXPATHLEN includes the terminating NUL */
789	if (strlen(dir) + strlen(leaf) > MAXPATHLEN - 1) {
790		fprintf(stderr, "stable storage path too long: %s%s. Exiting",
791		    dir, leaf);
792		exit(1);
793	}
794
795	(void) snprintf(tmppath, MAXPATHLEN, "%s/%s", dir, leaf);
796
797	/* the directory may already exist: that's OK */
798	if (mkdir(tmppath, NFS4_DSS_DIR_MODE) == -1 && errno != EEXIST) {
799		fprintf(stderr, "error creating stable storage directory: "
800		    "%s: %s. Exiting", strerror(errno), tmppath);
801		exit(1);
802	}
803}
804
805/*
806 * Create the storage dirs, and pass the path list to the kernel.
807 * This requires the nfssrv module to be loaded; the _nfssys() syscall
808 * will fail ENOTSUP if it is not.
809 * Use libnvpair(3LIB) to pass the data to the kernel.
810 */
811static int
812dss_init(uint_t npaths, char **pathnames)
813{
814	int i, j, nskipped, error;
815	char *bufp;
816	uint32_t bufsize;
817	size_t buflen;
818	nvlist_t *nvl;
819
820	if (npaths > 1) {
821		/*
822		 * We need to remove duplicate paths; this might be user error
823		 * in the general case, but HA-NFSv4 can also cause this.
824		 * Sort the pathnames array, and NULL out duplicates,
825		 * then write the non-NULL entries to a new array.
826		 * Sorting will also allow the kernel to optimise its searches.
827		 */
828
829		qsort(pathnames, npaths, sizeof (char *), qstrcmp);
830
831		/* now NULL out any duplicates */
832		i = 0; j = 1; nskipped = 0;
833		while (j < npaths) {
834			if (strcmp(pathnames[i], pathnames[j]) == NULL) {
835				pathnames[j] = NULL;
836				j++;
837				nskipped++;
838				continue;
839			}
840
841			/* skip i over any of its NULLed duplicates */
842			i = j++;
843		}
844
845		/* finally, write the non-NULL entries to a new array */
846		if (nskipped > 0) {
847			int nreal;
848			size_t sz;
849			char **tmp_pathnames;
850
851			nreal = npaths - nskipped;
852
853			sz = nreal * sizeof (char *);
854			tmp_pathnames = (char **)malloc(sz);
855			if (tmp_pathnames == NULL) {
856				fprintf(stderr, "tmp_pathnames malloc failed");
857				exit(1);
858			}
859
860			for (i = 0, j = 0; i < npaths; i++)
861				if (pathnames[i] != NULL)
862					tmp_pathnames[j++] = pathnames[i];
863			free(pathnames);
864			pathnames = tmp_pathnames;
865			npaths = nreal;
866		}
867
868	}
869
870	/* Create directories to store the distributed state files */
871	dss_mkleafdirs(npaths, pathnames);
872
873	/* Create the name-value pair list */
874	error = nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0);
875	if (error) {
876		fprintf(stderr, "nvlist_alloc failed: %s.", strerror(errno));
877		return (1);
878	}
879
880	/* Add the pathnames array as a single name-value pair */
881	error = nvlist_add_string_array(nvl, NFS4_DSS_NVPAIR_NAME,
882	    pathnames, npaths);
883	if (error) {
884		fprintf(stderr, "nvlist_add_string_array failed: %s.",
885		    strerror(errno));
886		nvlist_free(nvl);
887		return (1);
888	}
889
890	/*
891	 * Pack list into contiguous memory, for passing to kernel.
892	 * nvlist_pack() will allocate the memory for the buffer,
893	 * which we should free() when no longer needed.
894	 * NV_ENCODE_XDR for safety across ILP32/LP64 kernel boundary.
895	 */
896	bufp = NULL;
897	error = nvlist_pack(nvl, &bufp, &buflen, NV_ENCODE_XDR, 0);
898	if (error) {
899		fprintf(stderr, "nvlist_pack failed: %s.", strerror(errno));
900		nvlist_free(nvl);
901		return (1);
902	}
903
904	/* Now we have the packed buffer, we no longer need the list */
905	nvlist_free(nvl);
906
907	/*
908	 * Let the kernel know in advance how big the buffer is.
909	 * NOTE: we cannot just pass buflen, since size_t is a long, and
910	 * thus a different size between ILP32 userland and LP64 kernel.
911	 * Use an int for the transfer, since that should be big enough;
912	 * this is a no-op at the moment, here, since nfsd is 32-bit, but
913	 * that could change.
914	 */
915	bufsize = (uint32_t)buflen;
916	error = _nfssys(NFS4_DSS_SETPATHS_SIZE, &bufsize);
917	if (error) {
918		fprintf(stderr,
919		    "_nfssys(NFS4_DSS_SETPATHS_SIZE) failed: %s. ",
920		    strerror(errno));
921		free(bufp);
922		return (1);
923	}
924
925	/* Pass the packed buffer to the kernel */
926	error = _nfssys(NFS4_DSS_SETPATHS, bufp);
927	if (error) {
928		fprintf(stderr,
929		    "_nfssys(NFS4_DSS_SETPATHS) failed: %s. ", strerror(errno));
930		free(bufp);
931		return (1);
932	}
933
934	/*
935	 * The kernel has now unpacked the buffer and extracted the
936	 * pathnames array, we no longer need the buffer.
937	 */
938	free(bufp);
939
940	return (0);
941}
942
943/*
944 * Quick sort string compare routine, for qsort.
945 * Needed to make arg types correct.
946 */
947int
948qstrcmp(const void *p1, const void *p2)
949{
950	char *s1 = *((char **)p1);
951	char *s2 = *((char **)p2);
952
953	return (strcmp(s1, s2));
954}
955