1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24
25/*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T		*/
26/*	  All Rights Reserved	*/
27
28/*
29 * University Copyright- Copyright (c) 1982, 1986, 1988
30 * The Regents of the University of California
31 * All Rights Reserved
32 *
33 * University Acknowledgment- Portions of this document are derived from
34 * software developed by the University of California, Berkeley, and its
35 * contributors.
36 */
37
38/* LINTLIBRARY */
39/* PROTOLIB1 */
40
41/* NFS server */
42
43#include <sys/param.h>
44#include <sys/types.h>
45#include <sys/stat.h>
46#include <syslog.h>
47#include <tiuser.h>
48#include <rpc/rpc.h>
49#include <errno.h>
50#include <thread.h>
51#include <sys/resource.h>
52#include <sys/time.h>
53#include <sys/file.h>
54#include <nfs/nfs.h>
55#include <nfs/nfs_acl.h>
56#include <nfs/nfssys.h>
57#include <stdio.h>
58#include <stdio_ext.h>
59#include <stdlib.h>
60#include <signal.h>
61#include <netconfig.h>
62#include <netdir.h>
63#include <string.h>
64#include <unistd.h>
65#include <limits.h>
66#include <stropts.h>
67#include <sys/tihdr.h>
68#include <sys/wait.h>
69#include <poll.h>
70#include <priv_utils.h>
71#include <sys/tiuser.h>
72#include <netinet/tcp.h>
73#include <deflt.h>
74#include <rpcsvc/daemon_utils.h>
75#include <rpcsvc/nfs4_prot.h>
76#include <libnvpair.h>
77#include <libscf.h>
78#include <libshare.h>
79#include "nfs_tbind.h"
80#include "thrpool.h"
81#include "smfcfg.h"
82
83/* quiesce requests will be ignored if nfs_server_vers_max < QUIESCE_VERSMIN */
84#define	QUIESCE_VERSMIN	4
85/* DSS: distributed stable storage */
86#define	DSS_VERSMIN	4
87
88static	int	nfssvc(int, struct netbuf, struct netconfig *);
89static	int	nfssvcpool(int maxservers);
90static	int	dss_init(uint_t npaths, char **pathnames);
91static	void	dss_mkleafdirs(uint_t npaths, char **pathnames);
92static	void	dss_mkleafdir(char *dir, char *leaf, char *path);
93static	void	usage(void);
94int		qstrcmp(const void *s1, const void *s2);
95
96extern	int	_nfssys(int, void *);
97
98extern int	daemonize_init(void);
99extern void	daemonize_fini(int fd);
100
101/* signal handlers */
102static void sigflush(int);
103static void quiesce(int);
104
105static	char	*MyName;
106static	NETSELDECL(defaultproviders)[] = { "/dev/tcp6", "/dev/tcp", "/dev/udp",
107					    "/dev/udp6", NULL };
108/* static	NETSELDECL(defaultprotos)[] =	{ NC_UDP, NC_TCP, NULL }; */
109/*
110 * The following are all globals used by routines in nfs_tbind.c.
111 */
112size_t	end_listen_fds;		/* used by conn_close_oldest() */
113size_t	num_fds = 0;		/* used by multiple routines */
114int	listen_backlog = 32;	/* used by bind_to_{provider,proto}() */
115int	num_servers;		/* used by cots_listen_event() */
116int	(*Mysvc)(int, struct netbuf, struct netconfig *) = nfssvc;
117				/* used by cots_listen_event() */
118int	max_conns_allowed = -1;	/* used by cots_listen_event() */
119
120/*
121 * Keep track of min/max versions of NFS protocol to be started.
122 * Start with the defaults (min == 2, max == 3).  We have the
123 * capability of starting vers=4 but only if the user requests it.
124 */
125int	nfs_server_vers_min = NFS_VERSMIN_DEFAULT;
126int	nfs_server_vers_max = NFS_VERSMAX_DEFAULT;
127
128/*
129 * Set the default for server delegation enablement and set per
130 * /etc/default/nfs configuration (if present).
131 */
132int	nfs_server_delegation = NFS_SERVER_DELEGATION_DEFAULT;
133
134int
135main(int ac, char *av[])
136{
137	char *dir = "/";
138	int allflag = 0;
139	int df_allflag = 0;
140	int opt_cnt = 0;
141	int maxservers = 1024;	/* zero allows inifinte number of threads */
142	int maxservers_set = 0;
143	int logmaxservers = 0;
144	int pid;
145	int i;
146	char *provider = (char *)NULL;
147	char *df_provider = (char *)NULL;
148	struct protob *protobp0, *protobp;
149	NETSELDECL(proto) = NULL;
150	NETSELDECL(df_proto) = NULL;
151	NETSELPDECL(providerp);
152	char *defval;
153	boolean_t can_do_mlp;
154	uint_t dss_npaths = 0;
155	char **dss_pathnames = NULL;
156	sigset_t sgset;
157	char name[PATH_MAX], value[PATH_MAX];
158	int ret, bufsz;
159
160	int pipe_fd = -1;
161
162	MyName = *av;
163
164	/*
165	 * Initializations that require more privileges than we need to run.
166	 */
167	(void) _create_daemon_lock(NFSD, DAEMON_UID, DAEMON_GID);
168	svcsetprio();
169
170	can_do_mlp = priv_ineffect(PRIV_NET_BINDMLP);
171	if (__init_daemon_priv(PU_RESETGROUPS|PU_CLEARLIMITSET,
172	    DAEMON_UID, DAEMON_GID, PRIV_SYS_NFS,
173	    can_do_mlp ? PRIV_NET_BINDMLP : NULL, NULL) == -1) {
174		(void) fprintf(stderr, "%s should be run with"
175		    " sufficient privileges\n", av[0]);
176		exit(1);
177	}
178
179	(void) enable_extended_FILE_stdio(-1, -1);
180
181	/*
182	 * Read in the values from SMF first before we check
183	 * command line options so the options override SMF values.
184	 */
185	bufsz = PATH_MAX;
186	ret = nfs_smf_get_prop("max_connections", value, DEFAULT_INSTANCE,
187	    SCF_TYPE_INTEGER, NFSD, &bufsz);
188	if (ret == SA_OK) {
189		errno = 0;
190		max_conns_allowed = strtol(value, (char **)NULL, 10);
191		if (errno != 0)
192			max_conns_allowed = -1;
193	}
194
195	bufsz = PATH_MAX;
196	ret = nfs_smf_get_prop("listen_backlog", value, DEFAULT_INSTANCE,
197	    SCF_TYPE_INTEGER, NFSD, &bufsz);
198	if (ret == SA_OK) {
199		errno = 0;
200		listen_backlog = strtol(value, (char **)NULL, 10);
201		if (errno != 0) {
202			listen_backlog = 32;
203		}
204	}
205
206	bufsz = PATH_MAX;
207	ret = nfs_smf_get_prop("protocol", value, DEFAULT_INSTANCE,
208	    SCF_TYPE_ASTRING, NFSD, &bufsz);
209	if ((ret == SA_OK) && strlen(value) > 0) {
210		df_proto = strdup(value);
211		opt_cnt++;
212		if (strncasecmp("ALL", value, 3) == 0) {
213			free(df_proto);
214			df_proto = NULL;
215			df_allflag = 1;
216		}
217	}
218
219	bufsz = PATH_MAX;
220	ret = nfs_smf_get_prop("device", value, DEFAULT_INSTANCE,
221	    SCF_TYPE_ASTRING, NFSD, &bufsz);
222	if ((ret == SA_OK) && strlen(value) > 0) {
223		df_provider = strdup(value);
224		opt_cnt++;
225	}
226
227	bufsz = PATH_MAX;
228	ret = nfs_smf_get_prop("servers", value, DEFAULT_INSTANCE,
229	    SCF_TYPE_INTEGER, NFSD, &bufsz);
230	if (ret == SA_OK) {
231		errno = 0;
232		maxservers = strtol(value, (char **)NULL, 10);
233		if (errno != 0)
234			maxservers = 1024;
235		else
236			maxservers_set = 1;
237	}
238
239	bufsz = 4;
240	ret = nfs_smf_get_prop("server_versmin", value, DEFAULT_INSTANCE,
241	    SCF_TYPE_INTEGER, NFSD, &bufsz);
242	if (ret == SA_OK)
243		nfs_server_vers_min = strtol(value, (char **)NULL, 10);
244
245	bufsz = 4;
246	ret = nfs_smf_get_prop("server_versmax", value, DEFAULT_INSTANCE,
247	    SCF_TYPE_INTEGER, NFSD, &bufsz);
248	if (ret == SA_OK)
249		nfs_server_vers_max = strtol(value, (char **)NULL, 10);
250
251	bufsz = PATH_MAX;
252	ret = nfs_smf_get_prop("server_delegation", value, DEFAULT_INSTANCE,
253	    SCF_TYPE_ASTRING, NFSD, &bufsz);
254	if (ret == SA_OK)
255		if (strncasecmp(value, "off", 3) == 0)
256			nfs_server_delegation = FALSE;
257
258	/*
259	 * Conflict options error messages.
260	 */
261	if (opt_cnt > 1) {
262		(void) fprintf(stderr, "\nConflicting options, only one of "
263		    "the following options can be specified\n"
264		    "in SMF:\n"
265		    "\tprotocol=ALL\n"
266		    "\tprotocol=protocol\n"
267		    "\tdevice=devicename\n\n");
268		usage();
269	}
270	opt_cnt = 0;
271
272	while ((i = getopt(ac, av, "ac:p:s:t:l:")) != EOF) {
273		switch (i) {
274		case 'a':
275			free(df_proto);
276			df_proto = NULL;
277			free(df_provider);
278			df_provider = NULL;
279
280			allflag = 1;
281			opt_cnt++;
282			break;
283
284		case 'c':
285			max_conns_allowed = atoi(optarg);
286			break;
287
288		case 'p':
289			proto = optarg;
290			df_allflag = 0;
291			opt_cnt++;
292			break;
293
294		/*
295		 * DSS: NFSv4 distributed stable storage.
296		 *
297		 * This is a Contracted Project Private interface, for
298		 * the sole use of Sun Cluster HA-NFS. See PSARC/2006/313.
299		 */
300		case 's':
301			if (strlen(optarg) < MAXPATHLEN) {
302				/* first "-s" option encountered? */
303				if (dss_pathnames == NULL) {
304					/*
305					 * Allocate maximum possible space
306					 * required given cmdline arg count;
307					 * "-s <path>" consumes two args.
308					 */
309					size_t sz = (ac / 2) * sizeof (char *);
310					dss_pathnames = (char **)malloc(sz);
311					if (dss_pathnames == NULL) {
312						(void) fprintf(stderr, "%s: "
313						    "dss paths malloc failed\n",
314						    av[0]);
315						exit(1);
316					}
317					(void) memset(dss_pathnames, 0, sz);
318				}
319				dss_pathnames[dss_npaths] = optarg;
320				dss_npaths++;
321			} else {
322				(void) fprintf(stderr,
323				    "%s: -s pathname too long.\n", av[0]);
324			}
325			break;
326
327		case 't':
328			provider = optarg;
329			df_allflag = 0;
330			opt_cnt++;
331			break;
332
333		case 'l':
334			listen_backlog = atoi(optarg);
335			break;
336
337		case '?':
338			usage();
339			/* NOTREACHED */
340		}
341	}
342
343	allflag = df_allflag;
344	if (proto == NULL)
345		proto = df_proto;
346	if (provider == NULL)
347		provider = df_provider;
348
349	/*
350	 * Conflict options error messages.
351	 */
352	if (opt_cnt > 1) {
353		(void) fprintf(stderr, "\nConflicting options, only one of "
354		    "the following options can be specified\n"
355		    "on the command line:\n"
356		    "\t-a\n"
357		    "\t-p protocol\n"
358		    "\t-t transport\n\n");
359		usage();
360	}
361
362	if (proto != NULL &&
363	    strncasecmp(proto, NC_UDP, strlen(NC_UDP)) == 0) {
364		if (nfs_server_vers_max == NFS_V4) {
365			if (nfs_server_vers_min == NFS_V4) {
366				fprintf(stderr,
367				    "NFS version 4 is not supported "
368				    "with the UDP protocol.  Exiting\n");
369				exit(3);
370			} else {
371				fprintf(stderr,
372				    "NFS version 4 is not supported "
373				    "with the UDP protocol.\n");
374			}
375		}
376	}
377
378	/*
379	 * If there is exactly one more argument, it is the number of
380	 * servers.
381	 */
382	if (optind == ac - 1) {
383		maxservers = atoi(av[optind]);
384		maxservers_set = 1;
385	}
386	/*
387	 * If there are two or more arguments, then this is a usage error.
388	 */
389	else if (optind < ac - 1)
390		usage();
391	/*
392	 * Check the ranges for min/max version specified
393	 */
394	else if ((nfs_server_vers_min > nfs_server_vers_max) ||
395	    (nfs_server_vers_min < NFS_VERSMIN) ||
396	    (nfs_server_vers_max > NFS_VERSMAX))
397		usage();
398	/*
399	 * There are no additional arguments, and we haven't set maxservers
400	 * explicitly via the config file, we use a default number of
401	 * servers.  We will log this.
402	 */
403	else if (maxservers_set == 0)
404		logmaxservers = 1;
405
406	/*
407	 * Basic Sanity checks on options
408	 *
409	 * max_conns_allowed must be positive, except for the special
410	 * value of -1 which is used internally to mean unlimited, -1 isn't
411	 * documented but we allow it anyway.
412	 *
413	 * maxservers must be positive
414	 * listen_backlog must be positive or zero
415	 */
416	if (((max_conns_allowed != -1) && (max_conns_allowed <= 0)) ||
417	    (listen_backlog < 0) || (maxservers <= 0)) {
418		usage();
419	}
420
421	/*
422	 * Set current dir to server root
423	 */
424	if (chdir(dir) < 0) {
425		(void) fprintf(stderr, "%s:  ", MyName);
426		perror(dir);
427		exit(1);
428	}
429
430#ifndef DEBUG
431	pipe_fd = daemonize_init();
432#endif
433
434	openlog(MyName, LOG_PID | LOG_NDELAY, LOG_DAEMON);
435
436	/*
437	 * establish our lock on the lock file and write our pid to it.
438	 * exit if some other process holds the lock, or if there's any
439	 * error in writing/locking the file.
440	 */
441	pid = _enter_daemon_lock(NFSD);
442	switch (pid) {
443	case 0:
444		break;
445	case -1:
446		fprintf(stderr, "error locking for %s: %s\n", NFSD,
447		    strerror(errno));
448		exit(2);
449	default:
450		/* daemon was already running */
451		exit(0);
452	}
453
454	/*
455	 * If we've been given a list of paths to be used for distributed
456	 * stable storage, and provided we're going to run a version
457	 * that supports it, setup the DSS paths.
458	 */
459	if (dss_pathnames != NULL && nfs_server_vers_max >= DSS_VERSMIN) {
460		if (dss_init(dss_npaths, dss_pathnames) != 0) {
461			fprintf(stderr, "%s", "dss_init failed. Exiting.\n");
462			exit(1);
463		}
464	}
465
466	/*
467	 * Block all signals till we spawn other
468	 * threads.
469	 */
470	(void) sigfillset(&sgset);
471	(void) thr_sigsetmask(SIG_BLOCK, &sgset, NULL);
472
473	if (logmaxservers) {
474		fprintf(stderr,
475		    "Number of servers not specified. Using default of %d.\n",
476		    maxservers);
477	}
478
479	/*
480	 * Make sure to unregister any previous versions in case the
481	 * user is reconfiguring the server in interesting ways.
482	 */
483	svc_unreg(NFS_PROGRAM, NFS_VERSION);
484	svc_unreg(NFS_PROGRAM, NFS_V3);
485	svc_unreg(NFS_PROGRAM, NFS_V4);
486	svc_unreg(NFS_ACL_PROGRAM, NFS_ACL_V2);
487	svc_unreg(NFS_ACL_PROGRAM, NFS_ACL_V3);
488
489	/*
490	 * Set up kernel RPC thread pool for the NFS server.
491	 */
492	if (nfssvcpool(maxservers)) {
493		fprintf(stderr, "Can't set up kernel NFS service: %s. "
494		    "Exiting.\n", strerror(errno));
495		exit(1);
496	}
497
498	/*
499	 * Set up blocked thread to do LWP creation on behalf of the kernel.
500	 */
501	if (svcwait(NFS_SVCPOOL_ID)) {
502		fprintf(stderr, "Can't set up NFS pool creator: %s. Exiting.\n",
503		    strerror(errno));
504		exit(1);
505	}
506
507	/*
508	 * RDMA start and stop thread.
509	 * Per pool RDMA listener creation and
510	 * destructor thread.
511	 *
512	 * start rdma services and block in the kernel.
513	 * (only if proto or provider is not set to TCP or UDP)
514	 */
515	if ((proto == NULL) && (provider == NULL)) {
516		if (svcrdma(NFS_SVCPOOL_ID, nfs_server_vers_min,
517		    nfs_server_vers_max, nfs_server_delegation)) {
518			fprintf(stderr,
519			    "Can't set up RDMA creator thread : %s\n",
520			    strerror(errno));
521		}
522	}
523
524	/*
525	 * Now open up for signal delivery
526	 */
527
528	(void) thr_sigsetmask(SIG_UNBLOCK, &sgset, NULL);
529	sigset(SIGTERM, sigflush);
530	sigset(SIGUSR1, quiesce);
531
532	/*
533	 * Build a protocol block list for registration.
534	 */
535	protobp0 = protobp = (struct protob *)malloc(sizeof (struct protob));
536	protobp->serv = "NFS";
537	protobp->versmin = nfs_server_vers_min;
538	protobp->versmax = nfs_server_vers_max;
539	protobp->program = NFS_PROGRAM;
540
541	protobp->next = (struct protob *)malloc(sizeof (struct protob));
542	protobp = protobp->next;
543	protobp->serv = "NFS_ACL";		/* not used */
544	protobp->versmin = nfs_server_vers_min;
545	/* XXX - this needs work to get the version just right */
546	protobp->versmax = (nfs_server_vers_max > NFS_ACL_V3) ?
547	    NFS_ACL_V3 : nfs_server_vers_max;
548	protobp->program = NFS_ACL_PROGRAM;
549	protobp->next = (struct protob *)NULL;
550
551	if (allflag) {
552		if (do_all(protobp0, nfssvc) == -1) {
553			fprintf(stderr, "setnetconfig failed : %s\n",
554			    strerror(errno));
555			exit(1);
556		}
557	} else if (proto) {
558		/* there's more than one match for the same protocol */
559		struct netconfig *nconf;
560		NCONF_HANDLE *nc;
561		bool_t	protoFound = FALSE;
562		if ((nc = setnetconfig()) == (NCONF_HANDLE *) NULL) {
563			fprintf(stderr, "setnetconfig failed : %s\n",
564			    strerror(errno));
565			goto done;
566		}
567		while (nconf = getnetconfig(nc)) {
568			if (strcmp(nconf->nc_proto, proto) == 0) {
569				protoFound = TRUE;
570				do_one(nconf->nc_device, NULL,
571				    protobp0, nfssvc);
572			}
573		}
574		(void) endnetconfig(nc);
575		if (protoFound == FALSE) {
576			fprintf(stderr,
577			    "couldn't find netconfig entry for protocol %s\n",
578			    proto);
579		}
580	} else if (provider)
581		do_one(provider, proto, protobp0, nfssvc);
582	else {
583		for (providerp = defaultproviders;
584		    *providerp != NULL; providerp++) {
585			provider = *providerp;
586			do_one(provider, NULL, protobp0, nfssvc);
587		}
588	}
589done:
590
591	free(protobp);
592	free(protobp0);
593
594	if (num_fds == 0) {
595		fprintf(stderr, "Could not start NFS service for any protocol."
596		    " Exiting.\n");
597		exit(1);
598	}
599
600	end_listen_fds = num_fds;
601
602	/*
603	 * nfsd is up and running as far as we are concerned.
604	 */
605	daemonize_fini(pipe_fd);
606
607	/*
608	 * Get rid of unneeded privileges.
609	 */
610	__fini_daemon_priv(PRIV_PROC_FORK, PRIV_PROC_EXEC, PRIV_PROC_SESSION,
611	    PRIV_FILE_LINK_ANY, PRIV_PROC_INFO, (char *)NULL);
612
613	/*
614	 * Poll for non-data control events on the transport descriptors.
615	 */
616	poll_for_action();
617
618	/*
619	 * If we get here, something failed in poll_for_action().
620	 */
621	return (1);
622}
623
624static int
625nfssvcpool(int maxservers)
626{
627	struct svcpool_args npa;
628
629	npa.id = NFS_SVCPOOL_ID;
630	npa.maxthreads = maxservers;
631	npa.redline = 0;
632	npa.qsize = 0;
633	npa.timeout = 0;
634	npa.stksize = 0;
635	npa.max_same_xprt = 0;
636	return (_nfssys(SVCPOOL_CREATE, &npa));
637}
638
639/*
640 * Establish NFS service thread.
641 */
642static int
643nfssvc(int fd, struct netbuf addrmask, struct netconfig *nconf)
644{
645	struct nfs_svc_args nsa;
646
647	nsa.fd = fd;
648	nsa.netid = nconf->nc_netid;
649	nsa.addrmask = addrmask;
650	if (strncasecmp(nconf->nc_proto, NC_UDP, strlen(NC_UDP)) == 0) {
651		nsa.versmax = (nfs_server_vers_max > NFS_V3) ?
652		    NFS_V3 : nfs_server_vers_max;
653		nsa.versmin = nfs_server_vers_min;
654		/*
655		 * If no version left, silently do nothing, previous
656		 * checks will have assured at least TCP is available.
657		 */
658		if (nsa.versmin > nsa.versmax)
659			return (0);
660	} else {
661		nsa.versmax = nfs_server_vers_max;
662		nsa.versmin = nfs_server_vers_min;
663	}
664	nsa.delegation = nfs_server_delegation;
665	return (_nfssys(NFS_SVC, &nsa));
666}
667
668static void
669usage(void)
670{
671	(void) fprintf(stderr,
672"usage: %s [ -a ] [ -c max_conns ] [ -p protocol ] [ -t transport ] ", MyName);
673	(void) fprintf(stderr, "\n[ -l listen_backlog ] [ nservers ]\n");
674	(void) fprintf(stderr,
675"\twhere -a causes <nservers> to be started on each appropriate transport,\n");
676	(void) fprintf(stderr,
677"\tmax_conns is the maximum number of concurrent connections allowed,\n");
678	(void) fprintf(stderr, "\t\tand max_conns must be a decimal number");
679	(void) fprintf(stderr, "> zero,\n");
680	(void) fprintf(stderr, "\tprotocol is a protocol identifier,\n");
681	(void) fprintf(stderr,
682	    "\ttransport is a transport provider name (i.e. device),\n");
683	(void) fprintf(stderr,
684	    "\tlisten_backlog is the TCP listen backlog,\n");
685	(void) fprintf(stderr,
686	    "\tand <nservers> must be a decimal number > zero.\n");
687	exit(1);
688}
689
690/*
691 * Issue nfssys system call to flush all logging buffers asynchronously.
692 *
693 * NOTICE: It is extremely important to flush NFS logging buffers when
694 *	   nfsd exits. When the system is halted or rebooted nfslogd
695 *	   may not have an opportunity to flush the buffers.
696 */
697static void
698nfsl_flush()
699{
700	struct nfsl_flush_args nfa;
701
702	memset((void *)&nfa, 0, sizeof (nfa));
703	nfa.version = NFSL_FLUSH_ARGS_VERS;
704	nfa.directive = NFSL_ALL;	/* flush all asynchronously */
705
706	if (_nfssys(LOG_FLUSH, &nfa) < 0)
707		syslog(LOG_ERR, "_nfssys(LOG_FLUSH) failed: %s\n",
708		    strerror(errno));
709}
710
711/*
712 * SIGTERM handler.
713 * Flush logging buffers and exit.
714 */
715static void
716sigflush(int sig)
717{
718	nfsl_flush();
719	_exit(0);
720}
721
722/*
723 * SIGUSR1 handler.
724 *
725 * Request that server quiesce, then (nfsd) exit. For subsequent warm start.
726 *
727 * This is a Contracted Project Private interface, for the sole use
728 * of Sun Cluster HA-NFS. See PSARC/2004/497.
729 *
730 * Equivalent to SIGTERM handler if nfs_server_vers_max < QUIESCE_VERSMIN.
731 */
732static void
733quiesce(int sig)
734{
735	int error;
736	int id = NFS_SVCPOOL_ID;
737
738	if (nfs_server_vers_max >= QUIESCE_VERSMIN) {
739		/* Request server quiesce at next shutdown */
740		error = _nfssys(NFS4_SVC_REQUEST_QUIESCE, &id);
741
742		/*
743		 * ENOENT is returned if there is no matching SVC pool
744		 * for the id. Possibly because the pool is not yet setup.
745		 * In this case, just exit as if no error. For all other errors,
746		 * just return and allow caller to retry.
747		 */
748		if (error && errno != ENOENT) {
749			syslog(LOG_ERR,
750			    "_nfssys(NFS4_SVC_REQUEST_QUIESCE) failed: %s",
751			    strerror(errno));
752			return;
753		}
754	}
755
756	/* Flush logging buffers */
757	nfsl_flush();
758
759	_exit(0);
760}
761
762/*
763 * DSS: distributed stable storage.
764 * Create leaf directories as required, keeping an eye on path
765 * lengths. Calls exit(1) on failure.
766 * The pathnames passed in must already exist, and must be writeable by nfsd.
767 * Note: the leaf directories under NFS4_VAR_DIR are not created here;
768 * they're created at pkg install.
769 */
770static void
771dss_mkleafdirs(uint_t npaths, char **pathnames)
772{
773	int i;
774	char *tmppath = NULL;
775
776	/*
777	 * Create the temporary storage used by dss_mkleafdir() here,
778	 * rather than in that function, so that it only needs to be
779	 * done once, rather than once for each call. Too big to put
780	 * on the function's stack.
781	 */
782	tmppath = (char *)malloc(MAXPATHLEN);
783	if (tmppath == NULL) {
784		syslog(LOG_ERR, "tmppath malloc failed. Exiting");
785		exit(1);
786	}
787
788	for (i = 0; i < npaths; i++) {
789		char *p = pathnames[i];
790
791		dss_mkleafdir(p, NFS4_DSS_STATE_LEAF, tmppath);
792		dss_mkleafdir(p, NFS4_DSS_OLDSTATE_LEAF, tmppath);
793	}
794
795	free(tmppath);
796}
797
798/*
799 * Create "leaf" in "dir" (which must already exist).
800 * leaf: should start with a '/'
801 */
802static void
803dss_mkleafdir(char *dir, char *leaf, char *tmppath)
804{
805	/* MAXPATHLEN includes the terminating NUL */
806	if (strlen(dir) + strlen(leaf) > MAXPATHLEN - 1) {
807		fprintf(stderr, "stable storage path too long: %s%s. "
808		    "Exiting.\n", dir, leaf);
809		exit(1);
810	}
811
812	(void) snprintf(tmppath, MAXPATHLEN, "%s/%s", dir, leaf);
813
814	/* the directory may already exist: that's OK */
815	if (mkdir(tmppath, NFS4_DSS_DIR_MODE) == -1 && errno != EEXIST) {
816		fprintf(stderr, "error creating stable storage directory: "
817		    "%s: %s. Exiting.\n", strerror(errno), tmppath);
818		exit(1);
819	}
820}
821
822/*
823 * Create the storage dirs, and pass the path list to the kernel.
824 * This requires the nfssrv module to be loaded; the _nfssys() syscall
825 * will fail ENOTSUP if it is not.
826 * Use libnvpair(3LIB) to pass the data to the kernel.
827 */
828static int
829dss_init(uint_t npaths, char **pathnames)
830{
831	int i, j, nskipped, error;
832	char *bufp;
833	uint32_t bufsize;
834	size_t buflen;
835	nvlist_t *nvl;
836
837	if (npaths > 1) {
838		/*
839		 * We need to remove duplicate paths; this might be user error
840		 * in the general case, but HA-NFSv4 can also cause this.
841		 * Sort the pathnames array, and NULL out duplicates,
842		 * then write the non-NULL entries to a new array.
843		 * Sorting will also allow the kernel to optimise its searches.
844		 */
845
846		qsort(pathnames, npaths, sizeof (char *), qstrcmp);
847
848		/* now NULL out any duplicates */
849		i = 0; j = 1; nskipped = 0;
850		while (j < npaths) {
851			if (strcmp(pathnames[i], pathnames[j]) == 0) {
852				pathnames[j] = NULL;
853				j++;
854				nskipped++;
855				continue;
856			}
857
858			/* skip i over any of its NULLed duplicates */
859			i = j++;
860		}
861
862		/* finally, write the non-NULL entries to a new array */
863		if (nskipped > 0) {
864			int nreal;
865			size_t sz;
866			char **tmp_pathnames;
867
868			nreal = npaths - nskipped;
869
870			sz = nreal * sizeof (char *);
871			tmp_pathnames = (char **)malloc(sz);
872			if (tmp_pathnames == NULL) {
873				fprintf(stderr, "tmp_pathnames malloc "
874				    "failed\n");
875				exit(1);
876			}
877
878			for (i = 0, j = 0; i < npaths; i++)
879				if (pathnames[i] != NULL)
880					tmp_pathnames[j++] = pathnames[i];
881			free(pathnames);
882			pathnames = tmp_pathnames;
883			npaths = nreal;
884		}
885
886	}
887
888	/* Create directories to store the distributed state files */
889	dss_mkleafdirs(npaths, pathnames);
890
891	/* Create the name-value pair list */
892	error = nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0);
893	if (error) {
894		fprintf(stderr, "nvlist_alloc failed: %s\n", strerror(errno));
895		return (1);
896	}
897
898	/* Add the pathnames array as a single name-value pair */
899	error = nvlist_add_string_array(nvl, NFS4_DSS_NVPAIR_NAME,
900	    pathnames, npaths);
901	if (error) {
902		fprintf(stderr, "nvlist_add_string_array failed: %s\n",
903		    strerror(errno));
904		nvlist_free(nvl);
905		return (1);
906	}
907
908	/*
909	 * Pack list into contiguous memory, for passing to kernel.
910	 * nvlist_pack() will allocate the memory for the buffer,
911	 * which we should free() when no longer needed.
912	 * NV_ENCODE_XDR for safety across ILP32/LP64 kernel boundary.
913	 */
914	bufp = NULL;
915	error = nvlist_pack(nvl, &bufp, &buflen, NV_ENCODE_XDR, 0);
916	if (error) {
917		fprintf(stderr, "nvlist_pack failed: %s\n", strerror(errno));
918		nvlist_free(nvl);
919		return (1);
920	}
921
922	/* Now we have the packed buffer, we no longer need the list */
923	nvlist_free(nvl);
924
925	/*
926	 * Let the kernel know in advance how big the buffer is.
927	 * NOTE: we cannot just pass buflen, since size_t is a long, and
928	 * thus a different size between ILP32 userland and LP64 kernel.
929	 * Use an int for the transfer, since that should be big enough;
930	 * this is a no-op at the moment, here, since nfsd is 32-bit, but
931	 * that could change.
932	 */
933	bufsize = (uint32_t)buflen;
934	error = _nfssys(NFS4_DSS_SETPATHS_SIZE, &bufsize);
935	if (error) {
936		fprintf(stderr,
937		    "_nfssys(NFS4_DSS_SETPATHS_SIZE) failed: %s\n",
938		    strerror(errno));
939		free(bufp);
940		return (1);
941	}
942
943	/* Pass the packed buffer to the kernel */
944	error = _nfssys(NFS4_DSS_SETPATHS, bufp);
945	if (error) {
946		fprintf(stderr,
947		    "_nfssys(NFS4_DSS_SETPATHS) failed: %s\n", strerror(errno));
948		free(bufp);
949		return (1);
950	}
951
952	/*
953	 * The kernel has now unpacked the buffer and extracted the
954	 * pathnames array, we no longer need the buffer.
955	 */
956	free(bufp);
957
958	return (0);
959}
960
961/*
962 * Quick sort string compare routine, for qsort.
963 * Needed to make arg types correct.
964 */
965int
966qstrcmp(const void *p1, const void *p2)
967{
968	char *s1 = *((char **)p1);
969	char *s2 = *((char **)p2);
970
971	return (strcmp(s1, s2));
972}
973