nfsd.c revision 9acbbeaf2a1ffe5c14b244867d427714fab43c5c
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26/*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T		*/
27/*	  All Rights Reserved  	*/
28
29/*
30 * University Copyright- Copyright (c) 1982, 1986, 1988
31 * The Regents of the University of California
32 * All Rights Reserved
33 *
34 * University Acknowledgment- Portions of this document are derived from
35 * software developed by the University of California, Berkeley, and its
36 * contributors.
37 */
38
39/* LINTLIBRARY */
40/* PROTOLIB1 */
41
42#pragma ident	"%Z%%M%	%I%	%E% SMI"
43
44/* NFS server */
45
46#include <sys/param.h>
47#include <sys/types.h>
48#include <sys/stat.h>
49#include <syslog.h>
50#include <tiuser.h>
51#include <rpc/rpc.h>
52#include <errno.h>
53#include <thread.h>
54#include <sys/resource.h>
55#include <sys/time.h>
56#include <sys/file.h>
57#include <nfs/nfs.h>
58#include <nfs/nfs_acl.h>
59#include <nfs/nfssys.h>
60#include <stdio.h>
61#include <stdio_ext.h>
62#include <stdlib.h>
63#include <signal.h>
64#include <netconfig.h>
65#include <netdir.h>
66#include <string.h>
67#include <unistd.h>
68#include <stropts.h>
69#include <sys/tihdr.h>
70#include <poll.h>
71#include <priv_utils.h>
72#include <sys/tiuser.h>
73#include <netinet/tcp.h>
74#include <deflt.h>
75#include <rpcsvc/daemon_utils.h>
76#include <rpcsvc/nfs4_prot.h>
77#include <libnvpair.h>
78#include "nfs_tbind.h"
79#include "thrpool.h"
80
81/* quiesce requests will be ignored if nfs_server_vers_max < QUIESCE_VERSMIN */
82#define	QUIESCE_VERSMIN	4
83/* DSS: distributed stable storage */
84#define	DSS_VERSMIN	4
85
86static	int	nfssvc(int, struct netbuf, struct netconfig *);
87static	int	nfssvcpool(int maxservers);
88static	int	dss_init(uint_t npaths, char **pathnames);
89static	void	dss_mkleafdirs(uint_t npaths, char **pathnames);
90static	void	dss_mkleafdir(char *dir, char *leaf, char *path);
91static	void	usage(void);
92int		qstrcmp(const void *s1, const void *s2);
93
94extern	int	_nfssys(int, void *);
95
96/* signal handlers */
97static void sigflush(int);
98static void quiesce(int);
99
100static	char	*MyName;
101static	NETSELDECL(defaultproviders)[] = { "/dev/tcp6", "/dev/tcp", "/dev/udp",
102					    "/dev/udp6", NULL };
103/* static	NETSELDECL(defaultprotos)[] =	{ NC_UDP, NC_TCP, NULL }; */
104/*
105 * The following are all globals used by routines in nfs_tbind.c.
106 */
107size_t	end_listen_fds;		/* used by conn_close_oldest() */
108size_t	num_fds = 0;		/* used by multiple routines */
109int	listen_backlog = 32;	/* used by bind_to_{provider,proto}() */
110int	num_servers;		/* used by cots_listen_event() */
111int	(*Mysvc)(int, struct netbuf, struct netconfig *) = nfssvc;
112				/* used by cots_listen_event() */
113int	max_conns_allowed = -1;	/* used by cots_listen_event() */
114
115/*
116 * Keep track of min/max versions of NFS protocol to be started.
117 * Start with the defaults (min == 2, max == 3).  We have the
118 * capability of starting vers=4 but only if the user requests it.
119 */
120int	nfs_server_vers_min = NFS_VERSMIN_DEFAULT;
121int	nfs_server_vers_max = NFS_VERSMAX_DEFAULT;
122
123/*
124 * Set the default for server delegation enablement and set per
125 * /etc/default/nfs configuration (if present).
126 */
127int	nfs_server_delegation = NFS_SERVER_DELEGATION_DEFAULT;
128
129int
130main(int ac, char *av[])
131{
132	char *dir = "/";
133	int allflag = 0;
134	int df_allflag = 0;
135	int opt_cnt = 0;
136	int maxservers = 1;	/* zero allows inifinte number of threads */
137	int maxservers_set = 0;
138	int logmaxservers = 0;
139	int pid;
140	int i;
141	char *provider = (char *)NULL;
142	char *df_provider = (char *)NULL;
143	struct protob *protobp0, *protobp;
144	NETSELDECL(proto) = NULL;
145	NETSELDECL(df_proto) = NULL;
146	NETSELPDECL(providerp);
147	char *defval;
148	boolean_t can_do_mlp;
149	uint_t dss_npaths = 0;
150	char **dss_pathnames = NULL;
151
152	MyName = *av;
153
154	/*
155	 * Initializations that require more privileges than we need to run.
156	 */
157	(void) _create_daemon_lock(NFSD, DAEMON_UID, DAEMON_GID);
158	svcsetprio();
159
160	can_do_mlp = priv_ineffect(PRIV_NET_BINDMLP);
161	if (__init_daemon_priv(PU_RESETGROUPS|PU_CLEARLIMITSET,
162	    DAEMON_UID, DAEMON_GID, PRIV_SYS_NFS,
163	    can_do_mlp ? PRIV_NET_BINDMLP : NULL, NULL) == -1) {
164		(void) fprintf(stderr, "%s should be run with"
165			" sufficient privileges\n", av[0]);
166		exit(1);
167	}
168
169	(void) enable_extended_FILE_stdio(-1, -1);
170
171	/*
172	 * Read in the values from config file first before we check
173	 * commandline options so the options override the file.
174	 */
175	if ((defopen(NFSADMIN)) == 0) {
176		if ((defval = defread("NFSD_MAX_CONNECTIONS=")) != NULL) {
177			errno = 0;
178			max_conns_allowed = strtol(defval, (char **)NULL, 10);
179			if (errno != 0) {
180				max_conns_allowed = -1;
181			}
182		}
183		if ((defval = defread("NFSD_LISTEN_BACKLOG=")) != NULL) {
184			errno = 0;
185			listen_backlog = strtol(defval, (char **)NULL, 10);
186			if (errno != 0) {
187				listen_backlog = 32;
188			}
189		}
190		if ((defval = defread("NFSD_PROTOCOL=")) != NULL) {
191			df_proto = strdup(defval);
192			opt_cnt++;
193			if (strncasecmp("ALL", defval, 3) == 0) {
194				free(df_proto);
195				df_proto = NULL;
196				df_allflag = 1;
197			}
198		}
199		if ((defval = defread("NFSD_DEVICE=")) != NULL) {
200			df_provider = strdup(defval);
201			opt_cnt++;
202		}
203		if ((defval = defread("NFSD_SERVERS=")) != NULL) {
204			errno = 0;
205			maxservers = strtol(defval, (char **)NULL, 10);
206			if (errno != 0) {
207				maxservers = 1;
208			} else {
209				maxservers_set = 1;
210			}
211		}
212		if ((defval = defread("NFS_SERVER_VERSMIN=")) != NULL) {
213			errno = 0;
214			nfs_server_vers_min =
215				strtol(defval, (char **)NULL, 10);
216			if (errno != 0) {
217				nfs_server_vers_min = NFS_VERSMIN_DEFAULT;
218			}
219		}
220		if ((defval = defread("NFS_SERVER_VERSMAX=")) != NULL) {
221			errno = 0;
222			nfs_server_vers_max =
223				strtol(defval, (char **)NULL, 10);
224			if (errno != 0) {
225				nfs_server_vers_max = NFS_VERSMAX_DEFAULT;
226			}
227		}
228		if ((defval = defread("NFS_SERVER_DELEGATION=")) != NULL) {
229			if (strcmp(defval, "off") == 0) {
230				nfs_server_delegation = FALSE;
231			}
232		}
233
234		/* close defaults file */
235		defopen(NULL);
236	}
237
238	/*
239	 * Conflict options error messages.
240	 */
241	if (opt_cnt > 1) {
242		(void) fprintf(stderr, "\nConflicting options, only one of "
243		    "the following options can be specified\n"
244		    "in " NFSADMIN ":\n"
245		    "\tNFSD_PROTOCOL=ALL\n"
246		    "\tNFSD_PROTOCOL=protocol\n"
247		    "\tNFSD_DEVICE=device\n\n");
248		usage();
249	}
250	opt_cnt = 0;
251
252	while ((i = getopt(ac, av, "ac:p:s:t:l:")) != EOF) {
253		switch (i) {
254		case 'a':
255			free(df_proto);
256			df_proto = NULL;
257			free(df_provider);
258			df_provider = NULL;
259
260			allflag = 1;
261			opt_cnt++;
262			break;
263
264		case 'c':
265			max_conns_allowed = atoi(optarg);
266			break;
267
268		case 'p':
269			proto = optarg;
270			df_allflag = 0;
271			opt_cnt++;
272			break;
273
274		/*
275		 * DSS: NFSv4 distributed stable storage.
276		 *
277		 * This is a Contracted Project Private interface, for
278		 * the sole use of Sun Cluster HA-NFS. See PSARC/2006/313.
279		 */
280		case 's':
281			if (strlen(optarg) < MAXPATHLEN) {
282				/* first "-s" option encountered? */
283				if (dss_pathnames == NULL) {
284					/*
285					 * Allocate maximum possible space
286					 * required given cmdline arg count;
287					 * "-s <path>" consumes two args.
288					 */
289					size_t sz = (ac / 2) * sizeof (char *);
290					dss_pathnames = (char **)malloc(sz);
291					if (dss_pathnames == NULL) {
292						(void) fprintf(stderr, "%s: "
293						    "dss paths malloc failed\n",
294						    av[0]);
295						exit(1);
296					}
297					(void) memset(dss_pathnames, 0, sz);
298				}
299				dss_pathnames[dss_npaths] = optarg;
300				dss_npaths++;
301			} else {
302				(void) fprintf(stderr,
303				    "%s: -s pathname too long.\n", av[0]);
304			}
305			break;
306
307		case 't':
308			provider = optarg;
309			df_allflag = 0;
310			opt_cnt++;
311			break;
312
313		case 'l':
314			listen_backlog = atoi(optarg);
315			break;
316
317		case '?':
318			usage();
319			/* NOTREACHED */
320		}
321	}
322
323	allflag = df_allflag;
324	if (proto == NULL)
325		proto = df_proto;
326	if (provider == NULL)
327		provider = df_provider;
328
329	/*
330	 * Conflict options error messages.
331	 */
332	if (opt_cnt > 1) {
333		(void) fprintf(stderr, "\nConflicting options, only one of "
334		    "the following options can be specified\n"
335		    "on the command line:\n"
336		    "\t-a\n"
337		    "\t-p protocol\n"
338		    "\t-t transport\n\n");
339		usage();
340	}
341
342	if (proto != NULL &&
343	    strncasecmp(proto, NC_UDP, strlen(NC_UDP)) == 0) {
344		if (nfs_server_vers_max == NFS_V4) {
345			if (nfs_server_vers_min == NFS_V4) {
346				syslog(LOG_ERR,
347					"NFS version 4 is not supported "
348					"with the UDP protocol.  Exiting\n");
349				fprintf(stderr,
350					"NFS version 4 is not supported "
351					"with the UDP protocol.  Exiting\n");
352				exit(3);
353			} else {
354				fprintf(stderr,
355					"NFS version 4 is not supported "
356					"with the UDP protocol.\n");
357			}
358		}
359	}
360
361	/*
362	 * If there is exactly one more argument, it is the number of
363	 * servers.
364	 */
365	if (optind == ac - 1) {
366		maxservers = atoi(av[optind]);
367		maxservers_set = 1;
368	}
369	/*
370	 * If there are two or more arguments, then this is a usage error.
371	 */
372	else if (optind < ac - 1)
373		usage();
374	/*
375	 * Check the ranges for min/max version specified
376	 */
377	else if ((nfs_server_vers_min > nfs_server_vers_max) ||
378		(nfs_server_vers_min < NFS_VERSMIN) ||
379		(nfs_server_vers_max > NFS_VERSMAX))
380		usage();
381	/*
382	 * There are no additional arguments, and we haven't set maxservers
383	 * explicitly via the config file, we use a default number of
384	 * servers.  We will log this.
385	 */
386	else if (maxservers_set == 0)
387		logmaxservers = 1;
388
389	/*
390	 * Basic Sanity checks on options
391	 *
392	 * max_conns_allowed must be positive, except for the special
393	 * value of -1 which is used internally to mean unlimited, -1 isn't
394	 * documented but we allow it anyway.
395	 *
396	 * maxservers must be positive
397	 * listen_backlog must be positive or zero
398	 */
399	if (((max_conns_allowed != -1) && (max_conns_allowed <= 0)) ||
400	    (listen_backlog < 0) || (maxservers <= 0)) {
401		usage();
402	}
403
404	/*
405	 * Set current dir to server root
406	 */
407	if (chdir(dir) < 0) {
408		(void) fprintf(stderr, "%s:  ", MyName);
409		perror(dir);
410		exit(1);
411	}
412
413#ifndef DEBUG
414	/*
415	 * Background
416	 */
417	pid = fork();
418	if (pid < 0) {
419		perror("nfsd: fork");
420		exit(1);
421	}
422	if (pid != 0)
423		exit(0);
424
425	/*
426	 * Close existing file descriptors, open "/dev/null" as
427	 * standard input, output, and error, and detach from
428	 * controlling terminal.
429	 */
430	closefrom(0);
431	(void) open("/dev/null", O_RDONLY);
432	(void) open("/dev/null", O_WRONLY);
433	(void) dup(1);
434	(void) setsid();
435#endif
436	openlog(MyName, LOG_PID | LOG_NDELAY, LOG_DAEMON);
437
438	/*
439	 * establish our lock on the lock file and write our pid to it.
440	 * exit if some other process holds the lock, or if there's any
441	 * error in writing/locking the file.
442	 */
443	pid = _enter_daemon_lock(NFSD);
444	switch (pid) {
445	case 0:
446		break;
447	case -1:
448		syslog(LOG_ERR, "error locking for %s: %s", NFSD,
449		    strerror(errno));
450		exit(2);
451	default:
452		/* daemon was already running */
453		exit(0);
454	}
455
456	/*
457	 * If we've been given a list of paths to be used for distributed
458	 * stable storage, and provided we're going to run a version
459	 * that supports it, setup the DSS paths.
460	 */
461	if (dss_pathnames != NULL && nfs_server_vers_max >= DSS_VERSMIN) {
462		if (dss_init(dss_npaths, dss_pathnames) != 0) {
463			syslog(LOG_ERR, "dss_init failed. Exiting.");
464			exit(1);
465		}
466	}
467
468	sigset(SIGTERM, sigflush);
469	sigset(SIGUSR1, quiesce);
470
471	if (logmaxservers) {
472		(void) syslog(LOG_INFO,
473			"Number of servers not specified. Using default of %d.",
474			maxservers);
475	}
476
477	/*
478	 * Make sure to unregister any previous versions in case the
479	 * user is reconfiguring the server in interesting ways.
480	 */
481	svc_unreg(NFS_PROGRAM, NFS_VERSION);
482	svc_unreg(NFS_PROGRAM, NFS_V3);
483	svc_unreg(NFS_PROGRAM, NFS_V4);
484	svc_unreg(NFS_ACL_PROGRAM, NFS_ACL_V2);
485	svc_unreg(NFS_ACL_PROGRAM, NFS_ACL_V3);
486
487	/*
488	 * Set up kernel RPC thread pool for the NFS server.
489	 */
490	if (nfssvcpool(maxservers)) {
491		(void) syslog(LOG_ERR,
492			"Can't set up kernel NFS service: %m. Exiting");
493		exit(1);
494	}
495
496
497	/*
498	 * Set up blocked thread to do LWP creation on behalf of the kernel.
499	 */
500	if (svcwait(NFS_SVCPOOL_ID)) {
501		(void) syslog(LOG_ERR,
502		    "Can't set up NFS pool creator: %m, Exiting");
503		exit(1);
504	}
505
506	/*
507	 * RDMA start and stop thread.
508	 * Per pool RDMA listener creation and
509	 * destructor thread.
510	 *
511	 * start rdma services and block in the kernel.
512	 */
513	if (svcrdma(NFS_SVCPOOL_ID, nfs_server_vers_min, nfs_server_vers_max,
514		nfs_server_delegation)) {
515		(void) syslog(LOG_ERR,
516		    "Can't set up RDMA creator thread : %m.");
517	}
518
519	/*
520	 * Build a protocol block list for registration.
521	 */
522	protobp0 = protobp = (struct protob *)malloc(sizeof (struct protob));
523	protobp->serv = "NFS";
524	protobp->versmin = nfs_server_vers_min;
525	protobp->versmax = nfs_server_vers_max;
526	protobp->program = NFS_PROGRAM;
527
528	protobp->next = (struct protob *)malloc(sizeof (struct protob));
529	protobp = protobp->next;
530	protobp->serv = "NFS_ACL";		/* not used */
531	protobp->versmin = nfs_server_vers_min;
532	/* XXX - this needs work to get the version just right */
533	protobp->versmax = (nfs_server_vers_max > NFS_ACL_V3) ?
534		NFS_ACL_V3 : nfs_server_vers_max;
535	protobp->program = NFS_ACL_PROGRAM;
536	protobp->next = (struct protob *)NULL;
537
538	if (allflag) {
539		if (do_all(protobp0, nfssvc, 0) == -1)
540			exit(1);
541	} else if (proto) {
542		/* there's more than one match for the same protocol */
543		struct netconfig *nconf;
544		NCONF_HANDLE *nc;
545		bool_t	protoFound = FALSE;
546		if ((nc = setnetconfig()) == (NCONF_HANDLE *) NULL) {
547			syslog(LOG_ERR, "setnetconfig failed: %m");
548			goto done;
549		}
550		while (nconf = getnetconfig(nc)) {
551			if (strcmp(nconf->nc_proto, proto) == 0) {
552				protoFound = TRUE;
553				do_one(nconf->nc_device, NULL,
554					protobp0, nfssvc, 0);
555			}
556		}
557		(void) endnetconfig(nc);
558		if (protoFound == FALSE)
559			syslog(LOG_ERR, "couldn't find netconfig entry \
560for protocol %s", proto);
561
562	} else if (provider)
563		do_one(provider, proto, protobp0, nfssvc, 0);
564	else {
565		for (providerp = defaultproviders;
566			*providerp != NULL; providerp++) {
567			provider = *providerp;
568			do_one(provider, NULL, protobp0, nfssvc, 0);
569		}
570	}
571done:
572
573	free(protobp);
574	free(protobp0);
575
576	if (num_fds == 0) {
577		(void) syslog(LOG_ERR,
578		"Could not start NFS service for any protocol. Exiting");
579		exit(1);
580	}
581
582	end_listen_fds = num_fds;
583
584	/*
585	 * Get rid of unneeded privileges.
586	 */
587	__fini_daemon_priv(PRIV_PROC_FORK, PRIV_PROC_EXEC, PRIV_PROC_SESSION,
588	    PRIV_FILE_LINK_ANY, PRIV_PROC_INFO, (char *)NULL);
589
590	/*
591	 * Poll for non-data control events on the transport descriptors.
592	 */
593	poll_for_action();
594
595	/*
596	 * If we get here, something failed in poll_for_action().
597	 */
598	return (1);
599}
600
601static int
602nfssvcpool(int maxservers)
603{
604	struct svcpool_args npa;
605
606	npa.id = NFS_SVCPOOL_ID;
607	npa.maxthreads = maxservers;
608	npa.redline = 0;
609	npa.qsize = 0;
610	npa.timeout = 0;
611	npa.stksize = 0;
612	npa.max_same_xprt = 0;
613	return (_nfssys(SVCPOOL_CREATE, &npa));
614}
615
616/*
617 * Establish NFS service thread.
618 */
619static int
620nfssvc(int fd, struct netbuf addrmask, struct netconfig *nconf)
621{
622	struct nfs_svc_args nsa;
623
624	nsa.fd = fd;
625	nsa.netid = nconf->nc_netid;
626	nsa.addrmask = addrmask;
627	if (strncasecmp(nconf->nc_proto, NC_UDP, strlen(NC_UDP)) == 0) {
628		nsa.versmax = (nfs_server_vers_max > NFS_V3) ?
629			NFS_V3 : nfs_server_vers_max;
630		nsa.versmin = nfs_server_vers_min;
631		/*
632		 * If no version left, silently do nothing, previous
633		 * checks will have assured at least TCP is available.
634		 */
635		if (nsa.versmin > nsa.versmax)
636			return (0);
637	} else {
638		nsa.versmax = nfs_server_vers_max;
639		nsa.versmin = nfs_server_vers_min;
640	}
641	nsa.delegation = nfs_server_delegation;
642	return (_nfssys(NFS_SVC, &nsa));
643}
644
645static void
646usage(void)
647{
648	(void) fprintf(stderr,
649"usage: %s [ -a ] [ -c max_conns ] [ -p protocol ] [ -t transport ] ", MyName);
650	(void) fprintf(stderr, "\n[ -l listen_backlog ] [ nservers ]\n");
651	(void) fprintf(stderr,
652"\twhere -a causes <nservers> to be started on each appropriate transport,\n");
653	(void) fprintf(stderr,
654"\tmax_conns is the maximum number of concurrent connections allowed,\n");
655	(void) fprintf(stderr, "\t\tand max_conns must be a decimal number");
656	(void) fprintf(stderr, "> zero,\n");
657	(void) fprintf(stderr, "\tprotocol is a protocol identifier,\n");
658	(void) fprintf(stderr,
659		"\ttransport is a transport provider name (i.e. device),\n");
660	(void) fprintf(stderr,
661		"\tlisten_backlog is the TCP listen backlog,\n");
662	(void) fprintf(stderr,
663		"\tand <nservers> must be a decimal number > zero.\n");
664	exit(1);
665}
666
667/*
668 * Issue nfssys system call to flush all logging buffers asynchronously.
669 *
670 * NOTICE: It is extremely important to flush NFS logging buffers when
671 *	   nfsd exits. When the system is halted or rebooted nfslogd
672 *	   may not have an opportunity to flush the buffers.
673 */
674static void
675nfsl_flush()
676{
677	struct nfsl_flush_args nfa;
678
679	memset((void *)&nfa, 0, sizeof (nfa));
680	nfa.version = NFSL_FLUSH_ARGS_VERS;
681	nfa.directive = NFSL_ALL;	/* flush all asynchronously */
682
683	if (_nfssys(LOG_FLUSH, &nfa) < 0)
684		syslog(LOG_ERR, "_nfssys(LOG_FLUSH) failed: %s\n",
685			strerror(errno));
686}
687
688/*
689 * SIGTERM handler.
690 * Flush logging buffers and exit.
691 */
692static void
693sigflush(int sig)
694{
695	nfsl_flush();
696	exit(0);
697}
698
699/*
700 * SIGUSR1 handler.
701 *
702 * Request that server quiesce, then (nfsd) exit. For subsequent warm start.
703 *
704 * This is a Contracted Project Private interface, for the sole use
705 * of Sun Cluster HA-NFS. See PSARC/2004/497.
706 *
707 * Equivalent to SIGTERM handler if nfs_server_vers_max < QUIESCE_VERSMIN.
708 */
709static void
710quiesce(int sig)
711{
712	int error;
713	int id = NFS_SVCPOOL_ID;
714
715	if (nfs_server_vers_max >= QUIESCE_VERSMIN) {
716		/* Request server quiesce at next shutdown */
717		error = _nfssys(NFS4_SVC_REQUEST_QUIESCE, &id);
718		if (error) {
719			syslog(LOG_ERR,
720			    "_nfssys(NFS4_SVC_REQUEST_QUIESCE) failed: %s",
721			    strerror(errno));
722			return;
723		}
724	}
725
726	/* Flush logging buffers */
727	nfsl_flush();
728
729	exit(0);
730}
731
732/*
733 * DSS: distributed stable storage.
734 * Create leaf directories as required, keeping an eye on path
735 * lengths. Calls exit(1) on failure.
736 * The pathnames passed in must already exist, and must be writeable by nfsd.
737 * Note: the leaf directories under NFS4_VAR_DIR are not created here;
738 * they're created at pkg install.
739 */
740static void
741dss_mkleafdirs(uint_t npaths, char **pathnames)
742{
743	int i;
744	char *tmppath = NULL;
745
746	/*
747	 * Create the temporary storage used by dss_mkleafdir() here,
748	 * rather than in that function, so that it only needs to be
749	 * done once, rather than once for each call. Too big to put
750	 * on the function's stack.
751	 */
752	tmppath = (char *)malloc(MAXPATHLEN);
753	if (tmppath == NULL) {
754		syslog(LOG_ERR, "tmppath malloc failed. Exiting");
755		exit(1);
756	}
757
758	for (i = 0; i < npaths; i++) {
759		char *p = pathnames[i];
760
761		dss_mkleafdir(p, NFS4_DSS_STATE_LEAF, tmppath);
762		dss_mkleafdir(p, NFS4_DSS_OLDSTATE_LEAF, tmppath);
763	}
764
765	free(tmppath);
766}
767
768/*
769 * Create "leaf" in "dir" (which must already exist).
770 * leaf: should start with a '/'
771 */
772static void
773dss_mkleafdir(char *dir, char *leaf, char *tmppath)
774{
775	/* MAXPATHLEN includes the terminating NUL */
776	if (strlen(dir) + strlen(leaf) > MAXPATHLEN - 1) {
777		syslog(LOG_ERR, "stable storage path too long: %s%s. Exiting",
778		    dir, leaf);
779		exit(1);
780	}
781
782	(void) snprintf(tmppath, MAXPATHLEN, "%s/%s", dir, leaf);
783
784	/* the directory may already exist: that's OK */
785	if (mkdir(tmppath, NFS4_DSS_DIR_MODE) == -1 && errno != EEXIST) {
786		syslog(LOG_ERR, "error creating stable storage directory: "
787		    "%s: %s. Exiting", strerror(errno), tmppath);
788		exit(1);
789	}
790}
791
792/*
793 * Create the storage dirs, and pass the path list to the kernel.
794 * This requires the nfssrv module to be loaded; the _nfssys() syscall
795 * will fail ENOTSUP if it is not.
796 * Use libnvpair(3LIB) to pass the data to the kernel.
797 */
798static int
799dss_init(uint_t npaths, char **pathnames)
800{
801	int i, j, nskipped, error;
802	char *bufp;
803	uint32_t bufsize;
804	size_t buflen;
805	nvlist_t *nvl;
806
807	if (npaths > 1) {
808		/*
809		 * We need to remove duplicate paths; this might be user error
810		 * in the general case, but HA-NFSv4 can also cause this.
811		 * Sort the pathnames array, and NULL out duplicates,
812		 * then write the non-NULL entries to a new array.
813		 * Sorting will also allow the kernel to optimise its searches.
814		 */
815
816		qsort(pathnames, npaths, sizeof (char *), qstrcmp);
817
818		/* now NULL out any duplicates */
819		i = 0; j = 1; nskipped = 0;
820		while (j < npaths) {
821			if (strcmp(pathnames[i], pathnames[j]) == NULL) {
822				pathnames[j] = NULL;
823				j++;
824				nskipped++;
825				continue;
826			}
827
828			/* skip i over any of its NULLed duplicates */
829			i = j++;
830		}
831
832		/* finally, write the non-NULL entries to a new array */
833		if (nskipped > 0) {
834			int nreal;
835			size_t sz;
836			char **tmp_pathnames;
837
838			nreal = npaths - nskipped;
839
840			sz = nreal * sizeof (char *);
841			tmp_pathnames = (char **)malloc(sz);
842			if (tmp_pathnames == NULL) {
843				syslog(LOG_ERR, "tmp_pathnames malloc failed");
844				exit(1);
845			}
846
847			for (i = 0, j = 0; i < npaths; i++)
848				if (pathnames[i] != NULL)
849					tmp_pathnames[j++] = pathnames[i];
850			free(pathnames);
851			pathnames = tmp_pathnames;
852			npaths = nreal;
853		}
854
855	}
856
857	/* Create directories to store the distributed state files */
858	dss_mkleafdirs(npaths, pathnames);
859
860	/* Create the name-value pair list */
861	error = nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0);
862	if (error) {
863		syslog(LOG_ERR, "nvlist_alloc failed: %s.", strerror(errno));
864		return (1);
865	}
866
867	/* Add the pathnames array as a single name-value pair */
868	error = nvlist_add_string_array(nvl, NFS4_DSS_NVPAIR_NAME,
869	    pathnames, npaths);
870	if (error) {
871		syslog(LOG_ERR, "nvlist_add_string_array failed: %s.",
872		    strerror(errno));
873		nvlist_free(nvl);
874		return (1);
875	}
876
877	/*
878	 * Pack list into contiguous memory, for passing to kernel.
879	 * nvlist_pack() will allocate the memory for the buffer,
880	 * which we should free() when no longer needed.
881	 * NV_ENCODE_XDR for safety across ILP32/LP64 kernel boundary.
882	 */
883	bufp = NULL;
884	error = nvlist_pack(nvl, &bufp, &buflen, NV_ENCODE_XDR, 0);
885	if (error) {
886		syslog(LOG_ERR, "nvlist_pack failed: %s.", strerror(errno));
887		nvlist_free(nvl);
888		return (1);
889	}
890
891	/* Now we have the packed buffer, we no longer need the list */
892	nvlist_free(nvl);
893
894	/*
895	 * Let the kernel know in advance how big the buffer is.
896	 * NOTE: we cannot just pass buflen, since size_t is a long, and
897	 * thus a different size between ILP32 userland and LP64 kernel.
898	 * Use an int for the transfer, since that should be big enough;
899	 * this is a no-op at the moment, here, since nfsd is 32-bit, but
900	 * that could change.
901	 */
902	bufsize = (uint32_t)buflen;
903	error = _nfssys(NFS4_DSS_SETPATHS_SIZE, &bufsize);
904	if (error) {
905		syslog(LOG_ERR,
906		    "_nfssys(NFS4_DSS_SETPATHS_SIZE) failed: %s. ",
907		    strerror(errno));
908		free(bufp);
909		return (1);
910	}
911
912	/* Pass the packed buffer to the kernel */
913	error = _nfssys(NFS4_DSS_SETPATHS, bufp);
914	if (error) {
915		syslog(LOG_ERR,
916		    "_nfssys(NFS4_DSS_SETPATHS) failed: %s. ", strerror(errno));
917		free(bufp);
918		return (1);
919	}
920
921	/*
922	 * The kernel has now unpacked the buffer and extracted the
923	 * pathnames array, we no longer need the buffer.
924	 */
925	free(bufp);
926
927	return (0);
928}
929
930/*
931 * Quick sort string compare routine, for qsort.
932 * Needed to make arg types correct.
933 */
934int
935qstrcmp(const void *p1, const void *p2)
936{
937	char *s1 = *((char **)p1);
938	char *s2 = *((char **)p2);
939
940	return (strcmp(s1, s2));
941}
942