1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24
25 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
26 /* All Rights Reserved */
27
28 /*
29 * University Copyright- Copyright (c) 1982, 1986, 1988
30 * The Regents of the University of California
31 * All Rights Reserved
32 *
33 * University Acknowledgment- Portions of this document are derived from
34 * software developed by the University of California, Berkeley, and its
35 * contributors.
36 */
37
38 /* NFS server */
39
40 #include <sys/param.h>
41 #include <sys/types.h>
42 #include <sys/stat.h>
43 #include <syslog.h>
44 #include <tiuser.h>
45 #include <rpc/rpc.h>
46 #include <errno.h>
47 #include <thread.h>
48 #include <sys/resource.h>
49 #include <sys/time.h>
50 #include <sys/file.h>
51 #include <nfs/nfs.h>
52 #include <nfs/nfs4.h>
53 #include <nfs/nfs_acl.h>
54 #include <nfs/nfssys.h>
55 #include <stdio.h>
56 #include <stdio_ext.h>
57 #include <stdlib.h>
58 #include <signal.h>
59 #include <netconfig.h>
60 #include <netdir.h>
61 #include <string.h>
62 #include <unistd.h>
63 #include <limits.h>
64 #include <stropts.h>
65 #include <sys/tihdr.h>
66 #include <sys/wait.h>
67 #include <poll.h>
68 #include <priv_utils.h>
69 #include <sys/tiuser.h>
70 #include <netinet/tcp.h>
71 #include <deflt.h>
72 #include <rpcsvc/daemon_utils.h>
73 #include <rpcsvc/nfs4_prot.h>
74 #include <libnvpair.h>
75 #include <libscf.h>
76 #include <libshare.h>
77 #include "nfs_tbind.h"
78 #include "thrpool.h"
79 #include "smfcfg.h"
80
81 /* quiesce requests will be ignored if nfs_server_vers_max < QUIESCE_VERSMIN */
82 #define QUIESCE_VERSMIN 4
83 /* DSS: distributed stable storage */
84 #define DSS_VERSMIN 4
85
86 static int nfssvc(int, struct netbuf, struct netconfig *);
87 static int nfssvcpool(int maxservers);
88 static int dss_init(uint_t npaths, char **pathnames);
89 static void dss_mkleafdirs(uint_t npaths, char **pathnames);
90 static void dss_mkleafdir(char *dir, char *leaf, char *path);
91 static void usage(void);
92 int qstrcmp(const void *s1, const void *s2);
93
94 extern int _nfssys(int, void *);
95
96 extern int daemonize_init(void);
97 extern void daemonize_fini(int fd);
98
99 /* signal handlers */
100 static void sigflush(int);
101 static void quiesce(int);
102
103 static char *MyName;
104 static NETSELDECL(defaultproviders)[] = { "/dev/tcp6", "/dev/tcp", "/dev/udp",
105 "/dev/udp6", NULL };
106
107 /*
108 * The following are all globals used by routines in nfs_tbind.c.
109 */
110 size_t end_listen_fds; /* used by conn_close_oldest() */
111 size_t num_fds = 0; /* used by multiple routines */
112 int listen_backlog = 32; /* used by bind_to_{provider,proto}() */
113 int num_servers; /* used by cots_listen_event() */
114 int (*Mysvc)(int, struct netbuf, struct netconfig *) = nfssvc;
115 /* used by cots_listen_event() */
116 int max_conns_allowed = -1; /* used by cots_listen_event() */
117
118 /*
119 * Keep track of min/max versions of NFS protocol to be started.
120 * Start with the defaults (min == 2, max == 4).
121 * Used NFS_VERS_... and should be analyzed with NFS_PROT_VERSION
122 * macros.
123 */
124 uint32_t nfs_server_vers_min = NFS_SRV_VERS_MIN;
125 uint32_t nfs_server_vers_max = NFS_SRV_VERS_MAX;
126
127 /*
128 * Set the default for server delegation enablement and set per
129 * /etc/default/nfs configuration (if present).
130 */
131 int nfs_server_delegation = NFS_SERVER_DELEGATION_DEFAULT;
132
133 int
main(int ac,char * av[])134 main(int ac, char *av[])
135 {
136 char *dir = "/";
137 int allflag = 0;
138 int df_allflag = 0;
139 int opt_cnt = 0;
140 int maxservers = 1024; /* zero allows inifinte number of threads */
141 int maxservers_set = 0;
142 int logmaxservers = 0;
143 int pid;
144 int i;
145 char *provider = NULL;
146 char *df_provider = NULL;
147 struct protob *protobp0, *protobp;
148 NETSELDECL(proto) = NULL;
149 NETSELDECL(df_proto) = NULL;
150 NETSELPDECL(providerp);
151 char *defval;
152 boolean_t can_do_mlp;
153 uint_t dss_npaths = 0;
154 char **dss_pathnames = NULL;
155 sigset_t sgset;
156 char name[PATH_MAX], value[PATH_MAX];
157 int ret, bufsz;
158 int pipe_fd = -1;
159 const char *errstr;
160
161 MyName = *av;
162
163 /*
164 * Initializations that require more privileges than we need to run.
165 */
166 (void) _create_daemon_lock(NFSD, DAEMON_UID, DAEMON_GID);
167 svcsetprio();
168
169 can_do_mlp = priv_ineffect(PRIV_NET_BINDMLP);
170 if (__init_daemon_priv(PU_RESETGROUPS|PU_CLEARLIMITSET,
171 DAEMON_UID, DAEMON_GID, PRIV_SYS_NFS,
172 can_do_mlp ? PRIV_NET_BINDMLP : NULL, NULL) == -1) {
173 (void) fprintf(stderr, "%s should be run with"
174 " sufficient privileges\n", av[0]);
175 exit(1);
176 }
177
178 (void) enable_extended_FILE_stdio(-1, -1);
179
180 /* Upgrade SMF settings, if necessary. */
181 nfs_config_upgrade(NFSD);
182
183 /*
184 * Read in the values from SMF first before we check
185 * command line options so the options override SMF values.
186 */
187 bufsz = PATH_MAX;
188 ret = nfs_smf_get_prop("max_connections", value, DEFAULT_INSTANCE,
189 SCF_TYPE_INTEGER, NFSD, &bufsz);
190 if (ret == SA_OK) {
191 max_conns_allowed = strtonum(value, -1, INT32_MAX, &errstr);
192 if (errstr != NULL)
193 max_conns_allowed = -1;
194 }
195
196 bufsz = PATH_MAX;
197 ret = nfs_smf_get_prop("listen_backlog", value, DEFAULT_INSTANCE,
198 SCF_TYPE_INTEGER, NFSD, &bufsz);
199 if (ret == SA_OK) {
200 listen_backlog = strtonum(value, 0, INT32_MAX, &errstr);
201 if (errstr != NULL) {
202 listen_backlog = 32;
203 }
204 }
205
206 bufsz = PATH_MAX;
207 ret = nfs_smf_get_prop("protocol", value, DEFAULT_INSTANCE,
208 SCF_TYPE_ASTRING, NFSD, &bufsz);
209 if ((ret == SA_OK) && strlen(value) > 0) {
210 df_proto = strdup(value);
211 opt_cnt++;
212 if (strncasecmp("ALL", value, 3) == 0) {
213 free(df_proto);
214 df_proto = NULL;
215 df_allflag = 1;
216 }
217 }
218
219 bufsz = PATH_MAX;
220 ret = nfs_smf_get_prop("device", value, DEFAULT_INSTANCE,
221 SCF_TYPE_ASTRING, NFSD, &bufsz);
222 if ((ret == SA_OK) && strlen(value) > 0) {
223 df_provider = strdup(value);
224 opt_cnt++;
225 }
226
227 bufsz = PATH_MAX;
228 ret = nfs_smf_get_prop("servers", value, DEFAULT_INSTANCE,
229 SCF_TYPE_INTEGER, NFSD, &bufsz);
230 if (ret == SA_OK) {
231 maxservers = strtonum(value, 1, INT32_MAX, &errstr);
232 if (errstr != NULL)
233 maxservers = 1024;
234 else
235 maxservers_set = 1;
236 }
237
238 bufsz = PATH_MAX;
239 ret = nfs_smf_get_prop("server_versmin", value, DEFAULT_INSTANCE,
240 SCF_TYPE_ASTRING, NFSD, &bufsz);
241 if (ret == SA_OK) {
242 ret = nfs_convert_version_str(value);
243 if (ret == 0) {
244 (void) fprintf(stderr, "invalid server_versmin: %s\n",
245 value);
246 } else {
247 nfs_server_vers_min = ret;
248 }
249 }
250
251 bufsz = PATH_MAX;
252 ret = nfs_smf_get_prop("server_versmax", value, DEFAULT_INSTANCE,
253 SCF_TYPE_ASTRING, NFSD, &bufsz);
254 if (ret == SA_OK) {
255 ret = nfs_convert_version_str(value);
256 if (ret == 0) {
257 (void) fprintf(stderr, "invalid server_versmax: %s\n",
258 value);
259 } else {
260 nfs_server_vers_max = ret;
261 }
262 }
263
264 bufsz = PATH_MAX;
265 ret = nfs_smf_get_prop("server_delegation", value, DEFAULT_INSTANCE,
266 SCF_TYPE_ASTRING, NFSD, &bufsz);
267 if (ret == SA_OK)
268 if (strncasecmp(value, "off", 3) == 0)
269 nfs_server_delegation = FALSE;
270
271 /*
272 * Conflict options error messages.
273 */
274 if (opt_cnt > 1) {
275 (void) fprintf(stderr, "\nConflicting options, only one of "
276 "the following options can be specified\n"
277 "in SMF:\n"
278 "\tprotocol=ALL\n"
279 "\tprotocol=protocol\n"
280 "\tdevice=devicename\n\n");
281 usage();
282 }
283 opt_cnt = 0;
284
285 while ((i = getopt(ac, av, "ac:p:s:t:l:")) != EOF) {
286 switch (i) {
287 case 'a':
288 free(df_proto);
289 df_proto = NULL;
290 free(df_provider);
291 df_provider = NULL;
292
293 allflag = 1;
294 opt_cnt++;
295 break;
296
297 case 'c':
298 max_conns_allowed = atoi(optarg);
299 break;
300
301 case 'p':
302 proto = optarg;
303 df_allflag = 0;
304 opt_cnt++;
305 break;
306
307 /*
308 * DSS: NFSv4 distributed stable storage.
309 *
310 * This is a Contracted Project Private interface, for
311 * the sole use of Sun Cluster HA-NFS. See PSARC/2006/313.
312 */
313 case 's':
314 if (strlen(optarg) < MAXPATHLEN) {
315 /* first "-s" option encountered? */
316 if (dss_pathnames == NULL) {
317 /*
318 * Allocate maximum possible space
319 * required given cmdline arg count;
320 * "-s <path>" consumes two args.
321 */
322 size_t sz = (ac / 2) * sizeof (char *);
323 dss_pathnames = (char **)malloc(sz);
324 if (dss_pathnames == NULL) {
325 (void) fprintf(stderr, "%s: "
326 "dss paths malloc failed\n",
327 av[0]);
328 exit(1);
329 }
330 (void) memset(dss_pathnames, 0, sz);
331 }
332 dss_pathnames[dss_npaths] = optarg;
333 dss_npaths++;
334 } else {
335 (void) fprintf(stderr,
336 "%s: -s pathname too long.\n", av[0]);
337 }
338 break;
339
340 case 't':
341 provider = optarg;
342 df_allflag = 0;
343 opt_cnt++;
344 break;
345
346 case 'l':
347 listen_backlog = atoi(optarg);
348 break;
349
350 case '?':
351 usage();
352 /* NOTREACHED */
353 }
354 }
355
356 allflag = df_allflag;
357 if (proto == NULL)
358 proto = df_proto;
359 if (provider == NULL)
360 provider = df_provider;
361
362 /*
363 * Conflict options error messages.
364 */
365 if (opt_cnt > 1) {
366 (void) fprintf(stderr, "\nConflicting options, only one of "
367 "the following options can be specified\n"
368 "on the command line:\n"
369 "\t-a\n"
370 "\t-p protocol\n"
371 "\t-t transport\n\n");
372 usage();
373 }
374
375 if (proto != NULL &&
376 strncasecmp(proto, NC_UDP, strlen(NC_UDP)) == 0) {
377 if (NFS_PROT_VERSION(nfs_server_vers_max) == NFS_V4) {
378 if (NFS_PROT_VERSION(nfs_server_vers_min) == NFS_V4) {
379 fprintf(stderr,
380 "NFS version 4 is not supported "
381 "with the UDP protocol. Exiting\n");
382 exit(3);
383 } else {
384 fprintf(stderr,
385 "NFS version 4 is not supported "
386 "with the UDP protocol.\n");
387 }
388 }
389 }
390
391 /*
392 * If there is exactly one more argument, it is the number of
393 * servers.
394 */
395 if (optind == ac - 1) {
396 maxservers = atoi(av[optind]);
397 maxservers_set = 1;
398 }
399 /*
400 * If there are two or more arguments, then this is a usage error.
401 */
402 else if (optind < ac - 1)
403 usage();
404 /*
405 * Check the ranges for min/max version specified
406 */
407 else if ((nfs_server_vers_min > nfs_server_vers_max) ||
408 (nfs_server_vers_min < NFS_SRV_VERS_MIN) ||
409 (nfs_server_vers_max > NFS_SRV_VERS_MAX))
410 usage();
411 /*
412 * There are no additional arguments, and we haven't set maxservers
413 * explicitly via the config file, we use a default number of
414 * servers. We will log this.
415 */
416 else if (maxservers_set == 0)
417 logmaxservers = 1;
418
419 /*
420 * Basic Sanity checks on options
421 *
422 * max_conns_allowed must be positive, except for the special
423 * value of -1 which is used internally to mean unlimited, -1 isn't
424 * documented but we allow it anyway.
425 *
426 * maxservers must be positive
427 * listen_backlog must be positive or zero
428 */
429 if (((max_conns_allowed != -1) && (max_conns_allowed <= 0)) ||
430 (listen_backlog < 0) || (maxservers <= 0)) {
431 usage();
432 }
433
434 /*
435 * Set current dir to server root
436 */
437 if (chdir(dir) < 0) {
438 (void) fprintf(stderr, "%s: ", MyName);
439 perror(dir);
440 exit(1);
441 }
442
443 #ifndef DEBUG
444 pipe_fd = daemonize_init();
445 #endif
446
447 openlog(MyName, LOG_PID | LOG_NDELAY, LOG_DAEMON);
448
449 /*
450 * establish our lock on the lock file and write our pid to it.
451 * exit if some other process holds the lock, or if there's any
452 * error in writing/locking the file.
453 */
454 pid = _enter_daemon_lock(NFSD);
455 switch (pid) {
456 case 0:
457 break;
458 case -1:
459 fprintf(stderr, "error locking for %s: %s\n", NFSD,
460 strerror(errno));
461 exit(2);
462 default:
463 /* daemon was already running */
464 exit(0);
465 }
466
467 /*
468 * If we've been given a list of paths to be used for distributed
469 * stable storage, and provided we're going to run a version
470 * that supports it, setup the DSS paths.
471 */
472 if (dss_pathnames != NULL &&
473 NFS_PROT_VERSION(nfs_server_vers_max) >= DSS_VERSMIN) {
474 if (dss_init(dss_npaths, dss_pathnames) != 0) {
475 fprintf(stderr, "%s", "dss_init failed. Exiting.\n");
476 exit(1);
477 }
478 }
479
480 /*
481 * Block all signals till we spawn other
482 * threads.
483 */
484 (void) sigfillset(&sgset);
485 (void) thr_sigsetmask(SIG_BLOCK, &sgset, NULL);
486
487 if (logmaxservers) {
488 fprintf(stderr,
489 "Number of servers not specified. Using default of %d.\n",
490 maxservers);
491 }
492
493 /*
494 * Make sure to unregister any previous versions in case the
495 * user is reconfiguring the server in interesting ways.
496 */
497 svc_unreg(NFS_PROGRAM, NFS_VERSION);
498 svc_unreg(NFS_PROGRAM, NFS_V3);
499 svc_unreg(NFS_PROGRAM, NFS_V4);
500 svc_unreg(NFS_ACL_PROGRAM, NFS_ACL_V2);
501 svc_unreg(NFS_ACL_PROGRAM, NFS_ACL_V3);
502
503 /*
504 * Set up kernel RPC thread pool for the NFS server.
505 */
506 if (nfssvcpool(maxservers)) {
507 fprintf(stderr, "Can't set up kernel NFS service: %s. "
508 "Exiting.\n", strerror(errno));
509 exit(1);
510 }
511
512 /*
513 * Set up blocked thread to do LWP creation on behalf of the kernel.
514 */
515 if (svcwait(NFS_SVCPOOL_ID)) {
516 fprintf(stderr, "Can't set up NFS pool creator: %s. Exiting.\n",
517 strerror(errno));
518 exit(1);
519 }
520
521 /*
522 * RDMA start and stop thread.
523 * Per pool RDMA listener creation and
524 * destructor thread.
525 *
526 * start rdma services and block in the kernel.
527 * (only if proto or provider is not set to TCP or UDP)
528 */
529 if ((proto == NULL) && (provider == NULL)) {
530 if (svcrdma(NFS_SVCPOOL_ID, nfs_server_vers_min,
531 nfs_server_vers_max, nfs_server_delegation)) {
532 fprintf(stderr,
533 "Can't set up RDMA creator thread : %s\n",
534 strerror(errno));
535 }
536 }
537
538 /*
539 * Now open up for signal delivery
540 */
541
542 (void) thr_sigsetmask(SIG_UNBLOCK, &sgset, NULL);
543 sigset(SIGTERM, sigflush);
544 sigset(SIGUSR1, quiesce);
545
546 /*
547 * Build a protocol block list for registration.
548 * In protocol list we have first block for NFS and second
549 * block for NFS_ACL - which is needed up to v3, as support
550 * for ACL is included in NFS protocol since v4.
551 */
552 protobp0 = protobp = (struct protob *)malloc(sizeof (struct protob));
553 protobp->serv = "NFS";
554 protobp->versmin = NFS_PROT_VERSION(nfs_server_vers_min);
555 protobp->versmax = NFS_PROT_VERSION(nfs_server_vers_max);
556 protobp->program = NFS_PROGRAM;
557
558 protobp->next = (struct protob *)malloc(sizeof (struct protob));
559 protobp = protobp->next;
560 protobp->serv = "NFS_ACL"; /* not used */
561 protobp->versmin = NFS_PROT_VERSION(nfs_server_vers_min);
562 /* XXX - this needs work to get the version just right */
563 protobp->versmax =
564 MIN(NFS_PROT_VERSION(nfs_server_vers_max), NFS_ACL_V3);
565 protobp->program = NFS_ACL_PROGRAM;
566 protobp->next = NULL;
567
568 if (allflag) {
569 if (do_all(protobp0, nfssvc) == -1) {
570 fprintf(stderr, "setnetconfig failed : %s\n",
571 strerror(errno));
572 exit(1);
573 }
574 } else if (proto) {
575 /* there's more than one match for the same protocol */
576 struct netconfig *nconf;
577 NCONF_HANDLE *nc;
578 bool_t protoFound = FALSE;
579 if ((nc = setnetconfig()) == (NCONF_HANDLE *) NULL) {
580 fprintf(stderr, "setnetconfig failed : %s\n",
581 strerror(errno));
582 goto done;
583 }
584 while (nconf = getnetconfig(nc)) {
585 if (strcmp(nconf->nc_proto, proto) == 0) {
586 protoFound = TRUE;
587 do_one(nconf->nc_device, NULL,
588 protobp0, nfssvc);
589 }
590 }
591 (void) endnetconfig(nc);
592 if (protoFound == FALSE) {
593 fprintf(stderr,
594 "couldn't find netconfig entry for protocol %s\n",
595 proto);
596 }
597 } else if (provider)
598 do_one(provider, proto, protobp0, nfssvc);
599 else {
600 for (providerp = defaultproviders;
601 *providerp != NULL; providerp++) {
602 provider = *providerp;
603 do_one(provider, NULL, protobp0, nfssvc);
604 }
605 }
606 done:
607
608 free(protobp);
609 free(protobp0);
610
611 if (num_fds == 0) {
612 fprintf(stderr, "Could not start NFS service for any protocol."
613 " Exiting.\n");
614 exit(1);
615 }
616
617 end_listen_fds = num_fds;
618
619 /*
620 * nfsd is up and running as far as we are concerned.
621 */
622 daemonize_fini(pipe_fd);
623
624 /*
625 * Get rid of unneeded privileges.
626 */
627 __fini_daemon_priv(PRIV_PROC_FORK, PRIV_PROC_EXEC, PRIV_PROC_SESSION,
628 PRIV_FILE_LINK_ANY, PRIV_PROC_INFO, (char *)NULL);
629
630 /*
631 * Poll for non-data control events on the transport descriptors.
632 */
633 poll_for_action();
634
635 /*
636 * If we get here, something failed in poll_for_action().
637 */
638 return (1);
639 }
640
641 static int
nfssvcpool(int maxservers)642 nfssvcpool(int maxservers)
643 {
644 struct svcpool_args npa;
645
646 npa.id = NFS_SVCPOOL_ID;
647 npa.maxthreads = maxservers;
648 npa.redline = 0;
649 npa.qsize = 0;
650 npa.timeout = 0;
651 npa.stksize = 0;
652 npa.max_same_xprt = 0;
653 return (_nfssys(SVCPOOL_CREATE, &npa));
654 }
655
656 /*
657 * Establish NFS service thread.
658 */
659 static int
nfssvc(int fd,struct netbuf addrmask,struct netconfig * nconf)660 nfssvc(int fd, struct netbuf addrmask, struct netconfig *nconf)
661 {
662 struct nfs_svc_args nsa;
663
664 nsa.fd = fd;
665 nsa.netid = nconf->nc_netid;
666 nsa.addrmask = addrmask;
667 if (strncasecmp(nconf->nc_proto, NC_UDP, strlen(NC_UDP)) == 0) {
668 nsa.nfs_versmax = MIN(nfs_server_vers_max, NFS_VERS_3);
669 nsa.nfs_versmin = nfs_server_vers_min;
670 /*
671 * If no version left, silently do nothing, previous
672 * checks will have assured at least TCP is available.
673 */
674 if (nsa.nfs_versmin > nsa.nfs_versmax)
675 return (0);
676 } else {
677 nsa.nfs_versmax = nfs_server_vers_max;
678 nsa.nfs_versmin = nfs_server_vers_min;
679 }
680 nsa.delegation = nfs_server_delegation;
681 return (_nfssys(NFS_SVC, &nsa));
682 }
683
684 static void
usage(void)685 usage(void)
686 {
687 (void) fprintf(stderr,
688 "usage: %s [ -a ] [ -c max_conns ] [ -p protocol ] [ -t transport ] ", MyName);
689 (void) fprintf(stderr, "\n[ -l listen_backlog ] [ nservers ]\n");
690 (void) fprintf(stderr,
691 "\twhere -a causes <nservers> to be started on each appropriate transport,\n");
692 (void) fprintf(stderr,
693 "\tmax_conns is the maximum number of concurrent connections allowed,\n");
694 (void) fprintf(stderr, "\t\tand max_conns must be a decimal number");
695 (void) fprintf(stderr, "> zero,\n");
696 (void) fprintf(stderr, "\tprotocol is a protocol identifier,\n");
697 (void) fprintf(stderr,
698 "\ttransport is a transport provider name (i.e. device),\n");
699 (void) fprintf(stderr,
700 "\tlisten_backlog is the TCP listen backlog,\n");
701 (void) fprintf(stderr,
702 "\tand <nservers> must be a decimal number > zero.\n");
703 exit(1);
704 }
705
706 /*
707 * Issue nfssys system call to flush all logging buffers asynchronously.
708 *
709 * NOTICE: It is extremely important to flush NFS logging buffers when
710 * nfsd exits. When the system is halted or rebooted nfslogd
711 * may not have an opportunity to flush the buffers.
712 */
713 static void
nfsl_flush()714 nfsl_flush()
715 {
716 struct nfsl_flush_args nfa;
717
718 memset((void *)&nfa, 0, sizeof (nfa));
719 nfa.version = NFSL_FLUSH_ARGS_VERS;
720 nfa.directive = NFSL_ALL; /* flush all asynchronously */
721
722 if (_nfssys(LOG_FLUSH, &nfa) < 0)
723 syslog(LOG_ERR, "_nfssys(LOG_FLUSH) failed: %s\n",
724 strerror(errno));
725 }
726
727 /*
728 * SIGTERM handler.
729 * Flush logging buffers and exit.
730 */
731 static void
sigflush(int sig)732 sigflush(int sig)
733 {
734 nfsl_flush();
735 _exit(0);
736 }
737
738 /*
739 * SIGUSR1 handler.
740 *
741 * Request that server quiesce, then (nfsd) exit. For subsequent warm start.
742 *
743 * This is a Contracted Project Private interface, for the sole use
744 * of Sun Cluster HA-NFS. See PSARC/2004/497.
745 *
746 * Equivalent to SIGTERM handler if nfs_server_vers_max < QUIESCE_VERSMIN.
747 */
748 static void
quiesce(int sig)749 quiesce(int sig)
750 {
751 int error;
752 int id = NFS_SVCPOOL_ID;
753
754 if (NFS_PROT_VERSION(nfs_server_vers_max) >= QUIESCE_VERSMIN) {
755 /* Request server quiesce at next shutdown */
756 error = _nfssys(NFS4_SVC_REQUEST_QUIESCE, &id);
757
758 /*
759 * ENOENT is returned if there is no matching SVC pool
760 * for the id. Possibly because the pool is not yet setup.
761 * In this case, just exit as if no error. For all other errors,
762 * just return and allow caller to retry.
763 */
764 if (error && errno != ENOENT) {
765 syslog(LOG_ERR,
766 "_nfssys(NFS4_SVC_REQUEST_QUIESCE) failed: %s",
767 strerror(errno));
768 return;
769 }
770 }
771
772 /* Flush logging buffers */
773 nfsl_flush();
774
775 _exit(0);
776 }
777
778 /*
779 * DSS: distributed stable storage.
780 * Create leaf directories as required, keeping an eye on path
781 * lengths. Calls exit(1) on failure.
782 * The pathnames passed in must already exist, and must be writeable by nfsd.
783 * Note: the leaf directories under NFS4_VAR_DIR are not created here;
784 * they're created at pkg install.
785 */
786 static void
dss_mkleafdirs(uint_t npaths,char ** pathnames)787 dss_mkleafdirs(uint_t npaths, char **pathnames)
788 {
789 int i;
790 char *tmppath = NULL;
791
792 /*
793 * Create the temporary storage used by dss_mkleafdir() here,
794 * rather than in that function, so that it only needs to be
795 * done once, rather than once for each call. Too big to put
796 * on the function's stack.
797 */
798 tmppath = (char *)malloc(MAXPATHLEN);
799 if (tmppath == NULL) {
800 syslog(LOG_ERR, "tmppath malloc failed. Exiting");
801 exit(1);
802 }
803
804 for (i = 0; i < npaths; i++) {
805 char *p = pathnames[i];
806
807 dss_mkleafdir(p, NFS4_DSS_STATE_LEAF, tmppath);
808 dss_mkleafdir(p, NFS4_DSS_OLDSTATE_LEAF, tmppath);
809 }
810
811 free(tmppath);
812 }
813
814 /*
815 * Create "leaf" in "dir" (which must already exist).
816 * leaf: should start with a '/'
817 */
818 static void
dss_mkleafdir(char * dir,char * leaf,char * tmppath)819 dss_mkleafdir(char *dir, char *leaf, char *tmppath)
820 {
821 /* MAXPATHLEN includes the terminating NUL */
822 if (strlen(dir) + strlen(leaf) > MAXPATHLEN - 1) {
823 fprintf(stderr, "stable storage path too long: %s%s. "
824 "Exiting.\n", dir, leaf);
825 exit(1);
826 }
827
828 (void) snprintf(tmppath, MAXPATHLEN, "%s/%s", dir, leaf);
829
830 /* the directory may already exist: that's OK */
831 if (mkdir(tmppath, NFS4_DSS_DIR_MODE) == -1 && errno != EEXIST) {
832 fprintf(stderr, "error creating stable storage directory: "
833 "%s: %s. Exiting.\n", strerror(errno), tmppath);
834 exit(1);
835 }
836 }
837
838 /*
839 * Create the storage dirs, and pass the path list to the kernel.
840 * This requires the nfssrv module to be loaded; the _nfssys() syscall
841 * will fail ENOTSUP if it is not.
842 * Use libnvpair(3LIB) to pass the data to the kernel.
843 */
844 static int
dss_init(uint_t npaths,char ** pathnames)845 dss_init(uint_t npaths, char **pathnames)
846 {
847 int i, j, nskipped, error;
848 char *bufp;
849 uint32_t bufsize;
850 size_t buflen;
851 nvlist_t *nvl;
852
853 if (npaths > 1) {
854 /*
855 * We need to remove duplicate paths; this might be user error
856 * in the general case, but HA-NFSv4 can also cause this.
857 * Sort the pathnames array, and NULL out duplicates,
858 * then write the non-NULL entries to a new array.
859 * Sorting will also allow the kernel to optimise its searches.
860 */
861
862 qsort(pathnames, npaths, sizeof (char *), qstrcmp);
863
864 /* now NULL out any duplicates */
865 i = 0; j = 1; nskipped = 0;
866 while (j < npaths) {
867 if (strcmp(pathnames[i], pathnames[j]) == 0) {
868 pathnames[j] = NULL;
869 j++;
870 nskipped++;
871 continue;
872 }
873
874 /* skip i over any of its NULLed duplicates */
875 i = j++;
876 }
877
878 /* finally, write the non-NULL entries to a new array */
879 if (nskipped > 0) {
880 int nreal;
881 size_t sz;
882 char **tmp_pathnames;
883
884 nreal = npaths - nskipped;
885
886 sz = nreal * sizeof (char *);
887 tmp_pathnames = (char **)malloc(sz);
888 if (tmp_pathnames == NULL) {
889 fprintf(stderr, "tmp_pathnames malloc "
890 "failed\n");
891 exit(1);
892 }
893
894 for (i = 0, j = 0; i < npaths; i++)
895 if (pathnames[i] != NULL)
896 tmp_pathnames[j++] = pathnames[i];
897 free(pathnames);
898 pathnames = tmp_pathnames;
899 npaths = nreal;
900 }
901
902 }
903
904 /* Create directories to store the distributed state files */
905 dss_mkleafdirs(npaths, pathnames);
906
907 /* Create the name-value pair list */
908 error = nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0);
909 if (error) {
910 fprintf(stderr, "nvlist_alloc failed: %s\n", strerror(errno));
911 return (1);
912 }
913
914 /* Add the pathnames array as a single name-value pair */
915 error = nvlist_add_string_array(nvl, NFS4_DSS_NVPAIR_NAME,
916 pathnames, npaths);
917 if (error) {
918 fprintf(stderr, "nvlist_add_string_array failed: %s\n",
919 strerror(errno));
920 nvlist_free(nvl);
921 return (1);
922 }
923
924 /*
925 * Pack list into contiguous memory, for passing to kernel.
926 * nvlist_pack() will allocate the memory for the buffer,
927 * which we should free() when no longer needed.
928 * NV_ENCODE_XDR for safety across ILP32/LP64 kernel boundary.
929 */
930 bufp = NULL;
931 error = nvlist_pack(nvl, &bufp, &buflen, NV_ENCODE_XDR, 0);
932 if (error) {
933 fprintf(stderr, "nvlist_pack failed: %s\n", strerror(errno));
934 nvlist_free(nvl);
935 return (1);
936 }
937
938 /* Now we have the packed buffer, we no longer need the list */
939 nvlist_free(nvl);
940
941 /*
942 * Let the kernel know in advance how big the buffer is.
943 * NOTE: we cannot just pass buflen, since size_t is a long, and
944 * thus a different size between ILP32 userland and LP64 kernel.
945 * Use an int for the transfer, since that should be big enough;
946 * this is a no-op at the moment, here, since nfsd is 32-bit, but
947 * that could change.
948 */
949 bufsize = (uint32_t)buflen;
950 error = _nfssys(NFS4_DSS_SETPATHS_SIZE, &bufsize);
951 if (error) {
952 fprintf(stderr,
953 "_nfssys(NFS4_DSS_SETPATHS_SIZE) failed: %s\n",
954 strerror(errno));
955 free(bufp);
956 return (1);
957 }
958
959 /* Pass the packed buffer to the kernel */
960 error = _nfssys(NFS4_DSS_SETPATHS, bufp);
961 if (error) {
962 fprintf(stderr,
963 "_nfssys(NFS4_DSS_SETPATHS) failed: %s\n", strerror(errno));
964 free(bufp);
965 return (1);
966 }
967
968 /*
969 * The kernel has now unpacked the buffer and extracted the
970 * pathnames array, we no longer need the buffer.
971 */
972 free(bufp);
973
974 return (0);
975 }
976
977 /*
978 * Quick sort string compare routine, for qsort.
979 * Needed to make arg types correct.
980 */
981 int
qstrcmp(const void * p1,const void * p2)982 qstrcmp(const void *p1, const void *p2)
983 {
984 char *s1 = *((char **)p1);
985 char *s2 = *((char **)p2);
986
987 return (strcmp(s1, s2));
988 }
989